diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -8138,17 +8138,6 @@ } ], "models": [ - { - "id": "openai/gpt-4o-mini", - "name": "GPT-4o-mini", - "provider_name": "OpenAI", - "cost": 0.6, - "hf_id": null, - "size": null, - "type": "Commercial", - "license": null, - "creation_date": "2024-07-18" - }, { "id": "meta-llama/llama-4-maverick", "name": "Llama 4 Maverick", @@ -8193,6 +8182,17 @@ "license": "Llama3", "creation_date": "2024-04-17" }, + { + "id": "openai/gpt-4o-mini", + "name": "GPT-4o-mini", + "provider_name": "OpenAI", + "cost": 0.6, + "hf_id": null, + "size": null, + "type": "Commercial", + "license": null, + "creation_date": "2024-07-18" + }, { "id": "mistralai/mistral-small-3.1-24b-instruct", "name": "Mistral Small 3.1 24B", @@ -8204,17 +8204,6 @@ "license": "Apache 2.0", "creation_date": "2025-03-11" }, - { - "id": "google/gemini-2.0-flash-001", - "name": "Gemini 2.0 Flash", - "provider_name": "Google", - "cost": 0.4, - "hf_id": null, - "size": null, - "type": "Commercial", - "license": null, - "creation_date": "2025-02-05" - }, { "id": "google/gemma-3-27b-it", "name": "Gemma 3 27B", @@ -8283,22 +8272,6 @@ } ], "scores": [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_from", - "metric": "bleu", - "score": 0.5679608237702286, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_from", - "metric": "chrf", - "score": 0.746881923400435, - "sentence_nr": 0 - }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", @@ -8332,12612 +8305,54404 @@ "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.30676942927198475, + "score": 0.5894973558751632, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4968492831219663, + "score": 0.7562097956860054, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3742128962272385, + "score": 0.3846086976522069, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5924994297544066, + "score": 0.5835344719191324, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.32063971770635635, + "score": 0.5679608237702286, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5206258401513325, + "score": 0.746881923400435, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, + "score": 0.3472596783998825, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, + "score": 0.5880210095195896, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.8780634320789833, + "score": 0.5617561349997696, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.926946700115022, + "score": 0.7132694856647042, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.7964573357809173, + "score": 0.2963216580569375, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.8458636471716781, + "score": 0.5101500486835966, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4226799078177409, + "score": 0.6303545030576861, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5651672709988255, + "score": 0.77785134764153, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.32406433662077544, + "score": 0.15317719477157257, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5243586266504104, + "score": 0.38800976493585004, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.34633672321253084, + "score": 0.6001453932849357, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5378805625051344, + "score": 0.762029391170019, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.5077888484472814, + "score": 0.5157250202457466, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6493197366069867, + "score": 0.7038347316939249, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4318843329340524, + "score": 0.3742128962272385, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6011096108554106, + "score": 0.5924994297544066, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3582301850807646, + "score": 0.32063971770635635, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5380305837807603, + "score": 0.5206258401513325, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3732667150787326, + "score": 0.39086127104761287, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5674650482249737, + "score": 0.6239956806265569, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4641883721676649, + "score": 0.3020679767949182, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6403267149729506, + "score": 0.5246291817407542, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.300740577257699, + "score": 0.30676942927198475, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5272774705181614, + "score": 0.4968492831219663, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3576035471132581, + "score": 0.2516114673955893, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5426399702952437, + "score": 0.5164808837319497, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4422044705926463, + "score": 0.4273817965049865, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6089032707320831, + "score": 0.6016204186733703, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3099603853356145, + "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5209233176748354, + "score": 0.0, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.33210944907163426, + "score": 0.3972267643943283, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5289420578289948, + "score": 0.5952617863931118, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4331131003868224, + "score": 0.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5898969623074624, + "score": 0.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.35580399268816465, + "score": 0.2777551012631926, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5392592206305507, + "score": 0.49423240120783246, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4475435253337274, + "score": 0.29707776197115804, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5956867226653717, + "score": 0.5094838456142188, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5274220384037692, + "score": 0.8780634320789833, "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6765588140322357, + "score": 0.926946700115022, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.39317381456022266, + "score": 0.7964573357809173, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6026058740561834, + "score": 0.8458636471716781, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4166560818400039, + "score": 1.0, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6515522498665886, + "score": 1.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.42734667499155, + "score": 0.9452996322890763, "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6397906518456509, + "score": 0.9463396364218181, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.48930936408255293, + "score": 0.9878765474230741, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.699085629239476, + "score": 0.9958930217841712, "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.07407154448063642, - "sentence_nr": 1 + "score": 0.6537803976048806, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.43145434527321425, - "sentence_nr": 1 + "score": 0.7742226743967544, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.06647168102389285, - "sentence_nr": 1 + "score": 0.738238064391125, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.34350832619898364, - "sentence_nr": 1 + "score": 0.8637738769684485, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 + "score": 1.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12422788549118892, - "sentence_nr": 1 + "score": 0.9878765474230741, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40222210564426, - "sentence_nr": 1 + "score": 0.9958930217841712, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.09735981717515908, - "sentence_nr": 1 + "score": 0.32406433662077544, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.35288934658906385, - "sentence_nr": 1 + "score": 0.5243586266504104, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 + "score": 0.34633672321253084, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 + "score": 0.5378805625051344, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.13714845589364738, - "sentence_nr": 1 + "score": 0.3852076286218103, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.45499281593451946, - "sentence_nr": 1 + "score": 0.5629822759777402, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.11564012893219777, - "sentence_nr": 1 + "score": 0.26021014514167856, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.44599783682350064, - "sentence_nr": 1 + "score": 0.4820043660869366, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 + "score": 0.4226799078177409, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 + "score": 0.5651672709988255, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.3026566818840519, - "sentence_nr": 1 + "score": 0.2502214193201532, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5945859352092411, - "sentence_nr": 1 + "score": 0.49819657249183386, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.441464946158803, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.2521233582161207, - "sentence_nr": 1 + "score": 0.6003092613714627, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.34734422615832194, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.5262645092345396, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.07793031063789554, - "sentence_nr": 1 + "score": 0.3232734746803988, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3700181221537743, - "sentence_nr": 1 + "score": 0.5256417654956012, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0867932999243575, - "sentence_nr": 1 + "score": 0.13576358182705253, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4201964133235075, - "sentence_nr": 1 + "score": 0.41529193531769876, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.08214106568089705, - "sentence_nr": 1 + "score": 0.24926331918525627, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3969463877642616, - "sentence_nr": 1 + "score": 0.4599756430080559, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3793970928219617, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.41649654108052436, - "sentence_nr": 1 + "score": 0.5384125202333925, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.4318843329340524, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3630576975795868, - "sentence_nr": 1 + "score": 0.6011096108554106, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0744904632040495, - "sentence_nr": 1 + "score": 0.3582301850807646, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4111163205685468, - "sentence_nr": 1 + "score": 0.5380305837807603, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.08767210132815903, - "sentence_nr": 1 + "score": 0.44175593938589236, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.40476518002703893, - "sentence_nr": 1 + "score": 0.6111096352841461, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.08616711094288851, - "sentence_nr": 1 + "score": 0.3905612192964119, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3696512763473903, - "sentence_nr": 1 + "score": 0.5861956606716949, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.12894104034845807, - "sentence_nr": 1 + "score": 0.5077888484472814, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4486368934849452, - "sentence_nr": 1 + "score": 0.6493197366069867, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.14738500064905094, - "sentence_nr": 1 + "score": 0.388275825650142, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4659728395318289, - "sentence_nr": 1 + "score": 0.5946895227088745, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.15386029327005746, - "sentence_nr": 1 + "score": 0.47840604738578085, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.43911482594829104, - "sentence_nr": 1 + "score": 0.6297473901472479, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.10070927557742705, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.43718220262892105, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.5009380663759289, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3370100422576744, - "sentence_nr": 1 + "score": 0.6679481474132949, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2087397501881324, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.1946966569103724, - "sentence_nr": 1 + "score": 0.39410069470509135, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0772718393063023, - "sentence_nr": 1 + "score": 0.4640742081615844, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4203683137304257, - "sentence_nr": 1 + "score": 0.6372680189651158, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 + "score": 0.4453094933864169, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 + "score": 0.6148455639799472, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3961285597009415, - "sentence_nr": 2 + "score": 0.4641883721676649, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6148751441350505, - "sentence_nr": 2 + "score": 0.6403267149729506, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 + "score": 0.300740577257699, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 + "score": 0.5272774705181614, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 + "score": 0.4061066499716187, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 + "score": 0.6080346530552228, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.17374951565433233, - "sentence_nr": 2 + "score": 0.31520390441828733, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.45325597884524305, - "sentence_nr": 2 + "score": 0.5666753970394321, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 + "score": 0.3732667150787326, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 + "score": 0.5674650482249737, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.26459538953931094, - "sentence_nr": 2 + "score": 0.2426576141982896, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5272178908335121, - "sentence_nr": 2 + "score": 0.4681164293806726, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.26801022984888695, - "sentence_nr": 2 + "score": 0.3725907668893922, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5654883864995515, - "sentence_nr": 2 + "score": 0.5838909337906717, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 + "score": 0.1229583779881281, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 + "score": 0.3267617054992069, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3563758622144919, - "sentence_nr": 2 + "score": 0.46418585410212687, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6037023613177924, - "sentence_nr": 2 + "score": 0.6257813924169782, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3574583793293068, - "sentence_nr": 2 + "score": 0.21346660402255854, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5924115119819969, - "sentence_nr": 2 + "score": 0.4749873824263006, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 + "score": 0.3284656616594502, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 + "score": 0.5314089060682492, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2158914621804855, - "sentence_nr": 2 + "score": 0.37752144939477184, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5448184155666022, - "sentence_nr": 2 + "score": 0.5494778583745301, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.22292726306270316, - "sentence_nr": 2 + "score": 0.4422044705926463, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5653789747970112, - "sentence_nr": 2 + "score": 0.6089032707320831, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 + "score": 0.3099603853356145, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 + "score": 0.5209233176748354, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18031307339768174, - "sentence_nr": 2 + "score": 0.30939216619448856, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.522164454804456, - "sentence_nr": 2 + "score": 0.5208328629222005, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.21403222128228389, - "sentence_nr": 2 + "score": 0.2417404985264926, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.563121432204311, - "sentence_nr": 2 + "score": 0.4540589962283635, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 + "score": 0.3576035471132581, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 + "score": 0.5426399702952437, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.2999092588227898, - "sentence_nr": 2 + "score": 0.2840596414449913, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5505916495384416, - "sentence_nr": 2 + "score": 0.4892722276483434, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4054983797456263, - "sentence_nr": 2 + "score": 0.344338817815182, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6264774230839022, - "sentence_nr": 2 + "score": 0.5355920179313903, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.28716979381420105, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.4263684749347053, - "sentence_nr": 2 + "score": 0.5171262478660463, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.20051119758906127, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5334791309401924, - "sentence_nr": 2 + "score": 0.05448112815049329, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 + "score": 0.3360010226928493, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 + "score": 0.5216531073745614, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.2562849004088193, - "sentence_nr": 2 + "score": 0.32060183762800015, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.5767019342009202, - "sentence_nr": 2 + "score": 0.5258982083606875, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.3535002370419364, - "sentence_nr": 2 + "score": 0.4331131003868224, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5959879218348465, - "sentence_nr": 2 + "score": 0.5898969623074624, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.35580399268816465, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.5392592206305507, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.2465888500427759, - "sentence_nr": 2 + "score": 0.40724702386633355, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5221084445696768, - "sentence_nr": 2 + "score": 0.5650294312072152, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.35983766090218355, - "sentence_nr": 2 + "score": 0.3741026207881868, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.5862251404739759, - "sentence_nr": 2 + "score": 0.5834523243646894, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 + "score": 0.33210944907163426, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 + "score": 0.5289420578289948, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 + "score": 0.3909683536530208, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 + "score": 0.5861999156017297, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.4100880948326119, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7882997401328445, - "sentence_nr": 3 + "score": 0.5748650910980349, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 + "score": 0.553414625382002, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 + "score": 0.7074940030211, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.25552199116069907, - "sentence_nr": 3 + "score": 0.19250412598108757, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3799133205289109, - "sentence_nr": 3 + "score": 0.4448372401459185, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.3109333640704356, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 + "score": 0.5072760587388273, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 3 + "score": 0.49084794047865243, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6181373706707737, - "sentence_nr": 3 + "score": 0.6606364516519111, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.5274220384037692, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.512762518189388, - "sentence_nr": 3 + "score": 0.6765588140322357, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.39317381456022266, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.6026058740561834, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.2615858282579583, - "sentence_nr": 3 + "score": 0.3229030611977504, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.35447530946908884, - "sentence_nr": 3 + "score": 0.5136703373168134, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4577275269488853, - "sentence_nr": 3 + "score": 0.3113772787307771, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6747054474171109, - "sentence_nr": 3 + "score": 0.5344680037267059, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 + "score": 0.4475435253337274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 + "score": 0.5956867226653717, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15138514598766048, - "sentence_nr": 3 + "score": 0.35103160282487145, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3237497764315872, - "sentence_nr": 3 + "score": 0.5432217848942439, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.27668736912821895, - "sentence_nr": 3 + "score": 0.4831574055451935, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4414406760568898, - "sentence_nr": 3 + "score": 0.6238976883927624, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15604242268653643, - "sentence_nr": 3 + "score": 0.3667951090093586, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.2255928425212252, - "sentence_nr": 3 + "score": 0.574382729364071, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.0895824671662166, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.6159319815107203, - "sentence_nr": 3 + "score": 0.24679712992918926, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.3511508047578372, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 + "score": 0.502364219831564, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.24728515687112834, - "sentence_nr": 3 + "score": 0.4164593261612853, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3088155734423375, - "sentence_nr": 3 + "score": 0.5895619617012146, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.42734667499155, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.78479833664205, - "sentence_nr": 3 + "score": 0.6397906518456509, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 + "score": 0.48930936408255293, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 + "score": 0.699085629239476, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.353203510510529, - "sentence_nr": 3 + "score": 0.49517040114696814, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4910213297498164, - "sentence_nr": 3 + "score": 0.672650019344124, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.4815092081725061, - "sentence_nr": 3 + "score": 0.47426274497383164, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.5820265218174012, - "sentence_nr": 3 + "score": 0.6547171931962555, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 + "score": 0.4166560818400039, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 + "score": 0.6515522498665886, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.23660362391696813, - "sentence_nr": 3 + "score": 0.44697138732796604, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.34152697838249696, - "sentence_nr": 3 + "score": 0.6533822343227146, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.5803563388252858, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7246473808162345, - "sentence_nr": 3 + "score": 0.7397216312303552, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.05034135169161612, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.25001156386121903, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.45495679780282583, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.699735222419999, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, - "sentence_nr": 3 + "score": 0.0025767494884759577, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 3 + "score": 0.022849655955591117, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.49179307081132717, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.6798382116037067, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.5166723256924997, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.7009262917135753, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.07407154448063642, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.43145434527321425, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.12903696060775005, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.456225988032654, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.024459391267874976, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12351824822447692, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.46822754470803873, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15815751066481462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5152611872266766, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.49546288984677567, - "sentence_nr": 4 + "score": 0.4031456247133876, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20669086265781264, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.43795381992037963, - "sentence_nr": 4 + "score": 0.5076721272198604, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17630490037560695, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 + "score": 0.48116430160978857, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15611634095633747, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.40854152133685306, - "sentence_nr": 4 + "score": 0.5075814499747183, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.41213231348812146, - "sentence_nr": 4 + "score": 0.4122750002638689, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15412719160788987, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.5010353699512481, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.20387261486363278, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.4846245724468382, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.06647168102389285, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.39618802899930716, - "sentence_nr": 4 + "score": 0.34350832619898364, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12560672881768975, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 + "score": 0.4969560260291519, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.17077058518804336, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5948724602646328, - "sentence_nr": 4 + "score": 0.5022008374701596, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.10784756064735967, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5042211795038526, - "sentence_nr": 4 + "score": 0.4427230465401631, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12369892692249995, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.44549610902403686, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.27447938256311044, - "sentence_nr": 4 + "score": 0.06656213940646748, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.615291848344044, - "sentence_nr": 4 + "score": 0.38435741328258305, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.175396614619324, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5516607622642397, - "sentence_nr": 4 + "score": 0.49736499605529066, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.15154395847232716, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 + "score": 0.46053919348995803, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1609675245202845, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.33762297226992255, - "sentence_nr": 4 + "score": 0.5069863833094232, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4576529535952892, - "sentence_nr": 4 + "score": 0.4041678259311437, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1290514243115152, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 + "score": 0.4766581477336301, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.2658483576665877, - "sentence_nr": 4 + "score": 0.07595192904514617, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6410540990527072, - "sentence_nr": 4 + "score": 0.41479321394739394, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.24601372576927547, - "sentence_nr": 4 + "score": 0.09735981717515908, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6374693500772332, - "sentence_nr": 4 + "score": 0.35288934658906385, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.08273178236238297, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 + "score": 0.36399666460809255, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13012870333257068, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6562641136790542, - "sentence_nr": 4 + "score": 0.3852835519852091, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.46426595961938383, - "sentence_nr": 4 + "score": 0.3356633416447032, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12422788549118892, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.40222210564426, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.28789057461471257, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.10467757347424328, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.36749853206282146, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0952569581727979, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.38264808953110185, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.13026649757585426, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.41550755035304077, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.039782861678265974, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.1175904695048123, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.3996881234028031, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.16142282195879326, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.2828367156737383, - "sentence_nr": 5 + "score": 0.4147043899005278, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.11564012893219777, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.44599783682350064, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.12601482779921785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.43595665254608706, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.44897710722021167, - "sentence_nr": 5 + "score": 0.12022286401047096, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6862249089515978, - "sentence_nr": 5 + "score": 0.48279986805368713, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.404727200247809, - "sentence_nr": 5 + "score": 0.15350377490367967, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6681898017773897, - "sentence_nr": 5 + "score": 0.47645148444499064, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 + "score": 0.13714845589364738, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 + "score": 0.45499281593451946, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.2704091953828695, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6207272323003366, - "sentence_nr": 5 + "score": 0.38785209659947417, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.5379348324975908, - "sentence_nr": 5 + "score": 0.12848168928706002, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7703766110349561, - "sentence_nr": 5 + "score": 0.4421263683867116, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 + "score": 0.18629760071299903, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 + "score": 0.4381418376415505, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.09198045184317984, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.4598393646838097, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.07798530247118374, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.4006113700211268, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 + "score": 0.14541923959059266, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 + "score": 0.47577612932999147, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.2033664688556054, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.5013013732058768, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.5896613549548209, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.7528914749586836, - "sentence_nr": 5 + "score": 0.2521233582161207, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.24706467963183681, - "sentence_nr": 5 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.4801289744823913, - "sentence_nr": 5 + "score": 0.3182970443542658, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6766690087429765, - "sentence_nr": 5 + "score": 0.5953162569846108, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 + "score": 0.3026566818840519, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.5945859352092411, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.3665134361137304, - "sentence_nr": 5 + "score": 0.18816868192268246, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.6118771029352303, - "sentence_nr": 5 + "score": 0.5179253053631742, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 5 + "score": 0.09629060614977814, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5764325110247531, - "sentence_nr": 5 + "score": 0.43565498999747165, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 - }, + "score": 0.6348509381122925, + "sentence_nr": 1 + }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.28418123342684043, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.2799331151961311, - "sentence_nr": 5 + "score": 0.539816402671069, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.6471892368478446, - "sentence_nr": 5 + "score": 0.3765959322920135, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8142499721936278, - "sentence_nr": 5 + "score": 0.6295826606382191, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 + "score": 0.40801269202545287, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 + "score": 0.6210533025653295, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.2744916220706949, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.542564139748617, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.0867932999243575, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.4201964133235075, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.3969463877642616, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.1897299381066278, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.5086851537953713, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3390387389794623, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6170420596680538, - "sentence_nr": 6 + "score": 0.3833939462124923, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 + "score": 0.07793031063789554, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 + "score": 0.3700181221537743, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.4216890913810254, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6885217194158456, - "sentence_nr": 6 + "score": 0.2718653389257641, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.3327209336079636, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.1804000267306113, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.451798442226037, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.32365795029773287, - "sentence_nr": 6 + "score": 0.1777835117834348, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6509517796070665, - "sentence_nr": 6 + "score": 0.5166806073547074, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.009396473650937872, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.13582906387565688, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.43344913217266734, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.1250453615099799, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.4413612249791572, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.3630576975795868, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.0744904632040495, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.4111163205685468, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5088645484558708, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6991726442472661, - "sentence_nr": 6 + "score": 0.4363130300030932, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4101479464529936, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7041976254287654, - "sentence_nr": 6 + "score": 0.41747276065817185, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_from", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 + "score": 0.41649654108052436, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.07749370908741021, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.3853293582383978, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 + "score": 0.43622390508229153, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.13343258247486778, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.4018842345370629, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.8020845125558708, - "sentence_nr": 6 + "score": 0.4250905063113662, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.595092211343687, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7971172820981081, - "sentence_nr": 6 + "score": 0.36347800793516216, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.06254678076846341, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 + "score": 0.3887428577633272, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.44787223195695314, - "sentence_nr": 6 + "score": 0.1982991730465618, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7968980206907678, - "sentence_nr": 6 + "score": 0.47607372633277156, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.08616711094288851, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.3696512763473903, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 + "score": 0.4486368934849452, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.08825252192863794, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.4377853721520782, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5383680940297331, - "sentence_nr": 6 + "score": 0.05345137572833361, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.786096406361039, - "sentence_nr": 6 + "score": 0.3829169125379508, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 + "score": 0.08767210132815903, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 + "score": 0.40476518002703893, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 + "score": 0.05422898988559086, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 + "score": 0.335890201952113, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.17905278399134197, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.37257295447029826, - "sentence_nr": 7 + "score": 0.34617921188455225, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 + "score": 0.0588222649477664, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 + "score": 0.3642771871011383, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 + "score": 0.10186730973904586, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 + "score": 0.43665642120840553, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.1418524086391329, - "sentence_nr": 7 + "score": 0.08248974616169381, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.38295770773758747, - "sentence_nr": 7 + "score": 0.40456777770242314, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 + "score": 0.10496714075880566, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 + "score": 0.4262440114275301, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.19074380068002203, - "sentence_nr": 7 + "score": 0.117026611061013, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.40566585096277824, - "sentence_nr": 7 + "score": 0.44043117464934733, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.17382347640129553, - "sentence_nr": 7 + "score": 0.15386029327005746, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4061580777885601, - "sentence_nr": 7 + "score": 0.43911482594829104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 - }, + "score": 0.10070927557742705, + "sentence_nr": 1 + }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 + "score": 0.43718220262892105, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.22381487678101888, - "sentence_nr": 7 + "score": 0.11478463129234825, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5249370100068887, - "sentence_nr": 7 + "score": 0.4651957501593415, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.09431297723472011, - "sentence_nr": 7 + "score": 0.07137101582673294, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3616856339096348, - "sentence_nr": 7 + "score": 0.4075406301092705, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 + "score": 0.14738500064905094, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 + "score": 0.4659728395318289, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.10666682719585797, - "sentence_nr": 7 + "score": 0.0643329477522681, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.33462901494141756, - "sentence_nr": 7 + "score": 0.3960585990192623, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.14557808399334188, - "sentence_nr": 7 + "score": 0.17247941414020762, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.36598346755702993, - "sentence_nr": 7 + "score": 0.48320144379865687, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 + "score": 0.057981164297440296, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 + "score": 0.33896784137459673, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.20198948917565754, - "sentence_nr": 7 + "score": 0.09751270821852938, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.34858221035657466, - "sentence_nr": 7 + "score": 0.395617758442078, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.16780109158842918, - "sentence_nr": 7 + "score": 0.06301432444316532, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3968694014697679, - "sentence_nr": 7 + "score": 0.4249725532507508, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 + "score": 0.18248753930464637, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 + "score": 0.4759830743101189, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.22669486951066523, - "sentence_nr": 7 + "score": 0.16678232269832466, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4484451941575473, - "sentence_nr": 7 + "score": 0.4756863623605152, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.11697642623186386, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.37117753637984835, - "sentence_nr": 7 + "score": 0.1946966569103724, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 + "score": 0.0772718393063023, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 + "score": 0.4203683137304257, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.09916009482330297, - "sentence_nr": 7 + "score": 0.08291357159799752, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3032928217006101, - "sentence_nr": 7 + "score": 0.4009694996956877, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.13805615693046389, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.40787998733941394, - "sentence_nr": 7 + "score": 0.3714280466838255, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 + "score": 0.3370100422576744, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.09478705591775652, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.33293232395887284, - "sentence_nr": 7 + "score": 0.3538602132402044, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.19148282873929853, - "sentence_nr": 7 + "score": 0.167672929900467, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4707949702068854, - "sentence_nr": 7 + "score": 0.46910779766306765, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 + "score": 0.32123020755377657, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.14057105892389254, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3028381427383384, - "sentence_nr": 7 + "score": 0.47435308668900444, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.12157241570357182, - "sentence_nr": 7 + "score": 0.08351211898903935, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.4080990097991491, - "sentence_nr": 7 + "score": 0.33030812447506436, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 + "score": 0.07528927678469202, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 + "score": 0.422513417362817, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 + "score": 0.1779993767214403, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 + "score": 0.5054922206303282, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.598931508663349, - "sentence_nr": 8 + "score": 0.3961285597009415, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.7353063745802827, - "sentence_nr": 8 + "score": 0.6148751441350505, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 + "score": 0.4923751299732868, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 + "score": 0.6853756490381199, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 + "score": 0.3996712647649035, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 + "score": 0.6353525755760105, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.4262221594184117, - "sentence_nr": 8 + "score": 0.5115346945020283, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5886657414856064, - "sentence_nr": 8 + "score": 0.7037574715738644, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 + "score": 0.38870674200492367, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 + "score": 0.6484380084879691, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.3315037521841549, - "sentence_nr": 8 + "score": 0.24715873794308874, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.468197879470805, - "sentence_nr": 8 + "score": 0.49051792813181655, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.24508104771894088, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5792139686527714, - "sentence_nr": 8 + "score": 0.5725552336126134, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.22218130727359342, - "sentence_nr": 8 + "score": 0.33608213382072566, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.39929356245904674, - "sentence_nr": 8 + "score": 0.6155314069125684, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.46092611919700416, - "sentence_nr": 8 + "score": 0.20801258614305904, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.6365915338629015, - "sentence_nr": 8 + "score": 0.26703508536995574, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 + "score": 0.35315040956049437, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 + "score": 0.625895188503691, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.26538706048179084, - "sentence_nr": 8 + "score": 0.33713757310040376, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.4982627378595717, - "sentence_nr": 8 + "score": 0.6015500127828212, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.5234484809182233, - "sentence_nr": 8 + "score": 0.17374951565433233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6658297773613274, - "sentence_nr": 8 + "score": 0.45325597884524305, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 + "score": 0.17743299460161885, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 + "score": 0.43071271897416463, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.15161074985415177, - "sentence_nr": 8 + "score": 0.16052654068024738, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3796830006266126, - "sentence_nr": 8 + "score": 0.41580120868053494, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.3815250264738168, - "sentence_nr": 8 + "score": 0.05963579607071745, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6516314751979607, - "sentence_nr": 8 + "score": 0.31139762378406344, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 + "score": 0.11133996756497437, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 + "score": 0.4410280353998367, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.18679710353734788, - "sentence_nr": 8 + "score": 0.11346446511593337, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3876457319870774, - "sentence_nr": 8 + "score": 0.3675317022605926, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.40003810431098236, - "sentence_nr": 8 + "score": 0.10742716472890976, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.5899097408105687, - "sentence_nr": 8 + "score": 0.42694859148910824, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.21812881407613688, - "sentence_nr": 8 + "score": 0.19073363590503933, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3598346059855135, - "sentence_nr": 8 + "score": 0.49895382941569383, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.38047531731529327, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.49485723102957346, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 + "score": 0.14745870033404418, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 + "score": 0.475170637938921, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.24586918158076287, - "sentence_nr": 8 + "score": 0.20590661325770857, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.4658595745396681, - "sentence_nr": 8 + "score": 0.482883619765493, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.4489235959690452, - "sentence_nr": 8 + "score": 0.26801022984888695, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5934678825154104, - "sentence_nr": 8 + "score": 0.5654883864995515, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 + "score": 0.21665407194210906, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 + "score": 0.4344921442639243, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.2735429726790281, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5644723203818537, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 + "score": 0.20223322445648179, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 + "score": 0.5084057058209687, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.26459538953931094, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.5272178908335121, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 + "score": 0.012201453805310429, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 + "score": 0.063050817196087, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.18559542135951204, - "sentence_nr": 9 + "score": 0.25848476545940924, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3804842882867387, - "sentence_nr": 9 + "score": 0.5525933856866961, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.13019082899297843, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 + "score": 0.40512126305429846, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 + "score": 0.24071298960902482, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 + "score": 0.5438509851618877, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.15122189206102096, - "sentence_nr": 9 + "score": 0.2063890416514164, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.26750110507308866, - "sentence_nr": 9 + "score": 0.38567678850872256, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 + "score": 0.3295566054952435, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 + "score": 0.5816133441895466, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.30327872414714485, - "sentence_nr": 9 + "score": 0.2748068612990203, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.49804213541579834, - "sentence_nr": 9 + "score": 0.5326219867738043, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.21685485833927476, - "sentence_nr": 9 + "score": 0.3574583793293068, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3714219747170047, - "sentence_nr": 9 + "score": 0.5924115119819969, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 + "score": 0.37994652561206577, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 + "score": 0.6464467277069994, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 + "score": 0.4206507730319955, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.33752742535974617, - "sentence_nr": 9 + "score": 0.678851303587664, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.112289032173749, - "sentence_nr": 9 + "score": 0.35367180741660353, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.17726100052085036, - "sentence_nr": 9 + "score": 0.6344846206551544, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 + "score": 0.6037023613177924, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2632018059331281, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.39962545473912425, - "sentence_nr": 9 + "score": 0.501302719796297, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.20586736678432452, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3710595252626966, - "sentence_nr": 9 + "score": 0.5693016623172978, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 + "score": 0.27075075499555246, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 + "score": 0.5374328610523021, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.14326513489612383, - "sentence_nr": 9 + "score": 0.1455399826828606, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4034278533385552, - "sentence_nr": 9 + "score": 0.4504825146558032, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12666372160329223, - "sentence_nr": 9 + "score": 0.175866555062937, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.2650373529479294, - "sentence_nr": 9 + "score": 0.2758687846643748, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 + "score": 0.2948978498692003, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 + "score": 0.5529453973837751, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.18294117097472648, - "sentence_nr": 9 + "score": 0.3563982585943877, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.4383387744769579, - "sentence_nr": 9 + "score": 0.6108017012121061, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 9 + "score": 0.22292726306270316, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.2922087191170089, - "sentence_nr": 9 + "score": 0.5653789747970112, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 + "score": 0.09362261118571368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 + "score": 0.3452056942265759, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.20298407172594946, - "sentence_nr": 9 + "score": 0.26930937054323245, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.427376330935813, - "sentence_nr": 9 + "score": 0.5410704185827219, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.1740044679403827, - "sentence_nr": 9 + "score": 0.24634765861867908, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.36375152376157177, - "sentence_nr": 9 + "score": 0.55968513851572, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 + "score": 0.2158914621804855, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 + "score": 0.5448184155666022, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.15626231814206226, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.2918712789926548, - "sentence_nr": 9 + "score": 0.39152357647177133, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.322788951728102, - "sentence_nr": 9 + "score": 0.08175340974854195, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.40263021320001785, - "sentence_nr": 9 + "score": 0.4308342322390109, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 + "score": 0.3185785286756486, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.21397099133614067, - "sentence_nr": 9 + "score": 0.3268233487541633, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3568171392601981, - "sentence_nr": 9 + "score": 0.6084114123608597, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.16925466459550803, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.35912398848424326, - "sentence_nr": 9 + "score": 0.04759937639788563, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 + "score": 0.2500653935141143, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 + "score": 0.585528867886047, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.5745954681260859, - "sentence_nr": 0 + "score": 0.18505378795140082, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.7920051188244848, - "sentence_nr": 0 + "score": 0.5602893668984232, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.6358921902612438, - "sentence_nr": 0 + "score": 0.21403222128228389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.8041899227402122, - "sentence_nr": 0 + "score": 0.563121432204311, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.6299285159340671, - "sentence_nr": 0 + "score": 0.18917620656425485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.7993134129243716, - "sentence_nr": 0 + "score": 0.4346170232980484, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3572514590810421, - "sentence_nr": 0 + "score": 0.18505378795140082, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.40312319760122833, - "sentence_nr": 0 + "score": 0.47051087423292237, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.35059076445515835, - "sentence_nr": 0 + "score": 0.09807167131529582, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.40219803477483124, - "sentence_nr": 0 + "score": 0.4646043403137081, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.41316127706749806, - "sentence_nr": 0 + "score": 0.18031307339768174, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4430321339435623, - "sentence_nr": 0 + "score": 0.522164454804456, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.037874984245935134, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.13083094614009624, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.8780634320789833, - "sentence_nr": 0 + "score": 0.20064110494011925, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.926946700115022, - "sentence_nr": 0 + "score": 0.5205761630334527, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.7964573357809173, - "sentence_nr": 0 + "score": 0.0684792839692368, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.8458636471716781, - "sentence_nr": 0 + "score": 0.3138898863773231, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.35601247064914876, - "sentence_nr": 0 + "score": 0.1573857459340795, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6528728847159075, - "sentence_nr": 0 + "score": 0.5347526444819753, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.40673971192998765, - "sentence_nr": 0 + "score": 0.008180069062416927, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6897190926100627, - "sentence_nr": 0 + "score": 0.04605877529742035, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3707525915417785, - "sentence_nr": 0 + "score": 0.13904320686250593, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6481906761834414, - "sentence_nr": 0 + "score": 0.47169365083525167, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.5724622291345857, - "sentence_nr": 0 + "score": 0.10065329518890631, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6818279156433621, - "sentence_nr": 0 + "score": 0.47259396735190334, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.703373719677874, - "sentence_nr": 0 + "score": 0.4054983797456263, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7784050705257474, - "sentence_nr": 0 + "score": 0.6264774230839022, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.469958733898233, - "sentence_nr": 0 + "score": 0.420450507904553, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5843756060033074, - "sentence_nr": 0 + "score": 0.6503146347305717, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2676232320051144, - "sentence_nr": 0 + "score": 0.43870712112271204, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5440246804235981, - "sentence_nr": 0 + "score": 0.6525926696001584, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3489926819498492, - "sentence_nr": 0 + "score": 0.4207445490015154, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5715668842319502, - "sentence_nr": 0 + "score": 0.6496192656497308, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2786169604662155, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", - "metric": "chrf", - "score": 0.5267252236203236, - "sentence_nr": 0 + "score": 0.2999092588227898, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", + "metric": "chrf", + "score": 0.5505916495384416, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3765213224289163, - "sentence_nr": 0 + "score": 0.2772655014585435, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6469521424555786, - "sentence_nr": 0 + "score": 0.4799723286048352, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3410244689880313, - "sentence_nr": 0 + "score": 0.2516768028374535, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5816669416914216, - "sentence_nr": 0 + "score": 0.49572209766846287, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4234343012313773, - "sentence_nr": 0 + "score": 0.2958351954606211, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6625289905598352, - "sentence_nr": 0 + "score": 0.5202221091638364, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.44219732271776674, - "sentence_nr": 0 + "score": 0.3471036105446511, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6193429426274062, - "sentence_nr": 0 + "score": 0.5663019495273462, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4324680011853555, - "sentence_nr": 0 + "score": 0.009070964338765818, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5877600878871951, - "sentence_nr": 0 + "score": 0.06852404470758497, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4493940083619696, - "sentence_nr": 0 + "score": 0.3212066202235163, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6230960824462234, - "sentence_nr": 0 + "score": 0.5836558214123343, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.6638859619095425, - "sentence_nr": 0 + "score": 0.3359230828063257, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7874224590682172, - "sentence_nr": 0 + "score": 0.5729275692707756, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.6947677373756656, - "sentence_nr": 0 + "score": 0.20051119758906127, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.7941300666655116, - "sentence_nr": 0 + "score": 0.5334791309401924, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.6412098671661826, - "sentence_nr": 0 + "score": 0.24894072982768842, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.7665040244283648, - "sentence_nr": 0 + "score": 0.5212235893093335, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.4845227999608418, - "sentence_nr": 0 + "score": 0.23724642034775328, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5968050469845498, - "sentence_nr": 0 + "score": 0.5175129869169551, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3861375213265022, - "sentence_nr": 0 + "score": 0.1849419409628554, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5122109329134508, - "sentence_nr": 0 + "score": 0.5067677916637257, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.32539921259497445, - "sentence_nr": 0 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5133457276293165, - "sentence_nr": 0 + "score": 0.4263684749347053, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.28822910320599077, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6087031937056202, - "sentence_nr": 1 + "score": 0.4228574070038002, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3880515884750121, - "sentence_nr": 1 + "score": 0.11126509848873964, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6587916715823183, - "sentence_nr": 1 + "score": 0.4338923576538663, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.5142726846179982, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.7344716263345912, - "sentence_nr": 1 + "score": 0.31311320826536454, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.4094748015187699, - "sentence_nr": 1 + "score": 0.17236491061326006, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4288513205758089, - "sentence_nr": 1 + "score": 0.5548663878579595, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.4487746167679644, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.4476730201191672, - "sentence_nr": 1 + "score": 0.3540506408782035, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2836623400057614, - "sentence_nr": 1 + "score": 0.08906092883748383, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.29147337237183046, - "sentence_nr": 1 + "score": 0.4317746285352776, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.16950698451288215, - "sentence_nr": 1 + "score": 0.16994548762775233, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.48668984177868246, - "sentence_nr": 1 + "score": 0.49416616344892494, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.2113054108348111, - "sentence_nr": 1 + "score": 0.3535002370419364, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.44238229987470284, - "sentence_nr": 1 + "score": 0.5959879218348465, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.26207903587847736, - "sentence_nr": 1 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.50073123223194, - "sentence_nr": 1 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4527112325797497, - "sentence_nr": 1 + "score": 0.393613605227227, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6708989870027865, - "sentence_nr": 1 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4556160153884204, - "sentence_nr": 1 + "score": 0.38333108639273095, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6661994452325181, - "sentence_nr": 1 + "score": 0.6252821653079126, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3216756020053242, - "sentence_nr": 1 + "score": 0.2562849004088193, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6141241026166391, - "sentence_nr": 1 + "score": 0.5767019342009202, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3977038258772401, - "sentence_nr": 1 + "score": 0.19851743023355672, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6202897864314184, - "sentence_nr": 1 + "score": 0.49793621556542356, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.37570809340937233, - "sentence_nr": 1 + "score": 0.10954782904363085, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6339141734561076, - "sentence_nr": 1 + "score": 0.5090382887002297, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.559332422592187, - "sentence_nr": 1 + "score": 0.1614809742656655, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.733291190094771, - "sentence_nr": 1 + "score": 0.4145218112165384, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3167585643537871, - "sentence_nr": 1 + "score": 0.2834484329788497, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5076869840147092, - "sentence_nr": 1 + "score": 0.5201572704778937, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3446592076818278, - "sentence_nr": 1 + "score": 0.006569332862878646, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5819912583909785, - "sentence_nr": 1 + "score": 0.060864196135666904, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.23270938096152352, - "sentence_nr": 1 + "score": 0.2756885721075884, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4490269267329941, - "sentence_nr": 1 + "score": 0.5867077870431389, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.44114781827798216, - "sentence_nr": 1 + "score": 0.23380867598952562, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6241365710582877, - "sentence_nr": 1 + "score": 0.5162668764933175, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.529527758323629, - "sentence_nr": 1 + "score": 0.35983766090218355, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6540432510655854, - "sentence_nr": 1 + "score": 0.5862251404739759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.49704232910799745, - "sentence_nr": 1 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6453248294274054, - "sentence_nr": 1 + "score": 0.41020178654369294, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.3542266508664836, - "sentence_nr": 1 + "score": 0.3563982585943877, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5643413028542406, - "sentence_nr": 1 + "score": 0.5378970484635915, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.3479698393875884, - "sentence_nr": 1 + "score": 0.1510722413165652, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5760833125751785, - "sentence_nr": 1 + "score": 0.43592329727028295, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.24373253714463095, - "sentence_nr": 1 + "score": 0.2465888500427759, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.49482039214573803, - "sentence_nr": 1 + "score": 0.5221084445696768, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.3995439803178399, - "sentence_nr": 1 + "score": 0.13240628161243978, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6021193793256325, - "sentence_nr": 1 + "score": 0.3347576434758551, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.2988697040013311, - "sentence_nr": 1 + "score": 0.31372333533981844, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5442522660489195, - "sentence_nr": 1 + "score": 0.5741396495481692, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.500703635659656, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6501904887399698, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2993081268625724, - "sentence_nr": 1 + "score": 0.4583603882613907, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.47777429598730525, - "sentence_nr": 1 + "score": 0.671355324267905, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43330223254789785, - "sentence_nr": 1 + "score": 0.10077062063331403, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5564499529933307, - "sentence_nr": 1 + "score": 0.09760482860544632, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1466607445607986, - "sentence_nr": 1 + "score": 0.2600884210903425, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.36552963821230766, - "sentence_nr": 1 + "score": 0.531430106996609, + "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.6837528314895732, + "score": 0.23380867598952562, "sentence_nr": 2 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.7968789890147058, + "score": 0.5100551662629407, "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7017829861193574, - "sentence_nr": 2 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7743327021667388, - "sentence_nr": 2 + "score": 0.7882997401328445, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6961795371760597, - "sentence_nr": 2 + "score": 0.5806197937310393, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7859480663394858, - "sentence_nr": 2 + "score": 0.7346706700987636, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2360941227140328, - "sentence_nr": 2 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.35939098278145853, - "sentence_nr": 2 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.14118350058219528, - "sentence_nr": 2 + "score": 0.4855332614117322, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.20431837779877604, - "sentence_nr": 2 + "score": 0.5299556742893647, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.1811004938014804, - "sentence_nr": 2 + "score": 0.38249626297768063, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.2649993136544717, - "sentence_nr": 2 + "score": 0.40976234193505356, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.23649053182388327, - "sentence_nr": 2 + "score": 0.369345079296433, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4127382174759535, - "sentence_nr": 2 + "score": 0.5103516764863386, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.20721924345714232, - "sentence_nr": 2 + "score": 0.5357110024227318, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.36475932190367044, - "sentence_nr": 2 + "score": 0.6365941772753647, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.11386607947762988, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.33564583347921473, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.570135897056151, - "sentence_nr": 2 + "score": 0.6960917409740967, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6801332690579707, - "sentence_nr": 2 + "score": 0.8209757784637755, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.46442643702863534, - "sentence_nr": 2 + "score": 0.14790264259417688, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5519480629125156, - "sentence_nr": 2 + "score": 0.27159767590045303, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.6268941789647348, - "sentence_nr": 2 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6958291103494518, - "sentence_nr": 2 + "score": 0.6849386986272349, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.21305368975019265, - "sentence_nr": 2 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4371748197696026, - "sentence_nr": 2 + "score": 0.6931369519059803, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.22837680015088951, - "sentence_nr": 2 + "score": 0.25552199116069907, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.44164180234500505, - "sentence_nr": 2 + "score": 0.3799133205289109, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4151474543103342, - "sentence_nr": 2 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.638952468710771, - "sentence_nr": 2 + "score": 0.3682311523733465, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.20876900081884944, - "sentence_nr": 2 + "score": 0.11739521786077453, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3981381071356935, - "sentence_nr": 2 + "score": 0.22090491782919655, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.11634129390828839, - "sentence_nr": 2 + "score": 0.1892240568795935, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.31530902302000635, - "sentence_nr": 2 + "score": 0.280413108453108, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.19544795798162903, - "sentence_nr": 2 + "score": 0.08635800047213174, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3835451743665027, - "sentence_nr": 2 + "score": 0.218109371254876, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.37917766663411384, - "sentence_nr": 2 + "score": 0.139800134566647, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5365794450039074, - "sentence_nr": 2 + "score": 0.2510112235832054, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.359355103997122, - "sentence_nr": 2 + "score": 0.0925329498915617, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5589602235417395, - "sentence_nr": 2 + "score": 0.2110486160692096, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4267520229161, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5518115366540288, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3221305290185444, - "sentence_nr": 2 + "score": 0.2887308472548599, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4866081657424789, - "sentence_nr": 2 + "score": 0.41654484827391225, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.35551034193127495, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5627284645723449, - "sentence_nr": 2 + "score": 0.12453389344594705, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.5039752490702457, - "sentence_nr": 2 + "score": 0.141543757252386, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.613669501327356, - "sentence_nr": 2 + "score": 0.2594145364221844, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6260375038358343, - "sentence_nr": 2 + "score": 0.4185938787651429, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7803415401430737, - "sentence_nr": 2 + "score": 0.432962604241542, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.5088535943352446, - "sentence_nr": 2 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.625202596789752, - "sentence_nr": 2 + "score": 0.512762518189388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.562048819850726, - "sentence_nr": 2 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.7192054483864224, - "sentence_nr": 2 + "score": 0.6931369519059803, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3146726146646545, - "sentence_nr": 2 + "score": 0.581972638479957, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.4709531555683, - "sentence_nr": 2 + "score": 0.6970914528585833, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.3941975148525721, - "sentence_nr": 2 + "score": 0.44120063733294235, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5191046479503385, - "sentence_nr": 2 + "score": 0.5296624608564717, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.262633940062176, - "sentence_nr": 2 + "score": 0.5642761727828352, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.41923206553744197, - "sentence_nr": 2 + "score": 0.6181373706707737, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.8107492451395732, + "score": 0.4440750605884706, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.900032747778274, + "score": 0.5402588602256685, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, + "score": 0.6458552885189878, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, + "score": 0.7468283944111381, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.7505336182671021, + "score": 0.4272870063962341, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.8401910628269498, + "score": 0.5170917334956868, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.13725861056573663, + "score": 0.43310177167002284, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, + "score": 0.534533410927948, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.13453927150397377, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.10522974272748564, + "score": 0.26481979271706185, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.22055493694673897, + "score": 0.3212854967972961, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.3931965048763613, + "score": 0.47171327621770304, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, + "score": 0.6256538561604215, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, + "score": 0.6943310521668014, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.6885326214539055, + "score": 0.4577275269488853, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.8229812189228393, + "score": 0.6747054474171109, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, + "score": 0.25383339228798274, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, + "score": 0.45896379476820603, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.412295470431275, + "score": 0.3508739523842563, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.705800771033924, + "score": 0.5533976153694653, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3480442076026084, + "score": 0.23705266435224473, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6142483232997242, + "score": 0.44716007458096513, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2861853478258715, + "score": 0.2615858282579583, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6401604432917332, + "score": 0.35447530946908884, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.14172292406325543, + "score": 0.25530635525095574, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.4762857001428092, + "score": 0.4224404198283467, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.17401517708317762, + "score": 0.44392090655418587, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.45006261596496794, + "score": 0.5678926447384061, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.151240443751577, + "score": 0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.4224869587588239, + "score": 0.0, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.21850594525107195, + "score": 0.40891568776497583, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.4049269026117245, + "score": 0.46522329223142805, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, + "score": 0.11436433361427001, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.3614856639698008, + "score": 0.23221971735799607, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.2255489037266197, + "score": 0.18580985894574314, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.3954925749722234, + "score": 0.3347249292100999, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.0, + "score": 0.6244631487487835, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.2466674257522263, + "score": 0.6931369519059803, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2929807168354841, + "score": 0.27668736912821895, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5975595069845072, + "score": 0.4414406760568898, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.6689604664235209, + "score": 0.17200767571780612, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.8010329764520807, + "score": 0.3723150838362789, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4678134833959513, + "score": 0.2465659486053858, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5051480556620123, + "score": 0.5689069160047179, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4113125177363443, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.42808075762838727, + "score": 0.30391153783979835, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.32685141385924577, + "score": 0.15138514598766048, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.3758692873615971, + "score": 0.3237497764315872, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.27341185048222727, + "score": 0.1544458227548897, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.6411651849711889, + "score": 0.3343587266874694, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.6237774736059616, + "score": 0.23817261442630488, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.8500131524897436, + "score": 0.448286611717823, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.573764722928549, + "score": 0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.817979859532479, + "score": 0.0, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.20679845323803403, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.47636494608150104, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.8482942955247808, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.9256238040654331, + "score": 0.0, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.09147827112247602, "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.3258762519783793, "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.19107912313367556, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.49367395128894914, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.6159319815107203, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.28685201698226354, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.7215691881328408, - "sentence_nr": 4 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.735100789804592, - "sentence_nr": 4 + "score": 0.2391308148553106, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4765874091118851, - "sentence_nr": 4 + "score": 0.18180608220159192, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.45911557772276623, - "sentence_nr": 4 + "score": 0.27307753334479423, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.45022125383821326, - "sentence_nr": 4 + "score": 0.15604242268653643, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.46874267375238576, - "sentence_nr": 4 + "score": 0.2255928425212252, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, - "sentence_nr": 4 + "score": 0.14965975078050625, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.43458947791319813, - "sentence_nr": 4 + "score": 0.22213502776474325, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3160946016179871, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.3013901676230198, - "sentence_nr": 4 + "score": 0.407876439044591, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.38106012955734714, - "sentence_nr": 4 + "score": 0.025108530586642898, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, - "sentence_nr": 4 + "score": 0.17466240109087192, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, - "sentence_nr": 4 + "score": 0.2719194508460068, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3820562306791339, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5488313413379253, - "sentence_nr": 4 + "score": 0.12316365460790003, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 4 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.540550443602966, - "sentence_nr": 4 + "score": 0.35862918415512257, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.23189835231884592, - "sentence_nr": 4 + "score": 0.4185938787651429, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.44157797833899437, - "sentence_nr": 4 + "score": 0.5087816327099641, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2294068720558097, - "sentence_nr": 4 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.40458364050078693, - "sentence_nr": 4 + "score": 0.78479833664205, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3254455687469726, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.32588643749980295, - "sentence_nr": 4 + "score": 0.4474512036484817, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.31430120091187586, - "sentence_nr": 4 + "score": 0.6230832293767097, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5013155459452984, - "sentence_nr": 4 + "score": 0.702540870003671, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.45237912327122276, - "sentence_nr": 4 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.3267294026204632, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6299071573751139, - "sentence_nr": 4 + "score": 0.4510525482602028, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.23736810439041953, - "sentence_nr": 4 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4004852416401387, - "sentence_nr": 4 + "score": 0.3088155734423375, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3722001929300059, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5252698638532942, - "sentence_nr": 4 + "score": 0.27718461611705486, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5605065818946205, - "sentence_nr": 4 + "score": 0.5365920629514802, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.586853267829013, - "sentence_nr": 4 + "score": 0.6274039030337838, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.36300296341860155, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5816676674074003, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.1258907882951215, - "sentence_nr": 4 + "score": 0.4578226095312774, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.34143648068854054, - "sentence_nr": 4 + "score": 0.5406295999835291, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.28765408533715414, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4046608868073569, - "sentence_nr": 4 + "score": 0.002054231717337716, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.1819722649161304, - "sentence_nr": 4 + "score": 0.12286996020967837, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.44254730215235283, - "sentence_nr": 4 + "score": 0.31567668741706395, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.28295596283263513, - "sentence_nr": 4 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6067794553589253, - "sentence_nr": 4 + "score": 0.6834390596430621, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.13305199541830684, - "sentence_nr": 4 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.43244987270004115, - "sentence_nr": 4 + "score": 0.5820265218174012, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3377385620641691, - "sentence_nr": 4 + "score": 0.23887527917609022, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5256128450453542, - "sentence_nr": 4 + "score": 0.4120359948636439, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.1175771442804648, - "sentence_nr": 4 + "score": 0.36210097004176117, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.3103572690939351, - "sentence_nr": 4 + "score": 0.408098151133905, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.12546912767038895, - "sentence_nr": 4 + "score": 0.3165014630070639, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2651343523961406, - "sentence_nr": 4 + "score": 0.42516173623967946, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.12407216162020399, - "sentence_nr": 4 + "score": 0.353203510510529, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2664864612493293, - "sentence_nr": 4 + "score": 0.4910213297498164, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.15820362165931962, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.2249046365436241, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.4753167451887016, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.6372909532389948, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.38317923930200504, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.47975624978837655, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.37589902061551017, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.42554151277542873, - "sentence_nr": 5 + "score": 0.2054194471318506, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.25678404806291744, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.37045149029437513, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.4135171000263379, - "sentence_nr": 5 + "score": 0.5617848264135781, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.7050151549073953, - "sentence_nr": 5 + "score": 0.706027620990306, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.42988105429544615, - "sentence_nr": 5 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.7577244658187771, - "sentence_nr": 5 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.5366411241731205, - "sentence_nr": 5 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.825566494253596, - "sentence_nr": 5 + "score": 0.728208634600343, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.6976333495952621, - "sentence_nr": 5 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.8331572107884448, - "sentence_nr": 5 + "score": 0.6502428441722727, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4165530720734658, - "sentence_nr": 5 + "score": 0.1243018504102695, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.7027805129995731, - "sentence_nr": 5 + "score": 0.32950116238735283, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.4027788021844849, - "sentence_nr": 5 + "score": 0.23660362391696813, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6872835607174038, - "sentence_nr": 5 + "score": 0.34152697838249696, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.498704623570665, - "sentence_nr": 5 + "score": 0.369345079296433, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6478746389895599, - "sentence_nr": 5 + "score": 0.5103516764863386, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.41307323705325416, - "sentence_nr": 5 + "score": 0.5357110024227318, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5785653391533346, - "sentence_nr": 5 + "score": 0.6365941772753647, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.5248587176134882, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6664855309004869, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.35210829264331733, - "sentence_nr": 5 + "score": 0.36763082847636347, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5239651686730163, - "sentence_nr": 5 + "score": 0.45637140510576385, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.17729842264695017, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4103582047611184, - "sentence_nr": 5 + "score": 0.16935976352352106, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.34895836374229405, - "sentence_nr": 5 + "score": 0.31268514922728713, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4767378358574124, - "sentence_nr": 5 + "score": 0.41990725085948355, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.833078701050083, - "sentence_nr": 5 + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.9482515348146272, - "sentence_nr": 5 + "score": 0.6869404628233521, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.8958039312312598, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.9382091007325469, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.5805399561362194, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4810464260105228, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.4500531895417844, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.43027065541050147, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.4933292241270431, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5225247297523148, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.5064127215831256, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6516332048338376, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.8363600587440573, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5212982931053122, - "sentence_nr": 5 + "score": 0.9912737182609732, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.35319015092357736, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5822934956325967, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4592978565863154, - "sentence_nr": 5 + "score": 1.0, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3855522725905196, - "sentence_nr": 5 + "score": 0.6018154975998465, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.587260566914102, - "sentence_nr": 5 + "score": 0.7669980679050217, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.4426623526629488, - "sentence_nr": 5 + "score": 0.8482942955247808, + "sentence_nr": 3 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "pa", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6368371029698285, - "sentence_nr": 5 + "score": 0.9256238040654331, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", "score": 1.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.3450219162509876, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3993348853061597, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5561195823338172, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5362935676066722, - "sentence_nr": 6 + "score": 0.2246029757863831, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5803515898273521, - "sentence_nr": 6 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5422220468910552, - "sentence_nr": 6 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2562150245540302, - "sentence_nr": 6 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.47046477830594896, - "sentence_nr": 6 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.33438299066966715, - "sentence_nr": 6 + "score": 0.37709297891717664, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5409759573191787, - "sentence_nr": 6 + "score": 0.6881502501430368, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.37854068916316835, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5743796566387722, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5521710658453207, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7317828775912516, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.45506803308128024, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6477506541284608, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.592313615748771, - "sentence_nr": 6 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.7382416555842614, - "sentence_nr": 6 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.37184214350816, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.7949498209605872, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4018202851356865, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.6003256951549871, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7542976177437886, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.18207052811092134, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.4504432021668592, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.37717457428685847, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5554130492458337, - "sentence_nr": 6 + "score": 0.5581982021478125, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 6 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.31598923484911084, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6425503166524515, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8078891929749037, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.7629273292796576, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8510385544954956, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.8725129388059689, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.9495292423959529, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.2147607499133801, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.3976144917079093, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.49349163706233623, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.694445271037971, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.3037643089519314, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.5183662698462751, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.8253498772794055, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.8529564805429163, - "sentence_nr": 6 + "score": 0.43795381992037963, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.7944837206494969, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.8784531740275225, - "sentence_nr": 6 + "score": 0.5881561248602009, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5924993690004501, - "sentence_nr": 6 + "score": 0.46670957224939175, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.5828833474188783, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.7908226509294533, - "sentence_nr": 6 + "score": 0.37544324742239676, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.38694317759010316, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.5953878513137957, - "sentence_nr": 6 + "score": 0.49546288984677567, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.44711013370113256, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.7319347493436125, - "sentence_nr": 7 + "score": 0.41602211217571683, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.49023502313124495, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.7638414724136195, - "sentence_nr": 7 + "score": 0.49713060327965375, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.4424906782646928, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.705507971295129, - "sentence_nr": 7 + "score": 0.440129802760994, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.21326369102393236, - "sentence_nr": 7 + "score": 0.16195570128532405, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.24781828193168487, - "sentence_nr": 7 + "score": 0.581645267684411, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.07860105393900486, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.09678377693633947, - "sentence_nr": 7 + "score": 0.429292711066547, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.11601141307045003, - "sentence_nr": 7 + "score": 0.1667955161379731, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.21671187566850864, - "sentence_nr": 7 + "score": 0.5848202846227532, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.2577716972449781, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5171901208397282, - "sentence_nr": 7 + "score": 0.41734150775835166, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.35015224715252113, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5701648579139658, - "sentence_nr": 7 + "score": 0.41213231348812146, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.3349252032650068, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5908087431574293, - "sentence_nr": 7 + "score": 0.40435987083533204, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3274016883618531, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5570399656004248, - "sentence_nr": 7 + "score": 0.34256683873776383, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3449058130015412, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5365619830343804, - "sentence_nr": 7 + "score": 0.41477028165511615, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.29688845677442144, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "es", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5494319015457763, - "sentence_nr": 7 + "score": 0.40854152133685306, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.207314191412716, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.4360555836773355, - "sentence_nr": 7 + "score": 0.4164061298971701, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.25376032254696296, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5334329403985332, - "sentence_nr": 7 + "score": 0.5709936728721758, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.20039141607873007, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.36123312088832493, - "sentence_nr": 7 + "score": 0.40435987083533204, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.3235473265529593, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5441122251341168, - "sentence_nr": 7 + "score": 0.40562290854898025, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.32707695373369694, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5166643606783462, - "sentence_nr": 7 + "score": 0.33546955366063214, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.3069937936246452, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5024648105961349, - "sentence_nr": 7 + "score": 0.40319099863003527, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.26513488970168847, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6029932145447834, - "sentence_nr": 7 + "score": 0.4628457025650974, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3927237741677927, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.7451438087039315, - "sentence_nr": 7 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.5570357635362685, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.8116469942298856, - "sentence_nr": 7 + "score": 0.39858613265631837, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3860973950960897, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.6271680934322363, - "sentence_nr": 7 + "score": 0.3818534926571001, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3826576187198625, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.6071841372061269, - "sentence_nr": 7 + "score": 0.3644112480028862, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.3447241447679157, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5531085140985558, - "sentence_nr": 7 + "score": 0.39618802899930716, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.27710310401156996, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5216248191624099, - "sentence_nr": 7 + "score": 0.393379300802006, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.31128635710849173, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.6304411194127884, - "sentence_nr": 7 + "score": 0.3892064098781075, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.13308561809919006, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5312476702183977, - "sentence_nr": 7 + "score": 0.3066682918799934, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.20475739007221866, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.3934874462686164, - "sentence_nr": 7 + "score": 0.3010381621698183, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2879556779114461, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.4554184077174173, - "sentence_nr": 7 + "score": 0.28783297914763095, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.09578921953028982, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.40472887922389433, - "sentence_nr": 7 + "score": 0.3958941272081701, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.22816849039973935, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5295534280606148, - "sentence_nr": 8 + "score": 0.3113686002796155, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.348007986647201, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6148736550683231, - "sentence_nr": 8 + "score": 0.5042211795038526, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.31222258402876674, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5549937870516303, - "sentence_nr": 8 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.10721126066665879, - "sentence_nr": 8 + "score": 0.511876122662448, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.23683075175361493, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.2631328190836655, - "sentence_nr": 8 + "score": 0.511876122662448, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.16455392433653304, - "sentence_nr": 8 + "score": 0.5948724602646328, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5707860320039717, - "sentence_nr": 8 + "score": 0.5049375875723539, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.461597801606675, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6280777654467244, - "sentence_nr": 8 + "score": 0.5582360999449585, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.4224298950114519, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.60823085524287, - "sentence_nr": 8 + "score": 0.48375513642780327, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4141871474340027, - "sentence_nr": 8 + "score": 0.4915933923809756, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.36769040719718776, - "sentence_nr": 8 + "score": 0.41469341972645324, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.4064141882459388, - "sentence_nr": 8 + "score": 0.39451521279220947, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.3096036988813059, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.5894510883198948, - "sentence_nr": 8 + "score": 0.47825370157575003, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3286711939680359, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5944310794747374, - "sentence_nr": 8 + "score": 0.5516607622642397, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.13547277341758465, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", - "metric": "chrf", - "score": 0.4830189619506113, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", - "metric": "bleu", - "score": 0.23841754841770157, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.34481325534410395, - "sentence_nr": 8 + "score": 0.47160616105623426, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2822535302220024, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.3874773378787974, - "sentence_nr": 8 + "score": 0.5256353512715748, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.31747697264511426, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.40797778663955364, - "sentence_nr": 8 + "score": 0.3765697091436241, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.27447938256311044, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.6793717376740783, - "sentence_nr": 8 + "score": 0.615291848344044, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.46935933364934335, - "sentence_nr": 8 + "score": 0.31573558123189943, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.773055573548356, - "sentence_nr": 8 + "score": 0.6989238098201116, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.43368945552925614, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.640995178057518, - "sentence_nr": 8 + "score": 0.6245952145297528, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.20050320605789015, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4046291070099031, - "sentence_nr": 8 + "score": 0.44995700110278536, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.3160213610127146, - "sentence_nr": 8 + "score": 0.2680165156355779, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5165614670038283, - "sentence_nr": 8 + "score": 0.5989264158576341, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.30758744700466467, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.4684197705189288, - "sentence_nr": 8 + "score": 0.4425650919372919, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4161791450287817, - "sentence_nr": 8 + "score": 0.32594818888335836, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7054426787013603, - "sentence_nr": 8 + "score": 0.6263180162489238, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.2961516536011624, - "sentence_nr": 8 + "score": 0.32594818888335836, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.49803924348035766, - "sentence_nr": 8 + "score": 0.6263180162489238, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.3459667618766101, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6170810606402402, - "sentence_nr": 8 + "score": 0.4576529535952892, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 + "score": 0.5309982646782259, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.46832763312452297, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.8176110134774669, - "sentence_nr": 8 + "score": 0.4726395749383864, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.40052428191473877, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.43103580001357805, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6690742226623104, - "sentence_nr": 9 + "score": 0.33762297226992255, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.6031612036218008, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.736286703381354, - "sentence_nr": 9 + "score": 0.39336600752225864, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.39432344823662835, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5943452555220106, - "sentence_nr": 9 + "score": 0.4101715667811344, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.10772332006118607, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.17652714369664665, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.11254397891886614, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.20623288988983426, - "sentence_nr": 9 + "score": 0.45834841871997833, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.1100081929352474, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", - "metric": "chrf", - "score": 0.18967061672400035, - "sentence_nr": 9 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.31754227193241025, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.27190910124573536, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5173567851798608, - "sentence_nr": 9 + "score": 0.3974726419025883, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.43106863786661676, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.24493390281390082, - "sentence_nr": 9 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.48113625107113883, - "sentence_nr": 9 + "score": 0.6374693500772332, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.19476681308252697, - "sentence_nr": 9 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.42030407727741037, - "sentence_nr": 9 + "score": 0.6151179643430991, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.429512074830509, - "sentence_nr": 9 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6066779955199886, - "sentence_nr": 9 + "score": 0.686947433675709, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3459789902390003, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "translation_to", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5620330456296532, - "sentence_nr": 9 + "score": 0.4746119151171374, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.37825713491091884, - "sentence_nr": 9 + "score": 0.2658483576665877, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation_to", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.5584414289480568, - "sentence_nr": 9 + "score": 0.6410540990527072, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.09596136927307748, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.40849147213099996, - "sentence_nr": 9 + "score": 0.5639241776831634, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.1845747513433909, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "translation_to", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.44379971518505973, - "sentence_nr": 9 + "score": 0.5281061979991509, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.18212463619188357, - "sentence_nr": 9 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation_to", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.469592540371137, - "sentence_nr": 9 + "score": 0.6668099404219522, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.22972631482860506, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "translation_to", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.436102988762466, - "sentence_nr": 9 + "score": 0.5554602680850725, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.2517176762753373, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "translation_to", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.45137344500317134, - "sentence_nr": 9 + "score": 0.41291750111233794, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.3128384316903283, - "sentence_nr": 9 + "score": 0.17181529671327242, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation_to", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.48016279207050283, - "sentence_nr": 9 + "score": 0.6053635787005981, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.45026965676007474, - "sentence_nr": 9 + "score": 0.2658483576665877, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "translation_to", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.6668256174353906, - "sentence_nr": 9 + "score": 0.6798749495422826, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.43200638115383627, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6892273787708799, - "sentence_nr": 9 + "score": 0.46426595961938383, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.341195158470265, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.6539473951166187, - "sentence_nr": 9 + "score": 0.41238100267720657, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.41238100267720657, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4806367958084579, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.4710260495003035, - "sentence_nr": 9 + "score": 0.6562641136790542, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5013632657267051, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4224991954993499, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5191362758854317, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5379068753129642, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3348758882377771, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4601349893675622, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5066311799500233, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2887138086538547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.40475700826319555, - "sentence_nr": 9 + "score": 0.6342291345998248, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.4349871720911447, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.310186302993101, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5434540129901786, - "sentence_nr": 9 + "score": 1.0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.5244380103905697, - "sentence_nr": 9 + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.2828367156737383, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7513336773729535, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7341375356694393, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.2404315522172745, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.49155714102395526, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3477250470582593, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7188419868243952, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.40276720463657734, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6529271690805427, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6392900613840917, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6392900613840917, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.4386229919587297, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5379348324975908, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7703766110349561, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.30188353873287377, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6086565367747951, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.22391522968021457, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6087618281135659, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4621757041594117, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.22067731046885494, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5635661737033422, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.8025775976044891, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7689532399280165, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7338978299765546, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.24011079455637607, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.19920494035049138, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.614209720001149, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5896613549548209, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7528914749586836, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5300714512917181, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7461630750708693, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.33359103227594633, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.701102363286568, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5271017464925504, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7749613594649343, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4335364472118335, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6878319610579101, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.480771131185851, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7032048786770096, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.5300714512917181, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7461630750708693, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4801289744823913, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6766690087429765, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.30421485886156485, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.566236392445952, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.24706467963183681, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.32965129549221617, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.623436907204599, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.32078739729528816, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5817366082116868, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.44332438338421004, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5764325110247531, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5878575558111695, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3665134361137304, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6118771029352303, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6090575371936678, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6090575371936678, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.06088829927112382, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4100134571476398, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5856608401367807, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.17098323692758396, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5216877937894046, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3527295712700594, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6062826429226292, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3665134361137304, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6118771029352303, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.6471892368478446, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8142499721936278, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.7012294787544179, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8478115719875968, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.40202477345336673, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7469480084357536, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2799331151961311, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.40157733283424196, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7133166401137868, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7494665344743727, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.42612283570374254, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7185121839177114, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5199388279318895, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.23141570376732995, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5938624587877649, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7830281347135482, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3390387389794623, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.6170420596680538, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.6279894552667558, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.551397074868541, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5403400891349619, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5460240376042262, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.24343304284910333, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.6275577931282961, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.6431872581462166, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6589376390020449, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.4216890913810254, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6885217194158456, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6589376390020449, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.29176300840900793, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6143650111703199, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43021236941942204, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7142896582178452, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.24090844358935917, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5468852870478801, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43021236941942204, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.7142896582178452, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6963801389253689, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4101479464529936, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7041976254287654, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.2919394073770869, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5957961314949175, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.17537670874647399, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4800889669735933, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6991726442472661, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.22845493240080628, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.584996891148118, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6991726442472661, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.23272696712467975, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5794868721814046, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6673259967761724, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16331948281960493, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.350650198151987, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.8056920633274978, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.8391519966182309, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.22640935662631664, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6263363844543545, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.595092211343687, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7971172820981081, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4207937380724192, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6985308026285912, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.8020845125558708, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5700185304500285, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.595092211343687, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7945212279546889, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.43011383006801057, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7140577175386648, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3843363395779093, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.651158213392685, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.860002370506267, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7719180936906627, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6590438071804039, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7745649676018984, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6061131723054572, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.34636800712900173, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5167955767158704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.5397693417183738, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.425143650778693, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6674242019044293, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.421151249507493, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6938674571170766, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.41843795218458035, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6316283876832989, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.17905278399134197, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37257295447029826, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.12620429887108936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.35580703793872603, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.12872220631084524, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.33602633953270183, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.03037224815656603, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.10203846572325131, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.33381153680096753, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1685643537060726, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.36926449644166065, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.014935758919429663, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.08106107745254391, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.044304867337633724, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.20806974344498103, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.23889245981479848, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4543632408556633, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1418524086391329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.38295770773758747, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.030860166165309233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1100250143829584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.21255327712152144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.43272151570555034, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.05918530850500025, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1438459189500836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.30693371625402605, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0979038733644086, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.30211704738953993, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.2288990188897003, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.48933901443699584, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.009624974244068071, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.07318255686027669, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.043420474648595074, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2884095690753619, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.24725159675471015, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5112120334550363, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.17382347640129553, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4061580777885601, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.10361854845420869, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.32774802711076473, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.15186969315425305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3458120002305796, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.19074380068002203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.40566585096277824, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.20031726728306523, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.20485833586704885, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.468735805943922, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.192481383169461, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3799051443349615, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.01252735726099625, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.273148644463442, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.16260451093454215, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3759026836547276, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.09431297723472011, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3616856339096348, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.189717083187238, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.10266747466754884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3364703638684802, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22381487678101888, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5249370100068887, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.28912109037408523, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.2144604484498437, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.48894052224175993, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.21001173689943997, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.423493931076046, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.006232910970143225, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.06317168666869727, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.06938388878349923, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3541078046399395, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.20485833586704885, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.48732945706336717, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14557808399334188, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.36598346755702993, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.06897533888461813, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.2776666563000344, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.1665765483402476, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4017968725013381, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.10666682719585797, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.33462901494141756, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.04151505758906764, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.12189363728567917, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.37595660827287636, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18926971577178767, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4931453714148122, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.004663531624960091, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.07262533604330305, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.037401300306846526, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.27395881217705964, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.26000287375180825, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.47354107832122266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16780109158842918, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3968694014697679, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1579497466001673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.5092928545844059, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16286876096900815, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3422914837190449, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.20198948917565754, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.34858221035657466, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.06888992790640074, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2874483621307283, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1631196072688366, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3502730667074754, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.21286836557101563, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.45055232014427626, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.031126201157905466, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.015970144454664378, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.06929847827527827, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.30185194035792856, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1723692524265489, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.349893200245233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11697642623186386, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.37117753637984835, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.17621963873521423, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09916146090364127, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3121110160693956, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22669486951066523, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4484451941575473, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.011560595536104562, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.07368089078790738, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.41452613113710224, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17892846390928677, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.47088195615067674, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.012370537823050053, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.06660321132654005, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.09831093939330879, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.33203866499974327, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22013459885748218, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.43986532876547135, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13805615693046389, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.40787998733941394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17643078314788999, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.40757584786696294, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1989414239237112, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3791567776918788, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.09916009482330297, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3032928217006101, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.09453698369211004, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1995980198896431, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4244503391142409, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.18216362398065106, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36524832602306334, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.08319287955437346, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.08383676689911676, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.2855329690010324, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20126232208711145, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.39698346457743144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.19148282873929853, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4707949702068854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.08183353655679478, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.25007633393249695, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0981642545874085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.31793222329793575, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.09478705591775652, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.33293232395887284, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.11976209355757551, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.27004759126600675, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.09142555538569784, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.31371707771405133, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.05438497632520132, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.26123506271154656, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1490232164900303, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.43745835724045856, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.005606294971348417, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.06662245090541388, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.12752236829255797, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.13462044240543036, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.39535559458710795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.12157241570357182, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4080990097991491, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.10415298161056984, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.33452632923050557, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.21204239268527586, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3846197304420823, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14057105892389254, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3028381427383384, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.004763623056487517, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.07485928007606017, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.11689600237805012, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.38258301195690664, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.19809535837880818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.447539350421338, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.004718557257042585, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.08019304349523304, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.05614653993259943, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.26485323792360876, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.23158048156321728, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.47580042760181485, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.598931508663349, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7353063745802827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1327526847508867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37850602486495205, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.18405035438430847, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4142901090120915, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.24239458593560292, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.32069132319909655, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.22478613858269392, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.44348101018104913, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.4026159305424288, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5712560131047175, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.183687049781416, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.351911486970854, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5181825846579515, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.6225705543415939, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7106977638931217, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.4262221594184117, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5886657414856064, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.25861130592298187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.39452644092432093, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.20379250618355427, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.41085414309816914, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.2990226215771518, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4323734152924571, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3012789660952507, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17248715680799764, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.40043565243219187, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.13835317113453516, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.16343842313572918, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3986641525285075, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.417372155782838, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.507980317618041, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5792139686527714, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.25798723088167685, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5244854229988815, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3315037521841549, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.468197879470805, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.460474309246715, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.6213537794704693, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5676965183365866, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.003172770121174655, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.46092611919700416, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6365915338629015, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3344305108778801, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.37017501464955627, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22218130727359342, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.39929356245904674, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.3194331635465395, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5031092445628172, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.334422418242443, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.47577086062579566, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.4482907809719588, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5498272118133005, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.02467424260792568, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.21902340561392236, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.43906671679239717, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.5104779149627351, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6198275970742451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5234484809182233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6658297773613274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.30007504691018483, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5153810823423555, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14074957769288798, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3750035199199742, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.26538706048179084, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4982627378595717, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.232738415750697, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.49618971681248764, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.3488611533620711, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5550499651473632, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.31883477089875656, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5510450101159524, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.038236956722392024, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.31998097041178836, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.4220833561341287, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5956810507017879, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3815250264738168, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6516314751979607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.189902924205034, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4072184389907138, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.18710260593933364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.44334313717706003, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.15161074985415177, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3796830006266126, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.07757069009917116, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3253161209971999, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.22168992033645996, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.523689661176845, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.2722704374402053, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.596004654894533, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.01008902035184167, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.10279947040838337, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3569840483632983, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4653583721345133, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6142658860525915, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.40003810431098236, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5899097408105687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2097387761551816, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.49663301508497226, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.27067168022307464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5439625482235064, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18679710353734788, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3876457319870774, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.2826204057042236, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5043062352893725, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.29588994069727786, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5527117669081858, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.46732353406180216, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6059276585345114, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.06266083709457643, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.36565527196849945, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4882803186347697, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.33661284377001893, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5885351264299764, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.38047531731529327, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.49485723102957346, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4672309378181727, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.29886658673327365, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21812881407613688, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3598346059855135, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1367498402979849, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3943841419148219, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.5676006714726635, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.6880701448812352, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2974074484950165, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5121581247515657, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.010162846529607748, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21947959999379651, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3226457008913864, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4265506545827786, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5487572224993423, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4489235959690452, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5934678825154104, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.28800869328515505, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.49348678623542436, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2429163097293302, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5044329486461447, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.24586918158076287, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4658595745396681, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.23073085454808062, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.44142087654422146, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4406612884550454, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5676112112992767, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4476950425126913, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5932980209045412, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.1483315516064897, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.25249051585915977, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.38558450790399557, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4631700687380434, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.59196914119751, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0067104198717751464, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9025232868361638, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9169897590736298, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9709835434146469, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9951728990866464, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.8935248372106969, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9404428602061264, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.18559542135951204, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3804842882867387, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.13087682931309413, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.19462952976787054, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.013538497707846785, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.16678872216161894, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.38156158663679846, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.21940429389247643, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.4343280866601455, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3300025916068812, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5052501972629104, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1824401863423467, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.36709433185688595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.3377854698776805, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.521201229892482, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.2992694690475121, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.521871374038439, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.15122189206102096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.26750110507308866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.1324578891826276, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.08163977068875294, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.14134641571854575, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.22948919855739472, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1475503033983142, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.22104108935973044, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.16434349396840395, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.28582614857210975, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.24911274612875411, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3603818786794888, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.23831215045289575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17543744527808774, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.28201016956553354, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17083255863912036, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.2797958336163538, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21685485833927476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3714219747170047, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.366137273378509, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12656494026948834, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3156355830822428, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30327872414714485, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.49804213541579834, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13237645860785527, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3818322535970043, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154235663145316, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3906877817743504, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12487405142186064, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.32817291858267583, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.16701570871784516, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4021286881032558, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.29383139922210444, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.15799783604363904, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3949243937510492, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.40100810859537644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.112289032173749, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.17726100052085036, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.14276716121505195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3191375424862687, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.33752742535974617, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.31758120882708796, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.33753843688529356, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3593717322097392, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.010176705289341573, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3124983184732695, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3227044640287027, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3710595252626966, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18235247300784824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.40779523977234755, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.013915288440632284, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.39962545473912425, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.39112369376374106, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2202248274013358, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.48474965676300186, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.45813938111627356, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.1593344703029041, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.22494952618128455, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4760660341798742, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3963858306295727, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12666372160329223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2650373529479294, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1649662542496744, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3466546857451185, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.01536966738773372, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4034278533385552, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.13829446068705525, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.32059338352121075, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16521691795932783, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4134512022176617, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3435867188688158, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12366644075037489, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.37651342775995167, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2962222000049211, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1971903602140518, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.36269646528997446, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3473154676483541, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2922087191170089, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2244748716483542, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.007281906895508523, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18294117097472648, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4383387744769579, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17092467746295725, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4340281226634826, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22860414459682069, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.47331131010100724, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17200673466668953, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.39948318545775324, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.10553225565626573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.1763116500850642, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.17730543118229922, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4201842844735916, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3744383822869251, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1740044679403827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36375152376157177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13269353024089545, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.28998089836851504, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.01357525601063516, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20298407172594946, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.427376330935813, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.382987159925022, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.17558199612672082, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.41334979014850587, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20947801521367798, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.37699245483283905, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24318848592140954, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.501343318078065, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13784906211485343, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3161105981607342, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.28433291815307693, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4589827303637465, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.322788951728102, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.40263021320001785, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.1405026510197826, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.24785258181936404, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15626231814206226, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2918712789926548, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15325316503089068, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2756316951639811, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.3210853623565359, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.009559007108143848, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.05937666456658802, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.28306950244125495, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.14063630555225284, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.24531520458611372, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3264287329357334, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.41662443172249786, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2855471341725443, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.40969820391967565, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16925466459550803, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.35912398848424326, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.21547697432588886, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.18039960295364865, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.21397099133614067, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3568171392601981, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14134641571854575, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3078571099929154, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.15658994837053716, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3084004707364603, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.20215771603666896, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.34483322672745376, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16165057948216605, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.35172210628524053, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.012458960343878354, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.20053583653512705, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3585550644386862, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.22665851162885023, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4402646729409968, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6358921902612438, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8041899227402122, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6299285159340671, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7993134129243716, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.710159574003633, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8462481747979111, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5487830136896633, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.77238965036654, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5745954681260859, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7920051188244848, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6012475603804444, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7990339788905771, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6660677740125452, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8293798371335214, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6012475603804444, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8102198011293434, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5718247506430171, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7570613392550647, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6241924127610678, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8031006153647919, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6372502110149713, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8187019874664503, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5745954681260859, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8001969096241068, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.35059076445515835, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40219803477483124, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.41316127706749806, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4430321339435623, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3993284843242707, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224738565076288, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2908087026261561, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3411361400094189, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3572514590810421, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40312319760122833, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2996868226086902, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3355531727847081, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4199243020508202, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4310330650643179, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.38146085172952343, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40030269579783606, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2613520653232399, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.292974388325607, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.38876512474558916, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.41342876789412997, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.36631135849378577, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3947683748805251, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.8780634320789833, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.926946700115022, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7964573357809173, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8458636471716781, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9452996322890763, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9463396364218181, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6537803976048806, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7742226743967544, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.738238064391125, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8637738769684485, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40673971192998765, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6897190926100627, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3707525915417785, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481906761834414, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4405434565828979, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6872423435487918, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.34070519401434163, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6376396416993303, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35601247064914876, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528728847159075, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3535276144718208, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6399338911163, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.29793763405666984, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5878658443031616, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2737856702715042, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6091441790112126, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2060740184460064, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5566122985381202, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3764145740138264, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.660406350984819, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3552824817180132, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6650963330720984, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.703373719677874, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7784050705257474, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.469958733898233, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5843756060033074, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6034601376302852, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7074074363255227, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5200692650497809, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6586847274336591, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5724622291345857, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6818279156433621, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5439803529976158, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.657598922173703, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5447800851151646, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6845859707632784, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5073374020380702, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6713451965832894, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.63457045351243, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.760139991277541, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2747017431249852, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4456826256200505, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4834220366915352, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.678862671476654, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5941142117182071, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6930129129388155, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489926819498492, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5715668842319502, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2786169604662155, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5267252236203236, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35446322216812387, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5208748527454148, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2679728611808951, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.466691372759197, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2676232320051144, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5440246804235981, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2328598163544389, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.46604753989124215, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3249989390135794, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5532261012182782, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.04043358226234485, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.178130317890244, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31400830186120793, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5436299115609682, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.180038135256147, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.42760668286140896, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2702404890575711, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.505948742808373, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3127320650917403, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5820474024058695, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3410244689880313, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816669416914216, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4234343012313773, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6625289905598352, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3885765192359091, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6554470157301392, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38108864298853723, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6485553379227472, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3765213224289163, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6469521424555786, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36247466608675993, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6011484151165629, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4079926989572759, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6592699047005666, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30494536158123264, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5857538582551342, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26075652499067425, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5605305670545515, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.13339786348528015, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.46778689835182324, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2602768294269028, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5310567541651178, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32577646359654405, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6034691061493307, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4324680011853555, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5877600878871951, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4493940083619696, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6230960824462234, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4116575552858724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5665759692366567, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3533147318401534, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972951640947346, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.44219732271776674, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6193429426274062, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.41852674506584964, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6035836275599532, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.433056028408153, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068335862669254, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37696437834356655, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5617832488367239, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1810501938660849, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4154005351684647, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5018386916018573, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6673891538739279, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.43320553917029947, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6050369991278077, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6947677373756656, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7941300666655116, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6412098671661826, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7665040244283648, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6045639360711837, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7576570567798335, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5438238038060724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7060850657954441, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6638859619095425, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7874224590682172, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6543739381048754, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7768522458527362, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5886489119980793, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7611944709376643, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5162974106233954, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.748545216109632, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6561309661336588, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7849652413082676, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.24311976929452217, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5332455436874994, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5821415139431849, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7329539842616807, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6665468808142623, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8018370160729217, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3861375213265022, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5122109329134508, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32539921259497445, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5133457276293165, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.375079512706724, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5371301483272257, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.48456463733283883, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5906105668854662, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4845227999608418, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5968050469845498, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4494703452336724, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5696298539086213, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4295014616287586, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5957510678657648, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.47727404239076743, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6081867525552255, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.01656048993031311, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.11323797713183678, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4124136266900752, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5714981155807188, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.461887670717865, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.594188645494074, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3880515884750121, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6587916715823183, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5142726846179982, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7344716263345912, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6066498620510337, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7812137754227463, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4342750764549485, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7115011221714777, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.28822910320599077, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6087031937056202, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5011893046413795, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7089203664957927, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3406014428030703, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6514548680180557, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4738611152748619, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7293997939434749, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4492327786840591, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6917786880624969, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5105553787243322, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.44571331402556874, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.67235059873138, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4241047637225085, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6844709246396142, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4487746167679644, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4476730201191672, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2836623400057614, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.29147337237183046, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2775905064108025, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3165767280260291, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.28912432952036243, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.31119603942667584, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4094748015187699, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4288513205758089, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.35430370029300495, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3864890531682498, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.37405604379521823, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.409758558051675, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4082186610925126, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4042514356445265, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3835611536417376, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.41360439536029553, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19333361726926898, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28056620588920506, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3541652369790141, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.38739546241623046, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5554441727233942, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5410106254032345, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2113054108348111, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44238229987470284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.26207903587847736, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.50073123223194, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16098073041469485, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.39710375075643284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11465623153412556, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4497512968651573, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16950698451288215, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48668984177868246, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23516650478671175, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4885052730214997, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23477037244978113, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5165217514090542, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.21585895003952446, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48140875917864023, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2711981710401392, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5555651822168547, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.168777027092081, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4368481165562445, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.22415590998535484, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.49981791926190994, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3231139066663432, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6056704743332197, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4556160153884204, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6661994452325181, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3216756020053242, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6141241026166391, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.43369048469848437, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6586872889176818, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4098419224543478, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6358736384460296, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4527112325797497, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6708989870027865, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.403282335120862, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6319223068216205, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.44234482870142466, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6617260327319175, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3870043562676652, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.631536050216449, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.465541200947692, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6735988737803571, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37462930793644134, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6297969107438809, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45236333724230443, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6557435747309683, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4197376433963966, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6581729857740523, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.37570809340937233, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6339141734561076, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.559332422592187, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.733291190094771, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.48457382450313924, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7144409873446065, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4881942815467274, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6662053431593723, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3977038258772401, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6202897864314184, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5024073848733999, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6805608953669952, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4198435178617755, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6615330486958915, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4423392581565186, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.663370348519268, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.42298863290550076, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6260112466527037, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.422714343026006, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6266965858252854, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3887113653056583, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6222111159250625, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3446592076818278, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819912583909785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23270938096152352, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4490269267329941, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.11634468327243708, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1766119944524977, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3986479587107995, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3167585643537871, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5076869840147092, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3292454551002283, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.25751023494151143, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4697665795408892, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3212983212315964, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5599573621112933, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2560040742784669, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09236883467211593, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3472719365557752, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.392653200684027, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6103547064240303, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.529527758323629, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6540432510655854, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.49704232910799745, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6453248294274054, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.49704232910799745, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6453248294274054, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5494410974163585, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6853937472090788, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.44114781827798216, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6241365710582877, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4286794450695727, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6365023289177463, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4744991305294048, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6720481841701565, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4946489712934811, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6635756951391838, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32345422777393923, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5652905380017423, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.473424955479643, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6791725069180572, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4946489712934811, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6642621312047408, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3479698393875884, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5760833125751785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24373253714463095, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49482039214573803, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.312050635062637, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5390444512132623, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24229889794871173, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4853505495636382, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3542266508664836, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5643413028542406, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18282456123768265, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.47540661243586124, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2921982022041547, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5264166199754001, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3142825719425009, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.584353897647861, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.17473028966988555, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.400425072418037, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2902817248447081, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5778883542136447, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2988697040013311, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5442522660489195, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.500703635659656, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6501904887399698, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4876463179677598, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6113405963585182, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5199813503697857, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6584629522606407, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3995439803178399, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6021193793256325, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.40656183899584336, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5890799945028116, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.528547004876945, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6459593469343872, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.45002572171222577, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.620458099259989, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27875207406965286, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5095968928696253, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5576102993622991, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6640761861237344, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.512463054128702, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6527479377010996, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.43330223254789785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5564499529933307, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1466607445607986, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.36552963821230766, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20527494029659898, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.43586475049009993, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3047577636054668, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.48318512703629857, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2993081268625724, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47777429598730525, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12340057804403023, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3331532512757645, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3196191720459511, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4758634857690128, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.06692436199443168, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.23947877713211682, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3095674062940522, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.49847201920427264, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.08748671768279999, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.01250047619586174, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.12383271014582256, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.38609988647757243, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5480418778026874, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7017829861193574, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7743327021667388, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6961795371760597, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7859480663394858, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5643442092080923, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7005543453411931, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5432312750246535, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6677259864784132, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6837528314895732, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7968789890147058, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4391684160269219, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6371098202414471, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7555875294328935, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8049022687045564, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6443411340522405, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7335999563315522, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2285369650225378, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.4750387664265888, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5975003598259766, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7683913390959731, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7108527311307847, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8209448175222175, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14118350058219528, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20431837779877604, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1811004938014804, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2649993136544717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14089011087858522, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21944603811527294, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.12501819027374758, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2360941227140328, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35939098278145853, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1273192735797341, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22231961416584312, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3360376952328008, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35297640449956286, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13680836462007476, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24537888283181183, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.21687218788036394, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35659125027777805, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1017839169529136, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14549060082020032, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22054620758680943, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2085590894856562, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3200949564949597, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.20721924345714232, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.36475932190367044, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11386607947762988, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.33564583347921473, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16862356321891248, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3637462812267946, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.1485432117087218, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23649053182388327, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4127382174759535, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2392792151449317, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.388678103641788, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34101364633474157, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5758572581135913, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2026639468552004, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4471011187469559, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11622323415479685, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.10826694406224016, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.180048782148418, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3772586334343914, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28939737284723716, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44550999966826343, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.46442643702863534, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5519480629125156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6268941789647348, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6958291103494518, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4554740717077828, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5498766350188072, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35817810808590844, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5012707040525209, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.570135897056151, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6801332690579707, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.349335635815966, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4827709277987172, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4692880637764782, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5773610754678101, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.34182319563232233, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5052410644804232, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5796814083647206, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6364369549208913, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2985280444159845, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072627289039213, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.49402195020645817, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.583821485566765, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6297960258710876, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7117676662366008, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22837680015088951, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44164180234500505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4151474543103342, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.638952468710771, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4355097603079957, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6511365998081735, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2468185992183292, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.46792167630295967, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.21305368975019265, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4371748197696026, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.08919951949408464, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.2986174009048306, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.38791552573256816, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5723637874192081, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4305675865000082, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6312508299648723, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.011973456545827533, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3002149853465536, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378189160780977, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.42866719142206977, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6045654191304047, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11634129390828839, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31530902302000635, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19544795798162903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3835451743665027, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19889333501994313, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3885583772632557, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.24480102898506534, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.20876900081884944, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3981381071356935, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.1582263258709324, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3530704022752377, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.521530381948501, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21574854574751035, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.41940323708656974, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.058854097785805734, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.17240019222052141, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.23425891587078498, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3268258845598709, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4863358380144881, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.359355103997122, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5589602235417395, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4267520229161, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5518115366540288, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4248870612387681, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5641041633033193, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.40429429626811253, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.575799986766, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.37917766663411384, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365794450039074, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.23329145933277767, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44291475401588093, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30626379803308257, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5077543267123376, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.39963516628793516, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5372822043426468, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.38861707449775285, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5432656354167995, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.17706333085447226, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4047932836379997, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.30904104300309865, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.48677056338263186, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5565087025816967, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7009254382359046, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.35551034193127495, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5627284645723449, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5039752490702457, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.613669501327356, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.36932295883897953, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5524455184773474, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.080331199191236, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.23021641289829473, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3221305290185444, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4866081657424789, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10563809356628297, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2323275601638909, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37818447598700816, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5516941276443429, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.34591973979258805, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5197016245837053, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10020997712284248, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.16327778043310373, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24470192769722524, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4583472827584427, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4247248638956501, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5970793788386907, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088535943352446, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.625202596789752, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.562048819850726, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7192054483864224, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5550041554031738, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6554946147279708, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4406896260480816, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.571328063702761, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6260375038358343, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7803415401430737, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961648173595504, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5022745285039809, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.37446819995007063, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5965995710194948, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.41110950985436373, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6710923400142267, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.47237086893932345, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6521003933528818, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.31867018346252723, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5261433842307197, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.709255033821849, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5512181178347816, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7607059998582948, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3941975148525721, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5191046479503385, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.262633940062176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.41923206553744197, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3850172427136058, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5264633431241114, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.15363234192450648, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3146726146646545, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4709531555683, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.13978782442553714, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3223419048219805, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5205977846006183, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1764046491640527, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3464061249457313, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.18856799944599728, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20466701735848536, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3025868321081519, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.27417618121875437, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.42108960466757744, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7505336182671021, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8401910628269498, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9289416300153619, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4450050658086207, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7558874882119336, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8107492451395732, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.900032747778274, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7406375008540003, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9160988509714175, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3132252321342574, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.35025412310639736, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6825372617659788, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13453927150397377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10522974272748564, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.22055493694673897, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3931965048763613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13755274871304535, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10397715306705207, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13899941210887606, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10947303419437356, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13725861056573663, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1350501875730652, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09408024740752835, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.054674609450212665, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09895358918308976, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11538184104597694, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09204268041910899, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1350501875730652, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8229812189228393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7267072830982378, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8396959977515368, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6374950652411382, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6643984252563968, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5821747317554493, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7539119883011114, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8328652216139806, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.251696695878184, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5710821658681214, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7498810286408993, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7886148242134857, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.760856626273165, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8852144067617798, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3480442076026084, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6142483232997242, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2852636439147137, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6732018003142922, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2104783778565715, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6117499551501043, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.18814785746917081, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5692328972915052, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.589811312024197, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.31072931460421827, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4306285422638574, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6879589052239306, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.17401517708317762, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.45006261596496794, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.151240443751577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224869587588239, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.151240443751577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224869587588239, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.26860011657329247, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14172292406325543, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4762857001428092, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4800955244005148, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.16234676720992364, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48137970077362496, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.16853790965501372, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5242065098084487, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.07810235385630719, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.10401577613691954, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32252336426814965, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.15094813209726435, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.41808466373264913, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3614856639698008, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2255489037266197, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3954925749722234, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2927057121559396, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4330945753016968, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18038302998635977, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.320678468026793, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21850594525107195, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4049269026117245, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21233470585998818, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3630016390465325, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22325877055095214, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38760873730223866, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2741229265391949, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3721657350281369, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33805023952655533, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.23398197530631124, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.272143800067929, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4262772266504184, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2929807168354841, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5975595069845072, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.14908960803395838, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4761746966391582, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2466674257522263, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5677534942306638, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.684329671666446, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666935927206881, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7886059879769752, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3854501214118697, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.595779023757305, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.1999934463074552, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.30520457148036917, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6141797522526763, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8112468273360185, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4113125177363443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42808075762838727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32685141385924577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3758692873615971, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.383916695249631, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3804672236690253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.433708341935832, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4678134833959513, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5051480556620123, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4028998029112093, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43422338821405304, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.46360731056064436, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5726015901952585, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3734832062562986, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43771936994910393, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2041405149858879, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2728627798814474, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.36592034784584504, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4836940239497908, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5186946866114049, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6237774736059616, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8500131524897436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.573764722928549, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.817979859532479, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6224956012824276, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.21449459478473423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528501353073614, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27341185048222727, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6411651849711889, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4041187386794465, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6888233111124319, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5645815242299279, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8151453923340255, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5972046851135996, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.24570408832734913, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5699365673055954, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7743810851655712, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8921783635360989, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8363600587440573, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9912737182609732, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.6018154975998465, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7669980679050217, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4765874091118851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.45911557772276623, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.45022125383821326, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.46874267375238576, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.6350593429017282, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6022395694696409, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3407065041529668, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3482814151315599, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.7215691881328408, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.735100789804592, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.700487718300918, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.7205373993220106, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5894567062209923, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6051783687131701, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5614660831213585, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5781117871636209, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5907010930652489, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995581839975431, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.33500599401126563, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.34371117385240735, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.630923553986829, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.6458808155334796, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4687776643329939, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4435402570986094, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3013901676230198, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.38106012955734714, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.35187745073108273, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.2850647115160651, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.43458947791319813, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4033902612785559, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3101159279982649, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3909330178955319, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.217295409663537, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.12173115521158184, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3904544509639755, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3411846484329862, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.540550443602966, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.36314748337164254, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5392658386159207, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5519360558961294, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.23610158425430544, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.46869487580371916, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5499993547125768, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27062395495883934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4407436716645838, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3711335186021823, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5595427509161435, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35459684529390034, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5499993547125768, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2294068720558097, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40458364050078693, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32588643749980295, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32003170276441123, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23088247483586974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4093450185788297, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23189835231884592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44157797833899437, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.11917756990194882, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.37850093315889116, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.3330732444230803, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2550184675066243, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4392529322675216, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23721317187079113, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4319465813689286, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.29417113956364643, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4653698220842079, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23088247483586974, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.42828303349678104, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23251355381714656, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.43190915325898727, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.45237912327122276, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6299071573751139, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23736810439041953, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004852416401387, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2916261378761629, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4106520926894174, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.282764733088686, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3743678965131091, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31430120091187586, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5013155459452984, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16451929399933107, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2957279302594959, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11401282249739858, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3117911565455793, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34019506273883837, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48708558391259515, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.33184166448858593, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1676136890247661, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38914692664434314, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3579553000756425, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5605065818946205, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.586853267829013, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36300296341860155, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816676674074003, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3525399760372503, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5798116969849163, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927053212677373, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5108598154804425, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3722001929300059, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5252698638532942, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3514475288270508, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5361569875660316, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.27914759735007616, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3958350231734361, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.20974733068050955, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.41455868084196934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.16086531618356015, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2797876941198672, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3277803741755935, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.4267708983045122, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.275187053569825, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.40511706867577885, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28765408533715414, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046608868073569, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1819722649161304, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44254730215235283, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.25291831689404154, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4482360279074225, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.279600269133294, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48172049854477195, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1258907882951215, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34143648068854054, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.11986809949741643, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34629467658248214, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32666181171942305, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5670457942911707, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.29175929784144866, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45299010750030405, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.24463910693302512, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2908660541001102, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.17432585713050458, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.435271111238395, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13305199541830684, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.43244987270004115, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3377385620641691, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5256128450453542, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.44776047557667586, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5801193947715436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2875583820017638, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6087635830564418, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.28295596283263513, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6067794553589253, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.20863283213455547, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5109257435313587, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2810551683573811, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477048453606161, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.473265686519562, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7021422985630228, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.11810019511256618, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.3708545152745943, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3423375720396189, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5343801172775681, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4190284595730208, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6790829001019099, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12546912767038895, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2651343523961406, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12407216162020399, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2664864612493293, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11261597894135422, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.25234827342962907, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12757855945289526, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3385577201847465, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1175771442804648, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3103572690939351, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12629279972753293, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2933944065312711, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12363251371327445, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2692822154793075, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.24470651147480013, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.09410612421964877, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.13696035837771334, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335388002918436, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.24296155543954379, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2887138086538547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6342291345998248, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.37589902061551017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.42554151277542873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.19272923456045185, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.314589204347422, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562402498959597, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462132320098601, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30613574556266654, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.36162356523761796, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1277700534498365, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20846991452438368, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.25985341959039815, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3462132320098601, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3645334083305707, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4348353905978472, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.42988105429544615, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7577244658187771, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5366411241731205, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.825566494253596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8665162960307256, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4360038791211645, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7669087484597642, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4135171000263379, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7050151549073953, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3345794609803645, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7523344918083558, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.46997395980026974, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8114935753258365, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.690216773228096, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4547722460981925, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.793631811653261, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44338575968779337, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6736450219247083, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8665162960307256, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4165530720734658, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7027805129995731, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4027788021844849, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6872835607174038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40245827940445855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664090181705107, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5552412314880962, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40245827940445855, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664090181705107, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5440766840557734, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7530101164980872, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.482878209362615, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7054264546871626, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22447836580911282, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.635962708232662, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3964122180109575, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.584540734626554, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6537813760269277, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7617489761353242, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5440766840557734, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7530101164980872, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.41307323705325416, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5785653391533346, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.32797138117025904, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48645628248697975, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2280299254440877, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4447177675003817, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4549681528678131, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6164314607426773, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.05173101600908794, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.062313574266204104, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.35012358768277246, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816664251371266, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6412236038065892, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7394688674718397, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103582047611184, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34895836374229405, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4767378358574124, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4010889714538991, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5642546048162433, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.30145280436636923, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4729753929525169, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35210829264331733, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239651686730163, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38732841080078323, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38584042605633057, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35548377438423956, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5150536106864393, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.35098096867859657, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38561859819475125, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21992062963866632, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.42619491111236635, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8958039312312598, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6433799261824519, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.81037697367602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5403356450597102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7639130574395125, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5815699184831468, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8158797976578578, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8097013849965253, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8896806148658662, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6161420984415483, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6945809713247855, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4500531895417844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43027065541050147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4933292241270431, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5225247297523148, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.23487811400114963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4062284746604391, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3763743474188506, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4017565065239436, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5805399561362194, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4810464260105228, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.21510618470971102, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.41380245501613677, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489214645008508, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4519496200669607, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.15415064977510756, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.48192435154139673, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49539605131242165, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44728880966754114, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4825434542324755, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5617848264135781, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570886750436929, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49539605131242165, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44728880966754114, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5095895501997145, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4647137781420131, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5062835959915031, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.649790991083579, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3855522725905196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.587260566914102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6368371029698285, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3666340989897011, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5782960278998768, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32892676518285585, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5800761309604682, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4592978565863154, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.29456425448249246, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5691358329649412, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4714472446464193, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376192011637994, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47199515498282607, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.26349889713915725, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12514328743841557, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.34961836061490087, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20280903279060938, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.49374400062508916, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5561195823338172, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5362935676066722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5803515898273521, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5422220468910552, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4204739940979302, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173824078732066, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4177866849157374, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5243375045345786, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3450219162509876, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3993348853061597, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.30978068501889056, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28295274449167956, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.33498389276277546, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3296536654279081, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.42670493571995677, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4251985835808586, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2368693821608258, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.320909989176825, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3223833286593516, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.39475158383309167, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.46146548771819573, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4019452398054806, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3973758238312869, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3937751818988156, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33438299066966715, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409759573191787, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5743796566387722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5768306472334509, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.32134504358579785, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5076725973953424, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562150245540302, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47046477830594896, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27182849679730653, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5497265770945076, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3509258729305825, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5379703355059909, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19135220621724439, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4724042181215377, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.38936263771250235, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5443518219250745, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.28555753499459907, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3416445560351976, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4049402235047407, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5871644977560334, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3060614307377341, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4885853123228743, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45506803308128024, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477506541284608, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7382416555842614, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7568286018427376, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4226108216696222, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6331414171574684, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.602867050301643, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7367363357155757, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45506803308128024, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6608670586710113, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7542976177437886, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.526357446896968, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6666823117022298, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7571125338649978, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3864572432237816, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5849342936087653, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5406438522344627, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6520694800788391, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.6977240390484037, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.818984467219358, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7019499719108448, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8450280883390384, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.7397087417978795, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.8865031414920428, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5364361872901348, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7164775580022767, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.37717457428685847, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5554130492458337, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31598923484911084, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18953162992336403, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45876745950873354, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2383770504614087, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.466645869611307, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18207052811092134, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4504432021668592, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.14291173574075158, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45184360988354105, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34419514726440925, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2128497674847141, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.438591227628555, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.27743662258385243, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21081851067789198, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4042801758173556, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2603965252496297, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5017583605709452, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7629273292796576, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8510385544954956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.90941532255964, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6888074582865503, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8055061207769505, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6888074582865503, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8151715541788959, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6978429290017016, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7717858931341154, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49349163706233623, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.694445271037971, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3037643089519314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183662698462751, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18376711147874328, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3981272326046884, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.340960560695735, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5217663812589132, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2147607499133801, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3976144917079093, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3941175366175992, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5506555496793699, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3339087646492816, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5995623358499859, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4122974402951816, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170911690364487, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.2643854378698732, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4066689638009577, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5267604642487788, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18485450668488082, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.46452791098932883, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.8253498772794055, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8529564805429163, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6941268297866866, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7679844670813416, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7072172847953276, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7914639887327892, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5642761727828352, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7668993520558344, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.175538121835486, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.44197441533246407, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7072172847953276, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7914639887327892, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5828833474188783, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7908226509294533, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.38694317759010316, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5953878513137957, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2620499195763038, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.48937240022909234, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22128776529156546, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4999323991212311, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5924993690004501, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2745762486209681, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5730023382770898, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3070898761263382, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5791648909423264, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.31600229153053044, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5374439094267343, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.27733310601709266, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4703077247331959, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7203673717155472, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3029928206533524, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5600962993297164, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.49023502313124495, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7638414724136195, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4424906782646928, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.705507971295129, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41452787844405115, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6973605663974715, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.41032302768839235, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6634154486532953, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.44711013370113256, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7319347493436125, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4275810014748856, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481070648129139, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.46409619603227925, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7474126325188408, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0422060018445322, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.27278456488226854, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.42803425515420807, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7260183442795153, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.23962966980870534, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5138361143222901, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3597862823053843, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7212767938301806, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.73702431000915, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8608238485042174, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.07860105393900486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09678377693633947, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11601141307045003, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21671187566850864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2390076354901812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.33570154125476054, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1371661844308428, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.23455679137513727, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.21326369102393236, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24781828193168487, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.12394460940540938, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.26662620996190534, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14891504773093184, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2618919111168516, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1406879778177777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24227488458492952, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.04114212836378985, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1070604518443882, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.26411327741267115, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2898946819245943, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.24248913939867353, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.26831514794764233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35015224715252113, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5701648579139658, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3349252032650068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5908087431574293, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3258812297722265, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5753985304712377, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2883113322808919, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5835478395499368, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2577716972449781, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5171901208397282, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.04631732527976412, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.21558480215297515, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23287896954139942, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5168980964497457, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24643585808835486, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5459613462641708, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.004597701149425286, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23325505861671614, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.503948422566616, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.29496488310853664, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6102579121305785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3449058130015412, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365619830343804, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.29688845677442144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494319015457763, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3257602417321556, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5666596539835803, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.22511140285349446, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.553839023223762, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3274016883618531, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570399656004248, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2211880505010663, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4573855767208229, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.399477857457097, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5617218895807364, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.10125638619893, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3191349966700777, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5329750656706205, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.09175663647957763, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.32499940569388225, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.162496560019558, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4637542439867255, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3764579689992314, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5818293561882879, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376032254696296, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5334329403985332, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20039141607873007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.36123312088832493, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0564437248458207, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.15026037463138217, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4303467795130825, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.207314191412716, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4360555836773355, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.08070632004040007, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.35911678207067443, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4398690431123469, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6046405925677363, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.30594422683254774, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5439400651386468, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.24968557018529272, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5511430757077329, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.036093834539820895, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1939545119098376, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.48320036215224016, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4686201168430013, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6206226468359836, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32707695373369694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5166643606783462, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3069937936246452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5024648105961349, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3575909322256676, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409483829147745, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2228729825024992, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4466759653076362, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3235473265529593, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441122251341168, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2781578586520005, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3796663901127053, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288143137394372, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5596092732231619, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.4122335241726334, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6323888082640657, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.12858902882463447, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3148709023566568, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17140863043800483, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.39681418211766745, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3722876193273297, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5622245182354383, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927237741677927, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7451438087039315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5570357635362685, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116469942298856, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508597296865219, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6833592152043626, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.21259470439331316, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5863866793721222, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26513488970168847, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6029932145447834, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4432782054917686, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7181569025811343, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5112867162620864, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7497537018148864, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.32547291366749675, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6738469931497133, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4667782254569818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7754094279644977, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.10177931989613292, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.3335479382455017, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459167762620119, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7155724078484401, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8588886238396082, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9454872546266168, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3826576187198625, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6071841372061269, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3447241447679157, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5531085140985558, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3178743908080705, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5513949312034092, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1883251048230039, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.45303225382772006, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3860973950960897, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6271680934322363, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24967756802190116, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.559682285505658, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.38048895490051765, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6349497388372479, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4324371049196428, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6305851137521162, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.01937817581496422, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1805414152287055, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4894585255537274, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.391005181589246, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6383641960193629, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.31128635710849173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6304411194127884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13308561809919006, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5312476702183977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2579124920342433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5545120254366757, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.1595487507830045, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.45111566089364774, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27710310401156996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216248191624099, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.303998162324503, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5931856951819833, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39631066492420963, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6670602127484115, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2625805454451497, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5886806140244891, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.06797010899515823, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.27154181329396565, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.19568007857684672, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5228407307909605, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.49680276687617775, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7190025833795584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2879556779114461, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4554184077174173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.09578921953028982, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40472887922389433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.22965669823067916, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.46247819390492995, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.08920952468433085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32241875701400735, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20475739007221866, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3934874462686164, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1719646079342664, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.308102700736633, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.11684343186914438, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40293579310759836, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.28255079601170635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4828223682720399, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2023651649328507, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1689706894436884, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.32609144958957464, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3881647386960232, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5417015630917802, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.348007986647201, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6148736550683231, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.31222258402876674, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5549937870516303, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2706573913259733, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5619563043714905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.21331098311931576, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.47660259733052845, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5295534280606148, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2320305803246989, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5719371199531044, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5196627001050362, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.18627639656696823, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.44356601067804086, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.27048170758554296, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5452157067944216, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3324437360240581, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.3472164938104332, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.33464494273746426, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6019539804372768, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.23683075175361493, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2631328190836655, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.16455392433653304, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.13673885815184886, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.052821402483564636, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10721126066665879, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20388486867467934, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.14973178994918127, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.1337840368142243, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2143764616947716, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.014262006975939606, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11557977235371186, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3291598889023262, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.31026575785653826, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.461597801606675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6280777654467244, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4224298950114519, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.60823085524287, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30451258861070496, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4983778740634126, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707860320039717, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.45886678012586496, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6234514801756209, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3916177035633811, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6068458202737596, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.15465401249808575, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42501995363729067, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.30004556274899286, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.561482333900969, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4501609222100726, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6301404717605862, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36769040719718776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4064141882459388, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.34722897369611144, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103553163121394, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4141871474340027, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36586001924521905, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.44328515185259987, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.2870169689559038, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.37150797394258683, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.006130367300589213, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.29038853710161877, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.35427389686155986, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3286711939680359, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5944310794747374, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4830189619506113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22970092088416938, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5537467826528029, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4685134392551311, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3096036988813059, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5894510883198948, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14957644445778928, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4378856092523028, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22481074167380632, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.49840634234674935, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.26751157705127454, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494472552960327, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.0066610108556241394, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22536453058221606, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4365811373563711, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5354135417523515, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6499871908570826, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3874773378787974, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31747697264511426, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.40797778663955364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3785761836985817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23841754841770157, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34481325534410395, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3818556455365969, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.24796413807329218, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3530186228211094, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09821019441701705, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.274825378700542, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2807763229912453, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.38395145132718883, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.07218766113019179, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.24018250025773352, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.2096419313570871, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3673119644292626, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.46935933364934335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.773055573548356, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.640995178057518, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3681829215408091, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6273930299436508, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4389321784429702, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.646847036932526, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3595137194874952, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5619162673780028, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2927181624015055, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43383878173729606, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.721993849834018, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.007378883018336222, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.16440791304482247, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4027271257521195, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7244660400837248, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3160213610127146, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5165614670038283, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.30758744700466467, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4684197705189288, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.37169237058440824, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5383668331525606, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.18655267161524258, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3640275543948514, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20050320605789015, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046291070099031, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.14579837024705408, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3459916112351503, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2761603007895394, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49207696507318593, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.20630721151497294, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.31518520840312125, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5072004558983904, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.004516711833785005, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.16951909200513385, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3380125247643079, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3163330109126403, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4797767411663947, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961516536011624, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.49803924348035766, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459667618766101, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170810606402402, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.23623790626704147, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441149448679464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35936994872479583, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6492026440953677, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4161791450287817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7054426787013603, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.577852219465442, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.39688965270008814, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.646373332434726, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4272870063962341, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6682855797405902, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.008777992747819234, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.12288887055424895, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.4148619356639114, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5640009831507545, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.793311073470687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0067104198717751464, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9025232868361638, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9169897590736298, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9709835434146469, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9951728990866464, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8935248372106969, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9404428602061264, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6031612036218008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.736286703381354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39432344823662835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5943452555220106, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.34437686643287496, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6090402109312658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39205580893266934, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6772940233934857, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.43103580001357805, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6690742226623104, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.478854281434795, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6734455797843703, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2981426768485538, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5882799317365235, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.22739562220830442, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.49805301036023364, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4263005628892719, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6688425476017256, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4579102348988084, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6417119032346416, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6252078221435556, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7406162627381982, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.509851600045062, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6689059150119564, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11254397891886614, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20623288988983426, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1100081929352474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.18967061672400035, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.23609036869909603, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1498435848533153, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21051700087939107, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.17652714369664665, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1864036495127383, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.28188465375440136, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1445047538382198, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2737322242154943, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10686832559533661, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20609270360853799, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1957899789117337, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.32253417440653254, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.137248043368656, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.22329074990170197, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14969363386531168, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.27820986095394096, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21051971584146587, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24493390281390082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48113625107113883, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19476681308252697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42030407727741037, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33600502687041833, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162346121569341, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.10336049249219333, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3088863284587533, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27190910124573536, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173567851798608, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.09851325694216304, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3616605984753398, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.34854547753540127, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5565027260893921, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11215313654295675, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.36001328873605765, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19593487880196195, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4136765523891332, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.08839512340686698, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.21177781620127928, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4460741740050364, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19202937871014814, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4392268366970299, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459789902390003, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5620330456296532, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37825713491091884, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5584414289480568, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3695375029926146, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.556875129479421, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.47923168144435746, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6534660189132082, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.429512074830509, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6066779955199886, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4257605183794877, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6026940597371309, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40518022025671885, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707666164180741, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27460305577138294, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5251472574042976, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4257605183794877, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6072620760408021, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.24287220388451114, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4829182994799567, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.27309322054464596, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5162255850430824, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.370828716498988, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5662656160148991, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1845747513433909, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44379971518505973, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.18212463619188357, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.469592540371137, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20734616999079872, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5036833880605232, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2817686971402115, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5170853673805775, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.09596136927307748, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40849147213099996, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.216062485604554, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4780977009860418, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23502778906204924, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5533644883224328, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23288432092807593, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4777685664632553, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3288562544630599, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5875530351959068, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.024449792954766115, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25748397762867226, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.49702079004924316, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.33663600853613573, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5875950506541368, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2517176762753373, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45137344500317134, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128384316903283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48016279207050283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3508847643803501, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.529198044527105, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2674628639054191, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4784292149775752, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22972631482860506, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.436102988762466, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.1587543502252646, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4114443619817223, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.267457541157426, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4673846703066711, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21808070471467408, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3966492622645894, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.26116607863611285, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.09196922936475649, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.35950194744727476, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2661882195219029, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3867524502755999, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43200638115383627, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6892273787708799, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.341195158470265, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6539473951166187, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.47372467075851415, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.734800469477975, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5582838437615822, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7625459507115938, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45026965676007474, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6668256174353906, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.310668922100995, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.58212864821275, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5585674160229753, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.791505922278621, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5592126620745396, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6961094171330644, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.29504037076486817, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6837809127705262, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.44680913024590146, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.648473971864945, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8247818102038394, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6040638744786117, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7781313195018753, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.40475700826319555, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4349871720911447, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.47497024539412314, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3805666011451541, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4710260495003035, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4677317890018283, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.354353831625583, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.10235881838919027, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42794399630326124, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.09649622940465846, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.29275810079464665, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.33453241597890554, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5244380103905697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6914581279144536, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4472834999328078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6457130269652316, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.403469748891042, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5836273992135024, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4521209970489246, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6307076431103672, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.310186302993101, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5434540129901786, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5201565256464291, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6663170490872967, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5950978682255068, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7209575532500453, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.32679491753274487, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5592874366443522, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.36634140441362645, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5925773491774018, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2672991324984635, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5378982230702222, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4880149105083363, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7177464929662396, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6095141355358055, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7260733102028687, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.504154287515855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6074467585243234, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18771816026273827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37594160796244835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2828480467326008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4330386622117487, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.26314173809974317, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.41943156806161835, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17207258849758605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3052503498954155, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1377448219106278, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.35651447515721807, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3653634812607, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239315135469935, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.30019266689543556, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.520168227007293, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.022925118914031796, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.10793991565723801, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3418311350990793, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.30359559780163287, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4685200908441382, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.6914581279144536, - "sentence_nr": 9 + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", - "metric": "bleu", - "score": 0.4472834999328078, - "sentence_nr": 9 + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation_to", - "metric": "chrf", - "score": 0.6457130269652316, - "sentence_nr": 9 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.17207258849758605, - "sentence_nr": 9 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.3052503498954155, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.504154287515855, - "sentence_nr": 9 + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.6074467585243234, - "sentence_nr": 9 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", - "metric": "bleu", - "score": 0.18771816026273827, - "sentence_nr": 9 + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation_to", - "metric": "chrf", - "score": 0.37594160796244835, - "sentence_nr": 9 + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "es", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "pt", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 4 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "es", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 5 }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 5 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "fr", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "bn", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 6 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 6 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", - "task": "classification", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ur", - "task": "classification", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ur", - "task": "classification", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", - "task": "classification", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", - "task": "classification", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", - "task": "classification", + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pa", - "task": "classification", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", @@ -20945,7 +62710,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -20953,1067 +62718,1043 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", - "task": "mmlu", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "fr", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "mmlu", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "google/gemma-3-27b-it", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "qwen/qwq-32b", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "pt", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "pt", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -22021,7 +63762,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -22029,87 +63770,87 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "es", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "ar", + "model": "anthropic/claude-3.5-sonnet", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -22117,7 +63858,7 @@ }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "ar", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, @@ -22125,62 +63866,70 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "bn", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", @@ -22188,7 +63937,7 @@ "sentence_nr": 9 }, { - "model": "meta-llama/llama-4-maverick", + "model": "amazon/nova-micro-v1", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", @@ -22196,7 +63945,7 @@ "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "anthropic/claude-3.5-sonnet", "bcp_47": "pt", "task": "mmlu", "metric": "accuracy",