diff --git "a/all_results.json" "b/all_results.json" --- "a/all_results.json" +++ "b/all_results.json" @@ -116309,3255 +116309,11176 @@ } } }, - "gemma-7b": { + "gemma-2b": { "model_size": "7B", - "model_link": "https://huggingface.co/google/gemma-7b", + "model_link": "https://huggingface.co/google/gemma-2b", "zero_shot": { "cross_mmlu": { "prompt_1": { - "overall_acc": 0.5180952380952382, + "overall_acc": 0.35714285714285715, "language_acc": { - "Vietnamese": 0.4533333333333333, - "English": 0.62, - "Malay": 0.4066666666666667, - "Chinese": 0.49333333333333335, - "Filipino": 0.5933333333333334, - "Indonesian": 0.5, - "Spanish": 0.56 + "Vietnamese": 0.36, + "English": 0.36, + "Malay": 0.35333333333333333, + "Chinese": 0.34, + "Filipino": 0.38666666666666666, + "Indonesian": 0.36, + "Spanish": 0.34 }, - "consistency_score_2": 0.5625396825396825, - "consistency_score_3": 0.38247619047619047, - "consistency_score_4": 0.2801904761904762, - "consistency_score_5": 0.21396825396825397, - "consistency_score_6": 0.16761904761904764, - "consistency_score_7": 0.13333333333333333, + "consistency_score_2": 0.5339682539682539, + "consistency_score_3": 0.3533333333333334, + "consistency_score_4": 0.2579047619047619, + "consistency_score_5": 0.20063492063492067, + "consistency_score_6": 0.1638095238095238, + "consistency_score_7": 0.14, "detailed_consistency_score": { "2_combine": { - "Vietnamese,English": 0.4666666666666667, - "Vietnamese,Malay": 0.5333333333333333, - "Vietnamese,Chinese": 0.44666666666666666, - "Vietnamese,Filipino": 0.5, - "Vietnamese,Indonesian": 0.49333333333333335, - "Vietnamese,Spanish": 0.49333333333333335, - "English,Malay": 0.52, - "English,Chinese": 0.6066666666666667, - "English,Filipino": 0.6466666666666666, - "English,Indonesian": 0.6333333333333333, - "English,Spanish": 0.6466666666666666, - "Malay,Chinese": 0.4866666666666667, - "Malay,Filipino": 0.5533333333333333, - "Malay,Indonesian": 0.54, - "Malay,Spanish": 0.5266666666666666, - "Chinese,Filipino": 0.5866666666666667, - "Chinese,Indonesian": 0.56, - "Chinese,Spanish": 0.6066666666666667, - "Filipino,Indonesian": 0.62, - "Filipino,Spanish": 0.6933333333333334, - "Indonesian,Spanish": 0.6533333333333333 + "Vietnamese,English": 0.42, + "Vietnamese,Malay": 0.48, + "Vietnamese,Chinese": 0.4266666666666667, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Spanish": 0.5066666666666667, + "English,Malay": 0.5333333333333333, + "English,Chinese": 0.52, + "English,Filipino": 0.54, + "English,Indonesian": 0.5066666666666667, + "English,Spanish": 0.6066666666666667, + "Malay,Chinese": 0.5333333333333333, + "Malay,Filipino": 0.64, + "Malay,Indonesian": 0.6133333333333333, + "Malay,Spanish": 0.5533333333333333, + "Chinese,Filipino": 0.4533333333333333, + "Chinese,Indonesian": 0.5066666666666667, + "Chinese,Spanish": 0.54, + "Filipino,Indonesian": 0.6066666666666667, + "Filipino,Spanish": 0.64, + "Indonesian,Spanish": 0.5066666666666667 }, "3_combine": { - "Vietnamese,English,Malay": 0.3333333333333333, - "Vietnamese,English,Chinese": 0.3, - "Vietnamese,English,Filipino": 0.3466666666666667, - "Vietnamese,English,Indonesian": 0.34, - "Vietnamese,English,Spanish": 0.3466666666666667, - "Vietnamese,Malay,Chinese": 0.3, - "Vietnamese,Malay,Filipino": 0.3466666666666667, - "Vietnamese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,English,Malay": 0.28, + "Vietnamese,English,Chinese": 0.26, + "Vietnamese,English,Filipino": 0.29333333333333333, + "Vietnamese,English,Indonesian": 0.29333333333333333, + "Vietnamese,English,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.37333333333333335, "Vietnamese,Malay,Spanish": 0.3333333333333333, - "Vietnamese,Chinese,Filipino": 0.32, - "Vietnamese,Chinese,Indonesian": 0.29333333333333333, + "Vietnamese,Chinese,Filipino": 0.2866666666666667, + "Vietnamese,Chinese,Indonesian": 0.32, "Vietnamese,Chinese,Spanish": 0.30666666666666664, - "Vietnamese,Filipino,Indonesian": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian": 0.38, "Vietnamese,Filipino,Spanish": 0.38666666666666666, - "Vietnamese,Indonesian,Spanish": 0.36, - "English,Malay,Chinese": 0.3466666666666667, + "Vietnamese,Indonesian,Spanish": 0.34, + "English,Malay,Chinese": 0.37333333333333335, "English,Malay,Filipino": 0.38666666666666666, - "English,Malay,Indonesian": 0.38666666666666666, - "English,Malay,Spanish": 0.38666666666666666, - "English,Chinese,Filipino": 0.44, - "English,Chinese,Indonesian": 0.44666666666666666, - "English,Chinese,Spanish": 0.4533333333333333, - "English,Filipino,Indonesian": 0.47333333333333333, - "English,Filipino,Spanish": 0.5133333333333333, - "English,Indonesian,Spanish": 0.49333333333333335, - "Malay,Chinese,Filipino": 0.35333333333333333, - "Malay,Chinese,Indonesian": 0.34, - "Malay,Chinese,Spanish": 0.34, - "Malay,Filipino,Indonesian": 0.38666666666666666, - "Malay,Filipino,Spanish": 0.41333333333333333, - "Malay,Indonesian,Spanish": 0.38666666666666666, - "Chinese,Filipino,Indonesian": 0.42, - "Chinese,Filipino,Spanish": 0.47333333333333333, - "Chinese,Indonesian,Spanish": 0.43333333333333335, - "Filipino,Indonesian,Spanish": 0.5066666666666667 + "English,Malay,Indonesian": 0.37333333333333335, + "English,Malay,Spanish": 0.3933333333333333, + "English,Chinese,Filipino": 0.31333333333333335, + "English,Chinese,Indonesian": 0.3, + "English,Chinese,Spanish": 0.38666666666666666, + "English,Filipino,Indonesian": 0.36666666666666664, + "English,Filipino,Spanish": 0.4266666666666667, + "English,Indonesian,Spanish": 0.34, + "Malay,Chinese,Filipino": 0.36666666666666664, + "Malay,Chinese,Indonesian": 0.36666666666666664, + "Malay,Chinese,Spanish": 0.37333333333333335, + "Malay,Filipino,Indonesian": 0.4666666666666667, + "Malay,Filipino,Spanish": 0.44666666666666666, + "Malay,Indonesian,Spanish": 0.3933333333333333, + "Chinese,Filipino,Indonesian": 0.3333333333333333, + "Chinese,Filipino,Spanish": 0.36666666666666664, + "Chinese,Indonesian,Spanish": 0.3466666666666667, + "Filipino,Indonesian,Spanish": 0.42 }, "4_combine": { - "Vietnamese,English,Malay,Chinese": 0.21333333333333335, - "Vietnamese,English,Malay,Filipino": 0.26666666666666666, - "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, - "Vietnamese,English,Malay,Spanish": 0.26, - "Vietnamese,English,Chinese,Filipino": 0.24666666666666667, - "Vietnamese,English,Chinese,Indonesian": 0.23333333333333334, - "Vietnamese,English,Chinese,Spanish": 0.24, - "Vietnamese,English,Filipino,Indonesian": 0.26666666666666666, - "Vietnamese,English,Filipino,Spanish": 0.29333333333333333, - "Vietnamese,English,Indonesian,Spanish": 0.28, - "Vietnamese,Malay,Chinese,Filipino": 0.24, - "Vietnamese,Malay,Chinese,Indonesian": 0.22, - "Vietnamese,Malay,Chinese,Spanish": 0.21333333333333335, - "Vietnamese,Malay,Filipino,Indonesian": 0.28, - "Vietnamese,Malay,Filipino,Spanish": 0.28, - "Vietnamese,Malay,Indonesian,Spanish": 0.2733333333333333, - "Vietnamese,Chinese,Filipino,Indonesian": 0.22666666666666666, - "Vietnamese,Chinese,Filipino,Spanish": 0.26666666666666666, - "Vietnamese,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Chinese": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino": 0.22, + "Vietnamese,English,Malay,Indonesian": 0.23333333333333334, + "Vietnamese,English,Malay,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,English,Chinese,Indonesian": 0.2, + "Vietnamese,English,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,English,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,English,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian,Spanish": 0.24, "Vietnamese,Filipino,Indonesian,Spanish": 0.3, - "English,Malay,Chinese,Filipino": 0.2733333333333333, - "English,Malay,Chinese,Indonesian": 0.2866666666666667, - "English,Malay,Chinese,Spanish": 0.2733333333333333, - "English,Malay,Filipino,Indonesian": 0.30666666666666664, - "English,Malay,Filipino,Spanish": 0.32, - "English,Malay,Indonesian,Spanish": 0.32, - "English,Chinese,Filipino,Indonesian": 0.34, - "English,Chinese,Filipino,Spanish": 0.36666666666666664, - "English,Chinese,Indonesian,Spanish": 0.34, - "English,Filipino,Indonesian,Spanish": 0.3933333333333333, - "Malay,Chinese,Filipino,Indonesian": 0.26, - "Malay,Chinese,Filipino,Spanish": 0.28, - "Malay,Chinese,Indonesian,Spanish": 0.26666666666666666, - "Malay,Filipino,Indonesian,Spanish": 0.32, - "Chinese,Filipino,Indonesian,Spanish": 0.36 + "English,Malay,Chinese,Filipino": 0.26, + "English,Malay,Chinese,Indonesian": 0.26, + "English,Malay,Chinese,Spanish": 0.29333333333333333, + "English,Malay,Filipino,Indonesian": 0.31333333333333335, + "English,Malay,Filipino,Spanish": 0.31333333333333335, + "English,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Chinese,Filipino,Spanish": 0.28, + "English,Chinese,Indonesian,Spanish": 0.24666666666666667, + "English,Filipino,Indonesian,Spanish": 0.29333333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.3, + "Malay,Chinese,Filipino,Spanish": 0.30666666666666664, + "Malay,Chinese,Indonesian,Spanish": 0.2866666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.34, + "Chinese,Filipino,Indonesian,Spanish": 0.28 }, "5_combine": { - "Vietnamese,English,Malay,Chinese,Filipino": 0.18666666666666668, - "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, - "Vietnamese,English,Malay,Chinese,Spanish": 0.17333333333333334, - "Vietnamese,English,Malay,Filipino,Indonesian": 0.22666666666666666, - "Vietnamese,English,Malay,Filipino,Spanish": 0.22666666666666666, - "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, - "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, - "Vietnamese,English,Chinese,Filipino,Spanish": 0.21333333333333335, - "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18666666666666668, - "Vietnamese,English,Filipino,Indonesian,Spanish": 0.23333333333333334, - "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, - "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, - "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Malay,Chinese,Filipino": 0.16, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.17333333333333334, + "Vietnamese,English,Malay,Chinese,Spanish": 0.16, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.2, "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, - "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.2, - "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, - "English,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, - "English,Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, - "English,Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, - "English,Chinese,Filipino,Indonesian,Spanish": 0.28, - "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22 + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "English,Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "English,Malay,Chinese,Filipino,Spanish": 0.23333333333333334, + "English,Malay,Chinese,Indonesian,Spanish": 0.22, + "English,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 }, "6_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16, - "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, - "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.15333333333333332, - "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2, - "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16, - "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16, - "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.14666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 }, "7_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.13333333333333333 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14 } }, - "AC3_2": 0.5394035028318483, - "AC3_3": 0.44007412774099774, - "AC3_4": 0.3636927201825941, - "AC3_5": 0.3028587948208935, - "AC3_6": 0.25329100525406706, - "AC3_7": 0.2120857699479498 + "AC3_2": 0.4280116035959201, + "AC3_3": 0.355227881987535, + "AC3_4": 0.2995177630800899, + "AC3_5": 0.2569314577960376, + "AC3_6": 0.22460172364453324, + "AC3_7": 0.20114942524689522 }, "prompt_2": { - "overall_acc": 0.5285714285714286, + "overall_acc": 0.3695238095238095, "language_acc": { - "Vietnamese": 0.54, - "English": 0.6733333333333333, - "Malay": 0.38666666666666666, - "Chinese": 0.48, - "Filipino": 0.5933333333333334, - "Indonesian": 0.48, - "Spanish": 0.5466666666666666 + "Vietnamese": 0.37333333333333335, + "English": 0.4666666666666667, + "Malay": 0.3333333333333333, + "Chinese": 0.3466666666666667, + "Filipino": 0.32666666666666666, + "Indonesian": 0.34, + "Spanish": 0.4 }, - "consistency_score_2": 0.5320634920634921, - "consistency_score_3": 0.3546666666666668, - "consistency_score_4": 0.259047619047619, - "consistency_score_5": 0.19873015873015876, - "consistency_score_6": 0.15714285714285717, - "consistency_score_7": 0.12666666666666668, + "consistency_score_2": 0.48920634920634914, + "consistency_score_3": 0.30133333333333334, + "consistency_score_4": 0.20552380952380955, + "consistency_score_5": 0.14761904761904762, + "consistency_score_6": 0.10857142857142858, + "consistency_score_7": 0.08, "detailed_consistency_score": { "2_combine": { - "Vietnamese,English": 0.56, - "Vietnamese,Malay": 0.43333333333333335, - "Vietnamese,Chinese": 0.5066666666666667, - "Vietnamese,Filipino": 0.6, - "Vietnamese,Indonesian": 0.5333333333333333, - "Vietnamese,Spanish": 0.54, - "English,Malay": 0.4533333333333333, - "English,Chinese": 0.49333333333333335, - "English,Filipino": 0.62, - "English,Indonesian": 0.58, - "English,Spanish": 0.62, - "Malay,Chinese": 0.5266666666666666, - "Malay,Filipino": 0.49333333333333335, - "Malay,Indonesian": 0.5266666666666666, - "Malay,Spanish": 0.47333333333333333, - "Chinese,Filipino": 0.49333333333333335, - "Chinese,Indonesian": 0.47333333333333333, - "Chinese,Spanish": 0.43333333333333335, - "Filipino,Indonesian": 0.6333333333333333, - "Filipino,Spanish": 0.6066666666666667, - "Indonesian,Spanish": 0.5733333333333334 + "Vietnamese,English": 0.41333333333333333, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Chinese": 0.4, + "Vietnamese,Filipino": 0.4866666666666667, + "Vietnamese,Indonesian": 0.4266666666666667, + "Vietnamese,Spanish": 0.4533333333333333, + "English,Malay": 0.52, + "English,Chinese": 0.4533333333333333, + "English,Filipino": 0.5133333333333333, + "English,Indonesian": 0.4533333333333333, + "English,Spanish": 0.5866666666666667, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.5866666666666667, + "Malay,Indonesian": 0.6066666666666667, + "Malay,Spanish": 0.5133333333333333, + "Chinese,Filipino": 0.4533333333333333, + "Chinese,Indonesian": 0.4266666666666667, + "Chinese,Spanish": 0.5066666666666667, + "Filipino,Indonesian": 0.49333333333333335, + "Filipino,Spanish": 0.5066666666666667, + "Indonesian,Spanish": 0.48 }, "3_combine": { - "Vietnamese,English,Malay": 0.29333333333333333, - "Vietnamese,English,Chinese": 0.34, - "Vietnamese,English,Filipino": 0.43333333333333335, - "Vietnamese,English,Indonesian": 0.4, - "Vietnamese,English,Spanish": 0.42, - "Vietnamese,Malay,Chinese": 0.3, - "Vietnamese,Malay,Filipino": 0.3333333333333333, - "Vietnamese,Malay,Indonesian": 0.3, + "Vietnamese,English,Malay": 0.2866666666666667, + "Vietnamese,English,Chinese": 0.21333333333333335, + "Vietnamese,English,Filipino": 0.2866666666666667, + "Vietnamese,English,Indonesian": 0.26666666666666666, + "Vietnamese,English,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese": 0.26666666666666666, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.32, "Vietnamese,Malay,Spanish": 0.31333333333333335, - "Vietnamese,Chinese,Filipino": 0.4, - "Vietnamese,Chinese,Indonesian": 0.32666666666666666, - "Vietnamese,Chinese,Spanish": 0.31333333333333335, - "Vietnamese,Filipino,Indonesian": 0.43333333333333335, - "Vietnamese,Filipino,Spanish": 0.4066666666666667, - "Vietnamese,Indonesian,Spanish": 0.38, - "English,Malay,Chinese": 0.3, - "English,Malay,Filipino": 0.32666666666666666, - "English,Malay,Indonesian": 0.32, - "English,Malay,Spanish": 0.32666666666666666, - "English,Chinese,Filipino": 0.36666666666666664, - "English,Chinese,Indonesian": 0.32, - "English,Chinese,Spanish": 0.32666666666666666, - "English,Filipino,Indonesian": 0.46, - "English,Filipino,Spanish": 0.46, - "English,Indonesian,Spanish": 0.43333333333333335, - "Malay,Chinese,Filipino": 0.31333333333333335, - "Malay,Chinese,Indonesian": 0.30666666666666664, - "Malay,Chinese,Spanish": 0.2866666666666667, - "Malay,Filipino,Indonesian": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.26, + "Vietnamese,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian": 0.28, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Indonesian,Spanish": 0.2733333333333333, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.35333333333333333, + "English,Malay,Indonesian": 0.34, + "English,Malay,Spanish": 0.3466666666666667, + "English,Chinese,Filipino": 0.26666666666666666, + "English,Chinese,Indonesian": 0.26, + "English,Chinese,Spanish": 0.30666666666666664, + "English,Filipino,Indonesian": 0.2866666666666667, + "English,Filipino,Spanish": 0.35333333333333333, + "English,Indonesian,Spanish": 0.32666666666666666, + "Malay,Chinese,Filipino": 0.32, + "Malay,Chinese,Indonesian": 0.32, + "Malay,Chinese,Spanish": 0.3333333333333333, + "Malay,Filipino,Indonesian": 0.37333333333333335, "Malay,Filipino,Spanish": 0.35333333333333333, - "Malay,Indonesian,Spanish": 0.3333333333333333, - "Chinese,Filipino,Indonesian": 0.36666666666666664, - "Chinese,Filipino,Spanish": 0.3466666666666667, - "Chinese,Indonesian,Spanish": 0.29333333333333333, - "Filipino,Indonesian,Spanish": 0.43333333333333335 + "Malay,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Filipino,Indonesian": 0.2733333333333333, + "Chinese,Filipino,Spanish": 0.29333333333333333, + "Chinese,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,Spanish": 0.31333333333333335 }, "4_combine": { - "Vietnamese,English,Malay,Chinese": 0.20666666666666667, - "Vietnamese,English,Malay,Filipino": 0.25333333333333335, - "Vietnamese,English,Malay,Indonesian": 0.23333333333333334, - "Vietnamese,English,Malay,Spanish": 0.24, - "Vietnamese,English,Chinese,Filipino": 0.30666666666666664, - "Vietnamese,English,Chinese,Indonesian": 0.25333333333333335, - "Vietnamese,English,Chinese,Spanish": 0.26, - "Vietnamese,English,Filipino,Indonesian": 0.3466666666666667, - "Vietnamese,English,Filipino,Spanish": 0.3333333333333333, - "Vietnamese,English,Indonesian,Spanish": 0.32, - "Vietnamese,Malay,Chinese,Filipino": 0.25333333333333335, - "Vietnamese,Malay,Chinese,Indonesian": 0.2, - "Vietnamese,Malay,Chinese,Spanish": 0.20666666666666667, - "Vietnamese,Malay,Filipino,Indonesian": 0.24, - "Vietnamese,Malay,Filipino,Spanish": 0.26666666666666666, - "Vietnamese,Malay,Indonesian,Spanish": 0.23333333333333334, - "Vietnamese,Chinese,Filipino,Indonesian": 0.30666666666666664, - "Vietnamese,Chinese,Filipino,Spanish": 0.2733333333333333, - "Vietnamese,Chinese,Indonesian,Spanish": 0.22666666666666666, - "Vietnamese,Filipino,Indonesian,Spanish": 0.32, - "English,Malay,Chinese,Filipino": 0.24, - "English,Malay,Chinese,Indonesian": 0.2, + "Vietnamese,English,Malay,Chinese": 0.16666666666666666, + "Vietnamese,English,Malay,Filipino": 0.21333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.22, + "Vietnamese,English,Malay,Spanish": 0.21333333333333335, + "Vietnamese,English,Chinese,Filipino": 0.16, + "Vietnamese,English,Chinese,Indonesian": 0.15333333333333332, + "Vietnamese,English,Chinese,Spanish": 0.16666666666666666, + "Vietnamese,English,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,English,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino": 0.20666666666666667, + "English,Malay,Chinese,Indonesian": 0.20666666666666667, "English,Malay,Chinese,Spanish": 0.22, - "English,Malay,Filipino,Indonesian": 0.25333333333333335, - "English,Malay,Filipino,Spanish": 0.28, - "English,Malay,Indonesian,Spanish": 0.25333333333333335, - "English,Chinese,Filipino,Indonesian": 0.2866666666666667, - "English,Chinese,Filipino,Spanish": 0.28, - "English,Chinese,Indonesian,Spanish": 0.24, - "English,Filipino,Indonesian,Spanish": 0.36, - "Malay,Chinese,Filipino,Indonesian": 0.23333333333333334, - "Malay,Chinese,Filipino,Spanish": 0.24, - "Malay,Chinese,Indonesian,Spanish": 0.2, - "Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, - "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + "English,Malay,Filipino,Indonesian": 0.23333333333333334, + "English,Malay,Filipino,Spanish": 0.25333333333333335, + "English,Malay,Indonesian,Spanish": 0.24666666666666667, + "English,Chinese,Filipino,Indonesian": 0.16666666666666666, + "English,Chinese,Filipino,Spanish": 0.19333333333333333, + "English,Chinese,Indonesian,Spanish": 0.20666666666666667, + "English,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Malay,Chinese,Filipino,Spanish": 0.23333333333333334, + "Malay,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.24, + "Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 }, "5_combine": { - "Vietnamese,English,Malay,Chinese,Filipino": 0.2, - "Vietnamese,English,Malay,Chinese,Indonesian": 0.15333333333333332, - "Vietnamese,English,Malay,Chinese,Spanish": 0.16666666666666666, - "Vietnamese,English,Malay,Filipino,Indonesian": 0.2, - "Vietnamese,English,Malay,Filipino,Spanish": 0.22, - "Vietnamese,English,Malay,Indonesian,Spanish": 0.19333333333333333, - "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24666666666666667, - "Vietnamese,English,Chinese,Filipino,Spanish": 0.23333333333333334, - "Vietnamese,English,Chinese,Indonesian,Spanish": 0.2, - "Vietnamese,English,Filipino,Indonesian,Spanish": 0.28, - "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, - "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Chinese,Filipino": 0.12666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.13333333333333333, + "Vietnamese,English,Malay,Chinese,Spanish": 0.13333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.16, + "Vietnamese,English,Malay,Filipino,Spanish": 0.16, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.10666666666666667, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.12, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.13333333333333333, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.13333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.14666666666666667, "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, - "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, - "English,Malay,Chinese,Filipino,Indonesian": 0.18, - "English,Malay,Chinese,Filipino,Spanish": 0.2, - "English,Malay,Chinese,Indonesian,Spanish": 0.16, - "English,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, - "English,Chinese,Filipino,Indonesian,Spanish": 0.22, - "Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.13333333333333333, + "English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "English,Malay,Chinese,Filipino,Spanish": 0.16, + "English,Malay,Chinese,Indonesian,Spanish": 0.17333333333333334, + "English,Malay,Filipino,Indonesian,Spanish": 0.18, + "English,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18 }, "6_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, - "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, - "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.12666666666666668, - "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, - "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, - "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14, - "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.09333333333333334, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.1, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.09333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.10666666666666667, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 }, "7_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.08 } }, - "AC3_2": 0.5303117115846695, - "AC3_3": 0.4244985981835537, - "AC3_4": 0.3476939021842192, - "AC3_5": 0.2888570181058222, - "AC3_6": 0.24226190472657494, - "AC3_7": 0.20436046508509034 + "AC3_2": 0.42102490973004, + "AC3_3": 0.33196289981801613, + "AC3_4": 0.2641379201105361, + "AC3_5": 0.21096202749581938, + "AC3_6": 0.1678315309836778, + "AC3_7": 0.13152542369955472 }, "prompt_3": { - "overall_acc": 0.5114285714285715, + "overall_acc": 0.36095238095238097, "language_acc": { - "Vietnamese": 0.4866666666666667, - "English": 0.6, - "Malay": 0.4066666666666667, - "Chinese": 0.44, - "Filipino": 0.58, - "Indonesian": 0.48, - "Spanish": 0.5866666666666667 + "Vietnamese": 0.36, + "English": 0.46, + "Malay": 0.32666666666666666, + "Chinese": 0.34, + "Filipino": 0.32, + "Indonesian": 0.36, + "Spanish": 0.36 }, - "consistency_score_2": 0.5193650793650794, - "consistency_score_3": 0.33352380952380956, - "consistency_score_4": 0.24076190476190473, - "consistency_score_5": 0.18666666666666668, - "consistency_score_6": 0.15142857142857144, - "consistency_score_7": 0.12666666666666668, + "consistency_score_2": 0.4714285714285714, + "consistency_score_3": 0.2857142857142857, + "consistency_score_4": 0.1952380952380952, + "consistency_score_5": 0.14285714285714285, + "consistency_score_6": 0.10952380952380951, + "consistency_score_7": 0.08666666666666667, "detailed_consistency_score": { "2_combine": { - "Vietnamese,English": 0.44, - "Vietnamese,Malay": 0.5866666666666667, - "Vietnamese,Chinese": 0.52, - "Vietnamese,Filipino": 0.5466666666666666, - "Vietnamese,Indonesian": 0.5533333333333333, - "Vietnamese,Spanish": 0.5, - "English,Malay": 0.47333333333333333, - "English,Chinese": 0.4666666666666667, - "English,Filipino": 0.6133333333333333, - "English,Indonesian": 0.4866666666666667, - "English,Spanish": 0.6133333333333333, - "Malay,Chinese": 0.47333333333333333, - "Malay,Filipino": 0.48, - "Malay,Indonesian": 0.5466666666666666, - "Malay,Spanish": 0.48, - "Chinese,Filipino": 0.44666666666666666, - "Chinese,Indonesian": 0.4533333333333333, + "Vietnamese,English": 0.41333333333333333, + "Vietnamese,Malay": 0.54, + "Vietnamese,Chinese": 0.36, + "Vietnamese,Filipino": 0.5133333333333333, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Spanish": 0.4, + "English,Malay": 0.3933333333333333, + "English,Chinese": 0.4533333333333333, + "English,Filipino": 0.4666666666666667, + "English,Indonesian": 0.47333333333333333, + "English,Spanish": 0.5866666666666667, + "Malay,Chinese": 0.46, + "Malay,Filipino": 0.56, + "Malay,Indonesian": 0.66, + "Malay,Spanish": 0.38, + "Chinese,Filipino": 0.4266666666666667, + "Chinese,Indonesian": 0.4266666666666667, "Chinese,Spanish": 0.47333333333333333, - "Filipino,Indonesian": 0.58, - "Filipino,Spanish": 0.6333333333333333, - "Indonesian,Spanish": 0.54 + "Filipino,Indonesian": 0.54, + "Filipino,Spanish": 0.4666666666666667, + "Indonesian,Spanish": 0.4266666666666667 }, "3_combine": { - "Vietnamese,English,Malay": 0.30666666666666664, - "Vietnamese,English,Chinese": 0.3, - "Vietnamese,English,Filipino": 0.35333333333333333, - "Vietnamese,English,Indonesian": 0.29333333333333333, - "Vietnamese,English,Spanish": 0.32, - "Vietnamese,Malay,Chinese": 0.3466666666666667, + "Vietnamese,English,Malay": 0.25333333333333335, + "Vietnamese,English,Chinese": 0.22, + "Vietnamese,English,Filipino": 0.2733333333333333, + "Vietnamese,English,Indonesian": 0.26, + "Vietnamese,English,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese": 0.25333333333333335, "Vietnamese,Malay,Filipino": 0.3466666666666667, - "Vietnamese,Malay,Indonesian": 0.38, - "Vietnamese,Malay,Spanish": 0.3333333333333333, - "Vietnamese,Chinese,Filipino": 0.32666666666666666, - "Vietnamese,Chinese,Indonesian": 0.32, - "Vietnamese,Chinese,Spanish": 0.30666666666666664, - "Vietnamese,Filipino,Indonesian": 0.38, - "Vietnamese,Filipino,Spanish": 0.3933333333333333, - "Vietnamese,Indonesian,Spanish": 0.3333333333333333, - "English,Malay,Chinese": 0.2733333333333333, - "English,Malay,Filipino": 0.34, + "Vietnamese,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Malay,Spanish": 0.24666666666666667, + "Vietnamese,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Chinese,Indonesian": 0.24, + "Vietnamese,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian": 0.34, + "Vietnamese,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Spanish": 0.26, + "English,Malay,Chinese": 0.24, + "English,Malay,Filipino": 0.26666666666666666, "English,Malay,Indonesian": 0.30666666666666664, - "English,Malay,Spanish": 0.3333333333333333, - "English,Chinese,Filipino": 0.32, - "English,Chinese,Indonesian": 0.26666666666666666, - "English,Chinese,Spanish": 0.3333333333333333, - "English,Filipino,Indonesian": 0.38666666666666666, - "English,Filipino,Spanish": 0.4666666666666667, - "English,Indonesian,Spanish": 0.38, - "Malay,Chinese,Filipino": 0.28, - "Malay,Chinese,Indonesian": 0.3, - "Malay,Chinese,Spanish": 0.28, - "Malay,Filipino,Indonesian": 0.3333333333333333, - "Malay,Filipino,Spanish": 0.36, - "Malay,Indonesian,Spanish": 0.32666666666666666, - "Chinese,Filipino,Indonesian": 0.32, - "Chinese,Filipino,Spanish": 0.32666666666666666, - "Chinese,Indonesian,Spanish": 0.3, - "Filipino,Indonesian,Spanish": 0.4 + "English,Malay,Spanish": 0.25333333333333335, + "English,Chinese,Filipino": 0.24666666666666667, + "English,Chinese,Indonesian": 0.25333333333333335, + "English,Chinese,Spanish": 0.32666666666666666, + "English,Filipino,Indonesian": 0.31333333333333335, + "English,Filipino,Spanish": 0.34, + "English,Indonesian,Spanish": 0.3, + "Malay,Chinese,Filipino": 0.30666666666666664, + "Malay,Chinese,Indonesian": 0.35333333333333333, + "Malay,Chinese,Spanish": 0.26, + "Malay,Filipino,Indonesian": 0.4266666666666667, + "Malay,Filipino,Spanish": 0.26666666666666666, + "Malay,Indonesian,Spanish": 0.30666666666666664, + "Chinese,Filipino,Indonesian": 0.29333333333333333, + "Chinese,Filipino,Spanish": 0.26666666666666666, + "Chinese,Indonesian,Spanish": 0.26, + "Filipino,Indonesian,Spanish": 0.3 }, "4_combine": { - "Vietnamese,English,Malay,Chinese": 0.22, - "Vietnamese,English,Malay,Filipino": 0.25333333333333335, - "Vietnamese,English,Malay,Indonesian": 0.24, - "Vietnamese,English,Malay,Spanish": 0.23333333333333334, - "Vietnamese,English,Chinese,Filipino": 0.24666666666666667, - "Vietnamese,English,Chinese,Indonesian": 0.2, - "Vietnamese,English,Chinese,Spanish": 0.24, - "Vietnamese,English,Filipino,Indonesian": 0.25333333333333335, - "Vietnamese,English,Filipino,Spanish": 0.28, - "Vietnamese,English,Indonesian,Spanish": 0.22666666666666666, - "Vietnamese,Malay,Chinese,Filipino": 0.23333333333333334, - "Vietnamese,Malay,Chinese,Indonesian": 0.23333333333333334, - "Vietnamese,Malay,Chinese,Spanish": 0.22, - "Vietnamese,Malay,Filipino,Indonesian": 0.26666666666666666, - "Vietnamese,Malay,Filipino,Spanish": 0.28, - "Vietnamese,Malay,Indonesian,Spanish": 0.26, - "Vietnamese,Chinese,Filipino,Indonesian": 0.24, - "Vietnamese,Chinese,Filipino,Spanish": 0.24, - "Vietnamese,Chinese,Indonesian,Spanish": 0.20666666666666667, - "Vietnamese,Filipino,Indonesian,Spanish": 0.26, - "English,Malay,Chinese,Filipino": 0.22666666666666666, - "English,Malay,Chinese,Indonesian": 0.2, - "English,Malay,Chinese,Spanish": 0.22, - "English,Malay,Filipino,Indonesian": 0.26, - "English,Malay,Filipino,Spanish": 0.28, - "English,Malay,Indonesian,Spanish": 0.24666666666666667, - "English,Chinese,Filipino,Indonesian": 0.22, - "English,Chinese,Filipino,Spanish": 0.26666666666666666, - "English,Chinese,Indonesian,Spanish": 0.22, - "English,Filipino,Indonesian,Spanish": 0.31333333333333335, - "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, - "Malay,Chinese,Filipino,Spanish": 0.22, - "Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, - "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, - "Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 + "Vietnamese,English,Malay,Chinese": 0.14666666666666667, + "Vietnamese,English,Malay,Filipino": 0.19333333333333333, + "Vietnamese,English,Malay,Indonesian": 0.21333333333333335, + "Vietnamese,English,Malay,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino": 0.15333333333333332, + "Vietnamese,English,Chinese,Indonesian": 0.14666666666666667, + "Vietnamese,English,Chinese,Spanish": 0.18, + "Vietnamese,English,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Chinese,Filipino,Indonesian": 0.18, + "Vietnamese,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,Chinese,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino": 0.16666666666666666, + "English,Malay,Chinese,Indonesian": 0.18666666666666668, + "English,Malay,Chinese,Spanish": 0.18, + "English,Malay,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Filipino,Spanish": 0.19333333333333333, + "English,Malay,Indonesian,Spanish": 0.20666666666666667, + "English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "English,Chinese,Filipino,Spanish": 0.2, + "English,Chinese,Indonesian,Spanish": 0.18666666666666668, + "English,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Malay,Chinese,Filipino,Indonesian": 0.25333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.18, + "Malay,Chinese,Indonesian,Spanish": 0.22, + "Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.18 }, "5_combine": { - "Vietnamese,English,Malay,Chinese,Filipino": 0.19333333333333333, - "Vietnamese,English,Malay,Chinese,Indonesian": 0.16666666666666666, - "Vietnamese,English,Malay,Chinese,Spanish": 0.18, - "Vietnamese,English,Malay,Filipino,Indonesian": 0.20666666666666667, - "Vietnamese,English,Malay,Filipino,Spanish": 0.21333333333333335, - "Vietnamese,English,Malay,Indonesian,Spanish": 0.19333333333333333, - "Vietnamese,English,Chinese,Filipino,Indonesian": 0.17333333333333334, - "Vietnamese,English,Chinese,Filipino,Spanish": 0.20666666666666667, - "Vietnamese,English,Chinese,Indonesian,Spanish": 0.16, - "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, - "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18, - "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.18666666666666668, - "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.17333333333333334, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, - "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, - "English,Malay,Chinese,Filipino,Indonesian": 0.17333333333333334, - "English,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, - "English,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, - "English,Malay,Filipino,Indonesian,Spanish": 0.22, - "English,Chinese,Filipino,Indonesian,Spanish": 0.18, - "Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + "Vietnamese,English,Malay,Chinese,Filipino": 0.10666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.12, + "Vietnamese,English,Malay,Chinese,Spanish": 0.13333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Spanish": 0.14666666666666667, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.12, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.13333333333333333, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.12666666666666668, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668, + "English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "English,Malay,Chinese,Filipino,Spanish": 0.13333333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.16, + "English,Chinese,Filipino,Indonesian,Spanish": 0.14, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332 }, "6_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, - "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, - "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.14, - "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, - "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.14, - "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, - "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.09333333333333334, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.1, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.11333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.13333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.1, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.11333333333333333, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.11333333333333333 }, "7_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.08666666666666667 } }, - "AC3_2": 0.5153662721133185, - "AC3_3": 0.4037472626084062, - "AC3_4": 0.3273971710305254, - "AC3_5": 0.27350613911498206, - "AC3_6": 0.23366995070366434, - "AC3_7": 0.2030447760875825 + "AC3_2": 0.40885910423332594, + "AC3_3": 0.31895644850948174, + "AC3_4": 0.25340834959566494, + "AC3_5": 0.2046988927489998, + "AC3_6": 0.16805475222955843, + "AC3_7": 0.13977304961416417 }, "prompt_4": { - "overall_acc": 0.54, + "overall_acc": 0.3761904761904762, "language_acc": { - "Vietnamese": 0.5066666666666667, - "English": 0.6066666666666667, - "Malay": 0.4666666666666667, - "Chinese": 0.48, - "Filipino": 0.6066666666666667, - "Indonesian": 0.52, - "Spanish": 0.5933333333333334 + "Vietnamese": 0.41333333333333333, + "English": 0.41333333333333333, + "Malay": 0.3466666666666667, + "Chinese": 0.36666666666666664, + "Filipino": 0.3333333333333333, + "Indonesian": 0.38666666666666666, + "Spanish": 0.37333333333333335 }, - "consistency_score_2": 0.5647619047619047, - "consistency_score_3": 0.3931428571428571, - "consistency_score_4": 0.2948571428571429, - "consistency_score_5": 0.22984126984126982, - "consistency_score_6": 0.18476190476190474, - "consistency_score_7": 0.15333333333333332, + "consistency_score_2": 0.5361904761904762, + "consistency_score_3": 0.35790476190476195, + "consistency_score_4": 0.26761904761904765, + "consistency_score_5": 0.21523809523809526, + "consistency_score_6": 0.18190476190476187, + "consistency_score_7": 0.16, "detailed_consistency_score": { "2_combine": { - "Vietnamese,English": 0.5, - "Vietnamese,Malay": 0.5533333333333333, - "Vietnamese,Chinese": 0.48, - "Vietnamese,Filipino": 0.56, - "Vietnamese,Indonesian": 0.5, - "Vietnamese,Spanish": 0.48, - "English,Malay": 0.5266666666666666, - "English,Chinese": 0.5533333333333333, - "English,Filipino": 0.6733333333333333, - "English,Indonesian": 0.6, - "English,Spanish": 0.6866666666666666, - "Malay,Chinese": 0.4866666666666667, - "Malay,Filipino": 0.5266666666666666, - "Malay,Indonesian": 0.54, - "Malay,Spanish": 0.52, - "Chinese,Filipino": 0.6533333333333333, - "Chinese,Indonesian": 0.54, - "Chinese,Spanish": 0.5866666666666667, - "Filipino,Indonesian": 0.6466666666666666, - "Filipino,Spanish": 0.6466666666666666, - "Indonesian,Spanish": 0.6 + "Vietnamese,English": 0.4866666666666667, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Chinese": 0.5133333333333333, + "Vietnamese,Filipino": 0.48, + "Vietnamese,Indonesian": 0.62, + "Vietnamese,Spanish": 0.5466666666666666, + "English,Malay": 0.4866666666666667, + "English,Chinese": 0.56, + "English,Filipino": 0.5466666666666666, + "English,Indonesian": 0.46, + "English,Spanish": 0.6266666666666667, + "Malay,Chinese": 0.5133333333333333, + "Malay,Filipino": 0.56, + "Malay,Indonesian": 0.68, + "Malay,Spanish": 0.5266666666666666, + "Chinese,Filipino": 0.4666666666666667, + "Chinese,Indonesian": 0.48, + "Chinese,Spanish": 0.58, + "Filipino,Indonesian": 0.5533333333333333, + "Filipino,Spanish": 0.5333333333333333, + "Indonesian,Spanish": 0.44666666666666666 }, "3_combine": { - "Vietnamese,English,Malay": 0.35333333333333333, - "Vietnamese,English,Chinese": 0.32666666666666666, - "Vietnamese,English,Filipino": 0.4, - "Vietnamese,English,Indonesian": 0.35333333333333333, + "Vietnamese,English,Malay": 0.34, + "Vietnamese,English,Chinese": 0.3466666666666667, + "Vietnamese,English,Filipino": 0.32, + "Vietnamese,English,Indonesian": 0.32666666666666666, "Vietnamese,English,Spanish": 0.38, - "Vietnamese,Malay,Chinese": 0.31333333333333335, - "Vietnamese,Malay,Filipino": 0.37333333333333335, - "Vietnamese,Malay,Indonesian": 0.3466666666666667, - "Vietnamese,Malay,Spanish": 0.34, - "Vietnamese,Chinese,Filipino": 0.3933333333333333, - "Vietnamese,Chinese,Indonesian": 0.30666666666666664, - "Vietnamese,Chinese,Spanish": 0.32666666666666666, - "Vietnamese,Filipino,Indonesian": 0.41333333333333333, - "Vietnamese,Filipino,Spanish": 0.38666666666666666, - "Vietnamese,Indonesian,Spanish": 0.3466666666666667, + "Vietnamese,Malay,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Filipino": 0.36666666666666664, + "Vietnamese,Malay,Indonesian": 0.48, + "Vietnamese,Malay,Spanish": 0.4, + "Vietnamese,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,Chinese,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,Spanish": 0.35333333333333333, + "Vietnamese,Filipino,Indonesian": 0.36, + "Vietnamese,Filipino,Spanish": 0.35333333333333333, + "Vietnamese,Indonesian,Spanish": 0.36, "English,Malay,Chinese": 0.34, - "English,Malay,Filipino": 0.4066666666666667, - "English,Malay,Indonesian": 0.36, - "English,Malay,Spanish": 0.4066666666666667, - "English,Chinese,Filipino": 0.49333333333333335, - "English,Chinese,Indonesian": 0.3933333333333333, - "English,Chinese,Spanish": 0.4533333333333333, - "English,Filipino,Indonesian": 0.49333333333333335, - "English,Filipino,Spanish": 0.5266666666666666, - "English,Indonesian,Spanish": 0.47333333333333333, - "Malay,Chinese,Filipino": 0.37333333333333335, - "Malay,Chinese,Indonesian": 0.32666666666666666, - "Malay,Chinese,Spanish": 0.3466666666666667, - "Malay,Filipino,Indonesian": 0.3933333333333333, - "Malay,Filipino,Spanish": 0.3933333333333333, - "Malay,Indonesian,Spanish": 0.36, - "Chinese,Filipino,Indonesian": 0.4666666666666667, - "Chinese,Filipino,Spanish": 0.49333333333333335, - "Chinese,Indonesian,Spanish": 0.41333333333333333, - "Filipino,Indonesian,Spanish": 0.4866666666666667 + "English,Malay,Filipino": 0.36, + "English,Malay,Indonesian": 0.35333333333333333, + "English,Malay,Spanish": 0.37333333333333335, + "English,Chinese,Filipino": 0.3466666666666667, + "English,Chinese,Indonesian": 0.30666666666666664, + "English,Chinese,Spanish": 0.4266666666666667, + "English,Filipino,Indonesian": 0.32666666666666666, + "English,Filipino,Spanish": 0.38666666666666666, + "English,Indonesian,Spanish": 0.32666666666666666, + "Malay,Chinese,Filipino": 0.32, + "Malay,Chinese,Indonesian": 0.38666666666666666, + "Malay,Chinese,Spanish": 0.36, + "Malay,Filipino,Indonesian": 0.43333333333333335, + "Malay,Filipino,Spanish": 0.38666666666666666, + "Malay,Indonesian,Spanish": 0.38, + "Chinese,Filipino,Indonesian": 0.3, + "Chinese,Filipino,Spanish": 0.3466666666666667, + "Chinese,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,Spanish": 0.3333333333333333 }, "4_combine": { - "Vietnamese,English,Malay,Chinese": 0.24, - "Vietnamese,English,Malay,Filipino": 0.2866666666666667, - "Vietnamese,English,Malay,Indonesian": 0.25333333333333335, - "Vietnamese,English,Malay,Spanish": 0.29333333333333333, - "Vietnamese,English,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,English,Malay,Chinese": 0.26, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,English,Malay,Spanish": 0.28, + "Vietnamese,English,Chinese,Filipino": 0.25333333333333335, "Vietnamese,English,Chinese,Indonesian": 0.24, - "Vietnamese,English,Chinese,Spanish": 0.2733333333333333, - "Vietnamese,English,Filipino,Indonesian": 0.31333333333333335, - "Vietnamese,English,Filipino,Spanish": 0.32666666666666666, - "Vietnamese,English,Indonesian,Spanish": 0.2866666666666667, - "Vietnamese,Malay,Chinese,Filipino": 0.26666666666666666, - "Vietnamese,Malay,Chinese,Indonesian": 0.20666666666666667, - "Vietnamese,Malay,Chinese,Spanish": 0.23333333333333334, - "Vietnamese,Malay,Filipino,Indonesian": 0.2866666666666667, - "Vietnamese,Malay,Filipino,Spanish": 0.29333333333333333, - "Vietnamese,Malay,Indonesian,Spanish": 0.25333333333333335, - "Vietnamese,Chinese,Filipino,Indonesian": 0.3, - "Vietnamese,Chinese,Filipino,Spanish": 0.30666666666666664, - "Vietnamese,Chinese,Indonesian,Spanish": 0.24666666666666667, - "Vietnamese,Filipino,Indonesian,Spanish": 0.32, - "English,Malay,Chinese,Filipino": 0.30666666666666664, - "English,Malay,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,English,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,English,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Indonesian": 0.3, + "Vietnamese,Malay,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Filipino,Spanish": 0.3, + "Vietnamese,Malay,Indonesian,Spanish": 0.32, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.26, + "Vietnamese,Chinese,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.25333333333333335, + "English,Malay,Chinese,Indonesian": 0.24666666666666667, "English,Malay,Chinese,Spanish": 0.29333333333333333, - "English,Malay,Filipino,Indonesian": 0.30666666666666664, - "English,Malay,Filipino,Spanish": 0.3466666666666667, - "English,Malay,Indonesian,Spanish": 0.2866666666666667, - "English,Chinese,Filipino,Indonesian": 0.37333333333333335, - "English,Chinese,Filipino,Spanish": 0.42, - "English,Chinese,Indonesian,Spanish": 0.32666666666666666, - "English,Filipino,Indonesian,Spanish": 0.4, - "Malay,Chinese,Filipino,Indonesian": 0.28, - "Malay,Chinese,Filipino,Spanish": 0.3, - "Malay,Chinese,Indonesian,Spanish": 0.24, - "Malay,Filipino,Indonesian,Spanish": 0.3, - "Chinese,Filipino,Indonesian,Spanish": 0.37333333333333335 + "English,Malay,Filipino,Indonesian": 0.2733333333333333, + "English,Malay,Filipino,Spanish": 0.3, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Chinese,Filipino,Indonesian": 0.23333333333333334, + "English,Chinese,Filipino,Spanish": 0.28, + "English,Chinese,Indonesian,Spanish": 0.24, + "English,Filipino,Indonesian,Spanish": 0.26, + "Malay,Chinese,Filipino,Indonesian": 0.24666666666666667, + "Malay,Chinese,Filipino,Spanish": 0.2733333333333333, + "Malay,Chinese,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.29333333333333333, + "Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 }, "5_combine": { - "Vietnamese,English,Malay,Chinese,Filipino": 0.22, - "Vietnamese,English,Malay,Chinese,Indonesian": 0.16666666666666666, - "Vietnamese,English,Malay,Chinese,Spanish": 0.21333333333333335, - "Vietnamese,English,Malay,Filipino,Indonesian": 0.22, - "Vietnamese,English,Malay,Filipino,Spanish": 0.26, - "Vietnamese,English,Malay,Indonesian,Spanish": 0.22, - "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24, - "Vietnamese,English,Chinese,Filipino,Spanish": 0.26666666666666666, - "Vietnamese,English,Chinese,Indonesian,Spanish": 0.20666666666666667, - "Vietnamese,English,Filipino,Indonesian,Spanish": 0.26, - "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, - "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22, - "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, - "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.24666666666666667, - "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, - "English,Malay,Chinese,Filipino,Spanish": 0.2733333333333333, - "English,Malay,Chinese,Indonesian,Spanish": 0.2, - "English,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, - "English,Chinese,Filipino,Indonesian,Spanish": 0.31333333333333335, - "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22 + "Vietnamese,English,Malay,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.2, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "English,Malay,Chinese,Filipino,Indonesian": 0.19333333333333333, + "English,Malay,Chinese,Filipino,Spanish": 0.23333333333333334, + "English,Malay,Chinese,Indonesian,Spanish": 0.22, + "English,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667 }, "6_combine": { "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16666666666666666, - "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.20666666666666667, - "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.15333333333333332, - "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2, - "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, - "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, - "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18 }, "7_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16 } }, - "AC3_2": 0.5521034482258871, - "AC3_3": 0.45501530919802347, - "AC3_4": 0.3814373716175554, - "AC3_5": 0.3224412370715179, - "AC3_6": 0.2753219447714733, - "AC3_7": 0.23884615381170488 + "AC3_2": 0.4421612485846026, + "AC3_3": 0.36681987787525405, + "AC3_4": 0.3127500703938001, + "AC3_5": 0.27381335782503197, + "AC3_6": 0.24522996907681355, + "AC3_7": 0.2245115452512012 }, "prompt_5": { - "overall_acc": 0.5838095238095239, + "overall_acc": 0.36, "language_acc": { - "Vietnamese": 0.5333333333333333, - "English": 0.64, - "Malay": 0.5866666666666667, - "Chinese": 0.5733333333333334, - "Filipino": 0.6, - "Indonesian": 0.5733333333333334, - "Spanish": 0.58 + "Vietnamese": 0.37333333333333335, + "English": 0.3933333333333333, + "Malay": 0.37333333333333335, + "Chinese": 0.34, + "Filipino": 0.31333333333333335, + "Indonesian": 0.36, + "Spanish": 0.36666666666666664 }, - "consistency_score_2": 0.654285714285714, - "consistency_score_3": 0.504, - "consistency_score_4": 0.4072380952380952, - "consistency_score_5": 0.33682539682539675, - "consistency_score_6": 0.28285714285714286, - "consistency_score_7": 0.24, + "consistency_score_2": 0.5231746031746033, + "consistency_score_3": 0.34476190476190477, + "consistency_score_4": 0.25390476190476197, + "consistency_score_5": 0.19936507936507936, + "consistency_score_6": 0.1638095238095238, + "consistency_score_7": 0.14, "detailed_consistency_score": { "2_combine": { - "Vietnamese,English": 0.54, - "Vietnamese,Malay": 0.5733333333333334, - "Vietnamese,Chinese": 0.58, - "Vietnamese,Filipino": 0.5933333333333334, - "Vietnamese,Indonesian": 0.5666666666666667, - "Vietnamese,Spanish": 0.58, - "English,Malay": 0.7133333333333334, - "English,Chinese": 0.64, - "English,Filipino": 0.6733333333333333, - "English,Indonesian": 0.7, - "English,Spanish": 0.7666666666666667, - "Malay,Chinese": 0.6533333333333333, - "Malay,Filipino": 0.64, - "Malay,Indonesian": 0.7333333333333333, - "Malay,Spanish": 0.7333333333333333, - "Chinese,Filipino": 0.6533333333333333, - "Chinese,Indonesian": 0.6533333333333333, - "Chinese,Spanish": 0.6866666666666666, - "Filipino,Indonesian": 0.7, - "Filipino,Spanish": 0.66, - "Indonesian,Spanish": 0.7 + "Vietnamese,English": 0.41333333333333333, + "Vietnamese,Malay": 0.5933333333333334, + "Vietnamese,Chinese": 0.38666666666666666, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Spanish": 0.4866666666666667, + "English,Malay": 0.5333333333333333, + "English,Chinese": 0.62, + "English,Filipino": 0.49333333333333335, + "English,Indonesian": 0.4, + "English,Spanish": 0.5933333333333334, + "Malay,Chinese": 0.5, + "Malay,Filipino": 0.6133333333333333, + "Malay,Indonesian": 0.6666666666666666, + "Malay,Spanish": 0.56, + "Chinese,Filipino": 0.46, + "Chinese,Indonesian": 0.44666666666666666, + "Chinese,Spanish": 0.5733333333333334, + "Filipino,Indonesian": 0.6133333333333333, + "Filipino,Spanish": 0.5133333333333333, + "Indonesian,Spanish": 0.47333333333333333 }, "3_combine": { - "Vietnamese,English,Malay": 0.4533333333333333, - "Vietnamese,English,Chinese": 0.41333333333333333, - "Vietnamese,English,Filipino": 0.43333333333333335, - "Vietnamese,English,Indonesian": 0.4266666666666667, - "Vietnamese,English,Spanish": 0.4533333333333333, - "Vietnamese,Malay,Chinese": 0.4533333333333333, - "Vietnamese,Malay,Filipino": 0.44, - "Vietnamese,Malay,Indonesian": 0.46, - "Vietnamese,Malay,Spanish": 0.4666666666666667, - "Vietnamese,Chinese,Filipino": 0.4533333333333333, - "Vietnamese,Chinese,Indonesian": 0.43333333333333335, - "Vietnamese,Chinese,Spanish": 0.46, - "Vietnamese,Filipino,Indonesian": 0.4533333333333333, - "Vietnamese,Filipino,Spanish": 0.44666666666666666, - "Vietnamese,Indonesian,Spanish": 0.44, - "English,Malay,Chinese": 0.52, - "English,Malay,Filipino": 0.5333333333333333, - "English,Malay,Indonesian": 0.5866666666666667, - "English,Malay,Spanish": 0.62, - "English,Chinese,Filipino": 0.5066666666666667, - "English,Chinese,Indonesian": 0.5133333333333333, - "English,Chinese,Spanish": 0.5533333333333333, - "English,Filipino,Indonesian": 0.56, - "English,Filipino,Spanish": 0.58, - "English,Indonesian,Spanish": 0.5866666666666667, - "Malay,Chinese,Filipino": 0.5, - "Malay,Chinese,Indonesian": 0.54, - "Malay,Chinese,Spanish": 0.56, - "Malay,Filipino,Indonesian": 0.54, - "Malay,Filipino,Spanish": 0.5333333333333333, - "Malay,Indonesian,Spanish": 0.5933333333333334, - "Chinese,Filipino,Indonesian": 0.5266666666666666, - "Chinese,Filipino,Spanish": 0.52, - "Chinese,Indonesian,Spanish": 0.5333333333333333, - "Filipino,Indonesian,Spanish": 0.5466666666666666 + "Vietnamese,English,Malay": 0.32666666666666666, + "Vietnamese,English,Chinese": 0.2866666666666667, + "Vietnamese,English,Filipino": 0.2733333333333333, + "Vietnamese,English,Indonesian": 0.24666666666666667, + "Vietnamese,English,Spanish": 0.32, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Filipino": 0.4, + "Vietnamese,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Malay,Spanish": 0.38666666666666666, + "Vietnamese,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Chinese,Indonesian": 0.2866666666666667, + "Vietnamese,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,Filipino,Indonesian": 0.36, + "Vietnamese,Filipino,Spanish": 0.31333333333333335, + "Vietnamese,Indonesian,Spanish": 0.32666666666666666, + "English,Malay,Chinese": 0.38, + "English,Malay,Filipino": 0.37333333333333335, + "English,Malay,Indonesian": 0.35333333333333333, + "English,Malay,Spanish": 0.4, + "English,Chinese,Filipino": 0.3466666666666667, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.43333333333333335, + "English,Filipino,Indonesian": 0.30666666666666664, + "English,Filipino,Spanish": 0.34, + "English,Indonesian,Spanish": 0.3, + "Malay,Chinese,Filipino": 0.3333333333333333, + "Malay,Chinese,Indonesian": 0.36666666666666664, + "Malay,Chinese,Spanish": 0.37333333333333335, + "Malay,Filipino,Indonesian": 0.47333333333333333, + "Malay,Filipino,Spanish": 0.42, + "Malay,Indonesian,Spanish": 0.42, + "Chinese,Filipino,Indonesian": 0.32, + "Chinese,Filipino,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Spanish": 0.31333333333333335, + "Filipino,Indonesian,Spanish": 0.35333333333333333 }, "4_combine": { - "Vietnamese,English,Malay,Chinese": 0.36, - "Vietnamese,English,Malay,Filipino": 0.36, - "Vietnamese,English,Malay,Indonesian": 0.38, - "Vietnamese,English,Malay,Spanish": 0.3933333333333333, - "Vietnamese,English,Chinese,Filipino": 0.36666666666666664, - "Vietnamese,English,Chinese,Indonesian": 0.34, - "Vietnamese,English,Chinese,Spanish": 0.36666666666666664, - "Vietnamese,English,Filipino,Indonesian": 0.36, - "Vietnamese,English,Filipino,Spanish": 0.38666666666666666, - "Vietnamese,English,Indonesian,Spanish": 0.36, - "Vietnamese,Malay,Chinese,Filipino": 0.38, - "Vietnamese,Malay,Chinese,Indonesian": 0.37333333333333335, - "Vietnamese,Malay,Chinese,Spanish": 0.38666666666666666, - "Vietnamese,Malay,Filipino,Indonesian": 0.36666666666666664, - "Vietnamese,Malay,Filipino,Spanish": 0.36666666666666664, - "Vietnamese,Malay,Indonesian,Spanish": 0.37333333333333335, - "Vietnamese,Chinese,Filipino,Indonesian": 0.36666666666666664, - "Vietnamese,Chinese,Filipino,Spanish": 0.38, - "Vietnamese,Chinese,Indonesian,Spanish": 0.36666666666666664, - "Vietnamese,Filipino,Indonesian,Spanish": 0.36666666666666664, - "English,Malay,Chinese,Filipino": 0.42, - "English,Malay,Chinese,Indonesian": 0.44666666666666666, - "English,Malay,Chinese,Spanish": 0.48, - "English,Malay,Filipino,Indonesian": 0.4666666666666667, - "English,Malay,Filipino,Spanish": 0.49333333333333335, - "English,Malay,Indonesian,Spanish": 0.5266666666666666, - "English,Chinese,Filipino,Indonesian": 0.4266666666666667, - "English,Chinese,Filipino,Spanish": 0.44666666666666666, - "English,Chinese,Indonesian,Spanish": 0.44666666666666666, - "English,Filipino,Indonesian,Spanish": 0.4866666666666667, - "Malay,Chinese,Filipino,Indonesian": 0.4266666666666667, - "Malay,Chinese,Filipino,Spanish": 0.43333333333333335, - "Malay,Chinese,Indonesian,Spanish": 0.4666666666666667, - "Malay,Filipino,Indonesian,Spanish": 0.46, - "Chinese,Filipino,Indonesian,Spanish": 0.4266666666666667 + "Vietnamese,English,Malay,Chinese": 0.24, + "Vietnamese,English,Malay,Filipino": 0.23333333333333334, + "Vietnamese,English,Malay,Indonesian": 0.22666666666666666, + "Vietnamese,English,Malay,Spanish": 0.26666666666666666, + "Vietnamese,English,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,English,Chinese,Indonesian": 0.2, + "Vietnamese,English,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Filipino,Spanish": 0.22, + "Vietnamese,English,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Indonesian": 0.32, + "Vietnamese,Malay,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.29333333333333333, + "Vietnamese,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,Chinese,Indonesian,Spanish": 0.22, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24, + "English,Malay,Chinese,Filipino": 0.2733333333333333, + "English,Malay,Chinese,Indonesian": 0.28, + "English,Malay,Chinese,Spanish": 0.32, + "English,Malay,Filipino,Indonesian": 0.26666666666666666, + "English,Malay,Filipino,Spanish": 0.3, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Chinese,Filipino,Indonesian": 0.26, + "English,Chinese,Filipino,Spanish": 0.26, + "English,Chinese,Indonesian,Spanish": 0.26666666666666666, + "English,Filipino,Indonesian,Spanish": 0.24, + "Malay,Chinese,Filipino,Indonesian": 0.28, + "Malay,Chinese,Filipino,Spanish": 0.28, + "Malay,Chinese,Indonesian,Spanish": 0.29333333333333333, + "Malay,Filipino,Indonesian,Spanish": 0.3333333333333333, + "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 }, "5_combine": { - "Vietnamese,English,Malay,Chinese,Filipino": 0.31333333333333335, - "Vietnamese,English,Malay,Chinese,Indonesian": 0.30666666666666664, - "Vietnamese,English,Malay,Chinese,Spanish": 0.32666666666666666, - "Vietnamese,English,Malay,Filipino,Indonesian": 0.31333333333333335, - "Vietnamese,English,Malay,Filipino,Spanish": 0.3333333333333333, - "Vietnamese,English,Malay,Indonesian,Spanish": 0.32666666666666666, - "Vietnamese,English,Chinese,Filipino,Indonesian": 0.3, - "Vietnamese,English,Chinese,Filipino,Spanish": 0.32666666666666666, - "Vietnamese,English,Chinese,Indonesian,Spanish": 0.3, - "Vietnamese,English,Filipino,Indonesian,Spanish": 0.32, - "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.31333333333333335, - "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.32, - "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.31333333333333335, - "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.30666666666666664, - "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.30666666666666664, - "English,Malay,Chinese,Filipino,Indonesian": 0.36666666666666664, - "English,Malay,Chinese,Filipino,Spanish": 0.3933333333333333, - "English,Malay,Chinese,Indonesian,Spanish": 0.41333333333333333, - "English,Malay,Filipino,Indonesian,Spanish": 0.43333333333333335, - "English,Chinese,Filipino,Indonesian,Spanish": 0.37333333333333335, - "Malay,Chinese,Filipino,Indonesian,Spanish": 0.36666666666666664 + "Vietnamese,English,Malay,Chinese,Filipino": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Spanish": 0.2, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Filipino,Spanish": 0.24, + "English,Malay,Chinese,Indonesian,Spanish": 0.24666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "English,Chinese,Filipino,Indonesian,Spanish": 0.22, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.24 }, "6_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.26666666666666666, - "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.2866666666666667, - "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.2733333333333333, - "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2866666666666667, - "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.26666666666666666, - "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.26, - "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.34 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667 }, "7_combine": { - "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.24 + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14 } }, - "AC3_2": 0.6170417581919202, - "AC3_3": 0.5409770617608103, - "AC3_4": 0.47979425784309665, - "AC3_5": 0.4271875204790501, - "AC3_6": 0.3810800627503778, - "AC3_7": 0.3401618496696913 + "AC3_2": 0.42651329973603463, + "AC3_3": 0.3522162161662396, + "AC3_4": 0.2977846726167121, + "AC3_5": 0.2566174800903323, + "AC3_6": 0.2251636363206506, + "AC3_7": 0.20159999995968 } }, "cross_logiqa": { "prompt_1": { - "overall_acc": 0.46185064935064934, + "overall_acc": 0.2987012987012987, "language_acc": { - "Spanish": 0.4772727272727273, - "Chinese": 0.4431818181818182, - "Vietnamese": 0.4943181818181818, - "Indonesian": 0.42045454545454547, - "Malay": 0.4375, - "Filipino": 0.4772727272727273, - "English": 0.48295454545454547 + "Spanish": 0.26136363636363635, + "Chinese": 0.26136363636363635, + "Vietnamese": 0.3465909090909091, + "Indonesian": 0.2784090909090909, + "Malay": 0.3409090909090909, + "Filipino": 0.2784090909090909, + "English": 0.32386363636363635 }, - "consistency_score_2": 0.5641233766233766, - "consistency_score_3": 0.3926948051948052, - "consistency_score_4": 0.2967532467532467, - "consistency_score_5": 0.2343073593073593, - "consistency_score_6": 0.19074675324675325, - "consistency_score_7": 0.1590909090909091, + "consistency_score_2": 0.4545454545454544, + "consistency_score_3": 0.25227272727272726, + "consistency_score_4": 0.15097402597402598, + "consistency_score_5": 0.09442640692640693, + "consistency_score_6": 0.06087662337662337, + "consistency_score_7": 0.03977272727272727, "detailed_consistency_score": { "2_combine": { - "Spanish,Chinese": 0.4715909090909091, - "Spanish,Vietnamese": 0.5738636363636364, - "Spanish,Indonesian": 0.5795454545454546, - "Spanish,Malay": 0.5965909090909091, - "Spanish,Filipino": 0.625, - "Spanish,English": 0.6079545454545454, - "Chinese,Vietnamese": 0.45454545454545453, - "Chinese,Indonesian": 0.4602272727272727, - "Chinese,Malay": 0.5227272727272727, - "Chinese,Filipino": 0.5170454545454546, - "Chinese,English": 0.45454545454545453, - "Vietnamese,Indonesian": 0.5965909090909091, - "Vietnamese,Malay": 0.5113636363636364, - "Vietnamese,Filipino": 0.5965909090909091, - "Vietnamese,English": 0.5852272727272727, + "Spanish,Chinese": 0.3806818181818182, + "Spanish,Vietnamese": 0.2897727272727273, + "Spanish,Indonesian": 0.32954545454545453, + "Spanish,Malay": 0.3465909090909091, + "Spanish,Filipino": 0.4147727272727273, + "Spanish,English": 0.5397727272727273, + "Chinese,Vietnamese": 0.3806818181818182, + "Chinese,Indonesian": 0.4375, + "Chinese,Malay": 0.4772727272727273, + "Chinese,Filipino": 0.4715909090909091, + "Chinese,English": 0.4772727272727273, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Malay": 0.4602272727272727, + "Vietnamese,Filipino": 0.5738636363636364, + "Vietnamese,English": 0.3465909090909091, "Indonesian,Malay": 0.6136363636363636, - "Indonesian,Filipino": 0.6420454545454546, - "Indonesian,English": 0.5795454545454546, - "Malay,Filipino": 0.6534090909090909, - "Malay,English": 0.5795454545454546, - "Filipino,English": 0.625 + "Indonesian,Filipino": 0.5852272727272727, + "Indonesian,English": 0.4147727272727273, + "Malay,Filipino": 0.6079545454545454, + "Malay,English": 0.4772727272727273, + "Filipino,English": 0.4147727272727273 }, "3_combine": { - "Spanish,Chinese,Vietnamese": 0.30113636363636365, - "Spanish,Chinese,Indonesian": 0.3181818181818182, - "Spanish,Chinese,Malay": 0.3465909090909091, - "Spanish,Chinese,Filipino": 0.35795454545454547, - "Spanish,Chinese,English": 0.32386363636363635, - "Spanish,Vietnamese,Indonesian": 0.42613636363636365, - "Spanish,Vietnamese,Malay": 0.38636363636363635, - "Spanish,Vietnamese,Filipino": 0.4431818181818182, - "Spanish,Vietnamese,English": 0.4147727272727273, - "Spanish,Indonesian,Malay": 0.44886363636363635, - "Spanish,Indonesian,Filipino": 0.4715909090909091, - "Spanish,Indonesian,English": 0.42045454545454547, - "Spanish,Malay,Filipino": 0.4659090909090909, - "Spanish,Malay,English": 0.42613636363636365, - "Spanish,Filipino,English": 0.45454545454545453, - "Chinese,Vietnamese,Indonesian": 0.30113636363636365, - "Chinese,Vietnamese,Malay": 0.3181818181818182, - "Chinese,Vietnamese,Filipino": 0.3465909090909091, - "Chinese,Vietnamese,English": 0.3125, - "Chinese,Indonesian,Malay": 0.3522727272727273, - "Chinese,Indonesian,Filipino": 0.3522727272727273, - "Chinese,Indonesian,English": 0.3068181818181818, - "Chinese,Malay,Filipino": 0.39204545454545453, - "Chinese,Malay,English": 0.3465909090909091, - "Chinese,Filipino,English": 0.3409090909090909, - "Vietnamese,Indonesian,Malay": 0.4090909090909091, - "Vietnamese,Indonesian,Filipino": 0.4602272727272727, - "Vietnamese,Indonesian,English": 0.4090909090909091, - "Vietnamese,Malay,Filipino": 0.42045454545454547, - "Vietnamese,Malay,English": 0.3806818181818182, - "Vietnamese,Filipino,English": 0.4375, - "Indonesian,Malay,Filipino": 0.4943181818181818, - "Indonesian,Malay,English": 0.42613636363636365, - "Indonesian,Filipino,English": 0.4659090909090909, - "Malay,Filipino,English": 0.4659090909090909 + "Spanish,Chinese,Vietnamese": 0.16477272727272727, + "Spanish,Chinese,Indonesian": 0.16477272727272727, + "Spanish,Chinese,Malay": 0.19886363636363635, + "Spanish,Chinese,Filipino": 0.21022727272727273, + "Spanish,Chinese,English": 0.26704545454545453, + "Spanish,Vietnamese,Indonesian": 0.1534090909090909, + "Spanish,Vietnamese,Malay": 0.14204545454545456, + "Spanish,Vietnamese,Filipino": 0.19886363636363635, + "Spanish,Vietnamese,English": 0.18181818181818182, + "Spanish,Indonesian,Malay": 0.2159090909090909, + "Spanish,Indonesian,Filipino": 0.22727272727272727, + "Spanish,Indonesian,English": 0.21022727272727273, + "Spanish,Malay,Filipino": 0.2556818181818182, + "Spanish,Malay,English": 0.23863636363636365, + "Spanish,Filipino,English": 0.25, + "Chinese,Vietnamese,Indonesian": 0.23295454545454544, + "Chinese,Vietnamese,Malay": 0.23295454545454544, + "Chinese,Vietnamese,Filipino": 0.29545454545454547, + "Chinese,Vietnamese,English": 0.21022727272727273, + "Chinese,Indonesian,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino": 0.3181818181818182, + "Chinese,Indonesian,English": 0.24431818181818182, + "Chinese,Malay,Filipino": 0.3409090909090909, + "Chinese,Malay,English": 0.2840909090909091, + "Chinese,Filipino,English": 0.26136363636363635, + "Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,English": 0.20454545454545456, + "Vietnamese,Malay,Filipino": 0.3522727272727273, + "Vietnamese,Malay,English": 0.20454545454545456, + "Vietnamese,Filipino,English": 0.22727272727272727, + "Indonesian,Malay,Filipino": 0.4375, + "Indonesian,Malay,English": 0.3068181818181818, + "Indonesian,Filipino,English": 0.26136363636363635, + "Malay,Filipino,English": 0.2897727272727273 }, "4_combine": { - "Spanish,Chinese,Vietnamese,Indonesian": 0.23295454545454544, - "Spanish,Chinese,Vietnamese,Malay": 0.24431818181818182, - "Spanish,Chinese,Vietnamese,Filipino": 0.26136363636363635, - "Spanish,Chinese,Vietnamese,English": 0.2215909090909091, - "Spanish,Chinese,Indonesian,Malay": 0.2784090909090909, - "Spanish,Chinese,Indonesian,Filipino": 0.2840909090909091, - "Spanish,Chinese,Indonesian,English": 0.23863636363636365, - "Spanish,Chinese,Malay,Filipino": 0.30113636363636365, - "Spanish,Chinese,Malay,English": 0.26136363636363635, - "Spanish,Chinese,Filipino,English": 0.26136363636363635, - "Spanish,Vietnamese,Indonesian,Malay": 0.3465909090909091, - "Spanish,Vietnamese,Indonesian,Filipino": 0.3693181818181818, - "Spanish,Vietnamese,Indonesian,English": 0.32386363636363635, - "Spanish,Vietnamese,Malay,Filipino": 0.32954545454545453, - "Spanish,Vietnamese,Malay,English": 0.2897727272727273, - "Spanish,Vietnamese,Filipino,English": 0.3352272727272727, - "Spanish,Indonesian,Malay,Filipino": 0.38636363636363635, - "Spanish,Indonesian,Malay,English": 0.3352272727272727, - "Spanish,Indonesian,Filipino,English": 0.35795454545454547, - "Spanish,Malay,Filipino,English": 0.3522727272727273, - "Chinese,Vietnamese,Indonesian,Malay": 0.2556818181818182, - "Chinese,Vietnamese,Indonesian,Filipino": 0.26704545454545453, - "Chinese,Vietnamese,Indonesian,English": 0.2159090909090909, - "Chinese,Vietnamese,Malay,Filipino": 0.2784090909090909, - "Chinese,Vietnamese,Malay,English": 0.24431818181818182, - "Chinese,Vietnamese,Filipino,English": 0.2556818181818182, - "Chinese,Indonesian,Malay,Filipino": 0.3125, - "Chinese,Indonesian,Malay,English": 0.2727272727272727, - "Chinese,Indonesian,Filipino,English": 0.2556818181818182, - "Chinese,Malay,Filipino,English": 0.3068181818181818, - "Vietnamese,Indonesian,Malay,Filipino": 0.35795454545454547, - "Vietnamese,Indonesian,Malay,English": 0.3068181818181818, - "Vietnamese,Indonesian,Filipino,English": 0.3465909090909091, - "Vietnamese,Malay,Filipino,English": 0.32954545454545453, - "Indonesian,Malay,Filipino,English": 0.3693181818181818 + "Spanish,Chinese,Vietnamese,Indonesian": 0.09659090909090909, + "Spanish,Chinese,Vietnamese,Malay": 0.09090909090909091, + "Spanish,Chinese,Vietnamese,Filipino": 0.125, + "Spanish,Chinese,Vietnamese,English": 0.11363636363636363, + "Spanish,Chinese,Indonesian,Malay": 0.125, + "Spanish,Chinese,Indonesian,Filipino": 0.125, + "Spanish,Chinese,Indonesian,English": 0.11931818181818182, + "Spanish,Chinese,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Malay,English": 0.14204545454545456, + "Spanish,Chinese,Filipino,English": 0.1534090909090909, + "Spanish,Vietnamese,Indonesian,Malay": 0.10795454545454546, + "Spanish,Vietnamese,Indonesian,Filipino": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,English": 0.09090909090909091, + "Spanish,Vietnamese,Malay,Filipino": 0.13068181818181818, + "Spanish,Vietnamese,Malay,English": 0.07954545454545454, + "Spanish,Vietnamese,Filipino,English": 0.125, + "Spanish,Indonesian,Malay,Filipino": 0.18181818181818182, + "Spanish,Indonesian,Malay,English": 0.14772727272727273, + "Spanish,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,English": 0.13068181818181818, + "Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Filipino,English": 0.1534090909090909, + "Chinese,Indonesian,Malay,Filipino": 0.2556818181818182, + "Chinese,Indonesian,Malay,English": 0.19318181818181818, + "Chinese,Indonesian,Filipino,English": 0.17045454545454544, + "Chinese,Malay,Filipino,English": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,English": 0.1590909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.1534090909090909, + "Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Indonesian,Malay,Filipino,English": 0.2159090909090909 }, "5_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.2215909090909091, - "Spanish,Chinese,Vietnamese,Indonesian,English": 0.17613636363636365, - "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, - "Spanish,Chinese,Vietnamese,Malay,English": 0.18181818181818182, - "Spanish,Chinese,Vietnamese,Filipino,English": 0.1875, - "Spanish,Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, - "Spanish,Chinese,Indonesian,Malay,English": 0.2215909090909091, - "Spanish,Chinese,Indonesian,Filipino,English": 0.21022727272727273, - "Spanish,Chinese,Malay,Filipino,English": 0.23863636363636365, - "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.3068181818181818, - "Spanish,Vietnamese,Indonesian,Malay,English": 0.26136363636363635, - "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2840909090909091, - "Spanish,Vietnamese,Malay,Filipino,English": 0.25, - "Spanish,Indonesian,Malay,Filipino,English": 0.29545454545454547, - "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, - "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, - "Chinese,Vietnamese,Indonesian,Filipino,English": 0.19886363636363635, - "Chinese,Vietnamese,Malay,Filipino,English": 0.2215909090909091, - "Chinese,Indonesian,Malay,Filipino,English": 0.24431818181818182, - "Vietnamese,Indonesian,Malay,Filipino,English": 0.2784090909090909 + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.0625, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.07954545454545454, + "Spanish,Chinese,Vietnamese,Malay,English": 0.056818181818181816, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.08522727272727272, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Spanish,Chinese,Indonesian,Malay,English": 0.09090909090909091, + "Spanish,Chinese,Indonesian,Filipino,English": 0.08522727272727272, + "Spanish,Chinese,Malay,Filipino,English": 0.10227272727272728, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.10227272727272728, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.0625, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.07386363636363637, + "Spanish,Vietnamese,Malay,Filipino,English": 0.06818181818181818, + "Spanish,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.14772727272727273, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.10795454545454546, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.10227272727272728, + "Chinese,Vietnamese,Malay,Filipino,English": 0.10795454545454546, + "Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.125 }, "6_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, - "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, - "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.16477272727272727, - "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.20454545454545456, - "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.23295454545454544, - "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1875 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.06818181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.045454545454545456, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.05113636363636364, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.045454545454545456, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.07386363636363637, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.056818181818181816, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.08522727272727272 }, "7_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.03977272727272727 } }, - "AC3_2": 0.5078895589674293, - "AC3_3": 0.42447443793635364, - "AC3_4": 0.36133660898852776, - "AC3_5": 0.3108920810572946, - "AC3_6": 0.2699873198526651, - "AC3_7": 0.23666072485790582 + "AC3_2": 0.36050156735025934, + "AC3_3": 0.27353082975696563, + "AC3_4": 0.20057199114110585, + "AC3_5": 0.14349174565758566, + "AC3_6": 0.10114039456599165, + "AC3_7": 0.07019838672440966 }, "prompt_2": { - "overall_acc": 0.4837662337662338, + "overall_acc": 0.3051948051948052, "language_acc": { - "Spanish": 0.4943181818181818, - "Chinese": 0.4147727272727273, - "Vietnamese": 0.5, - "Indonesian": 0.4715909090909091, - "Malay": 0.4772727272727273, - "Filipino": 0.4715909090909091, - "English": 0.5568181818181818 + "Spanish": 0.3181818181818182, + "Chinese": 0.29545454545454547, + "Vietnamese": 0.2784090909090909, + "Indonesian": 0.29545454545454547, + "Malay": 0.3125, + "Filipino": 0.3125, + "English": 0.32386363636363635 }, - "consistency_score_2": 0.5446428571428573, - "consistency_score_3": 0.3696428571428572, - "consistency_score_4": 0.26801948051948055, - "consistency_score_5": 0.19994588744588743, - "consistency_score_6": 0.15097402597402595, - "consistency_score_7": 0.11363636363636363, + "consistency_score_2": 0.3977272727272727, + "consistency_score_3": 0.19431818181818178, + "consistency_score_4": 0.10292207792207793, + "consistency_score_5": 0.055194805194805185, + "consistency_score_6": 0.027597402597402596, + "consistency_score_7": 0.011363636363636364, "detailed_consistency_score": { "2_combine": { - "Spanish,Chinese": 0.4090909090909091, - "Spanish,Vietnamese": 0.5681818181818182, - "Spanish,Indonesian": 0.5454545454545454, - "Spanish,Malay": 0.6079545454545454, - "Spanish,Filipino": 0.6136363636363636, - "Spanish,English": 0.6363636363636364, - "Chinese,Vietnamese": 0.4147727272727273, - "Chinese,Indonesian": 0.42045454545454547, - "Chinese,Malay": 0.375, + "Spanish,Chinese": 0.3806818181818182, + "Spanish,Vietnamese": 0.25, + "Spanish,Indonesian": 0.4090909090909091, + "Spanish,Malay": 0.5284090909090909, + "Spanish,Filipino": 0.4090909090909091, + "Spanish,English": 0.5227272727272727, + "Chinese,Vietnamese": 0.22727272727272727, + "Chinese,Indonesian": 0.4090909090909091, + "Chinese,Malay": 0.39204545454545453, "Chinese,Filipino": 0.3977272727272727, - "Chinese,English": 0.4375, - "Vietnamese,Indonesian": 0.5625, - "Vietnamese,Malay": 0.5454545454545454, - "Vietnamese,Filipino": 0.5625, - "Vietnamese,English": 0.6193181818181818, - "Indonesian,Malay": 0.6534090909090909, - "Indonesian,Filipino": 0.5795454545454546, - "Indonesian,English": 0.6363636363636364, - "Malay,Filipino": 0.5909090909090909, - "Malay,English": 0.6306818181818182, - "Filipino,English": 0.6306818181818182 + "Chinese,English": 0.39204545454545453, + "Vietnamese,Indonesian": 0.32386363636363635, + "Vietnamese,Malay": 0.29545454545454547, + "Vietnamese,Filipino": 0.2727272727272727, + "Vietnamese,English": 0.26704545454545453, + "Indonesian,Malay": 0.5227272727272727, + "Indonesian,Filipino": 0.4943181818181818, + "Indonesian,English": 0.45454545454545453, + "Malay,Filipino": 0.48295454545454547, + "Malay,English": 0.4715909090909091, + "Filipino,English": 0.44886363636363635 }, "3_combine": { - "Spanish,Chinese,Vietnamese": 0.2840909090909091, - "Spanish,Chinese,Indonesian": 0.26136363636363635, - "Spanish,Chinese,Malay": 0.2784090909090909, - "Spanish,Chinese,Filipino": 0.2727272727272727, - "Spanish,Chinese,English": 0.32386363636363635, - "Spanish,Vietnamese,Indonesian": 0.3977272727272727, - "Spanish,Vietnamese,Malay": 0.4147727272727273, - "Spanish,Vietnamese,Filipino": 0.4034090909090909, - "Spanish,Vietnamese,English": 0.4431818181818182, - "Spanish,Indonesian,Malay": 0.4375, - "Spanish,Indonesian,Filipino": 0.4034090909090909, - "Spanish,Indonesian,English": 0.44886363636363635, - "Spanish,Malay,Filipino": 0.4318181818181818, - "Spanish,Malay,English": 0.4715909090909091, - "Spanish,Filipino,English": 0.4715909090909091, - "Chinese,Vietnamese,Indonesian": 0.2897727272727273, - "Chinese,Vietnamese,Malay": 0.26136363636363635, - "Chinese,Vietnamese,Filipino": 0.2556818181818182, - "Chinese,Vietnamese,English": 0.2897727272727273, - "Chinese,Indonesian,Malay": 0.29545454545454547, - "Chinese,Indonesian,Filipino": 0.26704545454545453, - "Chinese,Indonesian,English": 0.30113636363636365, - "Chinese,Malay,Filipino": 0.25, - "Chinese,Malay,English": 0.2897727272727273, - "Chinese,Filipino,English": 0.2897727272727273, - "Vietnamese,Indonesian,Malay": 0.42045454545454547, - "Vietnamese,Indonesian,Filipino": 0.39204545454545453, - "Vietnamese,Indonesian,English": 0.44886363636363635, - "Vietnamese,Malay,Filipino": 0.3977272727272727, - "Vietnamese,Malay,English": 0.44886363636363635, - "Vietnamese,Filipino,English": 0.4375, - "Indonesian,Malay,Filipino": 0.44886363636363635, - "Indonesian,Malay,English": 0.4943181818181818, - "Indonesian,Filipino,English": 0.44886363636363635, - "Malay,Filipino,English": 0.4659090909090909 + "Spanish,Chinese,Vietnamese": 0.07954545454545454, + "Spanish,Chinese,Indonesian": 0.18181818181818182, + "Spanish,Chinese,Malay": 0.24431818181818182, + "Spanish,Chinese,Filipino": 0.19886363636363635, + "Spanish,Chinese,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian": 0.10795454545454546, + "Spanish,Vietnamese,Malay": 0.1590909090909091, + "Spanish,Vietnamese,Filipino": 0.10795454545454546, + "Spanish,Vietnamese,English": 0.13068181818181818, + "Spanish,Indonesian,Malay": 0.2840909090909091, + "Spanish,Indonesian,Filipino": 0.22727272727272727, + "Spanish,Indonesian,English": 0.24431818181818182, + "Spanish,Malay,Filipino": 0.2727272727272727, + "Spanish,Malay,English": 0.32386363636363635, + "Spanish,Filipino,English": 0.25, + "Chinese,Vietnamese,Indonesian": 0.125, + "Chinese,Vietnamese,Malay": 0.10795454545454546, + "Chinese,Vietnamese,Filipino": 0.10227272727272728, + "Chinese,Vietnamese,English": 0.07386363636363637, + "Chinese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Indonesian,Filipino": 0.23295454545454544, + "Chinese,Indonesian,English": 0.22727272727272727, + "Chinese,Malay,Filipino": 0.23863636363636365, + "Chinese,Malay,English": 0.22727272727272727, + "Chinese,Filipino,English": 0.20454545454545456, + "Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "Vietnamese,Indonesian,English": 0.14204545454545456, + "Vietnamese,Malay,Filipino": 0.14204545454545456, + "Vietnamese,Malay,English": 0.14204545454545456, + "Vietnamese,Filipino,English": 0.11363636363636363, + "Indonesian,Malay,Filipino": 0.29545454545454547, + "Indonesian,Malay,English": 0.30113636363636365, + "Indonesian,Filipino,English": 0.2556818181818182, + "Malay,Filipino,English": 0.25 }, "4_combine": { - "Spanish,Chinese,Vietnamese,Indonesian": 0.2159090909090909, - "Spanish,Chinese,Vietnamese,Malay": 0.20454545454545456, - "Spanish,Chinese,Vietnamese,Filipino": 0.19318181818181818, - "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, - "Spanish,Chinese,Indonesian,Malay": 0.21022727272727273, - "Spanish,Chinese,Indonesian,Filipino": 0.18181818181818182, - "Spanish,Chinese,Indonesian,English": 0.23295454545454544, - "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, - "Spanish,Chinese,Malay,English": 0.23863636363636365, - "Spanish,Chinese,Filipino,English": 0.2215909090909091, - "Spanish,Vietnamese,Indonesian,Malay": 0.32386363636363635, - "Spanish,Vietnamese,Indonesian,Filipino": 0.29545454545454547, - "Spanish,Vietnamese,Indonesian,English": 0.3409090909090909, - "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, - "Spanish,Vietnamese,Malay,English": 0.3465909090909091, - "Spanish,Vietnamese,Filipino,English": 0.3409090909090909, - "Spanish,Indonesian,Malay,Filipino": 0.3352272727272727, - "Spanish,Indonesian,Malay,English": 0.3806818181818182, - "Spanish,Indonesian,Filipino,English": 0.3465909090909091, - "Spanish,Malay,Filipino,English": 0.36363636363636365, - "Chinese,Vietnamese,Indonesian,Malay": 0.2159090909090909, - "Chinese,Vietnamese,Indonesian,Filipino": 0.20454545454545456, - "Chinese,Vietnamese,Indonesian,English": 0.22727272727272727, - "Chinese,Vietnamese,Malay,Filipino": 0.1875, - "Chinese,Vietnamese,Malay,English": 0.2159090909090909, - "Chinese,Vietnamese,Filipino,English": 0.20454545454545456, - "Chinese,Indonesian,Malay,Filipino": 0.21022727272727273, - "Chinese,Indonesian,Malay,English": 0.23863636363636365, - "Chinese,Indonesian,Filipino,English": 0.2159090909090909, - "Chinese,Malay,Filipino,English": 0.21022727272727273, - "Vietnamese,Indonesian,Malay,Filipino": 0.3181818181818182, - "Vietnamese,Indonesian,Malay,English": 0.36363636363636365, - "Vietnamese,Indonesian,Filipino,English": 0.32386363636363635, - "Vietnamese,Malay,Filipino,English": 0.3409090909090909, - "Indonesian,Malay,Filipino,English": 0.3806818181818182 + "Spanish,Chinese,Vietnamese,Indonesian": 0.045454545454545456, + "Spanish,Chinese,Vietnamese,Malay": 0.0625, + "Spanish,Chinese,Vietnamese,Filipino": 0.045454545454545456, + "Spanish,Chinese,Vietnamese,English": 0.03977272727272727, + "Spanish,Chinese,Indonesian,Malay": 0.14204545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.11931818181818182, + "Spanish,Chinese,Indonesian,English": 0.13068181818181818, + "Spanish,Chinese,Malay,Filipino": 0.14772727272727273, + "Spanish,Chinese,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Malay": 0.08522727272727272, + "Spanish,Vietnamese,Indonesian,Filipino": 0.0625, + "Spanish,Vietnamese,Indonesian,English": 0.056818181818181816, + "Spanish,Vietnamese,Malay,Filipino": 0.07386363636363637, + "Spanish,Vietnamese,Malay,English": 0.09659090909090909, + "Spanish,Vietnamese,Filipino,English": 0.05113636363636364, + "Spanish,Indonesian,Malay,Filipino": 0.17613636363636365, + "Spanish,Indonesian,Malay,English": 0.1875, + "Spanish,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Malay,Filipino,English": 0.16477272727272727, + "Chinese,Vietnamese,Indonesian,Malay": 0.07954545454545454, + "Chinese,Vietnamese,Indonesian,Filipino": 0.08522727272727272, + "Chinese,Vietnamese,Indonesian,English": 0.05113636363636364, + "Chinese,Vietnamese,Malay,Filipino": 0.06818181818181818, + "Chinese,Vietnamese,Malay,English": 0.05113636363636364, + "Chinese,Vietnamese,Filipino,English": 0.03977272727272727, + "Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Chinese,Indonesian,Malay,English": 0.1590909090909091, + "Chinese,Indonesian,Filipino,English": 0.13636363636363635, + "Chinese,Malay,Filipino,English": 0.13068181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Vietnamese,Indonesian,Malay,English": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,English": 0.07386363636363637, + "Vietnamese,Malay,Filipino,English": 0.0625, + "Indonesian,Malay,Filipino,English": 0.1875 }, "5_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.17045454545454544, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.14772727272727273, - "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1875, - "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1534090909090909, - "Spanish,Chinese,Vietnamese,Malay,English": 0.18181818181818182, - "Spanish,Chinese,Vietnamese,Filipino,English": 0.16477272727272727, - "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, - "Spanish,Chinese,Indonesian,Malay,English": 0.19318181818181818, - "Spanish,Chinese,Indonesian,Filipino,English": 0.16477272727272727, - "Spanish,Chinese,Malay,Filipino,English": 0.17045454545454544, - "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2556818181818182, - "Spanish,Vietnamese,Indonesian,Malay,English": 0.2897727272727273, - "Spanish,Vietnamese,Indonesian,Filipino,English": 0.26136363636363635, - "Spanish,Vietnamese,Malay,Filipino,English": 0.2784090909090909, - "Spanish,Indonesian,Malay,Filipino,English": 0.30113636363636365, - "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, - "Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, - "Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, - "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, - "Chinese,Indonesian,Malay,Filipino,English": 0.18181818181818182, - "Vietnamese,Indonesian,Malay,Filipino,English": 0.2784090909090909 + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.03977272727272727, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.022727272727272728, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Malay,English": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.017045454545454544, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.09659090909090909, + "Spanish,Chinese,Indonesian,Malay,English": 0.10795454545454546, + "Spanish,Chinese,Indonesian,Filipino,English": 0.08522727272727272, + "Spanish,Chinese,Malay,Filipino,English": 0.09659090909090909, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.056818181818181816, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.05113636363636364, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.022727272727272728, + "Spanish,Vietnamese,Malay,Filipino,English": 0.028409090909090908, + "Spanish,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.0625, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.03977272727272727, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.03409090909090909, + "Chinese,Vietnamese,Malay,Filipino,English": 0.028409090909090908, + "Chinese,Indonesian,Malay,Filipino,English": 0.10227272727272728, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.05113636363636364 }, "6_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.125, - "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.1534090909090909, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, - "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.13636363636363635, - "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, - "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.23295454545454544, - "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.028409090909090908, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.022727272727272728, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.011363636363636364, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.011363636363636364, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.06818181818181818, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.022727272727272728, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.028409090909090908 }, "7_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.011363636363636364 } }, - "AC3_2": 0.5124027511058862, - "AC3_3": 0.41907388782828214, - "AC3_4": 0.3449354575002427, - "AC3_5": 0.2829467722207821, - "AC3_6": 0.230129205796275, - "AC3_7": 0.18404150194547764 + "AC3_2": 0.3453705647209693, + "AC3_3": 0.23745088186223393, + "AC3_4": 0.15393278158002802, + "AC3_5": 0.09348309345715401, + "AC3_6": 0.0506176749927089, + "AC3_7": 0.02191142190450015 }, "prompt_3": { - "overall_acc": 0.45860389610389607, + "overall_acc": 0.3003246753246754, "language_acc": { - "Spanish": 0.48863636363636365, - "Chinese": 0.39204545454545453, - "Vietnamese": 0.4318181818181818, - "Indonesian": 0.4943181818181818, - "Malay": 0.4602272727272727, - "Filipino": 0.4318181818181818, - "English": 0.5113636363636364 + "Spanish": 0.3068181818181818, + "Chinese": 0.2840909090909091, + "Vietnamese": 0.2897727272727273, + "Indonesian": 0.2840909090909091, + "Malay": 0.3068181818181818, + "Filipino": 0.30113636363636365, + "English": 0.32954545454545453 }, - "consistency_score_2": 0.5462662337662338, - "consistency_score_3": 0.3678571428571428, - "consistency_score_4": 0.2683441558441559, - "consistency_score_5": 0.20535714285714285, - "consistency_score_6": 0.16396103896103895, - "consistency_score_7": 0.13636363636363635, + "consistency_score_2": 0.35064935064935066, + "consistency_score_3": 0.14870129870129878, + "consistency_score_4": 0.06866883116883118, + "consistency_score_5": 0.032467532467532464, + "consistency_score_6": 0.01461038961038961, + "consistency_score_7": 0.005681818181818182, "detailed_consistency_score": { "2_combine": { - "Spanish,Chinese": 0.44886363636363635, - "Spanish,Vietnamese": 0.5340909090909091, - "Spanish,Indonesian": 0.5284090909090909, - "Spanish,Malay": 0.5852272727272727, - "Spanish,Filipino": 0.6420454545454546, - "Spanish,English": 0.6534090909090909, - "Chinese,Vietnamese": 0.44886363636363635, - "Chinese,Indonesian": 0.44886363636363635, - "Chinese,Malay": 0.4147727272727273, - "Chinese,Filipino": 0.44886363636363635, - "Chinese,English": 0.4602272727272727, - "Vietnamese,Indonesian": 0.5227272727272727, - "Vietnamese,Malay": 0.5284090909090909, - "Vietnamese,Filipino": 0.5681818181818182, - "Vietnamese,English": 0.5795454545454546, - "Indonesian,Malay": 0.5909090909090909, - "Indonesian,Filipino": 0.5795454545454546, - "Indonesian,English": 0.5852272727272727, - "Malay,Filipino": 0.625, - "Malay,English": 0.6306818181818182, - "Filipino,English": 0.6477272727272727 + "Spanish,Chinese": 0.32386363636363635, + "Spanish,Vietnamese": 0.24431818181818182, + "Spanish,Indonesian": 0.29545454545454547, + "Spanish,Malay": 0.3693181818181818, + "Spanish,Filipino": 0.3181818181818182, + "Spanish,English": 0.4943181818181818, + "Chinese,Vietnamese": 0.26704545454545453, + "Chinese,Indonesian": 0.32954545454545453, + "Chinese,Malay": 0.3125, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,English": 0.375, + "Vietnamese,Indonesian": 0.45454545454545453, + "Vietnamese,Malay": 0.375, + "Vietnamese,Filipino": 0.375, + "Vietnamese,English": 0.26704545454545453, + "Indonesian,Malay": 0.4375, + "Indonesian,Filipino": 0.3693181818181818, + "Indonesian,English": 0.29545454545454547, + "Malay,Filipino": 0.35795454545454547, + "Malay,English": 0.39204545454545453, + "Filipino,English": 0.375 }, "3_combine": { - "Spanish,Chinese,Vietnamese": 0.29545454545454547, - "Spanish,Chinese,Indonesian": 0.2727272727272727, - "Spanish,Chinese,Malay": 0.2897727272727273, - "Spanish,Chinese,Filipino": 0.3181818181818182, - "Spanish,Chinese,English": 0.3352272727272727, - "Spanish,Vietnamese,Indonesian": 0.35795454545454547, - "Spanish,Vietnamese,Malay": 0.3693181818181818, - "Spanish,Vietnamese,Filipino": 0.4034090909090909, - "Spanish,Vietnamese,English": 0.4147727272727273, - "Spanish,Indonesian,Malay": 0.3977272727272727, - "Spanish,Indonesian,Filipino": 0.4034090909090909, - "Spanish,Indonesian,English": 0.42613636363636365, - "Spanish,Malay,Filipino": 0.4602272727272727, - "Spanish,Malay,English": 0.4659090909090909, - "Spanish,Filipino,English": 0.4943181818181818, - "Chinese,Vietnamese,Indonesian": 0.2840909090909091, - "Chinese,Vietnamese,Malay": 0.26704545454545453, - "Chinese,Vietnamese,Filipino": 0.2840909090909091, - "Chinese,Vietnamese,English": 0.3068181818181818, - "Chinese,Indonesian,Malay": 0.2727272727272727, - "Chinese,Indonesian,Filipino": 0.2840909090909091, - "Chinese,Indonesian,English": 0.3068181818181818, - "Chinese,Malay,Filipino": 0.30113636363636365, - "Chinese,Malay,English": 0.3125, - "Chinese,Filipino,English": 0.3409090909090909, - "Vietnamese,Indonesian,Malay": 0.38636363636363635, - "Vietnamese,Indonesian,Filipino": 0.38636363636363635, - "Vietnamese,Indonesian,English": 0.39204545454545453, - "Vietnamese,Malay,Filipino": 0.42045454545454547, - "Vietnamese,Malay,English": 0.4090909090909091, - "Vietnamese,Filipino,English": 0.42613636363636365, - "Indonesian,Malay,Filipino": 0.42613636363636365, - "Indonesian,Malay,English": 0.4375, - "Indonesian,Filipino,English": 0.4375, - "Malay,Filipino,English": 0.48863636363636365 + "Spanish,Chinese,Vietnamese": 0.056818181818181816, + "Spanish,Chinese,Indonesian": 0.13068181818181818, + "Spanish,Chinese,Malay": 0.13636363636363635, + "Spanish,Chinese,Filipino": 0.13636363636363635, + "Spanish,Chinese,English": 0.20454545454545456, + "Spanish,Vietnamese,Indonesian": 0.11931818181818182, + "Spanish,Vietnamese,Malay": 0.10227272727272728, + "Spanish,Vietnamese,Filipino": 0.11931818181818182, + "Spanish,Vietnamese,English": 0.125, + "Spanish,Indonesian,Malay": 0.16477272727272727, + "Spanish,Indonesian,Filipino": 0.13068181818181818, + "Spanish,Indonesian,English": 0.13636363636363635, + "Spanish,Malay,Filipino": 0.13068181818181818, + "Spanish,Malay,English": 0.2215909090909091, + "Spanish,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian": 0.14772727272727273, + "Chinese,Vietnamese,Malay": 0.11363636363636363, + "Chinese,Vietnamese,Filipino": 0.13068181818181818, + "Chinese,Vietnamese,English": 0.08522727272727272, + "Chinese,Indonesian,Malay": 0.16477272727272727, + "Chinese,Indonesian,Filipino": 0.13068181818181818, + "Chinese,Indonesian,English": 0.13636363636363635, + "Chinese,Malay,Filipino": 0.13068181818181818, + "Chinese,Malay,English": 0.17613636363636365, + "Chinese,Filipino,English": 0.14204545454545456, + "Vietnamese,Indonesian,Malay": 0.24431818181818182, + "Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Vietnamese,Indonesian,English": 0.13636363636363635, + "Vietnamese,Malay,Filipino": 0.19318181818181818, + "Vietnamese,Malay,English": 0.125, + "Vietnamese,Filipino,English": 0.13636363636363635, + "Indonesian,Malay,Filipino": 0.19886363636363635, + "Indonesian,Malay,English": 0.16477272727272727, + "Indonesian,Filipino,English": 0.1534090909090909, + "Malay,Filipino,English": 0.16477272727272727 }, "4_combine": { - "Spanish,Chinese,Vietnamese,Indonesian": 0.21022727272727273, - "Spanish,Chinese,Vietnamese,Malay": 0.21022727272727273, - "Spanish,Chinese,Vietnamese,Filipino": 0.22727272727272727, - "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, - "Spanish,Chinese,Indonesian,Malay": 0.19318181818181818, - "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, - "Spanish,Chinese,Indonesian,English": 0.22727272727272727, - "Spanish,Chinese,Malay,Filipino": 0.24431818181818182, - "Spanish,Chinese,Malay,English": 0.26136363636363635, - "Spanish,Chinese,Filipino,English": 0.2727272727272727, - "Spanish,Vietnamese,Indonesian,Malay": 0.2897727272727273, - "Spanish,Vietnamese,Indonesian,Filipino": 0.2784090909090909, - "Spanish,Vietnamese,Indonesian,English": 0.29545454545454547, - "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, - "Spanish,Vietnamese,Malay,English": 0.3181818181818182, - "Spanish,Vietnamese,Filipino,English": 0.32954545454545453, - "Spanish,Indonesian,Malay,Filipino": 0.32386363636363635, - "Spanish,Indonesian,Malay,English": 0.3352272727272727, - "Spanish,Indonesian,Filipino,English": 0.3409090909090909, - "Spanish,Malay,Filipino,English": 0.3977272727272727, - "Chinese,Vietnamese,Indonesian,Malay": 0.19886363636363635, - "Chinese,Vietnamese,Indonesian,Filipino": 0.2159090909090909, - "Chinese,Vietnamese,Indonesian,English": 0.23295454545454544, - "Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, - "Chinese,Vietnamese,Malay,English": 0.22727272727272727, - "Chinese,Vietnamese,Filipino,English": 0.24431818181818182, - "Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, - "Chinese,Indonesian,Malay,English": 0.2159090909090909, - "Chinese,Indonesian,Filipino,English": 0.23863636363636365, - "Chinese,Malay,Filipino,English": 0.2727272727272727, - "Vietnamese,Indonesian,Malay,Filipino": 0.3068181818181818, - "Vietnamese,Indonesian,Malay,English": 0.3125, - "Vietnamese,Indonesian,Filipino,English": 0.3068181818181818, - "Vietnamese,Malay,Filipino,English": 0.3409090909090909, - "Indonesian,Malay,Filipino,English": 0.3409090909090909 + "Spanish,Chinese,Vietnamese,Indonesian": 0.03409090909090909, + "Spanish,Chinese,Vietnamese,Malay": 0.017045454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.028409090909090908, + "Spanish,Chinese,Vietnamese,English": 0.017045454545454544, + "Spanish,Chinese,Indonesian,Malay": 0.07386363636363637, + "Spanish,Chinese,Indonesian,Filipino": 0.0625, + "Spanish,Chinese,Indonesian,English": 0.07954545454545454, + "Spanish,Chinese,Malay,Filipino": 0.05113636363636364, + "Spanish,Chinese,Malay,English": 0.09659090909090909, + "Spanish,Chinese,Filipino,English": 0.09090909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.07386363636363637, + "Spanish,Vietnamese,Indonesian,Filipino": 0.07954545454545454, + "Spanish,Vietnamese,Indonesian,English": 0.03977272727272727, + "Spanish,Vietnamese,Malay,Filipino": 0.06818181818181818, + "Spanish,Vietnamese,Malay,English": 0.056818181818181816, + "Spanish,Vietnamese,Filipino,English": 0.06818181818181818, + "Spanish,Indonesian,Malay,Filipino": 0.07386363636363637, + "Spanish,Indonesian,Malay,English": 0.08522727272727272, + "Spanish,Indonesian,Filipino,English": 0.07954545454545454, + "Spanish,Malay,Filipino,English": 0.08522727272727272, + "Chinese,Vietnamese,Indonesian,Malay": 0.08522727272727272, + "Chinese,Vietnamese,Indonesian,Filipino": 0.07386363636363637, + "Chinese,Vietnamese,Indonesian,English": 0.05113636363636364, + "Chinese,Vietnamese,Malay,Filipino": 0.0625, + "Chinese,Vietnamese,Malay,English": 0.05113636363636364, + "Chinese,Vietnamese,Filipino,English": 0.045454545454545456, + "Chinese,Indonesian,Malay,Filipino": 0.07386363636363637, + "Chinese,Indonesian,Malay,English": 0.07954545454545454, + "Chinese,Indonesian,Filipino,English": 0.06818181818181818, + "Chinese,Malay,Filipino,English": 0.06818181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Vietnamese,Indonesian,Malay,English": 0.08522727272727272, + "Vietnamese,Indonesian,Filipino,English": 0.08522727272727272, + "Vietnamese,Malay,Filipino,English": 0.08522727272727272, + "Indonesian,Malay,Filipino,English": 0.09090909090909091 }, "5_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1590909090909091, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, - "Spanish,Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, - "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1875, - "Spanish,Chinese,Vietnamese,Malay,English": 0.19886363636363635, - "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, - "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, - "Spanish,Chinese,Indonesian,Malay,English": 0.17613636363636365, - "Spanish,Chinese,Indonesian,Filipino,English": 0.18181818181818182, - "Spanish,Chinese,Malay,Filipino,English": 0.23863636363636365, - "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, - "Spanish,Vietnamese,Indonesian,Malay,English": 0.25, - "Spanish,Vietnamese,Indonesian,Filipino,English": 0.23863636363636365, - "Spanish,Vietnamese,Malay,Filipino,English": 0.2784090909090909, - "Spanish,Indonesian,Malay,Filipino,English": 0.2840909090909091, - "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.17045454545454544, - "Chinese,Vietnamese,Indonesian,Malay,English": 0.17613636363636365, - "Chinese,Vietnamese,Indonesian,Filipino,English": 0.19318181818181818, - "Chinese,Vietnamese,Malay,Filipino,English": 0.20454545454545456, - "Chinese,Indonesian,Malay,Filipino,English": 0.1875, - "Vietnamese,Indonesian,Malay,Filipino,English": 0.25 + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.017045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.022727272727272728, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.011363636363636364, + "Spanish,Chinese,Vietnamese,Malay,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.011363636363636364, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.028409090909090908, + "Spanish,Chinese,Indonesian,Malay,English": 0.045454545454545456, + "Spanish,Chinese,Indonesian,Filipino,English": 0.045454545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.03977272727272727, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.05113636363636364, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.03409090909090909, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.03409090909090909, + "Spanish,Vietnamese,Malay,Filipino,English": 0.03977272727272727, + "Spanish,Indonesian,Malay,Filipino,English": 0.045454545454545456, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.045454545454545456, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.03409090909090909, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.028409090909090908, + "Chinese,Vietnamese,Malay,Filipino,English": 0.03409090909090909, + "Chinese,Indonesian,Malay,Filipino,English": 0.03977272727272727, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.0625 }, "6_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, - "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.1534090909090909, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.14772727272727273, - "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.18181818181818182, - "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.1590909090909091, - "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.21022727272727273, - "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.011363636363636364, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.005681818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.005681818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.022727272727272728, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.028409090909090908, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.022727272727272728 }, "7_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.005681818181818182 } }, - "AC3_2": 0.49861134417965686, - "AC3_3": 0.4082484495956346, - "AC3_4": 0.33857625732713653, - "AC3_5": 0.28368407260833933, - "AC3_6": 0.24155928813065697, - "AC3_7": 0.2102195212346658 + "AC3_2": 0.3235417948137895, + "AC3_3": 0.19891352316423525, + "AC3_4": 0.11177944357933094, + "AC3_5": 0.05859993663110856, + "AC3_6": 0.027865176052204437, + "AC3_7": 0.011152640459340715 }, "prompt_4": { - "overall_acc": 0.476461038961039, + "overall_acc": 0.3319805194805195, "language_acc": { - "Spanish": 0.4772727272727273, - "Chinese": 0.5056818181818182, - "Vietnamese": 0.4943181818181818, - "Indonesian": 0.4375, - "Malay": 0.44886363636363635, - "Filipino": 0.4659090909090909, - "English": 0.5056818181818182 + "Spanish": 0.32386363636363635, + "Chinese": 0.3352272727272727, + "Vietnamese": 0.3693181818181818, + "Indonesian": 0.32386363636363635, + "Malay": 0.3181818181818182, + "Filipino": 0.32954545454545453, + "English": 0.32386363636363635 }, - "consistency_score_2": 0.5430194805194805, - "consistency_score_3": 0.37142857142857155, - "consistency_score_4": 0.27938311688311684, - "consistency_score_5": 0.22023809523809523, - "consistency_score_6": 0.17857142857142858, - "consistency_score_7": 0.14772727272727273, + "consistency_score_2": 0.50487012987013, + "consistency_score_3": 0.31233766233766236, + "consistency_score_4": 0.21298701298701297, + "consistency_score_5": 0.15503246753246755, + "consistency_score_6": 0.1176948051948052, + "consistency_score_7": 0.09090909090909091, "detailed_consistency_score": { "2_combine": { - "Spanish,Chinese": 0.48863636363636365, - "Spanish,Vietnamese": 0.5397727272727273, - "Spanish,Indonesian": 0.5454545454545454, - "Spanish,Malay": 0.5909090909090909, - "Spanish,Filipino": 0.6193181818181818, - "Spanish,English": 0.6306818181818182, - "Chinese,Vietnamese": 0.4034090909090909, - "Chinese,Indonesian": 0.4659090909090909, - "Chinese,Malay": 0.44886363636363635, + "Spanish,Chinese": 0.5170454545454546, + "Spanish,Vietnamese": 0.36363636363636365, + "Spanish,Indonesian": 0.39204545454545453, + "Spanish,Malay": 0.38636363636363635, + "Spanish,Filipino": 0.44886363636363635, + "Spanish,English": 0.5625, + "Chinese,Vietnamese": 0.42613636363636365, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Malay": 0.5056818181818182, "Chinese,Filipino": 0.4659090909090909, - "Chinese,English": 0.48863636363636365, - "Vietnamese,Indonesian": 0.5227272727272727, - "Vietnamese,Malay": 0.5397727272727273, - "Vietnamese,Filipino": 0.5852272727272727, - "Vietnamese,English": 0.5397727272727273, - "Indonesian,Malay": 0.6193181818181818, - "Indonesian,Filipino": 0.5681818181818182, - "Indonesian,English": 0.5795454545454546, - "Malay,Filipino": 0.5625, - "Malay,English": 0.5965909090909091, - "Filipino,English": 0.6022727272727273 + "Chinese,English": 0.5454545454545454, + "Vietnamese,Indonesian": 0.6363636363636364, + "Vietnamese,Malay": 0.5909090909090909, + "Vietnamese,Filipino": 0.6306818181818182, + "Vietnamese,English": 0.3977272727272727, + "Indonesian,Malay": 0.6534090909090909, + "Indonesian,Filipino": 0.6193181818181818, + "Indonesian,English": 0.4602272727272727, + "Malay,Filipino": 0.6306818181818182, + "Malay,English": 0.4431818181818182, + "Filipino,English": 0.4659090909090909 }, "3_combine": { - "Spanish,Chinese,Vietnamese": 0.30113636363636365, - "Spanish,Chinese,Indonesian": 0.3125, - "Spanish,Chinese,Malay": 0.32386363636363635, - "Spanish,Chinese,Filipino": 0.3465909090909091, + "Spanish,Chinese,Vietnamese": 0.2556818181818182, + "Spanish,Chinese,Indonesian": 0.26704545454545453, + "Spanish,Chinese,Malay": 0.29545454545454547, + "Spanish,Chinese,Filipino": 0.2784090909090909, "Spanish,Chinese,English": 0.3522727272727273, - "Spanish,Vietnamese,Indonesian": 0.3693181818181818, - "Spanish,Vietnamese,Malay": 0.3977272727272727, - "Spanish,Vietnamese,Filipino": 0.4090909090909091, - "Spanish,Vietnamese,English": 0.4090909090909091, - "Spanish,Indonesian,Malay": 0.4431818181818182, - "Spanish,Indonesian,Filipino": 0.4147727272727273, - "Spanish,Indonesian,English": 0.42045454545454547, - "Spanish,Malay,Filipino": 0.42613636363636365, - "Spanish,Malay,English": 0.45454545454545453, - "Spanish,Filipino,English": 0.4715909090909091, - "Chinese,Vietnamese,Indonesian": 0.29545454545454547, - "Chinese,Vietnamese,Malay": 0.2840909090909091, - "Chinese,Vietnamese,Filipino": 0.2897727272727273, - "Chinese,Vietnamese,English": 0.2840909090909091, - "Chinese,Indonesian,Malay": 0.32954545454545453, - "Chinese,Indonesian,Filipino": 0.30113636363636365, - "Chinese,Indonesian,English": 0.3181818181818182, - "Chinese,Malay,Filipino": 0.3068181818181818, - "Chinese,Malay,English": 0.32954545454545453, - "Chinese,Filipino,English": 0.32954545454545453, - "Vietnamese,Indonesian,Malay": 0.4034090909090909, - "Vietnamese,Indonesian,Filipino": 0.38636363636363635, - "Vietnamese,Indonesian,English": 0.375, - "Vietnamese,Malay,Filipino": 0.3977272727272727, - "Vietnamese,Malay,English": 0.39204545454545453, - "Vietnamese,Filipino,English": 0.4034090909090909, - "Indonesian,Malay,Filipino": 0.4147727272727273, - "Indonesian,Malay,English": 0.44886363636363635, - "Indonesian,Filipino,English": 0.42613636363636365, - "Malay,Filipino,English": 0.4318181818181818 + "Spanish,Vietnamese,Indonesian": 0.26136363636363635, + "Spanish,Vietnamese,Malay": 0.2215909090909091, + "Spanish,Vietnamese,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,English": 0.23863636363636365, + "Spanish,Indonesian,Malay": 0.2727272727272727, + "Spanish,Indonesian,Filipino": 0.29545454545454547, + "Spanish,Indonesian,English": 0.2784090909090909, + "Spanish,Malay,Filipino": 0.2784090909090909, + "Spanish,Malay,English": 0.26704545454545453, + "Spanish,Filipino,English": 0.29545454545454547, + "Chinese,Vietnamese,Indonesian": 0.32386363636363635, + "Chinese,Vietnamese,Malay": 0.3125, + "Chinese,Vietnamese,Filipino": 0.3125, + "Chinese,Vietnamese,English": 0.2556818181818182, + "Chinese,Indonesian,Malay": 0.35795454545454547, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,English": 0.2897727272727273, + "Chinese,Malay,Filipino": 0.3409090909090909, + "Chinese,Malay,English": 0.3068181818181818, + "Chinese,Filipino,English": 0.30113636363636365, + "Vietnamese,Indonesian,Malay": 0.4659090909090909, + "Vietnamese,Indonesian,Filipino": 0.4659090909090909, + "Vietnamese,Indonesian,English": 0.3068181818181818, + "Vietnamese,Malay,Filipino": 0.4602272727272727, + "Vietnamese,Malay,English": 0.2556818181818182, + "Vietnamese,Filipino,English": 0.29545454545454547, + "Indonesian,Malay,Filipino": 0.4772727272727273, + "Indonesian,Malay,English": 0.3181818181818182, + "Indonesian,Filipino,English": 0.3181818181818182, + "Malay,Filipino,English": 0.3068181818181818 }, "4_combine": { - "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, - "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, - "Spanish,Chinese,Vietnamese,Filipino": 0.23863636363636365, - "Spanish,Chinese,Vietnamese,English": 0.24431818181818182, - "Spanish,Chinese,Indonesian,Malay": 0.2556818181818182, - "Spanish,Chinese,Indonesian,Filipino": 0.23295454545454544, - "Spanish,Chinese,Indonesian,English": 0.25, - "Spanish,Chinese,Malay,Filipino": 0.2556818181818182, - "Spanish,Chinese,Malay,English": 0.26704545454545453, - "Spanish,Chinese,Filipino,English": 0.26704545454545453, - "Spanish,Vietnamese,Indonesian,Malay": 0.3181818181818182, - "Spanish,Vietnamese,Indonesian,Filipino": 0.3125, - "Spanish,Vietnamese,Indonesian,English": 0.3068181818181818, - "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, - "Spanish,Vietnamese,Malay,English": 0.32954545454545453, - "Spanish,Vietnamese,Filipino,English": 0.3465909090909091, - "Spanish,Indonesian,Malay,Filipino": 0.3465909090909091, - "Spanish,Indonesian,Malay,English": 0.36363636363636365, - "Spanish,Indonesian,Filipino,English": 0.3465909090909091, - "Spanish,Malay,Filipino,English": 0.3693181818181818, - "Chinese,Vietnamese,Indonesian,Malay": 0.23295454545454544, - "Chinese,Vietnamese,Indonesian,Filipino": 0.2215909090909091, - "Chinese,Vietnamese,Indonesian,English": 0.2215909090909091, - "Chinese,Vietnamese,Malay,Filipino": 0.2159090909090909, - "Chinese,Vietnamese,Malay,English": 0.23863636363636365, - "Chinese,Vietnamese,Filipino,English": 0.2215909090909091, - "Chinese,Indonesian,Malay,Filipino": 0.23295454545454544, - "Chinese,Indonesian,Malay,English": 0.2556818181818182, - "Chinese,Indonesian,Filipino,English": 0.23863636363636365, - "Chinese,Malay,Filipino,English": 0.26136363636363635, - "Vietnamese,Indonesian,Malay,Filipino": 0.3125, - "Vietnamese,Indonesian,Malay,English": 0.32386363636363635, - "Vietnamese,Indonesian,Filipino,English": 0.3068181818181818, - "Vietnamese,Malay,Filipino,English": 0.32386363636363635, - "Indonesian,Malay,Filipino,English": 0.3465909090909091 + "Spanish,Chinese,Vietnamese,Indonesian": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Malay": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.19886363636363635, + "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Filipino,English": 0.19886363636363635, + "Spanish,Vietnamese,Indonesian,Malay": 0.1875, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Spanish,Vietnamese,Indonesian,English": 0.18181818181818182, + "Spanish,Vietnamese,Malay,Filipino": 0.18181818181818182, + "Spanish,Vietnamese,Malay,English": 0.1590909090909091, + "Spanish,Vietnamese,Filipino,English": 0.19318181818181818, + "Spanish,Indonesian,Malay,Filipino": 0.2159090909090909, + "Spanish,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Indonesian,Filipino,English": 0.19318181818181818, + "Spanish,Malay,Filipino,English": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Malay": 0.2784090909090909, + "Chinese,Vietnamese,Indonesian,Filipino": 0.26136363636363635, + "Chinese,Vietnamese,Indonesian,English": 0.20454545454545456, + "Chinese,Vietnamese,Malay,Filipino": 0.24431818181818182, + "Chinese,Vietnamese,Malay,English": 0.19318181818181818, + "Chinese,Vietnamese,Filipino,English": 0.21022727272727273, + "Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Chinese,Indonesian,Malay,English": 0.22727272727272727, + "Chinese,Indonesian,Filipino,English": 0.21022727272727273, + "Chinese,Malay,Filipino,English": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.3693181818181818, + "Vietnamese,Indonesian,Malay,English": 0.2215909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.23863636363636365, + "Vietnamese,Malay,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino,English": 0.26136363636363635 }, "5_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.18181818181818182, - "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1875, - "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, - "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, - "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, - "Spanish,Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, - "Spanish,Chinese,Indonesian,Malay,English": 0.2159090909090909, - "Spanish,Chinese,Indonesian,Filipino,English": 0.19886363636363635, - "Spanish,Chinese,Malay,Filipino,English": 0.22727272727272727, - "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.26704545454545453, - "Spanish,Vietnamese,Indonesian,Malay,English": 0.2727272727272727, - "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2727272727272727, - "Spanish,Vietnamese,Malay,Filipino,English": 0.2897727272727273, - "Spanish,Indonesian,Malay,Filipino,English": 0.3068181818181818, - "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.17613636363636365, - "Chinese,Vietnamese,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,English": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.14772727272727273, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.14772727272727273, + "Spanish,Chinese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.13636363636363635, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.13068181818181818, + "Spanish,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.17613636363636365, "Chinese,Vietnamese,Indonesian,Filipino,English": 0.17613636363636365, - "Chinese,Vietnamese,Malay,Filipino,English": 0.19318181818181818, - "Chinese,Indonesian,Malay,Filipino,English": 0.19886363636363635, - "Vietnamese,Indonesian,Malay,Filipino,English": 0.26704545454545453 + "Chinese,Vietnamese,Malay,Filipino,English": 0.16477272727272727, + "Chinese,Indonesian,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.19318181818181818 }, "6_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, - "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.1590909090909091, - "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.18181818181818182, - "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.18181818181818182, - "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.24431818181818182, - "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.10227272727272728, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.10795454545454546, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.10795454545454546, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.14772727272727273 }, "7_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.14772727272727273 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09090909090909091 } }, - "AC3_2": 0.5075675717096639, - "AC3_3": 0.41743934788982806, - "AC3_4": 0.3522291443056106, - "AC3_5": 0.3012343966280525, - "AC3_6": 0.2597804920828404, - "AC3_7": 0.225529022306935 + "AC3_2": 0.40056621356393457, + "AC3_3": 0.3218596721498547, + "AC3_4": 0.2594926669517532, + "AC3_5": 0.2113609306925313, + "AC3_6": 0.17378041629592222, + "AC3_7": 0.14273250738205706 }, "prompt_5": { - "overall_acc": 0.4675324675324676, + "overall_acc": 0.29626623376623373, "language_acc": { - "Spanish": 0.45454545454545453, - "Chinese": 0.44886363636363635, - "Vietnamese": 0.4602272727272727, - "Indonesian": 0.4715909090909091, - "Malay": 0.4659090909090909, - "Filipino": 0.4659090909090909, - "English": 0.5056818181818182 + "Spanish": 0.30113636363636365, + "Chinese": 0.30113636363636365, + "Vietnamese": 0.30113636363636365, + "Indonesian": 0.26136363636363635, + "Malay": 0.2840909090909091, + "Filipino": 0.29545454545454547, + "English": 0.32954545454545453 }, - "consistency_score_2": 0.5625, - "consistency_score_3": 0.3925324675324674, - "consistency_score_4": 0.2983766233766234, - "consistency_score_5": 0.23566017316017318, - "consistency_score_6": 0.18912337662337664, - "consistency_score_7": 0.1534090909090909, + "consistency_score_2": 0.49648268398268397, + "consistency_score_3": 0.30000000000000004, + "consistency_score_4": 0.20227272727272722, + "consistency_score_5": 0.14853896103896105, + "consistency_score_6": 0.11850649350649352, + "consistency_score_7": 0.10227272727272728, "detailed_consistency_score": { "2_combine": { - "Spanish,Chinese": 0.4715909090909091, - "Spanish,Vietnamese": 0.5568181818181818, - "Spanish,Indonesian": 0.5852272727272727, - "Spanish,Malay": 0.6136363636363636, - "Spanish,Filipino": 0.5795454545454546, - "Spanish,English": 0.6534090909090909, - "Chinese,Vietnamese": 0.4034090909090909, + "Spanish,Chinese": 0.48863636363636365, + "Spanish,Vietnamese": 0.4659090909090909, + "Spanish,Indonesian": 0.42613636363636365, + "Spanish,Malay": 0.4431818181818182, + "Spanish,Filipino": 0.5113636363636364, + "Spanish,English": 0.5511363636363636, + "Chinese,Vietnamese": 0.375, "Chinese,Indonesian": 0.45454545454545453, - "Chinese,Malay": 0.4431818181818182, - "Chinese,Filipino": 0.4602272727272727, - "Chinese,English": 0.5340909090909091, - "Vietnamese,Indonesian": 0.5965909090909091, - "Vietnamese,Malay": 0.5340909090909091, - "Vietnamese,Filipino": 0.5454545454545454, - "Vietnamese,English": 0.5795454545454546, - "Indonesian,Malay": 0.6590909090909091, - "Indonesian,Filipino": 0.625, - "Indonesian,English": 0.6590909090909091, - "Malay,Filipino": 0.5852272727272727, - "Malay,English": 0.625, - "Filipino,English": 0.6477272727272727 + "Chinese,Malay": 0.4318181818181818, + "Chinese,Filipino": 0.4659090909090909, + "Chinese,English": 0.5397727272727273, + "Vietnamese,Indonesian": 0.5681818181818182, + "Vietnamese,Malay": 0.6590909090909091, + "Vietnamese,Filipino": 0.6590909090909091, + "Vietnamese,English": 0.3068181818181818, + "Indonesian,Malay": 0.6704545454545454, + "Indonesian,Filipino": 0.5965909090909091, + "Indonesian,English": 0.4147727272727273, + "Malay,Filipino": 0.6818181818181818, + "Malay,English": 0.3409090909090909, + "Filipino,English": 0.375 }, "3_combine": { - "Spanish,Chinese,Vietnamese": 0.2784090909090909, - "Spanish,Chinese,Indonesian": 0.3125, - "Spanish,Chinese,Malay": 0.3181818181818182, - "Spanish,Chinese,Filipino": 0.32386363636363635, - "Spanish,Chinese,English": 0.3693181818181818, - "Spanish,Vietnamese,Indonesian": 0.4090909090909091, - "Spanish,Vietnamese,Malay": 0.4147727272727273, - "Spanish,Vietnamese,Filipino": 0.3977272727272727, - "Spanish,Vietnamese,English": 0.4375, - "Spanish,Indonesian,Malay": 0.45454545454545453, - "Spanish,Indonesian,Filipino": 0.4375, - "Spanish,Indonesian,English": 0.48295454545454547, - "Spanish,Malay,Filipino": 0.4318181818181818, - "Spanish,Malay,English": 0.48863636363636365, - "Spanish,Filipino,English": 0.4772727272727273, - "Chinese,Vietnamese,Indonesian": 0.29545454545454547, - "Chinese,Vietnamese,Malay": 0.2727272727272727, - "Chinese,Vietnamese,Filipino": 0.2784090909090909, - "Chinese,Vietnamese,English": 0.3181818181818182, - "Chinese,Indonesian,Malay": 0.3352272727272727, - "Chinese,Indonesian,Filipino": 0.32954545454545453, - "Chinese,Indonesian,English": 0.3693181818181818, - "Chinese,Malay,Filipino": 0.3068181818181818, - "Chinese,Malay,English": 0.3465909090909091, - "Chinese,Filipino,English": 0.3693181818181818, - "Vietnamese,Indonesian,Malay": 0.4375, - "Vietnamese,Indonesian,Filipino": 0.42045454545454547, - "Vietnamese,Indonesian,English": 0.4431818181818182, - "Vietnamese,Malay,Filipino": 0.38636363636363635, - "Vietnamese,Malay,English": 0.4147727272727273, - "Vietnamese,Filipino,English": 0.42613636363636365, - "Indonesian,Malay,Filipino": 0.4715909090909091, - "Indonesian,Malay,English": 0.5056818181818182, - "Indonesian,Filipino,English": 0.5, - "Malay,Filipino,English": 0.4772727272727273 + "Spanish,Chinese,Vietnamese": 0.25, + "Spanish,Chinese,Indonesian": 0.2727272727272727, + "Spanish,Chinese,Malay": 0.25, + "Spanish,Chinese,Filipino": 0.2840909090909091, + "Spanish,Chinese,English": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian": 0.2784090909090909, + "Spanish,Vietnamese,Malay": 0.3125, + "Spanish,Vietnamese,Filipino": 0.35795454545454547, + "Spanish,Vietnamese,English": 0.2215909090909091, + "Spanish,Indonesian,Malay": 0.32954545454545453, + "Spanish,Indonesian,Filipino": 0.3181818181818182, + "Spanish,Indonesian,English": 0.26136363636363635, + "Spanish,Malay,Filipino": 0.36363636363636365, + "Spanish,Malay,English": 0.23295454545454544, + "Spanish,Filipino,English": 0.26136363636363635, + "Chinese,Vietnamese,Indonesian": 0.2897727272727273, + "Chinese,Vietnamese,Malay": 0.2784090909090909, + "Chinese,Vietnamese,Filipino": 0.30113636363636365, + "Chinese,Vietnamese,English": 0.20454545454545456, + "Chinese,Indonesian,Malay": 0.3409090909090909, + "Chinese,Indonesian,Filipino": 0.32386363636363635, + "Chinese,Indonesian,English": 0.29545454545454547, + "Chinese,Malay,Filipino": 0.3352272727272727, + "Chinese,Malay,English": 0.25, + "Chinese,Filipino,English": 0.2556818181818182, + "Vietnamese,Indonesian,Malay": 0.4772727272727273, + "Vietnamese,Indonesian,Filipino": 0.4318181818181818, + "Vietnamese,Indonesian,English": 0.2159090909090909, + "Vietnamese,Malay,Filipino": 0.5170454545454546, + "Vietnamese,Malay,English": 0.19886363636363635, + "Vietnamese,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino": 0.48863636363636365, + "Indonesian,Malay,English": 0.2556818181818182, + "Indonesian,Filipino,English": 0.25, + "Malay,Filipino,English": 0.25 }, "4_combine": { - "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, - "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, - "Spanish,Chinese,Vietnamese,Filipino": 0.2159090909090909, - "Spanish,Chinese,Vietnamese,English": 0.25, - "Spanish,Chinese,Indonesian,Malay": 0.25, - "Spanish,Chinese,Indonesian,Filipino": 0.24431818181818182, - "Spanish,Chinese,Indonesian,English": 0.2840909090909091, - "Spanish,Chinese,Malay,Filipino": 0.25, - "Spanish,Chinese,Malay,English": 0.2784090909090909, - "Spanish,Chinese,Filipino,English": 0.2840909090909091, - "Spanish,Vietnamese,Indonesian,Malay": 0.3409090909090909, - "Spanish,Vietnamese,Indonesian,Filipino": 0.3352272727272727, - "Spanish,Vietnamese,Indonesian,English": 0.3522727272727273, - "Spanish,Vietnamese,Malay,Filipino": 0.32386363636363635, - "Spanish,Vietnamese,Malay,English": 0.3522727272727273, - "Spanish,Vietnamese,Filipino,English": 0.3409090909090909, - "Spanish,Indonesian,Malay,Filipino": 0.35795454545454547, - "Spanish,Indonesian,Malay,English": 0.3977272727272727, - "Spanish,Indonesian,Filipino,English": 0.39204545454545453, - "Spanish,Malay,Filipino,English": 0.38636363636363635, - "Chinese,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Indonesian": 0.1875, + "Spanish,Chinese,Vietnamese,Malay": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Filipino": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.20454545454545456, + "Spanish,Chinese,Malay,Filipino": 0.19886363636363635, + "Spanish,Chinese,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Filipino,English": 0.18181818181818182, + "Spanish,Vietnamese,Indonesian,Malay": 0.24431818181818182, + "Spanish,Vietnamese,Indonesian,Filipino": 0.25, + "Spanish,Vietnamese,Indonesian,English": 0.1534090909090909, + "Spanish,Vietnamese,Malay,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,Malay,English": 0.14204545454545456, + "Spanish,Vietnamese,Filipino,English": 0.16477272727272727, + "Spanish,Indonesian,Malay,Filipino": 0.2727272727272727, + "Spanish,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Indonesian,Filipino,English": 0.17613636363636365, + "Spanish,Malay,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay": 0.24431818181818182, "Chinese,Vietnamese,Indonesian,Filipino": 0.23863636363636365, - "Chinese,Vietnamese,Indonesian,English": 0.26136363636363635, - "Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, - "Chinese,Vietnamese,Malay,English": 0.24431818181818182, - "Chinese,Vietnamese,Filipino,English": 0.24431818181818182, - "Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, - "Chinese,Indonesian,Malay,English": 0.2897727272727273, - "Chinese,Indonesian,Filipino,English": 0.30113636363636365, - "Chinese,Malay,Filipino,English": 0.2784090909090909, - "Vietnamese,Indonesian,Malay,Filipino": 0.32954545454545453, - "Vietnamese,Indonesian,Malay,English": 0.35795454545454547, - "Vietnamese,Indonesian,Filipino,English": 0.35795454545454547, - "Vietnamese,Malay,Filipino,English": 0.32954545454545453, - "Indonesian,Malay,Filipino,English": 0.4034090909090909 + "Chinese,Vietnamese,Indonesian,English": 0.17613636363636365, + "Chinese,Vietnamese,Malay,Filipino": 0.24431818181818182, + "Chinese,Vietnamese,Malay,English": 0.14772727272727273, + "Chinese,Vietnamese,Filipino,English": 0.1534090909090909, + "Chinese,Indonesian,Malay,Filipino": 0.2727272727272727, + "Chinese,Indonesian,Malay,English": 0.20454545454545456, + "Chinese,Indonesian,Filipino,English": 0.19318181818181818, + "Chinese,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino": 0.3806818181818182, + "Vietnamese,Indonesian,Malay,English": 0.16477272727272727, + "Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Vietnamese,Malay,Filipino,English": 0.17613636363636365, + "Indonesian,Malay,Filipino,English": 0.19318181818181818 }, "5_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1875, - "Spanish,Chinese,Vietnamese,Indonesian,English": 0.21022727272727273, - "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1875, - "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, - "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, - "Spanish,Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, - "Spanish,Chinese,Indonesian,Malay,English": 0.23295454545454544, - "Spanish,Chinese,Indonesian,Filipino,English": 0.23863636363636365, - "Spanish,Chinese,Malay,Filipino,English": 0.23295454545454544, - "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2784090909090909, - "Spanish,Vietnamese,Indonesian,Malay,English": 0.30113636363636365, - "Spanish,Vietnamese,Indonesian,Filipino,English": 0.29545454545454547, - "Spanish,Vietnamese,Malay,Filipino,English": 0.2840909090909091, - "Spanish,Indonesian,Malay,Filipino,English": 0.32954545454545453, - "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.19318181818181818, - "Chinese,Vietnamese,Indonesian,Malay,English": 0.2159090909090909, - "Chinese,Vietnamese,Indonesian,Filipino,English": 0.2215909090909091, - "Chinese,Vietnamese,Malay,Filipino,English": 0.19886363636363635, - "Chinese,Indonesian,Malay,Filipino,English": 0.24431818181818182, - "Vietnamese,Indonesian,Malay,Filipino,English": 0.2897727272727273 + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Malay,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.125, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.13068181818181818, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Vietnamese,Malay,Filipino,English": 0.125, + "Spanish,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.13636363636363635, + "Chinese,Vietnamese,Malay,Filipino,English": 0.125, + "Chinese,Indonesian,Malay,Filipino,English": 0.1534090909090909, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.14204545454545456 }, "6_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, - "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, - "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.18181818181818182, - "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.17045454545454544, - "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.19886363636363635, - "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.25, - "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.18181818181818182 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.14204545454545456, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.11363636363636363, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.10227272727272728, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.11931818181818182, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11931818181818182 }, "7_combine": { - "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909 + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.10227272727272728 } }, - "AC3_2": 0.5106382978227654, - "AC3_3": 0.42676236555759284, - "AC3_4": 0.3642749788946497, - "AC3_5": 0.31336727907877565, - "AC3_6": 0.2693079478825107, - "AC3_7": 0.2310160427435445 + "AC3_2": 0.3710911528276186, + "AC3_3": 0.29812142657673757, + "AC3_4": 0.2404088096376954, + "AC3_5": 0.19787124367529896, + "AC3_6": 0.16929499068274584, + "AC3_7": 0.15205517492944545 } }, "sg_eval": { "prompt_1": { - "accuracy": 0.5825242718446602 + "accuracy": 0.4368932038834951 }, "prompt_2": { - "accuracy": 0.5728155339805825 + "accuracy": 0.3300970873786408 }, "prompt_3": { - "accuracy": 0.6019417475728155 + "accuracy": 0.3106796116504854 }, "prompt_4": { - "accuracy": 0.33980582524271846 + "accuracy": 0.30097087378640774 }, "prompt_5": { - "accuracy": 0.5825242718446602 + "accuracy": 0.4368932038834951 } }, "cn_eval": { "prompt_1": { - "accuracy": 0.38095238095238093 + "accuracy": 0.2 }, "prompt_2": { - "accuracy": 0.3619047619047619 + "accuracy": 0.23809523809523808 }, "prompt_3": { - "accuracy": 0.3904761904761905 + "accuracy": 0.2857142857142857 }, "prompt_4": { - "accuracy": 0.26666666666666666 + "accuracy": 0.24761904761904763 }, "prompt_5": { - "accuracy": 0.3142857142857143 + "accuracy": 0.22857142857142856 } }, "us_eval": { "prompt_1": { - "accuracy": 0.6822429906542056 + "accuracy": 0.3644859813084112 }, "prompt_2": { - "accuracy": 0.6915887850467289 + "accuracy": 0.308411214953271 }, "prompt_3": { - "accuracy": 0.7289719626168224 + "accuracy": 0.29906542056074764 }, "prompt_4": { - "accuracy": 0.4953271028037383 + "accuracy": 0.2897196261682243 }, "prompt_5": { - "accuracy": 0.6635514018691588 + "accuracy": 0.3177570093457944 } }, "ph_eval": { "prompt_1": { - "accuracy": 0.52, + "accuracy": 0.21, "category_acc": { - "brand": 0.5, - "demographics": 0.4, + "brand": 0.1, + "demographics": 0.0, "biology": 0.3, - "history": 0.6, - "literature": 0.5, - "politics": 0.7, - "culture": 0.6, - "film": 0.5, - "law": 0.5, - "geography": 0.5 + "history": 0.2, + "literature": 0.2, + "politics": 0.3, + "culture": 0.2, + "film": 0.3, + "law": 0.2, + "geography": 0.2 } }, "prompt_2": { - "accuracy": 0.5, + "accuracy": 0.27, "category_acc": { - "brand": 0.4, + "brand": 0.2, "demographics": 0.0, "biology": 0.3, - "history": 0.6, - "literature": 0.4, - "politics": 0.6, - "culture": 0.6, - "film": 0.7, - "law": 0.6, - "geography": 0.5 + "history": 0.26666666666666666, + "literature": 0.1, + "politics": 0.4, + "culture": 0.4, + "film": 0.4, + "law": 0.2, + "geography": 0.3 } }, "prompt_3": { - "accuracy": 0.54, + "accuracy": 0.3, "category_acc": { - "brand": 0.5, + "brand": 0.0, "demographics": 0.2, - "biology": 0.3, - "history": 0.5333333333333333, - "literature": 0.5, - "politics": 0.8, - "culture": 0.6, - "film": 0.6, - "law": 0.5, - "geography": 0.7 + "biology": 0.4, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.5, + "culture": 0.5, + "film": 0.3, + "law": 0.2, + "geography": 0.4 } }, "prompt_4": { - "accuracy": 0.41, + "accuracy": 0.25, "category_acc": { - "brand": 0.4, - "demographics": 0.6, - "biology": 0.2, - "history": 0.3333333333333333, - "literature": 0.2, - "politics": 0.9, - "culture": 0.3, + "brand": 0.1, + "demographics": 0.0, + "biology": 0.1, + "history": 0.26666666666666666, + "literature": 0.0, + "politics": 0.6, + "culture": 0.4, "film": 0.5, - "law": 0.5, - "geography": 0.3 + "law": 0.2, + "geography": 0.2 } }, "prompt_5": { - "accuracy": 0.52, + "accuracy": 0.27, "category_acc": { - "brand": 0.5, + "brand": 0.1, "demographics": 0.0, - "biology": 0.3, - "history": 0.4666666666666667, - "literature": 0.5, - "politics": 0.9, - "culture": 0.6, - "film": 0.6, - "law": 0.6, - "geography": 0.5 + "biology": 0.2, + "history": 0.26666666666666666, + "literature": 0.2, + "politics": 0.3, + "culture": 0.5, + "film": 0.4, + "law": 0.3, + "geography": 0.3 } } }, "sing2eng": { "prompt_1": { - "bleu_score": 0.06750971426706272 + "bleu_score": 0.13444666259992166 }, "prompt_2": { - "bleu_score": 0.07721750285072881 + "bleu_score": 0.10881707423553691 }, "prompt_3": { - "bleu_score": 0.06821978311551388 + "bleu_score": 0.07602585561725173 }, "prompt_4": { - "bleu_score": 0.06804397274407875 + "bleu_score": 0.10747774441127018 }, "prompt_5": { - "bleu_score": 0.05429875271696506 + "bleu_score": 0.04960135353094971 } }, "flores_ind2eng": { "prompt_1": { - "bleu_score": 0.0982752747830463 + "bleu_score": 0.24466587100169057 }, "prompt_2": { - "bleu_score": 0.09693382479933621 + "bleu_score": 0.08501608363069053 }, "prompt_3": { - "bleu_score": 0.09211183542188045 + "bleu_score": 0.12303144338815364 }, "prompt_4": { - "bleu_score": 0.09162870818146696 + "bleu_score": 0.06867174863637551 }, "prompt_5": { - "bleu_score": 0.1809729930555669 + "bleu_score": 0.08939676338672865 } }, "flores_vie2eng": { "prompt_1": { - "bleu_score": 0.07481805117090375 + "bleu_score": 0.1667920055400222 }, "prompt_2": { - "bleu_score": 0.12408586583177651 + "bleu_score": 0.10244283792447458 }, "prompt_3": { - "bleu_score": 0.11323612407691637 + "bleu_score": 0.18346059882564225 }, "prompt_4": { - "bleu_score": 0.07103730915658504 + "bleu_score": 0.053284045320169914 }, "prompt_5": { - "bleu_score": 0.09878667276041729 + "bleu_score": 0.08388357106350783 } }, "flores_zho2eng": { "prompt_1": { - "bleu_score": 0.06792021752065504 + "bleu_score": 0.07362845589698015 }, "prompt_2": { - "bleu_score": 0.08249521942696134 + "bleu_score": 0.06935691300850896 }, "prompt_3": { - "bleu_score": 0.08084386708497976 + "bleu_score": 0.07149693910765932 }, "prompt_4": { - "bleu_score": 0.06431676202990873 + "bleu_score": 0.05131044525837725 }, "prompt_5": { - "bleu_score": 0.08100137822980161 + "bleu_score": 0.056164865879979924 } }, "flores_zsm2eng": { "prompt_1": { - "bleu_score": 0.10560765530945605 + "bleu_score": 0.11021772578152905 }, "prompt_2": { - "bleu_score": 0.13240900994121152 + "bleu_score": 0.09813002522565224 }, "prompt_3": { - "bleu_score": 0.1296920316364985 + "bleu_score": 0.1178949163145004 }, "prompt_4": { - "bleu_score": 0.09176703052478538 + "bleu_score": 0.06753740480974892 }, "prompt_5": { - "bleu_score": 0.16824113101991417 + "bleu_score": 0.08004736830282513 } }, "mmlu": { "prompt_1": { - "accuracy": 0.5974329054842473 + "accuracy": 0.367561260210035 }, "prompt_2": { - "accuracy": 0.588098016336056 + "accuracy": 0.3652275379229872 }, "prompt_3": { - "accuracy": 0.6102683780630105 + "accuracy": 0.35122520420070014 }, "prompt_4": { - "accuracy": 0.5857642940490082 + "accuracy": 0.3372228704784131 }, "prompt_5": { - "accuracy": 0.6149358226371062 + "accuracy": 0.35822637106184363 } }, "mmlu_full": { "prompt_1": { - "accuracy": 0.5864855202002145, + "accuracy": 0.35645334286735786, "category_acc": { - "high_school_european_history": 0.75, - "business_ethics": 0.5656565656565656, - "clinical_knowledge": 0.6325757575757576, - "medical_genetics": 0.6464646464646465, - "high_school_us_history": 0.7635467980295566, - "high_school_physics": 0.35333333333333333, - "high_school_world_history": 0.7838983050847458, - "virology": 0.509090909090909, - "high_school_microeconomics": 0.6286919831223629, - "econometrics": 0.4424778761061947, - "college_computer_science": 0.42424242424242425, - "high_school_biology": 0.7605177993527508, + "high_school_european_history": 0.3780487804878049, + "business_ethics": 0.37373737373737376, + "clinical_knowledge": 0.35984848484848486, + "medical_genetics": 0.40404040404040403, + "high_school_us_history": 0.3842364532019704, + "high_school_physics": 0.24, + "high_school_world_history": 0.3983050847457627, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.2869198312236287, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.20202020202020202, + "high_school_biology": 0.37216828478964403, "abstract_algebra": 0.32323232323232326, - "professional_accounting": 0.49110320284697506, - "philosophy": 0.632258064516129, - "professional_medicine": 0.6346863468634686, - "nutrition": 0.6950819672131148, - "global_facts": 0.37373737373737376, - "machine_learning": 0.4774774774774775, - "security_studies": 0.6639344262295082, - "public_relations": 0.5688073394495413, - "professional_psychology": 0.6268412438625205, - "prehistory": 0.7461300309597523, - "anatomy": 0.582089552238806, - "human_sexuality": 0.6846153846153846, - "college_medicine": 0.6162790697674418, - "high_school_government_and_politics": 0.7916666666666666, - "college_chemistry": 0.41414141414141414, - "logical_fallacies": 0.6666666666666666, - "high_school_geography": 0.7614213197969543, - "elementary_mathematics": 0.3978779840848806, - "human_aging": 0.6396396396396397, - "college_mathematics": 0.3333333333333333, - "high_school_psychology": 0.8014705882352942, - "formal_logic": 0.368, - "high_school_statistics": 0.5255813953488372, - "international_law": 0.7, - "high_school_mathematics": 0.3048327137546468, - "high_school_computer_science": 0.6363636363636364, - "conceptual_physics": 0.5299145299145299, - "miscellaneous": 0.7480818414322251, - "high_school_chemistry": 0.504950495049505, - "marketing": 0.8025751072961373, - "professional_law": 0.47162426614481406, - "management": 0.7254901960784313, - "college_physics": 0.3069306930693069, - "jurisprudence": 0.7102803738317757, - "world_religions": 0.8117647058823529, - "sociology": 0.75, - "us_foreign_policy": 0.7777777777777778, - "high_school_macroeconomics": 0.5681233933161953, - "computer_security": 0.6161616161616161, - "moral_scenarios": 0.24161073825503357, - "moral_disputes": 0.6405797101449275, - "electrical_engineering": 0.5625, - "astronomy": 0.6887417218543046, - "college_biology": 0.7482517482517482 + "professional_accounting": 0.2918149466192171, + "philosophy": 0.38387096774193546, + "professional_medicine": 0.22878228782287824, + "nutrition": 0.43278688524590164, + "global_facts": 0.32323232323232326, + "machine_learning": 0.36936936936936937, + "security_studies": 0.44672131147540983, + "public_relations": 0.3486238532110092, + "professional_psychology": 0.36661211129296234, + "prehistory": 0.4117647058823529, + "anatomy": 0.39552238805970147, + "human_sexuality": 0.3923076923076923, + "college_medicine": 0.28488372093023256, + "high_school_government_and_politics": 0.4322916666666667, + "college_chemistry": 0.24242424242424243, + "logical_fallacies": 0.37037037037037035, + "high_school_geography": 0.3604060913705584, + "elementary_mathematics": 0.2864721485411141, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.39338235294117646, + "formal_logic": 0.2, + "high_school_statistics": 0.22790697674418606, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.26765799256505574, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.45012787723785164, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.5622317596566524, + "professional_law": 0.3333333333333333, + "management": 0.3333333333333333, + "college_physics": 0.21782178217821782, + "jurisprudence": 0.42990654205607476, + "world_religions": 0.5058823529411764, + "sociology": 0.385, + "us_foreign_policy": 0.5454545454545454, + "high_school_macroeconomics": 0.35218508997429304, + "computer_security": 0.5050505050505051, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.3681159420289855, + "electrical_engineering": 0.4166666666666667, + "astronomy": 0.3443708609271523, + "college_biology": 0.3706293706293706 } }, "prompt_2": { - "accuracy": 0.594780121558813, + "accuracy": 0.36539149088308903, "category_acc": { - "high_school_european_history": 0.774390243902439, - "business_ethics": 0.6060606060606061, - "clinical_knowledge": 0.6628787878787878, - "medical_genetics": 0.6767676767676768, - "high_school_us_history": 0.7832512315270936, - "high_school_physics": 0.36, - "high_school_world_history": 0.8220338983050848, - "virology": 0.509090909090909, - "high_school_microeconomics": 0.6286919831223629, - "econometrics": 0.39823008849557523, - "college_computer_science": 0.48484848484848486, - "high_school_biology": 0.7669902912621359, - "abstract_algebra": 0.29292929292929293, - "professional_accounting": 0.4875444839857651, - "philosophy": 0.6870967741935484, - "professional_medicine": 0.6531365313653137, - "nutrition": 0.7016393442622951, - "global_facts": 0.3939393939393939, - "machine_learning": 0.44144144144144143, - "security_studies": 0.680327868852459, - "public_relations": 0.5963302752293578, - "professional_psychology": 0.6579378068739771, - "prehistory": 0.7275541795665634, - "anatomy": 0.582089552238806, - "human_sexuality": 0.7153846153846154, - "college_medicine": 0.5988372093023255, - "high_school_government_and_politics": 0.8125, - "college_chemistry": 0.42424242424242425, - "logical_fallacies": 0.7160493827160493, - "high_school_geography": 0.7563451776649747, - "elementary_mathematics": 0.41114058355437666, - "human_aging": 0.6666666666666666, - "college_mathematics": 0.3333333333333333, - "high_school_psychology": 0.8069852941176471, - "formal_logic": 0.432, - "high_school_statistics": 0.5023255813953489, - "international_law": 0.7, - "high_school_mathematics": 0.3345724907063197, - "high_school_computer_science": 0.6262626262626263, - "conceptual_physics": 0.5128205128205128, - "miscellaneous": 0.7391304347826086, - "high_school_chemistry": 0.5297029702970297, - "marketing": 0.8025751072961373, - "professional_law": 0.4644487932159165, - "management": 0.7941176470588235, - "college_physics": 0.3465346534653465, - "jurisprudence": 0.7476635514018691, - "world_religions": 0.7823529411764706, - "sociology": 0.8, - "us_foreign_policy": 0.797979797979798, - "high_school_macroeconomics": 0.5732647814910026, - "computer_security": 0.6565656565656566, - "moral_scenarios": 0.24161073825503357, - "moral_disputes": 0.6231884057971014, - "electrical_engineering": 0.5555555555555556, - "astronomy": 0.6887417218543046, - "college_biology": 0.7552447552447552 + "high_school_european_history": 0.4024390243902439, + "business_ethics": 0.40404040404040403, + "clinical_knowledge": 0.3712121212121212, + "medical_genetics": 0.41414141414141414, + "high_school_us_history": 0.4039408866995074, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.4110169491525424, + "virology": 0.3515151515151515, + "high_school_microeconomics": 0.31223628691983124, + "econometrics": 0.2920353982300885, + "college_computer_science": 0.20202020202020202, + "high_school_biology": 0.4110032362459547, + "abstract_algebra": 0.2727272727272727, + "professional_accounting": 0.2846975088967972, + "philosophy": 0.3903225806451613, + "professional_medicine": 0.23616236162361623, + "nutrition": 0.39344262295081966, + "global_facts": 0.31313131313131315, + "machine_learning": 0.2972972972972973, + "security_studies": 0.44672131147540983, + "public_relations": 0.3853211009174312, + "professional_psychology": 0.37479541734860883, + "prehistory": 0.4179566563467492, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.36923076923076925, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.4114583333333333, + "college_chemistry": 0.20202020202020202, + "logical_fallacies": 0.38271604938271603, + "high_school_geography": 0.39086294416243655, + "elementary_mathematics": 0.28116710875331563, + "human_aging": 0.4369369369369369, + "college_mathematics": 0.2828282828282828, + "high_school_psychology": 0.4264705882352941, + "formal_logic": 0.24, + "high_school_statistics": 0.2558139534883721, + "international_law": 0.43333333333333335, + "high_school_mathematics": 0.26022304832713755, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.3547008547008547, + "miscellaneous": 0.49104859335038364, + "high_school_chemistry": 0.32673267326732675, + "marketing": 0.5836909871244635, + "professional_law": 0.32289628180039137, + "management": 0.38235294117647056, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.38317757009345793, + "world_religions": 0.5058823529411764, + "sociology": 0.49, + "us_foreign_policy": 0.5959595959595959, + "high_school_macroeconomics": 0.3444730077120823, + "computer_security": 0.46464646464646464, + "moral_scenarios": 0.25279642058165547, + "moral_disputes": 0.4028985507246377, + "electrical_engineering": 0.4861111111111111, + "astronomy": 0.33774834437086093, + "college_biology": 0.34965034965034963 } }, "prompt_3": { - "accuracy": 0.6030747229174115, + "accuracy": 0.35774043618162316, "category_acc": { - "high_school_european_history": 0.7987804878048781, - "business_ethics": 0.6262626262626263, - "clinical_knowledge": 0.6742424242424242, - "medical_genetics": 0.7070707070707071, - "high_school_us_history": 0.7783251231527094, - "high_school_physics": 0.3333333333333333, - "high_school_world_history": 0.809322033898305, - "virology": 0.5515151515151515, - "high_school_microeconomics": 0.6286919831223629, - "econometrics": 0.4424778761061947, - "college_computer_science": 0.48484848484848486, - "high_school_biology": 0.7637540453074434, - "abstract_algebra": 0.29292929292929293, - "professional_accounting": 0.5160142348754448, - "philosophy": 0.6741935483870968, - "professional_medicine": 0.6457564575645757, - "nutrition": 0.6918032786885245, - "global_facts": 0.40404040404040403, - "machine_learning": 0.36936936936936937, - "security_studies": 0.6885245901639344, - "public_relations": 0.6422018348623854, - "professional_psychology": 0.6481178396072013, - "prehistory": 0.7120743034055728, - "anatomy": 0.5895522388059702, - "human_sexuality": 0.7076923076923077, - "college_medicine": 0.6046511627906976, - "high_school_government_and_politics": 0.828125, - "college_chemistry": 0.4444444444444444, - "logical_fallacies": 0.7283950617283951, - "high_school_geography": 0.7918781725888325, - "elementary_mathematics": 0.4509283819628647, - "human_aging": 0.6891891891891891, - "college_mathematics": 0.35353535353535354, - "high_school_psychology": 0.8106617647058824, - "formal_logic": 0.384, - "high_school_statistics": 0.5581395348837209, - "international_law": 0.7083333333333334, - "high_school_mathematics": 0.35687732342007433, - "high_school_computer_science": 0.6565656565656566, - "conceptual_physics": 0.5470085470085471, - "miscellaneous": 0.7442455242966752, - "high_school_chemistry": 0.5495049504950495, - "marketing": 0.8240343347639485, - "professional_law": 0.4794520547945205, - "management": 0.7843137254901961, - "college_physics": 0.36633663366336633, - "jurisprudence": 0.719626168224299, - "world_religions": 0.7823529411764706, - "sociology": 0.77, - "us_foreign_policy": 0.8686868686868687, - "high_school_macroeconomics": 0.6066838046272494, - "computer_security": 0.6565656565656566, - "moral_scenarios": 0.24272930648769575, - "moral_disputes": 0.6260869565217392, - "electrical_engineering": 0.5625, - "astronomy": 0.6821192052980133, - "college_biology": 0.7552447552447552 + "high_school_european_history": 0.4451219512195122, + "business_ethics": 0.3333333333333333, + "clinical_knowledge": 0.3446969696969697, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.4088669950738916, + "high_school_physics": 0.26, + "high_school_world_history": 0.3728813559322034, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3291139240506329, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.23232323232323232, + "high_school_biology": 0.3851132686084142, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.3238434163701068, + "philosophy": 0.3935483870967742, + "professional_medicine": 0.2767527675276753, + "nutrition": 0.40327868852459015, + "global_facts": 0.3333333333333333, + "machine_learning": 0.36036036036036034, + "security_studies": 0.45491803278688525, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.36824877250409166, + "prehistory": 0.4148606811145511, + "anatomy": 0.4253731343283582, + "human_sexuality": 0.36153846153846153, + "college_medicine": 0.3488372093023256, + "high_school_government_and_politics": 0.3802083333333333, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.4074074074074074, + "high_school_geography": 0.3096446700507614, + "elementary_mathematics": 0.30238726790450926, + "human_aging": 0.4099099099099099, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.39705882352941174, + "formal_logic": 0.2, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.48333333333333334, + "high_school_mathematics": 0.2862453531598513, + "high_school_computer_science": 0.40404040404040403, + "conceptual_physics": 0.3974358974358974, + "miscellaneous": 0.45524296675191817, + "high_school_chemistry": 0.3613861386138614, + "marketing": 0.48497854077253216, + "professional_law": 0.31637312459230266, + "management": 0.3431372549019608, + "college_physics": 0.297029702970297, + "jurisprudence": 0.3925233644859813, + "world_religions": 0.49411764705882355, + "sociology": 0.455, + "us_foreign_policy": 0.45454545454545453, + "high_school_macroeconomics": 0.3444730077120823, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.2371364653243848, + "moral_disputes": 0.37681159420289856, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.33112582781456956, + "college_biology": 0.36363636363636365 } }, "prompt_4": { - "accuracy": 0.5804790847336432, - "category_acc": { - "high_school_european_history": 0.7804878048780488, - "business_ethics": 0.5858585858585859, - "clinical_knowledge": 0.625, - "medical_genetics": 0.6868686868686869, - "high_school_us_history": 0.7684729064039408, - "high_school_physics": 0.3466666666666667, - "high_school_world_history": 0.8008474576271186, - "virology": 0.5212121212121212, - "high_school_microeconomics": 0.6160337552742616, - "econometrics": 0.4690265486725664, - "college_computer_science": 0.41414141414141414, - "high_school_biology": 0.7702265372168284, - "abstract_algebra": 0.31313131313131315, - "professional_accounting": 0.4875444839857651, - "philosophy": 0.667741935483871, - "professional_medicine": 0.6199261992619927, - "nutrition": 0.6622950819672131, - "global_facts": 0.32323232323232326, - "machine_learning": 0.45045045045045046, - "security_studies": 0.6639344262295082, - "public_relations": 0.6238532110091743, - "professional_psychology": 0.6268412438625205, - "prehistory": 0.718266253869969, - "anatomy": 0.5522388059701493, - "human_sexuality": 0.6461538461538462, - "college_medicine": 0.622093023255814, - "high_school_government_and_politics": 0.7760416666666666, - "college_chemistry": 0.42424242424242425, - "logical_fallacies": 0.6604938271604939, - "high_school_geography": 0.7208121827411168, - "elementary_mathematics": 0.38992042440318303, - "human_aging": 0.6306306306306306, - "college_mathematics": 0.35353535353535354, - "high_school_psychology": 0.7867647058823529, - "formal_logic": 0.4, - "high_school_statistics": 0.4883720930232558, - "international_law": 0.6583333333333333, - "high_school_mathematics": 0.31970260223048325, - "high_school_computer_science": 0.6060606060606061, - "conceptual_physics": 0.5555555555555556, - "miscellaneous": 0.6508951406649617, - "high_school_chemistry": 0.5445544554455446, - "marketing": 0.8369098712446352, - "professional_law": 0.47423352902804955, - "management": 0.6862745098039216, - "college_physics": 0.37623762376237624, - "jurisprudence": 0.7757009345794392, - "world_religions": 0.7823529411764706, - "sociology": 0.735, - "us_foreign_policy": 0.797979797979798, - "high_school_macroeconomics": 0.570694087403599, - "computer_security": 0.6262626262626263, - "moral_scenarios": 0.23937360178970918, - "moral_disputes": 0.6231884057971014, - "electrical_engineering": 0.6111111111111112, - "astronomy": 0.6754966887417219, - "college_biology": 0.7482517482517482 - } - }, - "prompt_5": { - "accuracy": 0.5931355023239184, + "accuracy": 0.34944583482302466, "category_acc": { - "high_school_european_history": 0.75, - "business_ethics": 0.6060606060606061, - "clinical_knowledge": 0.6515151515151515, - "medical_genetics": 0.6666666666666666, - "high_school_us_history": 0.7783251231527094, - "high_school_physics": 0.32666666666666666, - "high_school_world_history": 0.8050847457627118, - "virology": 0.509090909090909, - "high_school_microeconomics": 0.6371308016877637, - "econometrics": 0.4690265486725664, - "college_computer_science": 0.48484848484848486, - "high_school_biology": 0.7508090614886731, - "abstract_algebra": 0.29292929292929293, - "professional_accounting": 0.5160142348754448, - "philosophy": 0.6580645161290323, - "professional_medicine": 0.6051660516605166, - "nutrition": 0.6852459016393443, - "global_facts": 0.3838383838383838, - "machine_learning": 0.43243243243243246, - "security_studies": 0.6475409836065574, - "public_relations": 0.5871559633027523, - "professional_psychology": 0.6268412438625205, - "prehistory": 0.7275541795665634, - "anatomy": 0.5671641791044776, - "human_sexuality": 0.7076923076923077, - "college_medicine": 0.6162790697674418, - "high_school_government_and_politics": 0.8072916666666666, - "college_chemistry": 0.42424242424242425, - "logical_fallacies": 0.6296296296296297, - "high_school_geography": 0.8020304568527918, - "elementary_mathematics": 0.4376657824933687, - "human_aging": 0.6621621621621622, - "college_mathematics": 0.32323232323232326, - "high_school_psychology": 0.7977941176470589, - "formal_logic": 0.368, - "high_school_statistics": 0.5162790697674419, - "international_law": 0.6916666666666667, - "high_school_mathematics": 0.30111524163568776, - "high_school_computer_science": 0.6161616161616161, - "conceptual_physics": 0.5811965811965812, - "miscellaneous": 0.7851662404092071, - "high_school_chemistry": 0.5099009900990099, - "marketing": 0.8454935622317596, - "professional_law": 0.4657534246575342, - "management": 0.7156862745098039, - "college_physics": 0.3465346534653465, - "jurisprudence": 0.7383177570093458, - "world_religions": 0.8176470588235294, - "sociology": 0.72, - "us_foreign_policy": 0.8080808080808081, - "high_school_macroeconomics": 0.5604113110539846, - "computer_security": 0.6666666666666666, + "high_school_european_history": 0.40853658536585363, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.375, + "medical_genetics": 0.3434343434343434, + "high_school_us_history": 0.4039408866995074, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.4152542372881356, + "virology": 0.3696969696969697, + "high_school_microeconomics": 0.31223628691983124, + "econometrics": 0.3274336283185841, + "college_computer_science": 0.18181818181818182, + "high_school_biology": 0.4045307443365696, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.28113879003558717, + "philosophy": 0.41935483870967744, + "professional_medicine": 0.23985239852398524, + "nutrition": 0.39344262295081966, + "global_facts": 0.30303030303030304, + "machine_learning": 0.3333333333333333, + "security_studies": 0.4672131147540984, + "public_relations": 0.3669724770642202, + "professional_psychology": 0.353518821603928, + "prehistory": 0.38699690402476783, + "anatomy": 0.373134328358209, + "human_sexuality": 0.3230769230769231, + "college_medicine": 0.3081395348837209, + "high_school_government_and_politics": 0.40625, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.3888888888888889, + "high_school_geography": 0.29441624365482233, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.38738738738738737, + "college_mathematics": 0.25252525252525254, + "high_school_psychology": 0.38786764705882354, + "formal_logic": 0.24, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.475, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.33760683760683763, + "miscellaneous": 0.4117647058823529, + "high_school_chemistry": 0.39603960396039606, + "marketing": 0.4892703862660944, + "professional_law": 0.3242009132420091, + "management": 0.3431372549019608, + "college_physics": 0.2079207920792079, + "jurisprudence": 0.40186915887850466, + "world_religions": 0.48823529411764705, + "sociology": 0.385, + "us_foreign_policy": 0.494949494949495, + "high_school_macroeconomics": 0.3213367609254499, + "computer_security": 0.48484848484848486, "moral_scenarios": 0.24272930648769575, - "moral_disputes": 0.6579710144927536, - "electrical_engineering": 0.5833333333333334, - "astronomy": 0.7019867549668874, - "college_biology": 0.7622377622377622 + "moral_disputes": 0.3072463768115942, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.3509933774834437, + "college_biology": 0.3986013986013986 + } + }, + "prompt_5": { + "accuracy": 0.36396138720057203, + "category_acc": { + "high_school_european_history": 0.43902439024390244, + "business_ethics": 0.32323232323232326, + "clinical_knowledge": 0.39015151515151514, + "medical_genetics": 0.36363636363636365, + "high_school_us_history": 0.3891625615763547, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.4491525423728814, + "virology": 0.3575757575757576, + "high_school_microeconomics": 0.3291139240506329, + "econometrics": 0.26548672566371684, + "college_computer_science": 0.20202020202020202, + "high_school_biology": 0.39158576051779936, + "abstract_algebra": 0.3333333333333333, + "professional_accounting": 0.2918149466192171, + "philosophy": 0.4129032258064516, + "professional_medicine": 0.25461254612546125, + "nutrition": 0.4131147540983607, + "global_facts": 0.29292929292929293, + "machine_learning": 0.32432432432432434, + "security_studies": 0.4713114754098361, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.3567921440261866, + "prehistory": 0.4117647058823529, + "anatomy": 0.4626865671641791, + "human_sexuality": 0.38461538461538464, + "college_medicine": 0.3023255813953488, + "high_school_government_and_politics": 0.4270833333333333, + "college_chemistry": 0.25252525252525254, + "logical_fallacies": 0.37037037037037035, + "high_school_geography": 0.3553299492385787, + "elementary_mathematics": 0.29973474801061006, + "human_aging": 0.3918918918918919, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.4338235294117647, + "formal_logic": 0.24, + "high_school_statistics": 0.2744186046511628, + "international_law": 0.475, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.45454545454545453, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.4616368286445013, + "high_school_chemistry": 0.38613861386138615, + "marketing": 0.5407725321888412, + "professional_law": 0.319634703196347, + "management": 0.37254901960784315, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.3644859813084112, + "world_religions": 0.5588235294117647, + "sociology": 0.43, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.3264781491002571, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.3652173913043478, + "electrical_engineering": 0.4305555555555556, + "astronomy": 0.40397350993377484, + "college_biology": 0.34265734265734266 } } }, "c_eval": { "prompt_1": { - "accuracy": 0.4138187221396731 + "accuracy": 0.29420505200594355 }, "prompt_2": { - "accuracy": 0.4309063893016345 + "accuracy": 0.2674591381872214 }, "prompt_3": { - "accuracy": 0.4063893016344725 + "accuracy": 0.287518573551263 }, "prompt_4": { - "accuracy": 0.4026745913818722 + "accuracy": 0.3031203566121842 }, "prompt_5": { - "accuracy": 0.3179791976225854 + "accuracy": 0.2511144130757801 } }, "c_eval_full": { - "prompt_1": { - "accuracy": 0.39975093399750933, + "prompt_1": -1, + "prompt_2": { + "accuracy": 0.2671232876712329, "category_acc": { - "computer_network": 0.5, - "operating_system": 0.4583333333333333, - "computer_architecture": 0.34615384615384615, - "college_programming": 0.5476190476190477, - "college_physics": 0.5, - "college_chemistry": 0.41379310344827586, - "advanced_mathematics": 0.4583333333333333, - "probability_and_statistics": 0.08695652173913043, - "discrete_mathematics": 0.09523809523809523, - "electrical_engineer": 0.2619047619047619, - "metrology_engineer": 0.4482758620689655, + "computer_network": 0.4166666666666667, + "operating_system": 0.3333333333333333, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2619047619047619, + "college_physics": 0.2916666666666667, + "college_chemistry": 0.13793103448275862, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.21739130434782608, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.14285714285714285, + "metrology_engineer": 0.27586206896551724, "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.2916666666666667, - "high_school_chemistry": 0.5, - "high_school_biology": 0.4583333333333333, - "middle_school_mathematics": 0.375, - "middle_school_biology": 0.46153846153846156, - "middle_school_physics": 0.625, - "middle_school_chemistry": 0.36, - "veterinary_medicine": 0.5714285714285714, - "college_economics": 0.38333333333333336, - "business_administration": 0.2894736842105263, - "marxism": 0.5833333333333334, - "mao_zedong_thought": 0.6551724137931034, - "education_science": 0.5294117647058824, - "teacher_qualification": 0.5510204081632653, - "high_school_politics": 0.2916666666666667, - "high_school_geography": 0.375, - "middle_school_politics": 0.38461538461538464, - "middle_school_geography": 0.29411764705882354, - "modern_chinese_history": 0.35714285714285715, - "ideological_and_moral_cultivation": 0.4166666666666667, - "logic": 0.14814814814814814, + "high_school_physics": 0.25, + "high_school_chemistry": 0.3333333333333333, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.2, + "veterinary_medicine": 0.21428571428571427, + "college_economics": 0.16666666666666666, + "business_administration": 0.23684210526315788, + "marxism": 0.375, + "mao_zedong_thought": 0.2413793103448276, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.4166666666666667, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.125, + "logic": 0.2222222222222222, "law": 0.27586206896551724, - "chinese_language_and_literature": 0.4642857142857143, - "art_studies": 0.34210526315789475, - "professional_tour_guide": 0.3235294117647059, + "chinese_language_and_literature": 0.39285714285714285, + "art_studies": 0.18421052631578946, + "professional_tour_guide": 0.23529411764705882, "legal_professional": 0.2857142857142857, - "high_school_chinese": 0.2916666666666667, - "high_school_history": 0.56, - "middle_school_history": 0.4444444444444444, - "civil_servant": 0.3269230769230769, - "sports_science": 0.5416666666666666, - "plant_protection": 0.6296296296296297, - "basic_medicine": 0.4583333333333333, - "clinical_medicine": 0.37037037037037035, - "urban_and_rural_planner": 0.43137254901960786, - "accountant": 0.3148148148148148, - "fire_engineer": 0.4166666666666667, - "environmental_impact_assessment_engineer": 0.3611111111111111, - "tax_accountant": 0.2962962962962963, - "physician": 0.46296296296296297 + "high_school_chinese": 0.375, + "high_school_history": 0.12, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.34615384615384615, + "sports_science": 0.2916666666666667, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.4166666666666667, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.2549019607843137, + "accountant": 0.2777777777777778, + "fire_engineer": 0.19444444444444445, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.24074074074074073, + "physician": 0.3148148148148148 } }, - "prompt_2": { - "accuracy": 0.4277708592777086, + "prompt_3": { + "accuracy": 0.27895392278953923, "category_acc": { - "computer_network": 0.4583333333333333, - "operating_system": 0.5833333333333334, - "computer_architecture": 0.34615384615384615, - "college_programming": 0.5, - "college_physics": 0.5, - "college_chemistry": 0.41379310344827586, - "advanced_mathematics": 0.4583333333333333, - "probability_and_statistics": 0.13043478260869565, - "discrete_mathematics": 0.23809523809523808, - "electrical_engineer": 0.21428571428571427, - "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.4166666666666667, - "high_school_chemistry": 0.5, - "high_school_biology": 0.4166666666666667, - "middle_school_mathematics": 0.375, - "middle_school_biology": 0.6153846153846154, - "middle_school_physics": 0.5416666666666666, - "middle_school_chemistry": 0.52, - "veterinary_medicine": 0.5714285714285714, - "college_economics": 0.48333333333333334, - "business_administration": 0.34210526315789475, - "marxism": 0.5833333333333334, - "mao_zedong_thought": 0.5172413793103449, - "education_science": 0.5882352941176471, - "teacher_qualification": 0.5306122448979592, - "high_school_politics": 0.2916666666666667, - "high_school_geography": 0.4583333333333333, - "middle_school_politics": 0.5769230769230769, - "middle_school_geography": 0.29411764705882354, - "modern_chinese_history": 0.32142857142857145, - "ideological_and_moral_cultivation": 0.5, - "logic": 0.2962962962962963, - "law": 0.3448275862068966, - "chinese_language_and_literature": 0.35714285714285715, - "art_studies": 0.39473684210526316, - "professional_tour_guide": 0.3235294117647059, - "legal_professional": 0.35714285714285715, - "high_school_chinese": 0.20833333333333334, - "high_school_history": 0.52, - "middle_school_history": 0.5185185185185185, - "civil_servant": 0.3269230769230769, - "sports_science": 0.5833333333333334, - "plant_protection": 0.5555555555555556, - "basic_medicine": 0.3333333333333333, - "clinical_medicine": 0.48148148148148145, - "urban_and_rural_planner": 0.5294117647058824, - "accountant": 0.37037037037037035, - "fire_engineer": 0.4166666666666667, - "environmental_impact_assessment_engineer": 0.4166666666666667, - "tax_accountant": 0.4074074074074074, - "physician": 0.5 - } - }, - "prompt_3": { - "accuracy": 0.4122042341220423, - "category_acc": { - "computer_network": 0.5, - "operating_system": 0.4583333333333333, - "computer_architecture": 0.46153846153846156, - "college_programming": 0.5238095238095238, - "college_physics": 0.5, - "college_chemistry": 0.41379310344827586, - "advanced_mathematics": 0.4583333333333333, - "probability_and_statistics": 0.17391304347826086, - "discrete_mathematics": 0.14285714285714285, - "electrical_engineer": 0.19047619047619047, - "metrology_engineer": 0.41379310344827586, - "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.375, - "high_school_biology": 0.3333333333333333, - "middle_school_mathematics": 0.375, - "middle_school_biology": 0.6153846153846154, - "middle_school_physics": 0.5, - "middle_school_chemistry": 0.28, - "veterinary_medicine": 0.5357142857142857, - "college_economics": 0.45, + "computer_network": 0.3333333333333333, + "operating_system": 0.2916666666666667, + "computer_architecture": 0.2692307692307692, + "college_programming": 0.2857142857142857, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.3333333333333333, + "probability_and_statistics": 0.30434782608695654, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.3793103448275862, + "high_school_mathematics": 0.08695652173913043, + "high_school_physics": 0.25, + "high_school_chemistry": 0.20833333333333334, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.16666666666666666, + "middle_school_biology": 0.4230769230769231, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.12, + "veterinary_medicine": 0.25, + "college_economics": 0.31666666666666665, "business_administration": 0.34210526315789475, - "marxism": 0.3333333333333333, - "mao_zedong_thought": 0.5517241379310345, - "education_science": 0.5294117647058824, - "teacher_qualification": 0.5306122448979592, - "high_school_politics": 0.25, - "high_school_geography": 0.5833333333333334, - "middle_school_politics": 0.34615384615384615, - "middle_school_geography": 0.4117647058823529, - "modern_chinese_history": 0.39285714285714285, - "ideological_and_moral_cultivation": 0.5833333333333334, + "marxism": 0.20833333333333334, + "mao_zedong_thought": 0.3103448275862069, + "education_science": 0.3235294117647059, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.5416666666666666, + "high_school_geography": 0.16666666666666666, + "middle_school_politics": 0.11538461538461539, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.21428571428571427, + "ideological_and_moral_cultivation": 0.2916666666666667, "logic": 0.25925925925925924, - "law": 0.3103448275862069, - "chinese_language_and_literature": 0.5, - "art_studies": 0.4473684210526316, - "professional_tour_guide": 0.4117647058823529, - "legal_professional": 0.39285714285714285, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.2894736842105263, + "professional_tour_guide": 0.29411764705882354, + "legal_professional": 0.21428571428571427, "high_school_chinese": 0.20833333333333334, - "high_school_history": 0.52, - "middle_school_history": 0.5555555555555556, - "civil_servant": 0.3269230769230769, - "sports_science": 0.5, - "plant_protection": 0.5925925925925926, - "basic_medicine": 0.4583333333333333, - "clinical_medicine": 0.3333333333333333, - "urban_and_rural_planner": 0.5490196078431373, + "high_school_history": 0.32, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.28846153846153844, + "sports_science": 0.375, + "plant_protection": 0.25925925925925924, + "basic_medicine": 0.2916666666666667, + "clinical_medicine": 0.2962962962962963, + "urban_and_rural_planner": 0.27450980392156865, "accountant": 0.3333333333333333, - "fire_engineer": 0.4444444444444444, - "environmental_impact_assessment_engineer": 0.3333333333333333, - "tax_accountant": 0.3333333333333333, - "physician": 0.42592592592592593 + "fire_engineer": 0.16666666666666666, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.16666666666666666, + "physician": 0.2222222222222222 } }, "prompt_4": { - "accuracy": 0.4202988792029888, + "accuracy": 0.28019925280199254, "category_acc": { - "computer_network": 0.4166666666666667, - "operating_system": 0.5416666666666666, - "computer_architecture": 0.46153846153846156, - "college_programming": 0.5238095238095238, - "college_physics": 0.5, - "college_chemistry": 0.3793103448275862, - "advanced_mathematics": 0.4583333333333333, + "computer_network": 0.375, + "operating_system": 0.5, + "computer_architecture": 0.4230769230769231, + "college_programming": 0.23809523809523808, + "college_physics": 0.4166666666666667, + "college_chemistry": 0.2413793103448276, + "advanced_mathematics": 0.4166666666666667, "probability_and_statistics": 0.17391304347826086, - "discrete_mathematics": 0.14285714285714285, - "electrical_engineer": 0.23809523809523808, - "metrology_engineer": 0.4482758620689655, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.1724137931034483, "high_school_mathematics": 0.17391304347826086, - "high_school_physics": 0.3333333333333333, - "high_school_chemistry": 0.4166666666666667, - "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.375, - "middle_school_biology": 0.5, - "middle_school_physics": 0.5416666666666666, - "middle_school_chemistry": 0.52, - "veterinary_medicine": 0.6071428571428571, - "college_economics": 0.4, - "business_administration": 0.34210526315789475, - "marxism": 0.4583333333333333, - "mao_zedong_thought": 0.4827586206896552, - "education_science": 0.5, - "teacher_qualification": 0.5102040816326531, - "high_school_politics": 0.20833333333333334, - "high_school_geography": 0.5416666666666666, - "middle_school_politics": 0.5769230769230769, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.2916666666666667, + "high_school_biology": 0.25, + "middle_school_mathematics": 0.041666666666666664, + "middle_school_biology": 0.19230769230769232, + "middle_school_physics": 0.25, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.2857142857142857, + "college_economics": 0.3, + "business_administration": 0.23684210526315788, + "marxism": 0.2916666666666667, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.4117647058823529, + "teacher_qualification": 0.32653061224489793, + "high_school_politics": 0.25, + "high_school_geography": 0.20833333333333334, + "middle_school_politics": 0.2692307692307692, "middle_school_geography": 0.47058823529411764, - "modern_chinese_history": 0.42857142857142855, - "ideological_and_moral_cultivation": 0.5416666666666666, - "logic": 0.2222222222222222, - "law": 0.3448275862068966, - "chinese_language_and_literature": 0.5, - "art_studies": 0.47368421052631576, - "professional_tour_guide": 0.38235294117647056, - "legal_professional": 0.39285714285714285, - "high_school_chinese": 0.2916666666666667, - "high_school_history": 0.56, - "middle_school_history": 0.48148148148148145, - "civil_servant": 0.40384615384615385, - "sports_science": 0.4583333333333333, - "plant_protection": 0.5185185185185185, - "basic_medicine": 0.4583333333333333, - "clinical_medicine": 0.4074074074074074, - "urban_and_rural_planner": 0.45098039215686275, - "accountant": 0.37037037037037035, - "fire_engineer": 0.5, - "environmental_impact_assessment_engineer": 0.3611111111111111, - "tax_accountant": 0.3148148148148148, - "physician": 0.46296296296296297 + "modern_chinese_history": 0.10714285714285714, + "ideological_and_moral_cultivation": 0.20833333333333334, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.3157894736842105, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.44, + "middle_school_history": 0.25925925925925924, + "civil_servant": 0.28846153846153844, + "sports_science": 0.20833333333333334, + "plant_protection": 0.3333333333333333, + "basic_medicine": 0.375, + "clinical_medicine": 0.25925925925925924, + "urban_and_rural_planner": 0.3137254901960784, + "accountant": 0.2777777777777778, + "fire_engineer": 0.25, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.14814814814814814, + "physician": 0.25925925925925924 } }, "prompt_5": { - "accuracy": 0.3150684931506849, + "accuracy": 0.25965130759651306, "category_acc": { - "computer_network": 0.3333333333333333, + "computer_network": 0.16666666666666666, "operating_system": 0.25, - "computer_architecture": 0.5384615384615384, - "college_programming": 0.47619047619047616, - "college_physics": 0.25, - "college_chemistry": 0.3448275862068966, - "advanced_mathematics": 0.4166666666666667, - "probability_and_statistics": 0.17391304347826086, - "discrete_mathematics": 0.19047619047619047, - "electrical_engineer": 0.2619047619047619, - "metrology_engineer": 0.27586206896551724, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.21428571428571427, + "college_physics": 0.3333333333333333, + "college_chemistry": 0.1724137931034483, + "advanced_mathematics": 0.375, + "probability_and_statistics": 0.2608695652173913, + "discrete_mathematics": 0.2857142857142857, + "electrical_engineer": 0.2857142857142857, + "metrology_engineer": 0.3103448275862069, "high_school_mathematics": 0.21739130434782608, - "high_school_physics": 0.20833333333333334, - "high_school_chemistry": 0.375, + "high_school_physics": 0.125, + "high_school_chemistry": 0.125, "high_school_biology": 0.2916666666666667, - "middle_school_mathematics": 0.3333333333333333, - "middle_school_biology": 0.38461538461538464, - "middle_school_physics": 0.20833333333333334, - "middle_school_chemistry": 0.32, - "veterinary_medicine": 0.32142857142857145, - "college_economics": 0.35, - "business_administration": 0.15789473684210525, - "marxism": 0.4583333333333333, - "mao_zedong_thought": 0.5172413793103449, - "education_science": 0.2647058823529412, - "teacher_qualification": 0.3469387755102041, - "high_school_politics": 0.16666666666666666, - "high_school_geography": 0.375, - "middle_school_politics": 0.3076923076923077, - "middle_school_geography": 0.17647058823529413, - "modern_chinese_history": 0.17857142857142858, - "ideological_and_moral_cultivation": 0.375, - "logic": 0.14814814814814814, - "law": 0.3103448275862069, - "chinese_language_and_literature": 0.2857142857142857, - "art_studies": 0.4473684210526316, - "professional_tour_guide": 0.35294117647058826, - "legal_professional": 0.17857142857142858, - "high_school_chinese": 0.25, - "high_school_history": 0.44, - "middle_school_history": 0.37037037037037035, - "civil_servant": 0.3076923076923077, + "middle_school_mathematics": 0.20833333333333334, + "middle_school_biology": 0.3076923076923077, + "middle_school_physics": 0.3333333333333333, + "middle_school_chemistry": 0.16, + "veterinary_medicine": 0.25, + "college_economics": 0.36666666666666664, + "business_administration": 0.34210526315789475, + "marxism": 0.375, + "mao_zedong_thought": 0.3793103448275862, + "education_science": 0.35294117647058826, + "teacher_qualification": 0.20408163265306123, + "high_school_politics": 0.375, + "high_school_geography": 0.125, + "middle_school_politics": 0.19230769230769232, + "middle_school_geography": 0.23529411764705882, + "modern_chinese_history": 0.25, + "ideological_and_moral_cultivation": 0.125, + "logic": 0.25925925925925924, + "law": 0.20689655172413793, + "chinese_language_and_literature": 0.25, + "art_studies": 0.21052631578947367, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.21428571428571427, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.4, + "middle_school_history": 0.3333333333333333, + "civil_servant": 0.2692307692307692, "sports_science": 0.3333333333333333, - "plant_protection": 0.48148148148148145, - "basic_medicine": 0.375, - "clinical_medicine": 0.14814814814814814, - "urban_and_rural_planner": 0.35294117647058826, - "accountant": 0.25925925925925924, - "fire_engineer": 0.3333333333333333, + "plant_protection": 0.2222222222222222, + "basic_medicine": 0.20833333333333334, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.1568627450980392, + "accountant": 0.35185185185185186, + "fire_engineer": 0.2222222222222222, "environmental_impact_assessment_engineer": 0.3055555555555556, - "tax_accountant": 0.2777777777777778, - "physician": 0.37037037037037035 + "tax_accountant": 0.1111111111111111, + "physician": 0.16666666666666666 } } }, "cmmlu": { "prompt_1": { - "accuracy": 0.48028673835125446 + "accuracy": 0.23297491039426524 }, "prompt_2": { - "accuracy": 0.5089605734767025 + "accuracy": 0.25448028673835127 }, "prompt_3": { - "accuracy": 0.4767025089605735 + "accuracy": 0.25448028673835127 }, "prompt_4": { - "accuracy": 0.4731182795698925 + "accuracy": 0.3118279569892473 }, "prompt_5": { - "accuracy": 0.3906810035842294 + "accuracy": 0.2974910394265233 } }, "cmmlu_full": { "prompt_1": { - "accuracy": 0.3975133828354343, + "accuracy": 0.2617855292695562, "category_acc": { - "agronomy": 0.3609467455621302, - "anatomy": 0.30405405405405406, - "ancient_chinese": 0.22560975609756098, - "arts": 0.46875, - "astronomy": 0.30303030303030304, - "business_ethics": 0.3923444976076555, - "chinese_civil_service_exam": 0.3625, - "chinese_driving_rule": 0.5648854961832062, - "chinese_food_culture": 0.3088235294117647, - "chinese_foreign_policy": 0.45794392523364486, - "chinese_history": 0.4117647058823529, - "chinese_literature": 0.3333333333333333, - "chinese_teacher_qualification": 0.5027932960893855, - "clinical_knowledge": 0.38396624472573837, - "college_actuarial_science": 0.24528301886792453, - "college_education": 0.45794392523364486, - "college_engineering_hydrology": 0.49056603773584906, - "college_law": 0.2962962962962963, - "college_mathematics": 0.29523809523809524, - "college_medical_statistics": 0.41509433962264153, - "college_medicine": 0.42857142857142855, - "computer_science": 0.4264705882352941, - "computer_security": 0.5146198830409356, - "conceptual_physics": 0.3333333333333333, - "construction_project_management": 0.3597122302158273, - "economics": 0.42138364779874216, - "education": 0.4171779141104294, - "electrical_engineering": 0.46511627906976744, - "elementary_chinese": 0.2857142857142857, - "elementary_commonsense": 0.3838383838383838, - "elementary_information_and_technology": 0.5672268907563025, - "elementary_mathematics": 0.33043478260869563, - "ethnology": 0.34074074074074073, - "food_science": 0.44755244755244755, - "genetics": 0.42045454545454547, - "global_facts": 0.37583892617449666, - "high_school_biology": 0.3076923076923077, + "agronomy": 0.2485207100591716, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.21951219512195122, + "arts": 0.25, + "astronomy": 0.24242424242424243, + "business_ethics": 0.2535885167464115, + "chinese_civil_service_exam": 0.2125, + "chinese_driving_rule": 0.2595419847328244, + "chinese_food_culture": 0.23529411764705882, + "chinese_foreign_policy": 0.2897196261682243, + "chinese_history": 0.2631578947368421, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.20675105485232068, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.29906542056074764, + "college_engineering_hydrology": 0.2358490566037736, + "college_law": 0.2037037037037037, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.2830188679245283, + "college_medicine": 0.2857142857142857, + "computer_science": 0.30392156862745096, + "computer_security": 0.21052631578947367, + "conceptual_physics": 0.2789115646258503, + "construction_project_management": 0.33093525179856115, + "economics": 0.2641509433962264, + "education": 0.2883435582822086, + "electrical_engineering": 0.29069767441860467, + "elementary_chinese": 0.21428571428571427, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.23109243697478993, + "elementary_mathematics": 0.2565217391304348, + "ethnology": 0.26666666666666666, + "food_science": 0.2867132867132867, + "genetics": 0.2556818181818182, + "global_facts": 0.28859060402684567, + "high_school_biology": 0.26627218934911245, "high_school_chemistry": 0.22727272727272727, - "high_school_geography": 0.3389830508474576, - "high_school_mathematics": 0.2926829268292683, - "high_school_physics": 0.35454545454545455, - "high_school_politics": 0.3706293706293706, - "human_sexuality": 0.4365079365079365, - "international_law": 0.3675675675675676, - "journalism": 0.3953488372093023, - "jurisprudence": 0.36009732360097324, - "legal_and_moral_basis": 0.5654205607476636, - "logical": 0.5203252032520326, - "machine_learning": 0.4344262295081967, - "management": 0.4, - "marketing": 0.40555555555555556, - "marxist_theory": 0.41798941798941797, - "modern_chinese": 0.3706896551724138, - "nutrition": 0.43448275862068964, - "philosophy": 0.41904761904761906, - "professional_accounting": 0.4342857142857143, - "professional_law": 0.33649289099526064, - "professional_medicine": 0.34308510638297873, - "professional_psychology": 0.41379310344827586, - "public_relations": 0.5172413793103449, - "security_study": 0.4888888888888889, - "sociology": 0.47345132743362833, - "sports_science": 0.3878787878787879, - "traditional_chinese_medicine": 0.35135135135135137, - "virology": 0.47337278106508873, - "world_history": 0.40993788819875776, - "world_religions": 0.46875 + "high_school_geography": 0.211864406779661, + "high_school_mathematics": 0.20121951219512196, + "high_school_physics": 0.3090909090909091, + "high_school_politics": 0.23076923076923078, + "human_sexuality": 0.2777777777777778, + "international_law": 0.2972972972972973, + "journalism": 0.31976744186046513, + "jurisprudence": 0.2360097323600973, + "legal_and_moral_basis": 0.3037383177570093, + "logical": 0.2682926829268293, + "machine_learning": 0.2786885245901639, + "management": 0.23333333333333334, + "marketing": 0.25, + "marxist_theory": 0.2751322751322751, + "modern_chinese": 0.19827586206896552, + "nutrition": 0.25517241379310346, + "philosophy": 0.3333333333333333, + "professional_accounting": 0.25142857142857145, + "professional_law": 0.23696682464454977, + "professional_medicine": 0.27925531914893614, + "professional_psychology": 0.27155172413793105, + "public_relations": 0.29310344827586204, + "security_study": 0.2814814814814815, + "sociology": 0.252212389380531, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2648648648648649, + "virology": 0.28994082840236685, + "world_history": 0.2795031055900621, + "world_religions": 0.25625 } }, "prompt_2": { - "accuracy": 0.4317043688482127, + "accuracy": 0.2583318943187705, "category_acc": { - "agronomy": 0.4556213017751479, - "anatomy": 0.2905405405405405, - "ancient_chinese": 0.1951219512195122, - "arts": 0.5, - "astronomy": 0.4, - "business_ethics": 0.4449760765550239, - "chinese_civil_service_exam": 0.35625, - "chinese_driving_rule": 0.5114503816793893, - "chinese_food_culture": 0.36764705882352944, - "chinese_foreign_policy": 0.5046728971962616, - "chinese_history": 0.4458204334365325, - "chinese_literature": 0.3382352941176471, - "chinese_teacher_qualification": 0.5586592178770949, - "clinical_knowledge": 0.43037974683544306, - "college_actuarial_science": 0.25471698113207547, - "college_education": 0.514018691588785, - "college_engineering_hydrology": 0.4339622641509434, - "college_law": 0.37962962962962965, - "college_mathematics": 0.26666666666666666, - "college_medical_statistics": 0.4056603773584906, - "college_medicine": 0.4358974358974359, - "computer_science": 0.45098039215686275, - "computer_security": 0.6198830409356725, - "conceptual_physics": 0.38095238095238093, - "construction_project_management": 0.38848920863309355, - "economics": 0.5220125786163522, - "education": 0.4601226993865031, - "electrical_engineering": 0.48255813953488375, - "elementary_chinese": 0.3055555555555556, - "elementary_commonsense": 0.3484848484848485, - "elementary_information_and_technology": 0.6722689075630253, - "elementary_mathematics": 0.34347826086956523, - "ethnology": 0.362962962962963, - "food_science": 0.4965034965034965, - "genetics": 0.4715909090909091, - "global_facts": 0.4429530201342282, - "high_school_biology": 0.3668639053254438, - "high_school_chemistry": 0.2727272727272727, - "high_school_geography": 0.4067796610169492, - "high_school_mathematics": 0.34146341463414637, - "high_school_physics": 0.32727272727272727, - "high_school_politics": 0.44755244755244755, - "human_sexuality": 0.42857142857142855, - "international_law": 0.3783783783783784, - "journalism": 0.45930232558139533, - "jurisprudence": 0.38686131386861317, - "legal_and_moral_basis": 0.677570093457944, - "logical": 0.44715447154471544, - "machine_learning": 0.4344262295081967, - "management": 0.49523809523809526, - "marketing": 0.48333333333333334, - "marxist_theory": 0.4656084656084656, - "modern_chinese": 0.35344827586206895, - "nutrition": 0.5241379310344828, - "philosophy": 0.49523809523809526, - "professional_accounting": 0.5085714285714286, - "professional_law": 0.3222748815165877, - "professional_medicine": 0.3829787234042553, - "professional_psychology": 0.4525862068965517, - "public_relations": 0.5287356321839081, - "security_study": 0.5259259259259259, - "sociology": 0.4557522123893805, - "sports_science": 0.4, - "traditional_chinese_medicine": 0.34594594594594597, - "virology": 0.5207100591715976, - "world_history": 0.45962732919254656, - "world_religions": 0.46875 + "agronomy": 0.2603550295857988, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.21951219512195122, + "arts": 0.2375, + "astronomy": 0.24242424242424243, + "business_ethics": 0.3014354066985646, + "chinese_civil_service_exam": 0.2625, + "chinese_driving_rule": 0.31297709923664124, + "chinese_food_culture": 0.25, + "chinese_foreign_policy": 0.2336448598130841, + "chinese_history": 0.25386996904024767, + "chinese_literature": 0.23529411764705882, + "chinese_teacher_qualification": 0.24022346368715083, + "clinical_knowledge": 0.27848101265822783, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.1588785046728972, + "college_engineering_hydrology": 0.29245283018867924, + "college_law": 0.25925925925925924, + "college_mathematics": 0.22857142857142856, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.2490842490842491, + "computer_science": 0.27941176470588236, + "computer_security": 0.2573099415204678, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2446043165467626, + "economics": 0.3081761006289308, + "education": 0.22699386503067484, + "electrical_engineering": 0.27906976744186046, + "elementary_chinese": 0.23015873015873015, + "elementary_commonsense": 0.2878787878787879, + "elementary_information_and_technology": 0.23949579831932774, + "elementary_mathematics": 0.23043478260869565, + "ethnology": 0.24444444444444444, + "food_science": 0.3076923076923077, + "genetics": 0.26136363636363635, + "global_facts": 0.3087248322147651, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.24242424242424243, + "high_school_geography": 0.15254237288135594, + "high_school_mathematics": 0.25, + "high_school_physics": 0.22727272727272727, + "high_school_politics": 0.3076923076923077, + "human_sexuality": 0.2698412698412698, + "international_law": 0.2810810810810811, + "journalism": 0.25, + "jurisprudence": 0.22871046228710462, + "legal_and_moral_basis": 0.24766355140186916, + "logical": 0.2601626016260163, + "machine_learning": 0.319672131147541, + "management": 0.2571428571428571, + "marketing": 0.2722222222222222, + "marxist_theory": 0.2962962962962963, + "modern_chinese": 0.25, + "nutrition": 0.25517241379310346, + "philosophy": 0.2857142857142857, + "professional_accounting": 0.2857142857142857, + "professional_law": 0.2559241706161137, + "professional_medicine": 0.2712765957446808, + "professional_psychology": 0.2543103448275862, + "public_relations": 0.22988505747126436, + "security_study": 0.21481481481481482, + "sociology": 0.2168141592920354, + "sports_science": 0.30303030303030304, + "traditional_chinese_medicine": 0.21081081081081082, + "virology": 0.26627218934911245, + "world_history": 0.32298136645962733, + "world_religions": 0.25625 } }, "prompt_3": { - "accuracy": 0.4194439647729235, + "accuracy": 0.26817475392850976, "category_acc": { - "agronomy": 0.4556213017751479, - "anatomy": 0.2635135135135135, - "ancient_chinese": 0.21341463414634146, - "arts": 0.54375, - "astronomy": 0.3575757575757576, - "business_ethics": 0.4258373205741627, - "chinese_civil_service_exam": 0.33125, - "chinese_driving_rule": 0.549618320610687, - "chinese_food_culture": 0.375, - "chinese_foreign_policy": 0.5607476635514018, - "chinese_history": 0.43343653250773995, - "chinese_literature": 0.31862745098039214, - "chinese_teacher_qualification": 0.4972067039106145, - "clinical_knowledge": 0.3755274261603376, - "college_actuarial_science": 0.22641509433962265, - "college_education": 0.514018691588785, - "college_engineering_hydrology": 0.41509433962264153, - "college_law": 0.3611111111111111, - "college_mathematics": 0.3047619047619048, - "college_medical_statistics": 0.4339622641509434, - "college_medicine": 0.41025641025641024, - "computer_science": 0.45098039215686275, - "computer_security": 0.5321637426900585, - "conceptual_physics": 0.36054421768707484, - "construction_project_management": 0.3597122302158273, - "economics": 0.4968553459119497, - "education": 0.49693251533742333, - "electrical_engineering": 0.47093023255813954, - "elementary_chinese": 0.2976190476190476, - "elementary_commonsense": 0.4292929292929293, - "elementary_information_and_technology": 0.5840336134453782, - "elementary_mathematics": 0.36086956521739133, - "ethnology": 0.3925925925925926, - "food_science": 0.4755244755244755, - "genetics": 0.4715909090909091, - "global_facts": 0.47651006711409394, + "agronomy": 0.27218934911242604, + "anatomy": 0.25675675675675674, + "ancient_chinese": 0.27439024390243905, + "arts": 0.25, + "astronomy": 0.22424242424242424, + "business_ethics": 0.2727272727272727, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.3969465648854962, + "chinese_food_culture": 0.27941176470588236, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.26625386996904027, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.26256983240223464, + "clinical_knowledge": 0.20675105485232068, + "college_actuarial_science": 0.27358490566037735, + "college_education": 0.2523364485981308, + "college_engineering_hydrology": 0.2358490566037736, + "college_law": 0.2962962962962963, + "college_mathematics": 0.21904761904761905, + "college_medical_statistics": 0.2641509433962264, + "college_medicine": 0.28205128205128205, + "computer_science": 0.29901960784313725, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.3129251700680272, + "construction_project_management": 0.2949640287769784, + "economics": 0.3270440251572327, + "education": 0.20245398773006135, + "electrical_engineering": 0.28488372093023256, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.2474747474747475, + "elementary_information_and_technology": 0.2773109243697479, + "elementary_mathematics": 0.24347826086956523, + "ethnology": 0.2740740740740741, + "food_science": 0.34265734265734266, + "genetics": 0.21022727272727273, + "global_facts": 0.2684563758389262, "high_school_biology": 0.27218934911242604, "high_school_chemistry": 0.2196969696969697, - "high_school_geography": 0.4067796610169492, - "high_school_mathematics": 0.2926829268292683, - "high_school_physics": 0.2727272727272727, - "high_school_politics": 0.32867132867132864, - "human_sexuality": 0.40476190476190477, - "international_law": 0.3675675675675676, - "journalism": 0.46511627906976744, - "jurisprudence": 0.3722627737226277, - "legal_and_moral_basis": 0.6728971962616822, - "logical": 0.43902439024390244, - "machine_learning": 0.4098360655737705, - "management": 0.49523809523809526, - "marketing": 0.5111111111111111, - "marxist_theory": 0.4708994708994709, - "modern_chinese": 0.3017241379310345, - "nutrition": 0.42758620689655175, - "philosophy": 0.5333333333333333, - "professional_accounting": 0.5028571428571429, - "professional_law": 0.35071090047393366, - "professional_medicine": 0.35106382978723405, - "professional_psychology": 0.4482758620689655, - "public_relations": 0.4885057471264368, - "security_study": 0.5185185185185185, - "sociology": 0.47345132743362833, - "sports_science": 0.4121212121212121, - "traditional_chinese_medicine": 0.3783783783783784, - "virology": 0.5088757396449705, - "world_history": 0.43478260869565216, - "world_religions": 0.48125 + "high_school_geography": 0.2627118644067797, + "high_school_mathematics": 0.24390243902439024, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.3146853146853147, + "human_sexuality": 0.30158730158730157, + "international_law": 0.2918918918918919, + "journalism": 0.2558139534883721, + "jurisprudence": 0.22384428223844283, + "legal_and_moral_basis": 0.2523364485981308, + "logical": 0.2682926829268293, + "machine_learning": 0.29508196721311475, + "management": 0.2714285714285714, + "marketing": 0.23333333333333334, + "marxist_theory": 0.328042328042328, + "modern_chinese": 0.23275862068965517, + "nutrition": 0.2896551724137931, + "philosophy": 0.3142857142857143, + "professional_accounting": 0.3142857142857143, + "professional_law": 0.25118483412322273, + "professional_medicine": 0.30851063829787234, + "professional_psychology": 0.28879310344827586, + "public_relations": 0.27586206896551724, + "security_study": 0.1925925925925926, + "sociology": 0.2743362831858407, + "sports_science": 0.26666666666666666, + "traditional_chinese_medicine": 0.2756756756756757, + "virology": 0.26627218934911245, + "world_history": 0.2546583850931677, + "world_religions": 0.2875 } }, "prompt_4": { - "accuracy": 0.4064064928337075, + "accuracy": 0.27689518217924364, "category_acc": { - "agronomy": 0.4319526627218935, - "anatomy": 0.2702702702702703, - "ancient_chinese": 0.21341463414634146, - "arts": 0.55625, - "astronomy": 0.3393939393939394, - "business_ethics": 0.3923444976076555, - "chinese_civil_service_exam": 0.34375, - "chinese_driving_rule": 0.5419847328244275, - "chinese_food_culture": 0.29411764705882354, - "chinese_foreign_policy": 0.4953271028037383, - "chinese_history": 0.3684210526315789, - "chinese_literature": 0.29901960784313725, - "chinese_teacher_qualification": 0.4972067039106145, - "clinical_knowledge": 0.35864978902953587, - "college_actuarial_science": 0.24528301886792453, - "college_education": 0.5046728971962616, - "college_engineering_hydrology": 0.4716981132075472, - "college_law": 0.3425925925925926, - "college_mathematics": 0.3142857142857143, - "college_medical_statistics": 0.4056603773584906, - "college_medicine": 0.40293040293040294, - "computer_science": 0.44607843137254904, - "computer_security": 0.4853801169590643, - "conceptual_physics": 0.41496598639455784, - "construction_project_management": 0.37410071942446044, - "economics": 0.5220125786163522, - "education": 0.39263803680981596, - "electrical_engineering": 0.47093023255813954, - "elementary_chinese": 0.3134920634920635, - "elementary_commonsense": 0.398989898989899, - "elementary_information_and_technology": 0.6260504201680672, - "elementary_mathematics": 0.33043478260869563, - "ethnology": 0.37777777777777777, - "food_science": 0.5034965034965035, - "genetics": 0.44886363636363635, - "global_facts": 0.4228187919463087, - "high_school_biology": 0.3076923076923077, - "high_school_chemistry": 0.25, - "high_school_geography": 0.3474576271186441, - "high_school_mathematics": 0.31097560975609756, - "high_school_physics": 0.2727272727272727, - "high_school_politics": 0.3706293706293706, - "human_sexuality": 0.47619047619047616, - "international_law": 0.31351351351351353, - "journalism": 0.4418604651162791, - "jurisprudence": 0.340632603406326, - "legal_and_moral_basis": 0.5654205607476636, - "logical": 0.4878048780487805, - "machine_learning": 0.45901639344262296, - "management": 0.45714285714285713, - "marketing": 0.43333333333333335, - "marxist_theory": 0.4656084656084656, - "modern_chinese": 0.3706896551724138, - "nutrition": 0.496551724137931, - "philosophy": 0.41904761904761906, - "professional_accounting": 0.4514285714285714, - "professional_law": 0.35071090047393366, - "professional_medicine": 0.3377659574468085, - "professional_psychology": 0.44396551724137934, - "public_relations": 0.47126436781609193, - "security_study": 0.562962962962963, - "sociology": 0.3805309734513274, - "sports_science": 0.4484848484848485, - "traditional_chinese_medicine": 0.32432432432432434, - "virology": 0.5029585798816568, - "world_history": 0.43478260869565216, - "world_religions": 0.46875 + "agronomy": 0.22485207100591717, + "anatomy": 0.28378378378378377, + "ancient_chinese": 0.23780487804878048, + "arts": 0.23125, + "astronomy": 0.24242424242424243, + "business_ethics": 0.3062200956937799, + "chinese_civil_service_exam": 0.24375, + "chinese_driving_rule": 0.3053435114503817, + "chinese_food_culture": 0.22794117647058823, + "chinese_foreign_policy": 0.2523364485981308, + "chinese_history": 0.30030959752321984, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.25139664804469275, + "clinical_knowledge": 0.2320675105485232, + "college_actuarial_science": 0.2358490566037736, + "college_education": 0.308411214953271, + "college_engineering_hydrology": 0.2641509433962264, + "college_law": 0.2777777777777778, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.330188679245283, + "college_medicine": 0.28205128205128205, + "computer_science": 0.31862745098039214, + "computer_security": 0.23391812865497075, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.26618705035971224, + "economics": 0.3270440251572327, + "education": 0.3558282208588957, + "electrical_engineering": 0.32558139534883723, + "elementary_chinese": 0.2222222222222222, + "elementary_commonsense": 0.2676767676767677, + "elementary_information_and_technology": 0.3025210084033613, + "elementary_mathematics": 0.27391304347826084, + "ethnology": 0.2518518518518518, + "food_science": 0.3006993006993007, + "genetics": 0.2727272727272727, + "global_facts": 0.31543624161073824, + "high_school_biology": 0.26627218934911245, + "high_school_chemistry": 0.21212121212121213, + "high_school_geography": 0.2542372881355932, + "high_school_mathematics": 0.21341463414634146, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3006993006993007, + "human_sexuality": 0.25396825396825395, + "international_law": 0.2756756756756757, + "journalism": 0.29069767441860467, + "jurisprudence": 0.24330900243309003, + "legal_and_moral_basis": 0.37850467289719625, + "logical": 0.2764227642276423, + "machine_learning": 0.26229508196721313, + "management": 0.2857142857142857, + "marketing": 0.32222222222222224, + "marxist_theory": 0.291005291005291, + "modern_chinese": 0.28448275862068967, + "nutrition": 0.3103448275862069, + "philosophy": 0.34285714285714286, + "professional_accounting": 0.28, + "professional_law": 0.27488151658767773, + "professional_medicine": 0.2632978723404255, + "professional_psychology": 0.2974137931034483, + "public_relations": 0.3390804597701149, + "security_study": 0.2740740740740741, + "sociology": 0.252212389380531, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.2702702702702703, + "virology": 0.2958579881656805, + "world_history": 0.2670807453416149, + "world_religions": 0.23125 } }, "prompt_5": { - "accuracy": 0.32844068381972025, + "accuracy": 0.2729235019858401, "category_acc": { - "agronomy": 0.28402366863905326, - "anatomy": 0.2635135135135135, - "ancient_chinese": 0.2926829268292683, - "arts": 0.4375, - "astronomy": 0.2727272727272727, - "business_ethics": 0.3253588516746411, - "chinese_civil_service_exam": 0.325, - "chinese_driving_rule": 0.3893129770992366, - "chinese_food_culture": 0.3602941176470588, - "chinese_foreign_policy": 0.32710280373831774, - "chinese_history": 0.34055727554179566, - "chinese_literature": 0.3137254901960784, - "chinese_teacher_qualification": 0.3575418994413408, - "clinical_knowledge": 0.31223628691983124, - "college_actuarial_science": 0.2169811320754717, - "college_education": 0.3925233644859813, - "college_engineering_hydrology": 0.3018867924528302, - "college_law": 0.28703703703703703, - "college_mathematics": 0.23809523809523808, - "college_medical_statistics": 0.37735849056603776, - "college_medicine": 0.3772893772893773, - "computer_science": 0.38235294117647056, - "computer_security": 0.3333333333333333, - "conceptual_physics": 0.3401360544217687, - "construction_project_management": 0.28776978417266186, - "economics": 0.31446540880503143, - "education": 0.3496932515337423, - "electrical_engineering": 0.36627906976744184, - "elementary_chinese": 0.3134920634920635, - "elementary_commonsense": 0.3434343434343434, - "elementary_information_and_technology": 0.3319327731092437, - "elementary_mathematics": 0.3173913043478261, - "ethnology": 0.362962962962963, - "food_science": 0.34965034965034963, - "genetics": 0.30113636363636365, - "global_facts": 0.348993288590604, - "high_school_biology": 0.2485207100591716, - "high_school_chemistry": 0.23484848484848486, - "high_school_geography": 0.3389830508474576, - "high_school_mathematics": 0.25609756097560976, - "high_school_physics": 0.2636363636363636, - "high_school_politics": 0.2867132867132867, - "human_sexuality": 0.35714285714285715, - "international_law": 0.2864864864864865, - "journalism": 0.4127906976744186, - "jurisprudence": 0.30656934306569344, - "legal_and_moral_basis": 0.4485981308411215, - "logical": 0.34146341463414637, - "machine_learning": 0.319672131147541, - "management": 0.3523809523809524, - "marketing": 0.34444444444444444, - "marxist_theory": 0.3544973544973545, - "modern_chinese": 0.27586206896551724, - "nutrition": 0.3103448275862069, + "agronomy": 0.26627218934911245, + "anatomy": 0.22972972972972974, + "ancient_chinese": 0.23170731707317074, + "arts": 0.24375, + "astronomy": 0.23636363636363636, + "business_ethics": 0.2822966507177033, + "chinese_civil_service_exam": 0.225, + "chinese_driving_rule": 0.3435114503816794, + "chinese_food_culture": 0.22058823529411764, + "chinese_foreign_policy": 0.24299065420560748, + "chinese_history": 0.29102167182662536, + "chinese_literature": 0.29411764705882354, + "chinese_teacher_qualification": 0.2681564245810056, + "clinical_knowledge": 0.23628691983122363, + "college_actuarial_science": 0.2830188679245283, + "college_education": 0.21495327102803738, + "college_engineering_hydrology": 0.2169811320754717, + "college_law": 0.21296296296296297, + "college_mathematics": 0.24761904761904763, + "college_medical_statistics": 0.25471698113207547, + "college_medicine": 0.326007326007326, + "computer_science": 0.3284313725490196, + "computer_security": 0.22807017543859648, + "conceptual_physics": 0.2925170068027211, + "construction_project_management": 0.2949640287769784, + "economics": 0.2578616352201258, + "education": 0.24539877300613497, + "electrical_engineering": 0.2616279069767442, + "elementary_chinese": 0.24603174603174602, + "elementary_commonsense": 0.23232323232323232, + "elementary_information_and_technology": 0.22268907563025211, + "elementary_mathematics": 0.29130434782608694, + "ethnology": 0.23703703703703705, + "food_science": 0.27972027972027974, + "genetics": 0.2897727272727273, + "global_facts": 0.28187919463087246, + "high_school_biology": 0.3136094674556213, + "high_school_chemistry": 0.26515151515151514, + "high_school_geography": 0.2711864406779661, + "high_school_mathematics": 0.23780487804878048, + "high_school_physics": 0.2545454545454545, + "high_school_politics": 0.2937062937062937, + "human_sexuality": 0.2619047619047619, + "international_law": 0.34054054054054056, + "journalism": 0.27906976744186046, + "jurisprudence": 0.26763990267639903, + "legal_and_moral_basis": 0.35514018691588783, + "logical": 0.3008130081300813, + "machine_learning": 0.28688524590163933, + "management": 0.21428571428571427, + "marketing": 0.28888888888888886, + "marxist_theory": 0.30687830687830686, + "modern_chinese": 0.22413793103448276, + "nutrition": 0.27586206896551724, "philosophy": 0.3904761904761905, - "professional_accounting": 0.36, - "professional_law": 0.2796208530805687, - "professional_medicine": 0.2553191489361702, - "professional_psychology": 0.35344827586206895, - "public_relations": 0.3850574712643678, - "security_study": 0.3333333333333333, - "sociology": 0.3672566371681416, - "sports_science": 0.2787878787878788, - "traditional_chinese_medicine": 0.3081081081081081, - "virology": 0.378698224852071, - "world_history": 0.2981366459627329, - "world_religions": 0.40625 + "professional_accounting": 0.28, + "professional_law": 0.27014218009478674, + "professional_medicine": 0.28191489361702127, + "professional_psychology": 0.3017241379310345, + "public_relations": 0.26436781609195403, + "security_study": 0.3111111111111111, + "sociology": 0.26991150442477874, + "sports_science": 0.28484848484848485, + "traditional_chinese_medicine": 0.25405405405405407, + "virology": 0.3254437869822485, + "world_history": 0.2919254658385093, + "world_religions": 0.2625 } } }, "zbench": { - "prompt_1": { - "accuracy": 0.2727272727272727 - }, - "prompt_2": { - "accuracy": 0.21212121212121213 - }, - "prompt_3": { - "accuracy": 0.24242424242424243 - }, - "prompt_4": { - "accuracy": 0.12121212121212122 - }, - "prompt_5": { - "accuracy": 0.2727272727272727 - } + "prompt_1": { + "accuracy": 0.24242424242424243 + }, + "prompt_2": { + "accuracy": 0.18181818181818182 + }, + "prompt_3": { + "accuracy": 0.15151515151515152 + }, + "prompt_4": { + "accuracy": 0.24242424242424243 + }, + "prompt_5": { + "accuracy": 0.21212121212121213 + } + }, + "ind_emotion": { + "prompt_1": { + "accuracy": 0.23863636363636365 + }, + "prompt_2": { + "accuracy": 0.14545454545454545 + }, + "prompt_3": { + "accuracy": 0.17272727272727273 + }, + "prompt_4": { + "accuracy": 0.24772727272727274 + }, + "prompt_5": { + "accuracy": 0.2545454545454545 + } + }, + "ocnli": { + "prompt_1": { + "accuracy": 0.3149152542372881 + }, + "prompt_2": { + "accuracy": 0.31864406779661014 + }, + "prompt_3": { + "accuracy": 0.30847457627118646 + }, + "prompt_4": { + "accuracy": 0.30135593220338985 + }, + "prompt_5": { + "accuracy": 0.31559322033898307 + } + }, + "c3": { + "prompt_1": { + "accuracy": 0.3582647718773373 + }, + "prompt_2": { + "accuracy": 0.36200448765893795 + }, + "prompt_3": { + "accuracy": 0.3556469708302169 + }, + "prompt_4": { + "accuracy": 0.3582647718773373 + }, + "prompt_5": { + "accuracy": 0.3631264023934181 + } + }, + "dream": { + "prompt_1": { + "accuracy": 0.49044585987261147 + }, + "prompt_2": { + "accuracy": 0.4679078882900539 + }, + "prompt_3": { + "accuracy": 0.48799608035276826 + }, + "prompt_4": { + "accuracy": 0.4977951984321411 + }, + "prompt_5": { + "accuracy": 0.5036746692797648 + } + }, + "samsum": { + "prompt_1": { + "rouge1": 0.23810951058317034, + "rouge2": 0.0857002142135992, + "rougeL": 0.1885274416117672, + "avg_rouge": 0.17077905546951225 + }, + "prompt_2": { + "rouge1": 0.250523637973491, + "rouge2": 0.089277880693473, + "rougeL": 0.19747352513811894, + "avg_rouge": 0.17909168126836095 + }, + "prompt_3": { + "rouge1": 0.20650395584271117, + "rouge2": 0.06638490581079505, + "rougeL": 0.1627505483788398, + "avg_rouge": 0.14521313667744867 + }, + "prompt_4": { + "rouge1": 0.2732959822452368, + "rouge2": 0.10311655456011995, + "rougeL": 0.21324113219553317, + "avg_rouge": 0.19655122300029668 + }, + "prompt_5": { + "rouge1": 0.2701425342096417, + "rouge2": 0.10132507177481839, + "rougeL": 0.21413842412577347, + "avg_rouge": 0.1952020100367445 + } + }, + "dialogsum": { + "prompt_1": { + "rouge1": 0.21296063981820385, + "rouge2": 0.059261153885824085, + "rougeL": 0.16162505046260914, + "avg_rouge": 0.14461561472221238 + }, + "prompt_2": { + "rouge1": 0.21047103992181337, + "rouge2": 0.057955935982241955, + "rougeL": 0.15820991924545352, + "avg_rouge": 0.14221229838316962 + }, + "prompt_3": { + "rouge1": 0.20411808050595426, + "rouge2": 0.056710375076034165, + "rougeL": 0.15459172140442987, + "avg_rouge": 0.1384733923288061 + }, + "prompt_4": { + "rouge1": 0.2228297744624097, + "rouge2": 0.06374052414669532, + "rougeL": 0.1659809274302705, + "avg_rouge": 0.15085040867979183 + }, + "prompt_5": { + "rouge1": 0.21136870425728, + "rouge2": 0.05689872790476053, + "rougeL": 0.15741589496925315, + "avg_rouge": 0.1418944423770979 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.6743119266055045 + }, + "prompt_2": { + "accuracy": 0.5424311926605505 + }, + "prompt_3": { + "accuracy": 0.6100917431192661 + }, + "prompt_4": { + "accuracy": 0.5286697247706422 + }, + "prompt_5": { + "accuracy": 0.841743119266055 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.6414189837008629 + }, + "prompt_2": { + "accuracy": 0.675934803451582 + }, + "prompt_3": { + "accuracy": 0.6596356663470757 + }, + "prompt_4": { + "accuracy": 0.5110258868648131 + }, + "prompt_5": { + "accuracy": 0.6222435282837967 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.414 + }, + "prompt_2": { + "accuracy": 0.471 + }, + "prompt_3": { + "accuracy": 0.5035 + }, + "prompt_4": { + "accuracy": 0.4315 + }, + "prompt_5": { + "accuracy": 0.4255 + } + }, + "mnli": { + "prompt_1": { + "accuracy": 0.3235 + }, + "prompt_2": { + "accuracy": 0.325 + }, + "prompt_3": { + "accuracy": 0.322 + }, + "prompt_4": { + "accuracy": 0.333 + }, + "prompt_5": { + "accuracy": 0.33 + } + }, + "qnli": { + "prompt_1": { + "accuracy": 0.511 + }, + "prompt_2": { + "accuracy": 0.542 + }, + "prompt_3": { + "accuracy": 0.516 + }, + "prompt_4": { + "accuracy": 0.5275 + }, + "prompt_5": { + "accuracy": 0.5215 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.4507042253521127 + }, + "prompt_2": { + "accuracy": 0.647887323943662 + }, + "prompt_3": { + "accuracy": 0.4647887323943662 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.43661971830985913 + } + }, + "rte": { + "prompt_1": { + "accuracy": 0.5090252707581228 + }, + "prompt_2": { + "accuracy": 0.5342960288808665 + }, + "prompt_3": { + "accuracy": 0.5306859205776173 + }, + "prompt_4": { + "accuracy": 0.5306859205776173 + }, + "prompt_5": { + "accuracy": 0.5090252707581228 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.49754901960784315 + }, + "prompt_2": { + "accuracy": 0.4632352941176471 + }, + "prompt_3": { + "accuracy": 0.4485294117647059 + }, + "prompt_4": { + "accuracy": 0.4852941176470588 + }, + "prompt_5": { + "accuracy": 0.5490196078431373 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.29574737966486414, + "category_acc": { + "History": 0.2791164658634538, + "Geography": 0.23265306122448978, + "Lampungic": 0.29931972789115646, + "Social science": 0.34557595993322204, + "Balinese": 0.3078556263269639, + "Makassarese": 0.3010752688172043, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.2364963503649635, + "Biology": 0.27218934911242604, + "Science": 0.3065015479876161, + "Christian religion": 0.3034825870646766, + "Art": 0.30282861896838603, + "Islam religion": 0.31436699857752487, + "Hindu religion": 0.30666666666666664, + "Madurese": 0.3220338983050847, + "Sport": 0.33783783783783783, + "Indonesian language": 0.3103985056039851, + "Physics": 0.25252525252525254, + "Minangkabau culture": 0.2914572864321608, + "Dayak language": 0.30275229357798167, + "Sociology": 0.2842741935483871, + "Economy": 0.23770491803278687, + "Sundanese": 0.30337078651685395, + "Javanese": 0.2862903225806452, + "Civic education": 0.32474964234620884 + } + }, + "prompt_2": { + "accuracy": 0.27705454302690435, + "category_acc": { + "History": 0.25100401606425704, + "Geography": 0.21836734693877552, + "Lampungic": 0.30612244897959184, + "Social science": 0.34056761268781305, + "Balinese": 0.29723991507430997, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.2916666666666667, + "Chemistry": 0.2291970802919708, + "Biology": 0.24378698224852072, + "Science": 0.27450980392156865, + "Christian religion": 0.2835820895522388, + "Art": 0.2778702163061564, + "Islam religion": 0.28733997155049784, + "Hindu religion": 0.28, + "Madurese": 0.2847457627118644, + "Sport": 0.3783783783783784, + "Indonesian language": 0.29950186799501866, + "Physics": 0.21616161616161617, + "Minangkabau culture": 0.25125628140703515, + "Dayak language": 0.22935779816513763, + "Sociology": 0.2318548387096774, + "Economy": 0.22131147540983606, + "Sundanese": 0.29472774416594644, + "Javanese": 0.2772177419354839, + "Civic education": 0.296137339055794 + } + }, + "prompt_3": { + "accuracy": 0.2833967554576407, + "category_acc": { + "History": 0.28714859437751006, + "Geography": 0.24897959183673468, + "Lampungic": 0.30612244897959184, + "Social science": 0.335559265442404, + "Balinese": 0.28874734607218683, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.22335766423357664, + "Biology": 0.26153846153846155, + "Science": 0.2714138286893705, + "Christian religion": 0.24378109452736318, + "Art": 0.26788685524126454, + "Islam religion": 0.28733997155049784, + "Hindu religion": 0.24666666666666667, + "Madurese": 0.2847457627118644, + "Sport": 0.32432432432432434, + "Indonesian language": 0.3107098381070984, + "Physics": 0.22828282828282828, + "Minangkabau culture": 0.2562814070351759, + "Dayak language": 0.22935779816513763, + "Sociology": 0.24798387096774194, + "Economy": 0.21721311475409835, + "Sundanese": 0.30423509075194466, + "Javanese": 0.2782258064516129, + "Civic education": 0.32474964234620884 + } + }, + "prompt_4": { + "accuracy": 0.2908071299819748, + "category_acc": { + "History": 0.30120481927710846, + "Geography": 0.25918367346938775, + "Lampungic": 0.29931972789115646, + "Social science": 0.35225375626043404, + "Balinese": 0.3227176220806794, + "Makassarese": 0.3064516129032258, + "Banjarese": 0.3263888888888889, + "Chemistry": 0.23065693430656933, + "Biology": 0.26153846153846155, + "Science": 0.27450980392156865, + "Christian religion": 0.34328358208955223, + "Art": 0.2961730449251248, + "Islam religion": 0.3129445234708393, + "Hindu religion": 0.31333333333333335, + "Madurese": 0.28135593220338984, + "Sport": 0.3310810810810811, + "Indonesian language": 0.3010585305105853, + "Physics": 0.24646464646464647, + "Minangkabau culture": 0.2864321608040201, + "Dayak language": 0.25688073394495414, + "Sociology": 0.28225806451612906, + "Economy": 0.24180327868852458, + "Sundanese": 0.29213483146067415, + "Javanese": 0.28125, + "Civic education": 0.3261802575107296 + } + }, + "prompt_5": { + "accuracy": 0.2861339208224848, + "category_acc": { + "History": 0.24899598393574296, + "Geography": 0.23469387755102042, + "Lampungic": 0.3197278911564626, + "Social science": 0.35392320534223703, + "Balinese": 0.3057324840764331, + "Makassarese": 0.3064516129032258, + "Banjarese": 0.2708333333333333, + "Chemistry": 0.24087591240875914, + "Biology": 0.26745562130177514, + "Science": 0.2961816305469556, + "Christian religion": 0.3383084577114428, + "Art": 0.2911813643926789, + "Islam religion": 0.28733997155049784, + "Hindu religion": 0.3333333333333333, + "Madurese": 0.2847457627118644, + "Sport": 0.33783783783783783, + "Indonesian language": 0.3116438356164384, + "Physics": 0.24848484848484848, + "Minangkabau culture": 0.27638190954773867, + "Dayak language": 0.27522935779816515, + "Sociology": 0.2620967741935484, + "Economy": 0.20491803278688525, + "Sundanese": 0.2929991356957649, + "Javanese": 0.26814516129032256, + "Civic education": 0.28183118741058655 + } + } + } + }, + "five_shot": { + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + } + } + }, + "gemma-2b-it": { + "model_size": "7B", + "model_link": "https://huggingface.co/google/gemma-2b-it", + "zero_shot": { + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.37142857142857144, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "Malay": 0.36666666666666664, + "Filipino": 0.31333333333333335, + "Indonesian": 0.35333333333333333, + "Chinese": 0.38666666666666666, + "Spanish": 0.36, + "English": 0.44666666666666666 + }, + "consistency_score_2": 0.5914285714285714, + "consistency_score_3": 0.4316190476190477, + "consistency_score_4": 0.3413333333333333, + "consistency_score_5": 0.28095238095238095, + "consistency_score_6": 0.2361904761904762, + "consistency_score_7": 0.2, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.5666666666666667, + "Vietnamese,Filipino": 0.5333333333333333, + "Vietnamese,Indonesian": 0.5733333333333334, + "Vietnamese,Chinese": 0.6066666666666667, + "Vietnamese,Spanish": 0.5866666666666667, + "Vietnamese,English": 0.5733333333333334, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6533333333333333, + "Malay,Chinese": 0.5933333333333334, + "Malay,Spanish": 0.62, + "Malay,English": 0.56, + "Filipino,Indonesian": 0.5666666666666667, + "Filipino,Chinese": 0.52, + "Filipino,Spanish": 0.5333333333333333, + "Filipino,English": 0.52, + "Indonesian,Chinese": 0.64, + "Indonesian,Spanish": 0.68, + "Indonesian,English": 0.6333333333333333, + "Chinese,Spanish": 0.6333333333333333, + "Chinese,English": 0.6533333333333333, + "Spanish,English": 0.7133333333333334 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian": 0.44, + "Vietnamese,Malay,Chinese": 0.44, + "Vietnamese,Malay,Spanish": 0.42, + "Vietnamese,Malay,English": 0.41333333333333333, + "Vietnamese,Filipino,Indonesian": 0.37333333333333335, + "Vietnamese,Filipino,Chinese": 0.36666666666666664, + "Vietnamese,Filipino,Spanish": 0.37333333333333335, + "Vietnamese,Filipino,English": 0.35333333333333333, + "Vietnamese,Indonesian,Chinese": 0.4533333333333333, + "Vietnamese,Indonesian,Spanish": 0.4533333333333333, + "Vietnamese,Indonesian,English": 0.44, + "Vietnamese,Chinese,Spanish": 0.46, + "Vietnamese,Chinese,English": 0.4666666666666667, + "Vietnamese,Spanish,English": 0.48, + "Malay,Filipino,Indonesian": 0.38666666666666666, + "Malay,Filipino,Chinese": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.36, + "Malay,Filipino,English": 0.32, + "Malay,Indonesian,Chinese": 0.4666666666666667, + "Malay,Indonesian,Spanish": 0.52, + "Malay,Indonesian,English": 0.46, + "Malay,Chinese,Spanish": 0.48, + "Malay,Chinese,English": 0.44666666666666666, + "Malay,Spanish,English": 0.4866666666666667, + "Filipino,Indonesian,Chinese": 0.42, + "Filipino,Indonesian,Spanish": 0.44, + "Filipino,Indonesian,English": 0.4066666666666667, + "Filipino,Chinese,Spanish": 0.38666666666666666, + "Filipino,Chinese,English": 0.4, + "Filipino,Spanish,English": 0.42, + "Indonesian,Chinese,Spanish": 0.5133333333333333, + "Indonesian,Chinese,English": 0.49333333333333335, + "Indonesian,Spanish,English": 0.5533333333333333, + "Chinese,Spanish,English": 0.5266666666666666 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.28, + "Vietnamese,Malay,Filipino,English": 0.26, + "Vietnamese,Malay,Indonesian,Chinese": 0.36, + "Vietnamese,Malay,Indonesian,Spanish": 0.37333333333333335, + "Vietnamese,Malay,Indonesian,English": 0.35333333333333333, + "Vietnamese,Malay,Chinese,Spanish": 0.36, + "Vietnamese,Malay,Chinese,English": 0.36, + "Vietnamese,Malay,Spanish,English": 0.36, + "Vietnamese,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish": 0.32, + "Vietnamese,Filipino,Indonesian,English": 0.3, + "Vietnamese,Filipino,Chinese,Spanish": 0.3, + "Vietnamese,Filipino,Chinese,English": 0.30666666666666664, + "Vietnamese,Filipino,Spanish,English": 0.30666666666666664, + "Vietnamese,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese,English": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish,English": 0.4, + "Vietnamese,Chinese,Spanish,English": 0.4066666666666667, + "Malay,Filipino,Indonesian,Chinese": 0.31333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.34, + "Malay,Filipino,Indonesian,English": 0.29333333333333333, + "Malay,Filipino,Chinese,Spanish": 0.3, + "Malay,Filipino,Chinese,English": 0.28, + "Malay,Filipino,Spanish,English": 0.29333333333333333, + "Malay,Indonesian,Chinese,Spanish": 0.41333333333333333, + "Malay,Indonesian,Chinese,English": 0.37333333333333335, + "Malay,Indonesian,Spanish,English": 0.4266666666666667, + "Malay,Chinese,Spanish,English": 0.4, + "Filipino,Indonesian,Chinese,Spanish": 0.3466666666666667, + "Filipino,Indonesian,Chinese,English": 0.34, + "Filipino,Indonesian,Spanish,English": 0.36, + "Filipino,Chinese,Spanish,English": 0.3333333333333333, + "Indonesian,Chinese,Spanish,English": 0.44 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.24, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish,English": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.31333333333333335, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.32, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.35333333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.28, + "Malay,Filipino,Indonesian,Chinese,English": 0.25333333333333335, + "Malay,Filipino,Indonesian,Spanish,English": 0.28, + "Malay,Filipino,Chinese,Spanish,English": 0.25333333333333335, + "Malay,Indonesian,Chinese,Spanish,English": 0.35333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.3 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.3, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.24, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.24 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.2 + } + }, + "AC3_2": 0.4562950402239117, + "AC3_3": 0.3992680942847889, + "AC3_4": 0.3557455905433548, + "AC3_5": 0.319916579721556, + "AC3_6": 0.2887595162982002, + "AC3_7": 0.2599999999545 + }, + "prompt_2": { + "overall_acc": 0.3676190476190476, + "language_acc": { + "Vietnamese": 0.35333333333333333, + "Malay": 0.36666666666666664, + "Filipino": 0.3333333333333333, + "Indonesian": 0.34, + "Chinese": 0.38, + "Spanish": 0.36666666666666664, + "English": 0.43333333333333335 + }, + "consistency_score_2": 0.5628571428571428, + "consistency_score_3": 0.389142857142857, + "consistency_score_4": 0.29504761904761906, + "consistency_score_5": 0.2361904761904762, + "consistency_score_6": 0.1961904761904762, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.6066666666666667, + "Vietnamese,Filipino": 0.5066666666666667, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese": 0.5866666666666667, + "Vietnamese,Spanish": 0.48, + "Vietnamese,English": 0.6333333333333333, + "Malay,Filipino": 0.5066666666666667, + "Malay,Indonesian": 0.64, + "Malay,Chinese": 0.6066666666666667, + "Malay,Spanish": 0.5666666666666667, + "Malay,English": 0.5933333333333334, + "Filipino,Indonesian": 0.54, + "Filipino,Chinese": 0.5466666666666666, + "Filipino,Spanish": 0.48, + "Filipino,English": 0.5533333333333333, + "Indonesian,Chinese": 0.5733333333333334, + "Indonesian,Spanish": 0.5866666666666667, + "Indonesian,English": 0.52, + "Chinese,Spanish": 0.5533333333333333, + "Chinese,English": 0.6, + "Spanish,English": 0.6266666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.41333333333333333, + "Vietnamese,Malay,Chinese": 0.44666666666666666, + "Vietnamese,Malay,Spanish": 0.37333333333333335, + "Vietnamese,Malay,English": 0.4533333333333333, + "Vietnamese,Filipino,Indonesian": 0.3333333333333333, + "Vietnamese,Filipino,Chinese": 0.36, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Filipino,English": 0.38666666666666666, + "Vietnamese,Indonesian,Chinese": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish": 0.34, + "Vietnamese,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,Chinese,English": 0.4533333333333333, + "Vietnamese,Spanish,English": 0.42, + "Malay,Filipino,Indonesian": 0.38, + "Malay,Filipino,Chinese": 0.3933333333333333, + "Malay,Filipino,Spanish": 0.32666666666666666, + "Malay,Filipino,English": 0.37333333333333335, + "Malay,Indonesian,Chinese": 0.44666666666666666, + "Malay,Indonesian,Spanish": 0.44666666666666666, + "Malay,Indonesian,English": 0.41333333333333333, + "Malay,Chinese,Spanish": 0.41333333333333333, + "Malay,Chinese,English": 0.43333333333333335, + "Malay,Spanish,English": 0.44666666666666666, + "Filipino,Indonesian,Chinese": 0.37333333333333335, + "Filipino,Indonesian,Spanish": 0.3466666666666667, + "Filipino,Indonesian,English": 0.3333333333333333, + "Filipino,Chinese,Spanish": 0.3466666666666667, + "Filipino,Chinese,English": 0.37333333333333335, + "Filipino,Spanish,English": 0.36, + "Indonesian,Chinese,Spanish": 0.3933333333333333, + "Indonesian,Chinese,English": 0.38666666666666666, + "Indonesian,Spanish,English": 0.4266666666666667, + "Chinese,Spanish,English": 0.44 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Malay,Filipino,Spanish": 0.26, + "Vietnamese,Malay,Filipino,English": 0.32, + "Vietnamese,Malay,Indonesian,Chinese": 0.32, + "Vietnamese,Malay,Indonesian,Spanish": 0.3, + "Vietnamese,Malay,Indonesian,English": 0.32, + "Vietnamese,Malay,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Chinese,English": 0.35333333333333333, + "Vietnamese,Malay,Spanish,English": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian,Chinese": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,English": 0.26, + "Vietnamese,Filipino,Chinese,Spanish": 0.26, + "Vietnamese,Filipino,Chinese,English": 0.3, + "Vietnamese,Filipino,Spanish,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.3, + "Vietnamese,Indonesian,Spanish,English": 0.3, + "Vietnamese,Chinese,Spanish,English": 0.3333333333333333, + "Malay,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,English": 0.28, + "Malay,Filipino,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Chinese,English": 0.29333333333333333, + "Malay,Filipino,Spanish,English": 0.2733333333333333, + "Malay,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Malay,Indonesian,Chinese,English": 0.32, + "Malay,Indonesian,Spanish,English": 0.35333333333333333, + "Malay,Chinese,Spanish,English": 0.3466666666666667, + "Filipino,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Filipino,Indonesian,Chinese,English": 0.26, + "Filipino,Indonesian,Spanish,English": 0.26666666666666666, + "Filipino,Chinese,Spanish,English": 0.28, + "Indonesian,Chinese,Spanish,English": 0.32 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,English": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Spanish,English": 0.24, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.26, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.2, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.20666666666666667, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.23333333333333334, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.24666666666666667, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Malay,Filipino,Indonesian,Chinese,English": 0.22666666666666666, + "Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish,English": 0.2733333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.18, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.16666666666666666 + } + }, + "AC3_2": 0.4447550811044165, + "AC3_3": 0.37807486241453286, + "AC3_4": 0.32735953128828565, + "AC3_5": 0.28760102143351657, + "AC3_6": 0.25584298579760834, + "AC3_7": 0.2293523469564789 + }, + "prompt_3": { + "overall_acc": 0.3771428571428571, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "Malay": 0.36666666666666664, + "Filipino": 0.30666666666666664, + "Indonesian": 0.37333333333333335, + "Chinese": 0.38, + "Spanish": 0.4066666666666667, + "English": 0.43333333333333335 + }, + "consistency_score_2": 0.5596825396825397, + "consistency_score_3": 0.38380952380952377, + "consistency_score_4": 0.2866666666666666, + "consistency_score_5": 0.2241269841269841, + "consistency_score_6": 0.18, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.58, + "Vietnamese,Filipino": 0.4666666666666667, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Chinese": 0.54, + "Vietnamese,Spanish": 0.5466666666666666, + "Vietnamese,English": 0.56, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6333333333333333, + "Malay,Chinese": 0.5466666666666666, + "Malay,Spanish": 0.5666666666666667, + "Malay,English": 0.58, + "Filipino,Indonesian": 0.5133333333333333, + "Filipino,Chinese": 0.5133333333333333, + "Filipino,Spanish": 0.48, + "Filipino,English": 0.54, + "Indonesian,Chinese": 0.62, + "Indonesian,Spanish": 0.62, + "Indonesian,English": 0.6066666666666667, + "Chinese,Spanish": 0.58, + "Chinese,English": 0.6, + "Spanish,English": 0.6866666666666666 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian": 0.3933333333333333, + "Vietnamese,Malay,Chinese": 0.38, + "Vietnamese,Malay,Spanish": 0.38666666666666666, + "Vietnamese,Malay,English": 0.3933333333333333, + "Vietnamese,Filipino,Indonesian": 0.3, + "Vietnamese,Filipino,Chinese": 0.30666666666666664, + "Vietnamese,Filipino,Spanish": 0.32, + "Vietnamese,Filipino,English": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese": 0.36, + "Vietnamese,Indonesian,Spanish": 0.38, + "Vietnamese,Indonesian,English": 0.38, + "Vietnamese,Chinese,Spanish": 0.37333333333333335, + "Vietnamese,Chinese,English": 0.38666666666666666, + "Vietnamese,Spanish,English": 0.44, + "Malay,Filipino,Indonesian": 0.36666666666666664, + "Malay,Filipino,Chinese": 0.32666666666666666, + "Malay,Filipino,Spanish": 0.32666666666666666, + "Malay,Filipino,English": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.4266666666666667, + "Malay,Indonesian,Spanish": 0.4533333333333333, + "Malay,Indonesian,English": 0.44666666666666666, + "Malay,Chinese,Spanish": 0.4066666666666667, + "Malay,Chinese,English": 0.3933333333333333, + "Malay,Spanish,English": 0.46, + "Filipino,Indonesian,Chinese": 0.36666666666666664, + "Filipino,Indonesian,Spanish": 0.36, + "Filipino,Indonesian,English": 0.37333333333333335, + "Filipino,Chinese,Spanish": 0.34, + "Filipino,Chinese,English": 0.36, + "Filipino,Spanish,English": 0.38, + "Indonesian,Chinese,Spanish": 0.43333333333333335, + "Indonesian,Chinese,English": 0.44, + "Indonesian,Spanish,English": 0.5, + "Chinese,Spanish,English": 0.48 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese": 0.22, + "Vietnamese,Malay,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Filipino,English": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Chinese": 0.2866666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,English": 0.3, + "Vietnamese,Malay,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese,English": 0.28, + "Vietnamese,Malay,Spanish,English": 0.3333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Filipino,Chinese,Spanish": 0.24, + "Vietnamese,Filipino,Chinese,English": 0.24, + "Vietnamese,Filipino,Spanish,English": 0.26666666666666666, + "Vietnamese,Indonesian,Chinese,Spanish": 0.2866666666666667, + "Vietnamese,Indonesian,Chinese,English": 0.3, + "Vietnamese,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Chinese,Spanish,English": 0.32, + "Malay,Filipino,Indonesian,Chinese": 0.26666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian,English": 0.2866666666666667, + "Malay,Filipino,Chinese,Spanish": 0.25333333333333335, + "Malay,Filipino,Chinese,English": 0.26, + "Malay,Filipino,Spanish,English": 0.2866666666666667, + "Malay,Indonesian,Chinese,Spanish": 0.3466666666666667, + "Malay,Indonesian,Chinese,English": 0.32666666666666666, + "Malay,Indonesian,Spanish,English": 0.38666666666666666, + "Malay,Chinese,Spanish,English": 0.34, + "Filipino,Indonesian,Chinese,Spanish": 0.28, + "Filipino,Indonesian,Chinese,English": 0.29333333333333333, + "Filipino,Indonesian,Spanish,English": 0.30666666666666664, + "Filipino,Chinese,Spanish,English": 0.29333333333333333, + "Indonesian,Chinese,Spanish,English": 0.37333333333333335 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.2, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Chinese,English": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Spanish,English": 0.22, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.24, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.19333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.22, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.20666666666666667, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.26, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.22, + "Malay,Filipino,Indonesian,Chinese,English": 0.22, + "Malay,Filipino,Indonesian,Spanish,English": 0.24, + "Malay,Filipino,Chinese,Spanish,English": 0.22666666666666666, + "Malay,Indonesian,Chinese,Spanish,English": 0.29333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.25333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.18, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.18, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.2 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.14666666666666667 + } + }, + "AC3_2": 0.4506288424750139, + "AC3_3": 0.38044698725556436, + "AC3_4": 0.32573888086914976, + "AC3_5": 0.28116457982953924, + "AC3_6": 0.24369230764856797, + "AC3_7": 0.21119999995968003 + }, + "prompt_4": { + "overall_acc": 0.379047619047619, + "language_acc": { + "Vietnamese": 0.38666666666666666, + "Malay": 0.38, + "Filipino": 0.29333333333333333, + "Indonesian": 0.38, + "Chinese": 0.38666666666666666, + "Spanish": 0.38, + "English": 0.44666666666666666 + }, + "consistency_score_2": 0.5869841269841272, + "consistency_score_3": 0.41942857142857143, + "consistency_score_4": 0.32514285714285723, + "consistency_score_5": 0.2625396825396826, + "consistency_score_6": 0.21619047619047618, + "consistency_score_7": 0.18, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.58, + "Vietnamese,Filipino": 0.48, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Chinese": 0.5733333333333334, + "Vietnamese,Spanish": 0.5933333333333334, + "Vietnamese,English": 0.6266666666666667, + "Malay,Filipino": 0.5, + "Malay,Indonesian": 0.66, + "Malay,Chinese": 0.5733333333333334, + "Malay,Spanish": 0.5933333333333334, + "Malay,English": 0.58, + "Filipino,Indonesian": 0.5333333333333333, + "Filipino,Chinese": 0.5066666666666667, + "Filipino,Spanish": 0.52, + "Filipino,English": 0.54, + "Indonesian,Chinese": 0.64, + "Indonesian,Spanish": 0.64, + "Indonesian,English": 0.6266666666666667, + "Chinese,Spanish": 0.6133333333333333, + "Chinese,English": 0.64, + "Spanish,English": 0.7466666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.32666666666666666, + "Vietnamese,Malay,Indonesian": 0.43333333333333335, + "Vietnamese,Malay,Chinese": 0.4, + "Vietnamese,Malay,Spanish": 0.42, + "Vietnamese,Malay,English": 0.44, + "Vietnamese,Filipino,Indonesian": 0.34, + "Vietnamese,Filipino,Chinese": 0.32666666666666666, + "Vietnamese,Filipino,Spanish": 0.3333333333333333, + "Vietnamese,Filipino,English": 0.36, + "Vietnamese,Indonesian,Chinese": 0.41333333333333333, + "Vietnamese,Indonesian,Spanish": 0.43333333333333335, + "Vietnamese,Indonesian,English": 0.4533333333333333, + "Vietnamese,Chinese,Spanish": 0.4266666666666667, + "Vietnamese,Chinese,English": 0.4533333333333333, + "Vietnamese,Spanish,English": 0.5133333333333333, + "Malay,Filipino,Indonesian": 0.3933333333333333, + "Malay,Filipino,Chinese": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.3466666666666667, + "Malay,Filipino,English": 0.35333333333333333, + "Malay,Indonesian,Chinese": 0.47333333333333333, + "Malay,Indonesian,Spanish": 0.4866666666666667, + "Malay,Indonesian,English": 0.4666666666666667, + "Malay,Chinese,Spanish": 0.44, + "Malay,Chinese,English": 0.44, + "Malay,Spanish,English": 0.48, + "Filipino,Indonesian,Chinese": 0.3933333333333333, + "Filipino,Indonesian,Spanish": 0.4, + "Filipino,Indonesian,English": 0.4, + "Filipino,Chinese,Spanish": 0.36, + "Filipino,Chinese,English": 0.38666666666666666, + "Filipino,Spanish,English": 0.42, + "Indonesian,Chinese,Spanish": 0.48, + "Indonesian,Chinese,English": 0.49333333333333335, + "Indonesian,Spanish,English": 0.5333333333333333, + "Chinese,Spanish,English": 0.5066666666666667 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,Malay,Filipino,Chinese": 0.24, + "Vietnamese,Malay,Filipino,Spanish": 0.26, + "Vietnamese,Malay,Filipino,English": 0.2733333333333333, + "Vietnamese,Malay,Indonesian,Chinese": 0.3333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.35333333333333333, + "Vietnamese,Malay,Indonesian,English": 0.37333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.32, + "Vietnamese,Malay,Chinese,English": 0.34, + "Vietnamese,Malay,Spanish,English": 0.38, + "Vietnamese,Filipino,Indonesian,Chinese": 0.26, + "Vietnamese,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.30666666666666664, + "Vietnamese,Filipino,Chinese,Spanish": 0.26, + "Vietnamese,Filipino,Chinese,English": 0.28, + "Vietnamese,Filipino,Spanish,English": 0.3, + "Vietnamese,Indonesian,Chinese,Spanish": 0.3333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.36666666666666664, + "Vietnamese,Indonesian,Spanish,English": 0.4, + "Vietnamese,Chinese,Spanish,English": 0.38, + "Malay,Filipino,Indonesian,Chinese": 0.30666666666666664, + "Malay,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Malay,Filipino,Indonesian,English": 0.32, + "Malay,Filipino,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Chinese,English": 0.2866666666666667, + "Malay,Filipino,Spanish,English": 0.3, + "Malay,Indonesian,Chinese,Spanish": 0.38666666666666666, + "Malay,Indonesian,Chinese,English": 0.38, + "Malay,Indonesian,Spanish,English": 0.41333333333333333, + "Malay,Chinese,Spanish,English": 0.37333333333333335, + "Filipino,Indonesian,Chinese,Spanish": 0.32, + "Filipino,Indonesian,Chinese,English": 0.32666666666666666, + "Filipino,Indonesian,Spanish,English": 0.3466666666666667, + "Filipino,Chinese,Spanish,English": 0.3, + "Indonesian,Chinese,Spanish,English": 0.41333333333333333 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Chinese,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Spanish,English": 0.24, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.3, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3333333333333333, + "Vietnamese,Malay,Chinese,Spanish,English": 0.29333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.23333333333333334, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.32, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.25333333333333335, + "Malay,Filipino,Indonesian,Chinese,English": 0.26, + "Malay,Filipino,Indonesian,Spanish,English": 0.28, + "Malay,Filipino,Chinese,Spanish,English": 0.24, + "Malay,Indonesian,Chinese,Spanish,English": 0.3333333333333333, + "Filipino,Indonesian,Chinese,Spanish,English": 0.28 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.2, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.22, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.22666666666666666 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.18 + } + }, + "AC3_2": 0.46063690275813163, + "AC3_3": 0.3982170119457658, + "AC3_4": 0.3500320722869366, + "AC3_5": 0.31021512212124897, + "AC3_6": 0.2753401904299333, + "AC3_7": 0.24408858598700287 + }, + "prompt_5": { + "overall_acc": 0.37809523809523815, + "language_acc": { + "Vietnamese": 0.37333333333333335, + "Malay": 0.4, + "Filipino": 0.2866666666666667, + "Indonesian": 0.36666666666666664, + "Chinese": 0.38, + "Spanish": 0.37333333333333335, + "English": 0.4666666666666667 + }, + "consistency_score_2": 0.5488888888888889, + "consistency_score_3": 0.3712380952380952, + "consistency_score_4": 0.2773333333333333, + "consistency_score_5": 0.22031746031746033, + "consistency_score_6": 0.18190476190476187, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,Malay": 0.54, + "Vietnamese,Filipino": 0.4533333333333333, + "Vietnamese,Indonesian": 0.56, + "Vietnamese,Chinese": 0.5533333333333333, + "Vietnamese,Spanish": 0.56, + "Vietnamese,English": 0.58, + "Malay,Filipino": 0.46, + "Malay,Indonesian": 0.6266666666666667, + "Malay,Chinese": 0.5133333333333333, + "Malay,Spanish": 0.5333333333333333, + "Malay,English": 0.5533333333333333, + "Filipino,Indonesian": 0.5, + "Filipino,Chinese": 0.46, + "Filipino,Spanish": 0.5, + "Filipino,English": 0.52, + "Indonesian,Chinese": 0.52, + "Indonesian,Spanish": 0.6133333333333333, + "Indonesian,English": 0.6, + "Chinese,Spanish": 0.5533333333333333, + "Chinese,English": 0.62, + "Spanish,English": 0.7066666666666667 + }, + "3_combine": { + "Vietnamese,Malay,Filipino": 0.29333333333333333, + "Vietnamese,Malay,Indonesian": 0.4, + "Vietnamese,Malay,Chinese": 0.36666666666666664, + "Vietnamese,Malay,Spanish": 0.37333333333333335, + "Vietnamese,Malay,English": 0.4, + "Vietnamese,Filipino,Indonesian": 0.3, + "Vietnamese,Filipino,Chinese": 0.29333333333333333, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Filipino,English": 0.32666666666666666, + "Vietnamese,Indonesian,Chinese": 0.38, + "Vietnamese,Indonesian,Spanish": 0.41333333333333333, + "Vietnamese,Indonesian,English": 0.4266666666666667, + "Vietnamese,Chinese,Spanish": 0.3933333333333333, + "Vietnamese,Chinese,English": 0.4266666666666667, + "Vietnamese,Spanish,English": 0.4533333333333333, + "Malay,Filipino,Indonesian": 0.34, + "Malay,Filipino,Chinese": 0.2733333333333333, + "Malay,Filipino,Spanish": 0.3, + "Malay,Filipino,English": 0.32666666666666666, + "Malay,Indonesian,Chinese": 0.36666666666666664, + "Malay,Indonesian,Spanish": 0.43333333333333335, + "Malay,Indonesian,English": 0.4266666666666667, + "Malay,Chinese,Spanish": 0.37333333333333335, + "Malay,Chinese,English": 0.3933333333333333, + "Malay,Spanish,English": 0.4266666666666667, + "Filipino,Indonesian,Chinese": 0.29333333333333333, + "Filipino,Indonesian,Spanish": 0.3466666666666667, + "Filipino,Indonesian,English": 0.36, + "Filipino,Chinese,Spanish": 0.3, + "Filipino,Chinese,English": 0.34, + "Filipino,Spanish,English": 0.38666666666666666, + "Indonesian,Chinese,Spanish": 0.38, + "Indonesian,Chinese,English": 0.41333333333333333, + "Indonesian,Spanish,English": 0.49333333333333335, + "Chinese,Spanish,English": 0.47333333333333333 + }, + "4_combine": { + "Vietnamese,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Chinese": 0.2, + "Vietnamese,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,English": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Chinese": 0.30666666666666664, + "Vietnamese,Malay,Indonesian,Spanish": 0.32666666666666666, + "Vietnamese,Malay,Indonesian,English": 0.34, + "Vietnamese,Malay,Chinese,Spanish": 0.3, + "Vietnamese,Malay,Chinese,English": 0.32, + "Vietnamese,Malay,Spanish,English": 0.3333333333333333, + "Vietnamese,Filipino,Indonesian,Chinese": 0.21333333333333335, + "Vietnamese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,English": 0.25333333333333335, + "Vietnamese,Filipino,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Filipino,Chinese,English": 0.23333333333333334, + "Vietnamese,Filipino,Spanish,English": 0.25333333333333335, + "Vietnamese,Indonesian,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,Indonesian,Chinese,English": 0.32, + "Vietnamese,Indonesian,Spanish,English": 0.36, + "Vietnamese,Chinese,Spanish,English": 0.34, + "Malay,Filipino,Indonesian,Chinese": 0.20666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Malay,Filipino,Indonesian,English": 0.26666666666666666, + "Malay,Filipino,Chinese,Spanish": 0.21333333333333335, + "Malay,Filipino,Chinese,English": 0.23333333333333334, + "Malay,Filipino,Spanish,English": 0.26, + "Malay,Indonesian,Chinese,Spanish": 0.30666666666666664, + "Malay,Indonesian,Chinese,English": 0.32, + "Malay,Indonesian,Spanish,English": 0.36, + "Malay,Chinese,Spanish,English": 0.32666666666666666, + "Filipino,Indonesian,Chinese,Spanish": 0.22666666666666666, + "Filipino,Indonesian,Chinese,English": 0.26, + "Filipino,Indonesian,Spanish,English": 0.3, + "Filipino,Chinese,Spanish,English": 0.26, + "Indonesian,Chinese,Spanish,English": 0.3466666666666667 + }, + "5_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,English": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Chinese,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Chinese,English": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Spanish,English": 0.21333333333333335, + "Vietnamese,Malay,Indonesian,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,English": 0.28, + "Vietnamese,Malay,Indonesian,Spanish,English": 0.3, + "Vietnamese,Malay,Chinese,Spanish,English": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,Filipino,Indonesian,Chinese,English": 0.18666666666666668, + "Vietnamese,Filipino,Indonesian,Spanish,English": 0.22, + "Vietnamese,Filipino,Chinese,Spanish,English": 0.18666666666666668, + "Vietnamese,Indonesian,Chinese,Spanish,English": 0.2733333333333333, + "Malay,Filipino,Indonesian,Chinese,Spanish": 0.18, + "Malay,Filipino,Indonesian,Chinese,English": 0.2, + "Malay,Filipino,Indonesian,Spanish,English": 0.22666666666666666, + "Malay,Filipino,Chinese,Spanish,English": 0.19333333333333333, + "Malay,Indonesian,Chinese,Spanish,English": 0.28, + "Filipino,Indonesian,Chinese,Spanish,English": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish": 0.16, + "Vietnamese,Malay,Filipino,Indonesian,Chinese,English": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish,English": 0.19333333333333333, + "Vietnamese,Malay,Filipino,Chinese,Spanish,English": 0.16666666666666666, + "Vietnamese,Malay,Indonesian,Chinese,Spanish,English": 0.25333333333333335, + "Vietnamese,Filipino,Indonesian,Chinese,Spanish,English": 0.16, + "Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.17333333333333334 + }, + "7_combine": { + "Vietnamese,Malay,Filipino,Indonesian,Chinese,Spanish,English": 0.15333333333333332 + } + }, + "AC3_2": 0.44775799081927725, + "AC3_3": 0.3746352917891597, + "AC3_4": 0.3199690012105056, + "AC3_5": 0.2784064670495981, + "AC3_6": 0.24563330089556026, + "AC3_7": 0.2181839904009988 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.33360389610389607, + "language_acc": { + "English": 0.38636363636363635, + "Filipino": 0.26136363636363635, + "Vietnamese": 0.2840909090909091, + "Chinese": 0.3693181818181818, + "Indonesian": 0.32386363636363635, + "Malay": 0.35795454545454547, + "Spanish": 0.3522727272727273 + }, + "consistency_score_2": 0.5551948051948052, + "consistency_score_3": 0.37646103896103883, + "consistency_score_4": 0.27532467532467536, + "consistency_score_5": 0.20887445887445888, + "consistency_score_6": 0.16152597402597402, + "consistency_score_7": 0.125, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.48295454545454547, + "English,Vietnamese": 0.5738636363636364, + "English,Chinese": 0.6363636363636364, + "English,Indonesian": 0.6761363636363636, + "English,Malay": 0.5965909090909091, + "English,Spanish": 0.6420454545454546, + "Filipino,Vietnamese": 0.4147727272727273, + "Filipino,Chinese": 0.42613636363636365, + "Filipino,Indonesian": 0.48863636363636365, + "Filipino,Malay": 0.48295454545454547, + "Filipino,Spanish": 0.45454545454545453, + "Vietnamese,Chinese": 0.5568181818181818, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Spanish": 0.5397727272727273, + "Chinese,Indonesian": 0.6022727272727273, + "Chinese,Malay": 0.5284090909090909, + "Chinese,Spanish": 0.5568181818181818, + "Indonesian,Malay": 0.6477272727272727, + "Indonesian,Spanish": 0.6420454545454546, + "Malay,Spanish": 0.5681818181818182 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese": 0.32386363636363635, + "English,Filipino,Indonesian": 0.3522727272727273, + "English,Filipino,Malay": 0.3125, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese": 0.42045454545454547, + "English,Vietnamese,Indonesian": 0.45454545454545453, + "English,Vietnamese,Malay": 0.3977272727272727, + "English,Vietnamese,Spanish": 0.4318181818181818, + "English,Chinese,Indonesian": 0.48863636363636365, + "English,Chinese,Malay": 0.42045454545454547, + "English,Chinese,Spanish": 0.4715909090909091, + "English,Indonesian,Malay": 0.4772727272727273, + "English,Indonesian,Spanish": 0.5227272727272727, + "English,Malay,Spanish": 0.45454545454545453, + "Filipino,Vietnamese,Chinese": 0.2727272727272727, + "Filipino,Vietnamese,Indonesian": 0.30113636363636365, + "Filipino,Vietnamese,Malay": 0.26136363636363635, + "Filipino,Vietnamese,Spanish": 0.2727272727272727, + "Filipino,Chinese,Indonesian": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.26704545454545453, + "Filipino,Chinese,Spanish": 0.26704545454545453, + "Filipino,Indonesian,Malay": 0.3465909090909091, + "Filipino,Indonesian,Spanish": 0.32954545454545453, + "Filipino,Malay,Spanish": 0.3068181818181818, + "Vietnamese,Chinese,Indonesian": 0.4147727272727273, + "Vietnamese,Chinese,Malay": 0.35795454545454547, + "Vietnamese,Chinese,Spanish": 0.375, + "Vietnamese,Indonesian,Malay": 0.4147727272727273, + "Vietnamese,Indonesian,Spanish": 0.4318181818181818, + "Vietnamese,Malay,Spanish": 0.36363636363636365, + "Chinese,Indonesian,Malay": 0.4147727272727273, + "Chinese,Indonesian,Spanish": 0.4431818181818182, + "Chinese,Malay,Spanish": 0.38636363636363635, + "Indonesian,Malay,Spanish": 0.4715909090909091 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.2215909090909091, + "English,Filipino,Vietnamese,Indonesian": 0.25, + "English,Filipino,Vietnamese,Malay": 0.20454545454545456, + "English,Filipino,Vietnamese,Spanish": 0.23295454545454544, + "English,Filipino,Chinese,Indonesian": 0.26704545454545453, + "English,Filipino,Chinese,Malay": 0.2215909090909091, + "English,Filipino,Chinese,Spanish": 0.23863636363636365, + "English,Filipino,Indonesian,Malay": 0.26704545454545453, + "English,Filipino,Indonesian,Spanish": 0.2840909090909091, + "English,Filipino,Malay,Spanish": 0.2556818181818182, + "English,Vietnamese,Chinese,Indonesian": 0.3465909090909091, + "English,Vietnamese,Chinese,Malay": 0.29545454545454547, + "English,Vietnamese,Chinese,Spanish": 0.32954545454545453, + "English,Vietnamese,Indonesian,Malay": 0.3352272727272727, + "English,Vietnamese,Indonesian,Spanish": 0.3806818181818182, + "English,Vietnamese,Malay,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.36363636363636365, + "English,Chinese,Indonesian,Spanish": 0.4034090909090909, + "English,Chinese,Malay,Spanish": 0.3409090909090909, + "English,Indonesian,Malay,Spanish": 0.38636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.2215909090909091, + "Filipino,Vietnamese,Chinese,Malay": 0.1875, + "Filipino,Vietnamese,Chinese,Spanish": 0.18181818181818182, + "Filipino,Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Indonesian,Spanish": 0.2159090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.19318181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.23863636363636365, + "Filipino,Chinese,Indonesian,Spanish": 0.23295454545454544, + "Filipino,Chinese,Malay,Spanish": 0.20454545454545456, + "Filipino,Indonesian,Malay,Spanish": 0.2556818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.29545454545454547, + "Vietnamese,Chinese,Indonesian,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Malay,Spanish": 0.26704545454545453, + "Vietnamese,Indonesian,Malay,Spanish": 0.32386363636363635, + "Chinese,Indonesian,Malay,Spanish": 0.3352272727272727 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.19318181818181818, + "English,Filipino,Vietnamese,Chinese,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.16477272727272727, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.1875, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.20454545454545456, + "English,Filipino,Vietnamese,Malay,Spanish": 0.17613636363636365, + "English,Filipino,Chinese,Indonesian,Malay": 0.21022727272727273, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Malay,Spanish": 0.1875, + "English,Filipino,Indonesian,Malay,Spanish": 0.2215909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.26136363636363635, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "English,Vietnamese,Chinese,Malay,Spanish": 0.24431818181818182, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.2897727272727273, + "English,Chinese,Indonesian,Malay,Spanish": 0.3125, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.1875, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.23295454545454544, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.125 + } + }, + "AC3_2": 0.41677637426372877, + "AC3_3": 0.35373911063373725, + "AC3_4": 0.3016753973336666, + "AC3_5": 0.25689995785010744, + "AC3_6": 0.21766286987300748, + "AC3_7": 0.18185840703999123 + }, + "prompt_2": { + "overall_acc": 0.30275974025974023, + "language_acc": { + "English": 0.3693181818181818, + "Filipino": 0.2556818181818182, + "Vietnamese": 0.2556818181818182, + "Chinese": 0.35795454545454547, + "Indonesian": 0.26136363636363635, + "Malay": 0.3068181818181818, + "Spanish": 0.3125 + }, + "consistency_score_2": 0.5037878787878787, + "consistency_score_3": 0.314448051948052, + "consistency_score_4": 0.21249999999999997, + "consistency_score_5": 0.14880952380952378, + "consistency_score_6": 0.10714285714285716, + "consistency_score_7": 0.07954545454545454, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.48863636363636365, + "English,Vietnamese": 0.5511363636363636, + "English,Chinese": 0.5511363636363636, + "English,Indonesian": 0.5738636363636364, + "English,Malay": 0.5397727272727273, + "English,Spanish": 0.6136363636363636, + "Filipino,Vietnamese": 0.42613636363636365, + "Filipino,Chinese": 0.39204545454545453, + "Filipino,Indonesian": 0.48863636363636365, + "Filipino,Malay": 0.48863636363636365, + "Filipino,Spanish": 0.4090909090909091, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Indonesian": 0.5568181818181818, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Spanish": 0.5056818181818182, + "Chinese,Indonesian": 0.4772727272727273, + "Chinese,Malay": 0.4772727272727273, + "Chinese,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Spanish": 0.5056818181818182, + "Malay,Spanish": 0.4772727272727273 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.3068181818181818, + "English,Filipino,Chinese": 0.2840909090909091, + "English,Filipino,Indonesian": 0.32954545454545453, + "English,Filipino,Malay": 0.3125, + "English,Filipino,Spanish": 0.3181818181818182, + "English,Vietnamese,Chinese": 0.3522727272727273, + "English,Vietnamese,Indonesian": 0.38636363636363635, + "English,Vietnamese,Malay": 0.3693181818181818, + "English,Vietnamese,Spanish": 0.38636363636363635, + "English,Chinese,Indonesian": 0.3465909090909091, + "English,Chinese,Malay": 0.3352272727272727, + "English,Chinese,Spanish": 0.32386363636363635, + "English,Indonesian,Malay": 0.38636363636363635, + "English,Indonesian,Spanish": 0.38636363636363635, + "English,Malay,Spanish": 0.36363636363636365, + "Filipino,Vietnamese,Chinese": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian": 0.30113636363636365, + "Filipino,Vietnamese,Malay": 0.30113636363636365, + "Filipino,Vietnamese,Spanish": 0.26136363636363635, + "Filipino,Chinese,Indonesian": 0.24431818181818182, + "Filipino,Chinese,Malay": 0.2215909090909091, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.3181818181818182, + "Filipino,Indonesian,Spanish": 0.2897727272727273, + "Filipino,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian": 0.32954545454545453, + "Vietnamese,Chinese,Malay": 0.3125, + "Vietnamese,Chinese,Spanish": 0.29545454545454547, + "Vietnamese,Indonesian,Malay": 0.3806818181818182, + "Vietnamese,Indonesian,Spanish": 0.3465909090909091, + "Vietnamese,Malay,Spanish": 0.3409090909090909, + "Chinese,Indonesian,Malay": 0.3068181818181818, + "Chinese,Indonesian,Spanish": 0.2727272727272727, + "Chinese,Malay,Spanish": 0.26704545454545453, + "Indonesian,Malay,Spanish": 0.3352272727272727 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.1875, + "English,Filipino,Vietnamese,Indonesian": 0.23295454545454544, + "English,Filipino,Vietnamese,Malay": 0.23295454545454544, + "English,Filipino,Vietnamese,Spanish": 0.2215909090909091, + "English,Filipino,Chinese,Indonesian": 0.19318181818181818, + "English,Filipino,Chinese,Malay": 0.1875, + "English,Filipino,Chinese,Spanish": 0.16477272727272727, + "English,Filipino,Indonesian,Malay": 0.24431818181818182, + "English,Filipino,Indonesian,Spanish": 0.23295454545454544, + "English,Filipino,Malay,Spanish": 0.2159090909090909, + "English,Vietnamese,Chinese,Indonesian": 0.24431818181818182, + "English,Vietnamese,Chinese,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Spanish": 0.23295454545454544, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Malay,Spanish": 0.26136363636363635, + "English,Chinese,Indonesian,Malay": 0.24431818181818182, + "English,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Chinese,Malay,Spanish": 0.20454545454545456, + "English,Indonesian,Malay,Spanish": 0.26136363636363635, + "Filipino,Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Spanish": 0.14204545454545456, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "Filipino,Vietnamese,Malay,Spanish": 0.19318181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.18181818181818182, + "Filipino,Chinese,Indonesian,Spanish": 0.14772727272727273, + "Filipino,Chinese,Malay,Spanish": 0.13068181818181818, + "Filipino,Indonesian,Malay,Spanish": 0.20454545454545456, + "Vietnamese,Chinese,Indonesian,Malay": 0.2159090909090909, + "Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "Vietnamese,Chinese,Malay,Spanish": 0.20454545454545456, + "Vietnamese,Indonesian,Malay,Spanish": 0.26704545454545453, + "Chinese,Indonesian,Malay,Spanish": 0.19886363636363635 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.13636363636363635, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14204545454545456, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.17045454545454544, + "English,Filipino,Vietnamese,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Chinese,Indonesian,Malay": 0.1590909090909091, + "English,Filipino,Chinese,Indonesian,Spanish": 0.11931818181818182, + "English,Filipino,Chinese,Malay,Spanish": 0.10795454545454546, + "English,Filipino,Indonesian,Malay,Spanish": 0.17613636363636365, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.17045454545454544, + "English,Vietnamese,Chinese,Malay,Spanish": 0.1534090909090909, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.20454545454545456, + "English,Chinese,Indonesian,Malay,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.11931818181818182, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.09090909090909091, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.08522727272727272, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.14204545454545456, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.10227272727272728, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.09090909090909091 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.07954545454545454 + } + }, + "AC3_2": 0.37822115819774643, + "AC3_3": 0.30849322292903786, + "AC3_4": 0.2497243225728323, + "AC3_5": 0.1995420696296965, + "AC3_6": 0.1582743988298456, + "AC3_7": 0.12598919124286034 + }, + "prompt_3": { + "overall_acc": 0.31493506493506496, + "language_acc": { + "English": 0.3465909090909091, + "Filipino": 0.26136363636363635, + "Vietnamese": 0.26136363636363635, + "Chinese": 0.36363636363636365, + "Indonesian": 0.2840909090909091, + "Malay": 0.32386363636363635, + "Spanish": 0.36363636363636365 + }, + "consistency_score_2": 0.5219155844155845, + "consistency_score_3": 0.3392857142857143, + "consistency_score_4": 0.2412337662337663, + "consistency_score_5": 0.1801948051948052, + "consistency_score_6": 0.14042207792207792, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.45454545454545453, + "English,Vietnamese": 0.5511363636363636, + "English,Chinese": 0.5738636363636364, + "English,Indonesian": 0.6477272727272727, + "English,Malay": 0.5397727272727273, + "English,Spanish": 0.6590909090909091, + "Filipino,Vietnamese": 0.4318181818181818, + "Filipino,Chinese": 0.375, + "Filipino,Indonesian": 0.45454545454545453, + "Filipino,Malay": 0.4772727272727273, + "Filipino,Spanish": 0.4715909090909091, + "Vietnamese,Chinese": 0.4715909090909091, + "Vietnamese,Indonesian": 0.5170454545454546, + "Vietnamese,Malay": 0.5170454545454546, + "Vietnamese,Spanish": 0.5227272727272727, + "Chinese,Indonesian": 0.5511363636363636, + "Chinese,Malay": 0.45454545454545453, + "Chinese,Spanish": 0.5284090909090909, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Spanish": 0.6022727272727273, + "Malay,Spanish": 0.5454545454545454 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese": 0.2784090909090909, + "English,Filipino,Indonesian": 0.3181818181818182, + "English,Filipino,Malay": 0.29545454545454547, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese": 0.3522727272727273, + "English,Vietnamese,Indonesian": 0.38636363636363635, + "English,Vietnamese,Malay": 0.35795454545454547, + "English,Vietnamese,Spanish": 0.4147727272727273, + "English,Chinese,Indonesian": 0.4375, + "English,Chinese,Malay": 0.3465909090909091, + "English,Chinese,Spanish": 0.4090909090909091, + "English,Indonesian,Malay": 0.4375, + "English,Indonesian,Spanish": 0.4943181818181818, + "English,Malay,Spanish": 0.4318181818181818, + "Filipino,Vietnamese,Chinese": 0.2215909090909091, + "Filipino,Vietnamese,Indonesian": 0.2840909090909091, + "Filipino,Vietnamese,Malay": 0.29545454545454547, + "Filipino,Vietnamese,Spanish": 0.29545454545454547, + "Filipino,Chinese,Indonesian": 0.2556818181818182, + "Filipino,Chinese,Malay": 0.2215909090909091, + "Filipino,Chinese,Spanish": 0.2556818181818182, + "Filipino,Indonesian,Malay": 0.3181818181818182, + "Filipino,Indonesian,Spanish": 0.3181818181818182, + "Filipino,Malay,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Indonesian": 0.3181818181818182, + "Vietnamese,Chinese,Malay": 0.2840909090909091, + "Vietnamese,Chinese,Spanish": 0.3181818181818182, + "Vietnamese,Indonesian,Malay": 0.375, + "Vietnamese,Indonesian,Spanish": 0.3693181818181818, + "Vietnamese,Malay,Spanish": 0.35795454545454547, + "Chinese,Indonesian,Malay": 0.36363636363636365, + "Chinese,Indonesian,Spanish": 0.375, + "Chinese,Malay,Spanish": 0.32954545454545453, + "Indonesian,Malay,Spanish": 0.42613636363636365 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian": 0.23295454545454544, + "English,Filipino,Vietnamese,Malay": 0.22727272727272727, + "English,Filipino,Vietnamese,Spanish": 0.25, + "English,Filipino,Chinese,Indonesian": 0.21022727272727273, + "English,Filipino,Chinese,Malay": 0.19318181818181818, + "English,Filipino,Chinese,Spanish": 0.20454545454545456, + "English,Filipino,Indonesian,Malay": 0.24431818181818182, + "English,Filipino,Indonesian,Spanish": 0.26136363636363635, + "English,Filipino,Malay,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian": 0.2727272727272727, + "English,Vietnamese,Chinese,Malay": 0.23863636363636365, + "English,Vietnamese,Chinese,Spanish": 0.26704545454545453, + "English,Vietnamese,Indonesian,Malay": 0.2840909090909091, + "English,Vietnamese,Indonesian,Spanish": 0.3125, + "English,Vietnamese,Malay,Spanish": 0.29545454545454547, + "English,Chinese,Indonesian,Malay": 0.3068181818181818, + "English,Chinese,Indonesian,Spanish": 0.32954545454545453, + "English,Chinese,Malay,Spanish": 0.2727272727272727, + "English,Indonesian,Malay,Spanish": 0.3522727272727273, + "Filipino,Vietnamese,Chinese,Indonesian": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Malay": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Spanish": 0.17613636363636365, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.2215909090909091, + "Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, + "Filipino,Chinese,Indonesian,Malay": 0.1875, + "Filipino,Chinese,Indonesian,Spanish": 0.1875, + "Filipino,Chinese,Malay,Spanish": 0.18181818181818182, + "Filipino,Indonesian,Malay,Spanish": 0.24431818181818182, + "Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23863636363636365, + "Vietnamese,Chinese,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,Spanish": 0.2897727272727273, + "Chinese,Indonesian,Malay,Spanish": 0.2784090909090909 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.1534090909090909, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.1534090909090909, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.19886363636363635, + "English,Filipino,Vietnamese,Malay,Spanish": 0.19886363636363635, + "English,Filipino,Chinese,Indonesian,Malay": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian,Spanish": 0.16477272727272727, + "English,Filipino,Chinese,Malay,Spanish": 0.1590909090909091, + "English,Filipino,Indonesian,Malay,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.20454545454545456, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Malay,Spanish": 0.19318181818181818, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.23863636363636365, + "English,Chinese,Indonesian,Malay,Spanish": 0.24431818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13636363636363635, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.13068181818181818, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1875, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.125, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17045454545454544, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.14772727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.16477272727272727, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363 + } + }, + "AC3_2": 0.39282880063830944, + "AC3_3": 0.32665721370405726, + "AC3_4": 0.2732011129068071, + "AC3_5": 0.22923142426709428, + "AC3_6": 0.19423802574617172, + "AC3_7": 0.16701101924477732 + }, + "prompt_4": { + "overall_acc": 0.32954545454545453, + "language_acc": { + "English": 0.39204545454545453, + "Filipino": 0.26136363636363635, + "Vietnamese": 0.2784090909090909, + "Chinese": 0.3352272727272727, + "Indonesian": 0.32386363636363635, + "Malay": 0.3522727272727273, + "Spanish": 0.36363636363636365 + }, + "consistency_score_2": 0.5465367965367965, + "consistency_score_3": 0.3652597402597403, + "consistency_score_4": 0.26298701298701294, + "consistency_score_5": 0.19534632034632035, + "consistency_score_6": 0.14772727272727273, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.4659090909090909, + "English,Vietnamese": 0.5795454545454546, + "English,Chinese": 0.6193181818181818, + "English,Indonesian": 0.6534090909090909, + "English,Malay": 0.5625, + "English,Spanish": 0.6477272727272727, + "Filipino,Vietnamese": 0.4318181818181818, + "Filipino,Chinese": 0.42045454545454547, + "Filipino,Indonesian": 0.48295454545454547, + "Filipino,Malay": 0.4772727272727273, + "Filipino,Spanish": 0.4943181818181818, + "Vietnamese,Chinese": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5909090909090909, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,Spanish": 0.5795454545454546, + "Chinese,Indonesian": 0.6079545454545454, + "Chinese,Malay": 0.4659090909090909, + "Chinese,Spanish": 0.5397727272727273, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Spanish": 0.6136363636363636, + "Malay,Spanish": 0.5795454545454546 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.29545454545454547, + "English,Filipino,Chinese": 0.3125, + "English,Filipino,Indonesian": 0.3409090909090909, + "English,Filipino,Malay": 0.30113636363636365, + "English,Filipino,Spanish": 0.3352272727272727, + "English,Vietnamese,Chinese": 0.39204545454545453, + "English,Vietnamese,Indonesian": 0.44886363636363635, + "English,Vietnamese,Malay": 0.3806818181818182, + "English,Vietnamese,Spanish": 0.44886363636363635, + "English,Chinese,Indonesian": 0.4772727272727273, + "English,Chinese,Malay": 0.3806818181818182, + "English,Chinese,Spanish": 0.4375, + "English,Indonesian,Malay": 0.4431818181818182, + "English,Indonesian,Spanish": 0.4943181818181818, + "English,Malay,Spanish": 0.4375, + "Filipino,Vietnamese,Chinese": 0.25, + "Filipino,Vietnamese,Indonesian": 0.3068181818181818, + "Filipino,Vietnamese,Malay": 0.29545454545454547, + "Filipino,Vietnamese,Spanish": 0.30113636363636365, + "Filipino,Chinese,Indonesian": 0.3181818181818182, + "Filipino,Chinese,Malay": 0.22727272727272727, + "Filipino,Chinese,Spanish": 0.2784090909090909, + "Filipino,Indonesian,Malay": 0.32954545454545453, + "Filipino,Indonesian,Spanish": 0.3409090909090909, + "Filipino,Malay,Spanish": 0.32386363636363635, + "Vietnamese,Chinese,Indonesian": 0.39204545454545453, + "Vietnamese,Chinese,Malay": 0.30113636363636365, + "Vietnamese,Chinese,Spanish": 0.35795454545454547, + "Vietnamese,Indonesian,Malay": 0.4147727272727273, + "Vietnamese,Indonesian,Spanish": 0.4318181818181818, + "Vietnamese,Malay,Spanish": 0.38636363636363635, + "Chinese,Indonesian,Malay": 0.38636363636363635, + "Chinese,Indonesian,Spanish": 0.42613636363636365, + "Chinese,Malay,Spanish": 0.3522727272727273, + "Indonesian,Malay,Spanish": 0.4375 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.21022727272727273, + "English,Filipino,Vietnamese,Indonesian": 0.23863636363636365, + "English,Filipino,Vietnamese,Malay": 0.2159090909090909, + "English,Filipino,Vietnamese,Spanish": 0.22727272727272727, + "English,Filipino,Chinese,Indonesian": 0.26136363636363635, + "English,Filipino,Chinese,Malay": 0.19886363636363635, + "English,Filipino,Chinese,Spanish": 0.23295454545454544, + "English,Filipino,Indonesian,Malay": 0.25, + "English,Filipino,Indonesian,Spanish": 0.2727272727272727, + "English,Filipino,Malay,Spanish": 0.25, + "English,Vietnamese,Chinese,Indonesian": 0.3352272727272727, + "English,Vietnamese,Chinese,Malay": 0.26136363636363635, + "English,Vietnamese,Chinese,Spanish": 0.3068181818181818, + "English,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.375, + "English,Vietnamese,Malay,Spanish": 0.3181818181818182, + "English,Chinese,Indonesian,Malay": 0.32954545454545453, + "English,Chinese,Indonesian,Spanish": 0.3806818181818182, + "English,Chinese,Malay,Spanish": 0.30113636363636365, + "English,Indonesian,Malay,Spanish": 0.35795454545454547, + "Filipino,Vietnamese,Chinese,Indonesian": 0.20454545454545456, + "Filipino,Vietnamese,Chinese,Malay": 0.17045454545454544, + "Filipino,Vietnamese,Chinese,Spanish": 0.1875, + "Filipino,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Filipino,Vietnamese,Indonesian,Spanish": 0.22727272727272727, + "Filipino,Vietnamese,Malay,Spanish": 0.2215909090909091, + "Filipino,Chinese,Indonesian,Malay": 0.19886363636363635, + "Filipino,Chinese,Indonesian,Spanish": 0.24431818181818182, + "Filipino,Chinese,Malay,Spanish": 0.1875, + "Filipino,Indonesian,Malay,Spanish": 0.25, + "Vietnamese,Chinese,Indonesian,Malay": 0.26136363636363635, + "Vietnamese,Chinese,Indonesian,Spanish": 0.30113636363636365, + "Vietnamese,Chinese,Malay,Spanish": 0.25, + "Vietnamese,Indonesian,Malay,Spanish": 0.3181818181818182, + "Chinese,Indonesian,Malay,Spanish": 0.3068181818181818 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.18181818181818182, + "English,Filipino,Vietnamese,Chinese,Malay": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.1590909090909091, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.19318181818181818, + "English,Filipino,Vietnamese,Malay,Spanish": 0.17613636363636365, + "English,Filipino,Chinese,Indonesian,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Chinese,Malay,Spanish": 0.16477272727272727, + "English,Filipino,Indonesian,Malay,Spanish": 0.21022727272727273, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.23295454545454544, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Chinese,Malay,Spanish": 0.2215909090909091, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.2727272727272727, + "English,Chinese,Indonesian,Malay,Spanish": 0.2784090909090909, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.14204545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.1590909090909091, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.13636363636363635, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.17613636363636365, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.2215909090909091 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.11931818181818182, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.1534090909090909, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.1590909090909091, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.20454545454545456, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363 + } + }, + "AC3_2": 0.41116851025684636, + "AC3_3": 0.3464847068320353, + "AC3_4": 0.29252801987591104, + "AC3_5": 0.24529053416132834, + "AC3_6": 0.20400432896158524, + "AC3_7": 0.1689976689595362 + }, + "prompt_5": { + "overall_acc": 0.3141233766233766, + "language_acc": { + "English": 0.3693181818181818, + "Filipino": 0.24431818181818182, + "Vietnamese": 0.25, + "Chinese": 0.35795454545454547, + "Indonesian": 0.2727272727272727, + "Malay": 0.36363636363636365, + "Spanish": 0.3409090909090909 + }, + "consistency_score_2": 0.4897186147186148, + "consistency_score_3": 0.2982142857142857, + "consistency_score_4": 0.1961038961038961, + "consistency_score_5": 0.13284632034632035, + "consistency_score_6": 0.09090909090909091, + "consistency_score_7": 0.0625, + "detailed_consistency_score": { + "2_combine": { + "English,Filipino": 0.4659090909090909, + "English,Vietnamese": 0.5227272727272727, + "English,Chinese": 0.6079545454545454, + "English,Indonesian": 0.5965909090909091, + "English,Malay": 0.5681818181818182, + "English,Spanish": 0.5738636363636364, + "Filipino,Vietnamese": 0.4034090909090909, + "Filipino,Chinese": 0.3977272727272727, + "Filipino,Indonesian": 0.4659090909090909, + "Filipino,Malay": 0.4375, + "Filipino,Spanish": 0.4034090909090909, + "Vietnamese,Chinese": 0.42045454545454547, + "Vietnamese,Indonesian": 0.5056818181818182, + "Vietnamese,Malay": 0.4772727272727273, + "Vietnamese,Spanish": 0.4659090909090909, + "Chinese,Indonesian": 0.5113636363636364, + "Chinese,Malay": 0.4602272727272727, + "Chinese,Spanish": 0.44886363636363635, + "Indonesian,Malay": 0.5681818181818182, + "Indonesian,Spanish": 0.5, + "Malay,Spanish": 0.48295454545454547 + }, + "3_combine": { + "English,Filipino,Vietnamese": 0.2727272727272727, + "English,Filipino,Chinese": 0.29545454545454547, + "English,Filipino,Indonesian": 0.3181818181818182, + "English,Filipino,Malay": 0.2784090909090909, + "English,Filipino,Spanish": 0.2840909090909091, + "English,Vietnamese,Chinese": 0.3181818181818182, + "English,Vietnamese,Indonesian": 0.375, + "English,Vietnamese,Malay": 0.3409090909090909, + "English,Vietnamese,Spanish": 0.3522727272727273, + "English,Chinese,Indonesian": 0.4090909090909091, + "English,Chinese,Malay": 0.375, + "English,Chinese,Spanish": 0.36363636363636365, + "English,Indonesian,Malay": 0.39204545454545453, + "English,Indonesian,Spanish": 0.39204545454545453, + "English,Malay,Spanish": 0.36363636363636365, + "Filipino,Vietnamese,Chinese": 0.20454545454545456, + "Filipino,Vietnamese,Indonesian": 0.2556818181818182, + "Filipino,Vietnamese,Malay": 0.2159090909090909, + "Filipino,Vietnamese,Spanish": 0.2159090909090909, + "Filipino,Chinese,Indonesian": 0.26704545454545453, + "Filipino,Chinese,Malay": 0.21022727272727273, + "Filipino,Chinese,Spanish": 0.21022727272727273, + "Filipino,Indonesian,Malay": 0.2784090909090909, + "Filipino,Indonesian,Spanish": 0.2556818181818182, + "Filipino,Malay,Spanish": 0.22727272727272727, + "Vietnamese,Chinese,Indonesian": 0.2784090909090909, + "Vietnamese,Chinese,Malay": 0.22727272727272727, + "Vietnamese,Chinese,Spanish": 0.25, + "Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Vietnamese,Indonesian,Spanish": 0.3181818181818182, + "Vietnamese,Malay,Spanish": 0.29545454545454547, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Spanish": 0.3181818181818182, + "Chinese,Malay,Spanish": 0.29545454545454547, + "Indonesian,Malay,Spanish": 0.3409090909090909 + }, + "4_combine": { + "English,Filipino,Vietnamese,Chinese": 0.17613636363636365, + "English,Filipino,Vietnamese,Indonesian": 0.21022727272727273, + "English,Filipino,Vietnamese,Malay": 0.1590909090909091, + "English,Filipino,Vietnamese,Spanish": 0.18181818181818182, + "English,Filipino,Chinese,Indonesian": 0.23295454545454544, + "English,Filipino,Chinese,Malay": 0.18181818181818182, + "English,Filipino,Chinese,Spanish": 0.17613636363636365, + "English,Filipino,Indonesian,Malay": 0.21022727272727273, + "English,Filipino,Indonesian,Spanish": 0.2159090909090909, + "English,Filipino,Malay,Spanish": 0.1875, + "English,Vietnamese,Chinese,Indonesian": 0.25, + "English,Vietnamese,Chinese,Malay": 0.19318181818181818, + "English,Vietnamese,Chinese,Spanish": 0.2159090909090909, + "English,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "English,Vietnamese,Indonesian,Spanish": 0.2784090909090909, + "English,Vietnamese,Malay,Spanish": 0.24431818181818182, + "English,Chinese,Indonesian,Malay": 0.26704545454545453, + "English,Chinese,Indonesian,Spanish": 0.2727272727272727, + "English,Chinese,Malay,Spanish": 0.24431818181818182, + "English,Indonesian,Malay,Spanish": 0.2727272727272727, + "Filipino,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "Filipino,Vietnamese,Chinese,Malay": 0.10795454545454546, + "Filipino,Vietnamese,Chinese,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Indonesian,Malay": 0.1534090909090909, + "Filipino,Vietnamese,Indonesian,Spanish": 0.1534090909090909, + "Filipino,Vietnamese,Malay,Spanish": 0.13068181818181818, + "Filipino,Chinese,Indonesian,Malay": 0.1590909090909091, + "Filipino,Chinese,Indonesian,Spanish": 0.17613636363636365, + "Filipino,Chinese,Malay,Spanish": 0.14772727272727273, + "Filipino,Indonesian,Malay,Spanish": 0.1534090909090909, + "Vietnamese,Chinese,Indonesian,Malay": 0.17045454545454544, + "Vietnamese,Chinese,Indonesian,Spanish": 0.19886363636363635, + "Vietnamese,Chinese,Malay,Spanish": 0.17045454545454544, + "Vietnamese,Indonesian,Malay,Spanish": 0.2215909090909091, + "Chinese,Indonesian,Malay,Spanish": 0.22727272727272727 + }, + "5_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian": 0.14772727272727273, + "English,Filipino,Vietnamese,Chinese,Malay": 0.09659090909090909, + "English,Filipino,Vietnamese,Chinese,Spanish": 0.10795454545454546, + "English,Filipino,Vietnamese,Indonesian,Malay": 0.13068181818181818, + "English,Filipino,Vietnamese,Indonesian,Spanish": 0.14772727272727273, + "English,Filipino,Vietnamese,Malay,Spanish": 0.11363636363636363, + "English,Filipino,Chinese,Indonesian,Malay": 0.14772727272727273, + "English,Filipino,Chinese,Indonesian,Spanish": 0.1590909090909091, + "English,Filipino,Chinese,Malay,Spanish": 0.125, + "English,Filipino,Indonesian,Malay,Spanish": 0.14204545454545456, + "English,Vietnamese,Chinese,Indonesian,Malay": 0.1534090909090909, + "English,Vietnamese,Chinese,Indonesian,Spanish": 0.18181818181818182, + "English,Vietnamese,Chinese,Malay,Spanish": 0.14204545454545456, + "English,Vietnamese,Indonesian,Malay,Spanish": 0.19318181818181818, + "English,Chinese,Indonesian,Malay,Spanish": 0.19318181818181818, + "Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "Filipino,Vietnamese,Chinese,Malay,Spanish": 0.07954545454545454, + "Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.09090909090909091, + "Filipino,Chinese,Indonesian,Malay,Spanish": 0.11363636363636363, + "Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.13636363636363635 + }, + "6_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay": 0.08522727272727272, + "English,Filipino,Vietnamese,Chinese,Indonesian,Spanish": 0.10227272727272728, + "English,Filipino,Vietnamese,Chinese,Malay,Spanish": 0.06818181818181818, + "English,Filipino,Vietnamese,Indonesian,Malay,Spanish": 0.09090909090909091, + "English,Filipino,Chinese,Indonesian,Malay,Spanish": 0.10795454545454546, + "English,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.11931818181818182, + "Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.0625 + }, + "7_combine": { + "English,Filipino,Vietnamese,Chinese,Indonesian,Malay,Spanish": 0.0625 + } + }, + "AC3_2": 0.38274204753792046, + "AC3_3": 0.3059621647735453, + "AC3_4": 0.24146423091738334, + "AC3_5": 0.18672467056606684, + "AC3_6": 0.1410092912750782, + "AC3_7": 0.10425646548955948 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.3786407766990291 + }, + "prompt_2": { + "accuracy": 0.4077669902912621 + }, + "prompt_3": { + "accuracy": 0.36893203883495146 + }, + "prompt_4": { + "accuracy": 0.3592233009708738 + }, + "prompt_5": { + "accuracy": 0.39805825242718446 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.3047619047619048 + }, + "prompt_2": { + "accuracy": 0.3047619047619048 + }, + "prompt_3": { + "accuracy": 0.3142857142857143 + }, + "prompt_4": { + "accuracy": 0.26666666666666666 + }, + "prompt_5": { + "accuracy": 0.2857142857142857 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.37383177570093457 + }, + "prompt_2": { + "accuracy": 0.35514018691588783 + }, + "prompt_3": { + "accuracy": 0.34579439252336447 + }, + "prompt_4": { + "accuracy": 0.37383177570093457 + }, + "prompt_5": { + "accuracy": 0.3644859813084112 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.5, + "history": 0.06666666666666667, + "literature": 0.2, + "politics": 0.3, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_2": { + "accuracy": 0.29, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.4, + "history": 0.06666666666666667, + "literature": 0.2, + "politics": 0.4, + "culture": 0.2, + "film": 0.3, + "law": 0.3, + "geography": 0.7 + } + }, + "prompt_3": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.6, + "history": 0.06666666666666667, + "literature": 0.2, + "politics": 0.3, + "culture": 0.2, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_4": { + "accuracy": 0.31, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.5, + "history": 0.06666666666666667, + "literature": 0.3, + "politics": 0.3, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + }, + "prompt_5": { + "accuracy": 0.3, + "category_acc": { + "brand": 0.3, + "demographics": 0.2, + "biology": 0.5, + "history": 0.06666666666666667, + "literature": 0.3, + "politics": 0.2, + "culture": 0.3, + "film": 0.3, + "law": 0.3, + "geography": 0.6 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.08018261727594969 + }, + "prompt_2": { + "bleu_score": 0.12382019242173196 + }, + "prompt_3": { + "bleu_score": 0.1199767749511639 + }, + "prompt_4": { + "bleu_score": 0.03195403005978134 + }, + "prompt_5": { + "bleu_score": 0.061824747337313385 + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.1343785060078945 + }, + "prompt_2": { + "bleu_score": 0.1496093405264642 + }, + "prompt_3": { + "bleu_score": 0.16003855519520016 + }, + "prompt_4": { + "bleu_score": 0.10462834210636253 + }, + "prompt_5": { + "bleu_score": 0.06385242696530409 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.1087606777198467 + }, + "prompt_2": { + "bleu_score": 0.11902537481654184 + }, + "prompt_3": { + "bleu_score": 0.11904787991589674 + }, + "prompt_4": { + "bleu_score": 0.08972204187399575 + }, + "prompt_5": { + "bleu_score": 0.055067576910017214 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.09334331817883475 + }, + "prompt_2": { + "bleu_score": 0.10315026424681181 + }, + "prompt_3": { + "bleu_score": 0.10991932612344779 + }, + "prompt_4": { + "bleu_score": 0.09121174219069825 + }, + "prompt_5": { + "bleu_score": 0.056671234844644376 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.11139158189885928 + }, + "prompt_2": { + "bleu_score": 0.12086218447554221 + }, + "prompt_3": { + "bleu_score": 0.12973348185998357 + }, + "prompt_4": { + "bleu_score": 0.08166315239199984 + }, + "prompt_5": { + "bleu_score": 0.037636119241057975 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.4130688448074679 + }, + "prompt_2": { + "accuracy": 0.42473745624270715 + }, + "prompt_3": { + "accuracy": 0.4282380396732789 + }, + "prompt_4": { + "accuracy": 0.4049008168028005 + }, + "prompt_5": { + "accuracy": 0.411901983663944 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.38248122988916694, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.42424242424242425, + "clinical_knowledge": 0.39015151515151514, + "medical_genetics": 0.42424242424242425, + "high_school_us_history": 0.3891625615763547, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5042372881355932, + "virology": 0.37575757575757573, + "high_school_microeconomics": 0.3037974683544304, + "econometrics": 0.2743362831858407, + "college_computer_science": 0.31313131313131315, + "high_school_biology": 0.43042071197411, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.31316725978647686, + "philosophy": 0.3774193548387097, + "professional_medicine": 0.3062730627306273, + "nutrition": 0.4426229508196721, + "global_facts": 0.31313131313131315, + "machine_learning": 0.3783783783783784, + "security_studies": 0.36885245901639346, + "public_relations": 0.42201834862385323, + "professional_psychology": 0.397708674304419, + "prehistory": 0.3993808049535604, + "anatomy": 0.44776119402985076, + "human_sexuality": 0.43846153846153846, + "college_medicine": 0.37790697674418605, + "high_school_government_and_politics": 0.4375, + "college_chemistry": 0.20202020202020202, + "logical_fallacies": 0.41358024691358025, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.27586206896551724, + "human_aging": 0.481981981981982, + "college_mathematics": 0.30303030303030304, + "high_school_psychology": 0.5018382352941176, + "formal_logic": 0.384, + "high_school_statistics": 0.2186046511627907, + "international_law": 0.49166666666666664, + "high_school_mathematics": 0.27137546468401486, + "high_school_computer_science": 0.36363636363636365, + "conceptual_physics": 0.36324786324786323, + "miscellaneous": 0.5089514066496164, + "high_school_chemistry": 0.27722772277227725, + "marketing": 0.6137339055793991, + "professional_law": 0.31441617742987604, + "management": 0.4411764705882353, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.42990654205607476, + "world_religions": 0.5529411764705883, + "sociology": 0.485, + "us_foreign_policy": 0.5252525252525253, + "high_school_macroeconomics": 0.3676092544987147, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.4579710144927536, + "electrical_engineering": 0.4444444444444444, + "astronomy": 0.31125827814569534, + "college_biology": 0.42657342657342656 + } + }, + "prompt_2": { + "accuracy": 0.39656775116195925, + "category_acc": { + "high_school_european_history": 0.5060975609756098, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.4090909090909091, + "medical_genetics": 0.47474747474747475, + "high_school_us_history": 0.3694581280788177, + "high_school_physics": 0.2866666666666667, + "high_school_world_history": 0.5550847457627118, + "virology": 0.40606060606060607, + "high_school_microeconomics": 0.33755274261603374, + "econometrics": 0.23893805309734514, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.44983818770226536, + "abstract_algebra": 0.1919191919191919, + "professional_accounting": 0.29537366548042704, + "philosophy": 0.38387096774193546, + "professional_medicine": 0.2952029520295203, + "nutrition": 0.4721311475409836, + "global_facts": 0.2727272727272727, + "machine_learning": 0.38738738738738737, + "security_studies": 0.3770491803278688, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.4026186579378069, + "prehistory": 0.4179566563467492, + "anatomy": 0.43283582089552236, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.4895833333333333, + "college_chemistry": 0.26262626262626265, + "logical_fallacies": 0.4074074074074074, + "high_school_geography": 0.49238578680203043, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.46846846846846846, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.5680147058823529, + "formal_logic": 0.376, + "high_school_statistics": 0.25116279069767444, + "international_law": 0.525, + "high_school_mathematics": 0.241635687732342, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.5332480818414322, + "high_school_chemistry": 0.30198019801980197, + "marketing": 0.5879828326180258, + "professional_law": 0.3268101761252446, + "management": 0.46078431372549017, + "college_physics": 0.1782178217821782, + "jurisprudence": 0.4485981308411215, + "world_religions": 0.5352941176470588, + "sociology": 0.53, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.4138817480719794, + "computer_security": 0.5454545454545454, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.4753623188405797, + "electrical_engineering": 0.4652777777777778, + "astronomy": 0.33774834437086093, + "college_biology": 0.4195804195804196 + } + }, + "prompt_3": { + "accuracy": 0.39756882373972113, + "category_acc": { + "high_school_european_history": 0.5, + "business_ethics": 0.46464646464646464, + "clinical_knowledge": 0.4128787878787879, + "medical_genetics": 0.45454545454545453, + "high_school_us_history": 0.4039408866995074, + "high_school_physics": 0.28, + "high_school_world_history": 0.5550847457627118, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.3291139240506329, + "econometrics": 0.25663716814159293, + "college_computer_science": 0.2828282828282828, + "high_school_biology": 0.44660194174757284, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.30604982206405695, + "philosophy": 0.3709677419354839, + "professional_medicine": 0.2988929889298893, + "nutrition": 0.4721311475409836, + "global_facts": 0.2828282828282828, + "machine_learning": 0.36936936936936937, + "security_studies": 0.38934426229508196, + "public_relations": 0.44036697247706424, + "professional_psychology": 0.3993453355155483, + "prehistory": 0.4458204334365325, + "anatomy": 0.41044776119402987, + "human_sexuality": 0.45384615384615384, + "college_medicine": 0.4011627906976744, + "high_school_government_and_politics": 0.4895833333333333, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.38271604938271603, + "high_school_geography": 0.48223350253807107, + "elementary_mathematics": 0.27055702917771884, + "human_aging": 0.481981981981982, + "college_mathematics": 0.36363636363636365, + "high_school_psychology": 0.5606617647058824, + "formal_logic": 0.376, + "high_school_statistics": 0.24186046511627907, + "international_law": 0.5083333333333333, + "high_school_mathematics": 0.2527881040892193, + "high_school_computer_science": 0.3939393939393939, + "conceptual_physics": 0.3717948717948718, + "miscellaneous": 0.5383631713554987, + "high_school_chemistry": 0.3069306930693069, + "marketing": 0.6223175965665236, + "professional_law": 0.32746249184605347, + "management": 0.45098039215686275, + "college_physics": 0.19801980198019803, + "jurisprudence": 0.45794392523364486, + "world_religions": 0.5352941176470588, + "sociology": 0.535, + "us_foreign_policy": 0.5757575757575758, + "high_school_macroeconomics": 0.41131105398457585, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.24384787472035793, + "moral_disputes": 0.4492753623188406, + "electrical_engineering": 0.4583333333333333, + "astronomy": 0.3576158940397351, + "college_biology": 0.44755244755244755 + } + }, + "prompt_4": { + "accuracy": 0.3877011083303539, + "category_acc": { + "high_school_european_history": 0.49390243902439024, + "business_ethics": 0.41414141414141414, + "clinical_knowledge": 0.3977272727272727, + "medical_genetics": 0.42424242424242425, + "high_school_us_history": 0.4088669950738916, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.5338983050847458, + "virology": 0.3939393939393939, + "high_school_microeconomics": 0.3459915611814346, + "econometrics": 0.23008849557522124, + "college_computer_science": 0.29292929292929293, + "high_school_biology": 0.42071197411003236, + "abstract_algebra": 0.2222222222222222, + "professional_accounting": 0.3096085409252669, + "philosophy": 0.3709677419354839, + "professional_medicine": 0.3025830258302583, + "nutrition": 0.46557377049180326, + "global_facts": 0.3333333333333333, + "machine_learning": 0.3783783783783784, + "security_studies": 0.38114754098360654, + "public_relations": 0.41284403669724773, + "professional_psychology": 0.39279869067103107, + "prehistory": 0.42105263157894735, + "anatomy": 0.44029850746268656, + "human_sexuality": 0.4307692307692308, + "college_medicine": 0.3953488372093023, + "high_school_government_and_politics": 0.4322916666666667, + "college_chemistry": 0.18181818181818182, + "logical_fallacies": 0.3888888888888889, + "high_school_geography": 0.47715736040609136, + "elementary_mathematics": 0.27320954907161804, + "human_aging": 0.46846846846846846, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.5128676470588235, + "formal_logic": 0.336, + "high_school_statistics": 0.24186046511627907, + "international_law": 0.48333333333333334, + "high_school_mathematics": 0.275092936802974, + "high_school_computer_science": 0.3838383838383838, + "conceptual_physics": 0.3333333333333333, + "miscellaneous": 0.5179028132992327, + "high_school_chemistry": 0.28217821782178215, + "marketing": 0.6137339055793991, + "professional_law": 0.33072407045009783, + "management": 0.4411764705882353, + "college_physics": 0.22772277227722773, + "jurisprudence": 0.48598130841121495, + "world_religions": 0.5470588235294118, + "sociology": 0.485, + "us_foreign_policy": 0.5353535353535354, + "high_school_macroeconomics": 0.39845758354755784, + "computer_security": 0.5252525252525253, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.48405797101449277, + "electrical_engineering": 0.4375, + "astronomy": 0.304635761589404, + "college_biology": 0.40559440559440557 + } + }, + "prompt_5": -1 + }, + "c_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c_eval_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "zbench": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "ind_emotion": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "ocnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c3": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "dream": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "samsum": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "dialogsum": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "sst2": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cola": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "qqp": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "qnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "wnli": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "rte": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "mrpc": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "indommlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + } + }, + "five_shot": { + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + } + } + }, + "gemma-7b": { + "model_size": "7B", + "model_link": "https://huggingface.co/google/gemma-7b", + "zero_shot": { + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.5180952380952382, + "language_acc": { + "Vietnamese": 0.4533333333333333, + "English": 0.62, + "Malay": 0.4066666666666667, + "Chinese": 0.49333333333333335, + "Filipino": 0.5933333333333334, + "Indonesian": 0.5, + "Spanish": 0.56 + }, + "consistency_score_2": 0.5625396825396825, + "consistency_score_3": 0.38247619047619047, + "consistency_score_4": 0.2801904761904762, + "consistency_score_5": 0.21396825396825397, + "consistency_score_6": 0.16761904761904764, + "consistency_score_7": 0.13333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.4666666666666667, + "Vietnamese,Malay": 0.5333333333333333, + "Vietnamese,Chinese": 0.44666666666666666, + "Vietnamese,Filipino": 0.5, + "Vietnamese,Indonesian": 0.49333333333333335, + "Vietnamese,Spanish": 0.49333333333333335, + "English,Malay": 0.52, + "English,Chinese": 0.6066666666666667, + "English,Filipino": 0.6466666666666666, + "English,Indonesian": 0.6333333333333333, + "English,Spanish": 0.6466666666666666, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.5533333333333333, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.5266666666666666, + "Chinese,Filipino": 0.5866666666666667, + "Chinese,Indonesian": 0.56, + "Chinese,Spanish": 0.6066666666666667, + "Filipino,Indonesian": 0.62, + "Filipino,Spanish": 0.6933333333333334, + "Indonesian,Spanish": 0.6533333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.3333333333333333, + "Vietnamese,English,Chinese": 0.3, + "Vietnamese,English,Filipino": 0.3466666666666667, + "Vietnamese,English,Indonesian": 0.34, + "Vietnamese,English,Spanish": 0.3466666666666667, + "Vietnamese,Malay,Chinese": 0.3, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.32, + "Vietnamese,Chinese,Indonesian": 0.29333333333333333, + "Vietnamese,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian": 0.3466666666666667, + "Vietnamese,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish": 0.36, + "English,Malay,Chinese": 0.3466666666666667, + "English,Malay,Filipino": 0.38666666666666666, + "English,Malay,Indonesian": 0.38666666666666666, + "English,Malay,Spanish": 0.38666666666666666, + "English,Chinese,Filipino": 0.44, + "English,Chinese,Indonesian": 0.44666666666666666, + "English,Chinese,Spanish": 0.4533333333333333, + "English,Filipino,Indonesian": 0.47333333333333333, + "English,Filipino,Spanish": 0.5133333333333333, + "English,Indonesian,Spanish": 0.49333333333333335, + "Malay,Chinese,Filipino": 0.35333333333333333, + "Malay,Chinese,Indonesian": 0.34, + "Malay,Chinese,Spanish": 0.34, + "Malay,Filipino,Indonesian": 0.38666666666666666, + "Malay,Filipino,Spanish": 0.41333333333333333, + "Malay,Indonesian,Spanish": 0.38666666666666666, + "Chinese,Filipino,Indonesian": 0.42, + "Chinese,Filipino,Spanish": 0.47333333333333333, + "Chinese,Indonesian,Spanish": 0.43333333333333335, + "Filipino,Indonesian,Spanish": 0.5066666666666667 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino": 0.26666666666666666, + "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Spanish": 0.26, + "Vietnamese,English,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,English,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,English,Chinese,Spanish": 0.24, + "Vietnamese,English,Filipino,Indonesian": 0.26666666666666666, + "Vietnamese,English,Filipino,Spanish": 0.29333333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.28, + "Vietnamese,Malay,Chinese,Filipino": 0.24, + "Vietnamese,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian": 0.28, + "Vietnamese,Malay,Filipino,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,Chinese,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.3, + "English,Malay,Chinese,Filipino": 0.2733333333333333, + "English,Malay,Chinese,Indonesian": 0.2866666666666667, + "English,Malay,Chinese,Spanish": 0.2733333333333333, + "English,Malay,Filipino,Indonesian": 0.30666666666666664, + "English,Malay,Filipino,Spanish": 0.32, + "English,Malay,Indonesian,Spanish": 0.32, + "English,Chinese,Filipino,Indonesian": 0.34, + "English,Chinese,Filipino,Spanish": 0.36666666666666664, + "English,Chinese,Indonesian,Spanish": 0.34, + "English,Filipino,Indonesian,Spanish": 0.3933333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.26, + "Malay,Chinese,Filipino,Spanish": 0.28, + "Malay,Chinese,Indonesian,Spanish": 0.26666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.32, + "Chinese,Filipino,Indonesian,Spanish": 0.36 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "English,Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, + "English,Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "English,Chinese,Filipino,Indonesian,Spanish": 0.28, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.13333333333333333 + } + }, + "AC3_2": 0.5394035028318483, + "AC3_3": 0.44007412774099774, + "AC3_4": 0.3636927201825941, + "AC3_5": 0.3028587948208935, + "AC3_6": 0.25329100525406706, + "AC3_7": 0.2120857699479498 + }, + "prompt_2": { + "overall_acc": 0.5285714285714286, + "language_acc": { + "Vietnamese": 0.54, + "English": 0.6733333333333333, + "Malay": 0.38666666666666666, + "Chinese": 0.48, + "Filipino": 0.5933333333333334, + "Indonesian": 0.48, + "Spanish": 0.5466666666666666 + }, + "consistency_score_2": 0.5320634920634921, + "consistency_score_3": 0.3546666666666668, + "consistency_score_4": 0.259047619047619, + "consistency_score_5": 0.19873015873015876, + "consistency_score_6": 0.15714285714285717, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.56, + "Vietnamese,Malay": 0.43333333333333335, + "Vietnamese,Chinese": 0.5066666666666667, + "Vietnamese,Filipino": 0.6, + "Vietnamese,Indonesian": 0.5333333333333333, + "Vietnamese,Spanish": 0.54, + "English,Malay": 0.4533333333333333, + "English,Chinese": 0.49333333333333335, + "English,Filipino": 0.62, + "English,Indonesian": 0.58, + "English,Spanish": 0.62, + "Malay,Chinese": 0.5266666666666666, + "Malay,Filipino": 0.49333333333333335, + "Malay,Indonesian": 0.5266666666666666, + "Malay,Spanish": 0.47333333333333333, + "Chinese,Filipino": 0.49333333333333335, + "Chinese,Indonesian": 0.47333333333333333, + "Chinese,Spanish": 0.43333333333333335, + "Filipino,Indonesian": 0.6333333333333333, + "Filipino,Spanish": 0.6066666666666667, + "Indonesian,Spanish": 0.5733333333333334 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.29333333333333333, + "Vietnamese,English,Chinese": 0.34, + "Vietnamese,English,Filipino": 0.43333333333333335, + "Vietnamese,English,Indonesian": 0.4, + "Vietnamese,English,Spanish": 0.42, + "Vietnamese,Malay,Chinese": 0.3, + "Vietnamese,Malay,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian": 0.3, + "Vietnamese,Malay,Spanish": 0.31333333333333335, + "Vietnamese,Chinese,Filipino": 0.4, + "Vietnamese,Chinese,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian": 0.43333333333333335, + "Vietnamese,Filipino,Spanish": 0.4066666666666667, + "Vietnamese,Indonesian,Spanish": 0.38, + "English,Malay,Chinese": 0.3, + "English,Malay,Filipino": 0.32666666666666666, + "English,Malay,Indonesian": 0.32, + "English,Malay,Spanish": 0.32666666666666666, + "English,Chinese,Filipino": 0.36666666666666664, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.32666666666666666, + "English,Filipino,Indonesian": 0.46, + "English,Filipino,Spanish": 0.46, + "English,Indonesian,Spanish": 0.43333333333333335, + "Malay,Chinese,Filipino": 0.31333333333333335, + "Malay,Chinese,Indonesian": 0.30666666666666664, + "Malay,Chinese,Spanish": 0.2866666666666667, + "Malay,Filipino,Indonesian": 0.3466666666666667, + "Malay,Filipino,Spanish": 0.35333333333333333, + "Malay,Indonesian,Spanish": 0.3333333333333333, + "Chinese,Filipino,Indonesian": 0.36666666666666664, + "Chinese,Filipino,Spanish": 0.3466666666666667, + "Chinese,Indonesian,Spanish": 0.29333333333333333, + "Filipino,Indonesian,Spanish": 0.43333333333333335 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.23333333333333334, + "Vietnamese,English,Malay,Spanish": 0.24, + "Vietnamese,English,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,English,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,English,Chinese,Spanish": 0.26, + "Vietnamese,English,Filipino,Indonesian": 0.3466666666666667, + "Vietnamese,English,Filipino,Spanish": 0.3333333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.32, + "Vietnamese,Malay,Chinese,Filipino": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian": 0.24, + "Vietnamese,Malay,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,Chinese,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.32, + "English,Malay,Chinese,Filipino": 0.24, + "English,Malay,Chinese,Indonesian": 0.2, + "English,Malay,Chinese,Spanish": 0.22, + "English,Malay,Filipino,Indonesian": 0.25333333333333335, + "English,Malay,Filipino,Spanish": 0.28, + "English,Malay,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian": 0.2866666666666667, + "English,Chinese,Filipino,Spanish": 0.28, + "English,Chinese,Indonesian,Spanish": 0.24, + "English,Filipino,Indonesian,Spanish": 0.36, + "Malay,Chinese,Filipino,Indonesian": 0.23333333333333334, + "Malay,Chinese,Filipino,Spanish": 0.24, + "Malay,Chinese,Indonesian,Spanish": 0.2, + "Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.2, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.2, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.2, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.28, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.14666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "English,Malay,Chinese,Filipino,Indonesian": 0.18, + "English,Malay,Chinese,Filipino,Spanish": 0.2, + "English,Malay,Chinese,Indonesian,Spanish": 0.16, + "English,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.22, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.12666666666666668, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + } + }, + "AC3_2": 0.5303117115846695, + "AC3_3": 0.4244985981835537, + "AC3_4": 0.3476939021842192, + "AC3_5": 0.2888570181058222, + "AC3_6": 0.24226190472657494, + "AC3_7": 0.20436046508509034 + }, + "prompt_3": { + "overall_acc": 0.5114285714285715, + "language_acc": { + "Vietnamese": 0.4866666666666667, + "English": 0.6, + "Malay": 0.4066666666666667, + "Chinese": 0.44, + "Filipino": 0.58, + "Indonesian": 0.48, + "Spanish": 0.5866666666666667 + }, + "consistency_score_2": 0.5193650793650794, + "consistency_score_3": 0.33352380952380956, + "consistency_score_4": 0.24076190476190473, + "consistency_score_5": 0.18666666666666668, + "consistency_score_6": 0.15142857142857144, + "consistency_score_7": 0.12666666666666668, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.44, + "Vietnamese,Malay": 0.5866666666666667, + "Vietnamese,Chinese": 0.52, + "Vietnamese,Filipino": 0.5466666666666666, + "Vietnamese,Indonesian": 0.5533333333333333, + "Vietnamese,Spanish": 0.5, + "English,Malay": 0.47333333333333333, + "English,Chinese": 0.4666666666666667, + "English,Filipino": 0.6133333333333333, + "English,Indonesian": 0.4866666666666667, + "English,Spanish": 0.6133333333333333, + "Malay,Chinese": 0.47333333333333333, + "Malay,Filipino": 0.48, + "Malay,Indonesian": 0.5466666666666666, + "Malay,Spanish": 0.48, + "Chinese,Filipino": 0.44666666666666666, + "Chinese,Indonesian": 0.4533333333333333, + "Chinese,Spanish": 0.47333333333333333, + "Filipino,Indonesian": 0.58, + "Filipino,Spanish": 0.6333333333333333, + "Indonesian,Spanish": 0.54 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.30666666666666664, + "Vietnamese,English,Chinese": 0.3, + "Vietnamese,English,Filipino": 0.35333333333333333, + "Vietnamese,English,Indonesian": 0.29333333333333333, + "Vietnamese,English,Spanish": 0.32, + "Vietnamese,Malay,Chinese": 0.3466666666666667, + "Vietnamese,Malay,Filipino": 0.3466666666666667, + "Vietnamese,Malay,Indonesian": 0.38, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish": 0.30666666666666664, + "Vietnamese,Filipino,Indonesian": 0.38, + "Vietnamese,Filipino,Spanish": 0.3933333333333333, + "Vietnamese,Indonesian,Spanish": 0.3333333333333333, + "English,Malay,Chinese": 0.2733333333333333, + "English,Malay,Filipino": 0.34, + "English,Malay,Indonesian": 0.30666666666666664, + "English,Malay,Spanish": 0.3333333333333333, + "English,Chinese,Filipino": 0.32, + "English,Chinese,Indonesian": 0.26666666666666666, + "English,Chinese,Spanish": 0.3333333333333333, + "English,Filipino,Indonesian": 0.38666666666666666, + "English,Filipino,Spanish": 0.4666666666666667, + "English,Indonesian,Spanish": 0.38, + "Malay,Chinese,Filipino": 0.28, + "Malay,Chinese,Indonesian": 0.3, + "Malay,Chinese,Spanish": 0.28, + "Malay,Filipino,Indonesian": 0.3333333333333333, + "Malay,Filipino,Spanish": 0.36, + "Malay,Indonesian,Spanish": 0.32666666666666666, + "Chinese,Filipino,Indonesian": 0.32, + "Chinese,Filipino,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Spanish": 0.3, + "Filipino,Indonesian,Spanish": 0.4 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.22, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.24, + "Vietnamese,English,Malay,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,English,Chinese,Indonesian": 0.2, + "Vietnamese,English,Chinese,Spanish": 0.24, + "Vietnamese,English,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,English,Filipino,Spanish": 0.28, + "Vietnamese,English,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Spanish": 0.28, + "Vietnamese,Malay,Indonesian,Spanish": 0.26, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.24, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Chinese,Indonesian": 0.2, + "English,Malay,Chinese,Spanish": 0.22, + "English,Malay,Filipino,Indonesian": 0.26, + "English,Malay,Filipino,Spanish": 0.28, + "English,Malay,Indonesian,Spanish": 0.24666666666666667, + "English,Chinese,Filipino,Indonesian": 0.22, + "English,Chinese,Filipino,Spanish": 0.26666666666666666, + "English,Chinese,Indonesian,Spanish": 0.22, + "English,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.22, + "Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Spanish": 0.18, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Chinese,Filipino,Indonesian": 0.17333333333333334, + "English,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Filipino,Indonesian,Spanish": 0.22, + "English,Chinese,Filipino,Indonesian,Spanish": 0.18, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.14666666666666667, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.14, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.14, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.12666666666666668 + } + }, + "AC3_2": 0.5153662721133185, + "AC3_3": 0.4037472626084062, + "AC3_4": 0.3273971710305254, + "AC3_5": 0.27350613911498206, + "AC3_6": 0.23366995070366434, + "AC3_7": 0.2030447760875825 + }, + "prompt_4": { + "overall_acc": 0.54, + "language_acc": { + "Vietnamese": 0.5066666666666667, + "English": 0.6066666666666667, + "Malay": 0.4666666666666667, + "Chinese": 0.48, + "Filipino": 0.6066666666666667, + "Indonesian": 0.52, + "Spanish": 0.5933333333333334 + }, + "consistency_score_2": 0.5647619047619047, + "consistency_score_3": 0.3931428571428571, + "consistency_score_4": 0.2948571428571429, + "consistency_score_5": 0.22984126984126982, + "consistency_score_6": 0.18476190476190474, + "consistency_score_7": 0.15333333333333332, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.5, + "Vietnamese,Malay": 0.5533333333333333, + "Vietnamese,Chinese": 0.48, + "Vietnamese,Filipino": 0.56, + "Vietnamese,Indonesian": 0.5, + "Vietnamese,Spanish": 0.48, + "English,Malay": 0.5266666666666666, + "English,Chinese": 0.5533333333333333, + "English,Filipino": 0.6733333333333333, + "English,Indonesian": 0.6, + "English,Spanish": 0.6866666666666666, + "Malay,Chinese": 0.4866666666666667, + "Malay,Filipino": 0.5266666666666666, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.52, + "Chinese,Filipino": 0.6533333333333333, + "Chinese,Indonesian": 0.54, + "Chinese,Spanish": 0.5866666666666667, + "Filipino,Indonesian": 0.6466666666666666, + "Filipino,Spanish": 0.6466666666666666, + "Indonesian,Spanish": 0.6 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.35333333333333333, + "Vietnamese,English,Chinese": 0.32666666666666666, + "Vietnamese,English,Filipino": 0.4, + "Vietnamese,English,Indonesian": 0.35333333333333333, + "Vietnamese,English,Spanish": 0.38, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Filipino": 0.37333333333333335, + "Vietnamese,Malay,Indonesian": 0.3466666666666667, + "Vietnamese,Malay,Spanish": 0.34, + "Vietnamese,Chinese,Filipino": 0.3933333333333333, + "Vietnamese,Chinese,Indonesian": 0.30666666666666664, + "Vietnamese,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,Filipino,Indonesian": 0.41333333333333333, + "Vietnamese,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,Indonesian,Spanish": 0.3466666666666667, + "English,Malay,Chinese": 0.34, + "English,Malay,Filipino": 0.4066666666666667, + "English,Malay,Indonesian": 0.36, + "English,Malay,Spanish": 0.4066666666666667, + "English,Chinese,Filipino": 0.49333333333333335, + "English,Chinese,Indonesian": 0.3933333333333333, + "English,Chinese,Spanish": 0.4533333333333333, + "English,Filipino,Indonesian": 0.49333333333333335, + "English,Filipino,Spanish": 0.5266666666666666, + "English,Indonesian,Spanish": 0.47333333333333333, + "Malay,Chinese,Filipino": 0.37333333333333335, + "Malay,Chinese,Indonesian": 0.32666666666666666, + "Malay,Chinese,Spanish": 0.3466666666666667, + "Malay,Filipino,Indonesian": 0.3933333333333333, + "Malay,Filipino,Spanish": 0.3933333333333333, + "Malay,Indonesian,Spanish": 0.36, + "Chinese,Filipino,Indonesian": 0.4666666666666667, + "Chinese,Filipino,Spanish": 0.49333333333333335, + "Chinese,Indonesian,Spanish": 0.41333333333333333, + "Filipino,Indonesian,Spanish": 0.4866666666666667 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.24, + "Vietnamese,English,Malay,Filipino": 0.2866666666666667, + "Vietnamese,English,Malay,Indonesian": 0.25333333333333335, + "Vietnamese,English,Malay,Spanish": 0.29333333333333333, + "Vietnamese,English,Chinese,Filipino": 0.30666666666666664, + "Vietnamese,English,Chinese,Indonesian": 0.24, + "Vietnamese,English,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,English,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,English,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,English,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Chinese,Indonesian": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,Malay,Filipino,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.3, + "Vietnamese,Chinese,Filipino,Spanish": 0.30666666666666664, + "Vietnamese,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.32, + "English,Malay,Chinese,Filipino": 0.30666666666666664, + "English,Malay,Chinese,Indonesian": 0.23333333333333334, + "English,Malay,Chinese,Spanish": 0.29333333333333333, + "English,Malay,Filipino,Indonesian": 0.30666666666666664, + "English,Malay,Filipino,Spanish": 0.3466666666666667, + "English,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Chinese,Filipino,Indonesian": 0.37333333333333335, + "English,Chinese,Filipino,Spanish": 0.42, + "English,Chinese,Indonesian,Spanish": 0.32666666666666666, + "English,Filipino,Indonesian,Spanish": 0.4, + "Malay,Chinese,Filipino,Indonesian": 0.28, + "Malay,Chinese,Filipino,Spanish": 0.3, + "Malay,Chinese,Indonesian,Spanish": 0.24, + "Malay,Filipino,Indonesian,Spanish": 0.3, + "Chinese,Filipino,Indonesian,Spanish": 0.37333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.22, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,English,Malay,Filipino,Spanish": 0.26, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.26, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.24666666666666667, + "English,Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Filipino,Spanish": 0.2733333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.2, + "English,Malay,Filipino,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.31333333333333335, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332 + } + }, + "AC3_2": 0.5521034482258871, + "AC3_3": 0.45501530919802347, + "AC3_4": 0.3814373716175554, + "AC3_5": 0.3224412370715179, + "AC3_6": 0.2753219447714733, + "AC3_7": 0.23884615381170488 + }, + "prompt_5": { + "overall_acc": 0.5838095238095239, + "language_acc": { + "Vietnamese": 0.5333333333333333, + "English": 0.64, + "Malay": 0.5866666666666667, + "Chinese": 0.5733333333333334, + "Filipino": 0.6, + "Indonesian": 0.5733333333333334, + "Spanish": 0.58 + }, + "consistency_score_2": 0.654285714285714, + "consistency_score_3": 0.504, + "consistency_score_4": 0.4072380952380952, + "consistency_score_5": 0.33682539682539675, + "consistency_score_6": 0.28285714285714286, + "consistency_score_7": 0.24, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.54, + "Vietnamese,Malay": 0.5733333333333334, + "Vietnamese,Chinese": 0.58, + "Vietnamese,Filipino": 0.5933333333333334, + "Vietnamese,Indonesian": 0.5666666666666667, + "Vietnamese,Spanish": 0.58, + "English,Malay": 0.7133333333333334, + "English,Chinese": 0.64, + "English,Filipino": 0.6733333333333333, + "English,Indonesian": 0.7, + "English,Spanish": 0.7666666666666667, + "Malay,Chinese": 0.6533333333333333, + "Malay,Filipino": 0.64, + "Malay,Indonesian": 0.7333333333333333, + "Malay,Spanish": 0.7333333333333333, + "Chinese,Filipino": 0.6533333333333333, + "Chinese,Indonesian": 0.6533333333333333, + "Chinese,Spanish": 0.6866666666666666, + "Filipino,Indonesian": 0.7, + "Filipino,Spanish": 0.66, + "Indonesian,Spanish": 0.7 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.4533333333333333, + "Vietnamese,English,Chinese": 0.41333333333333333, + "Vietnamese,English,Filipino": 0.43333333333333335, + "Vietnamese,English,Indonesian": 0.4266666666666667, + "Vietnamese,English,Spanish": 0.4533333333333333, + "Vietnamese,Malay,Chinese": 0.4533333333333333, + "Vietnamese,Malay,Filipino": 0.44, + "Vietnamese,Malay,Indonesian": 0.46, + "Vietnamese,Malay,Spanish": 0.4666666666666667, + "Vietnamese,Chinese,Filipino": 0.4533333333333333, + "Vietnamese,Chinese,Indonesian": 0.43333333333333335, + "Vietnamese,Chinese,Spanish": 0.46, + "Vietnamese,Filipino,Indonesian": 0.4533333333333333, + "Vietnamese,Filipino,Spanish": 0.44666666666666666, + "Vietnamese,Indonesian,Spanish": 0.44, + "English,Malay,Chinese": 0.52, + "English,Malay,Filipino": 0.5333333333333333, + "English,Malay,Indonesian": 0.5866666666666667, + "English,Malay,Spanish": 0.62, + "English,Chinese,Filipino": 0.5066666666666667, + "English,Chinese,Indonesian": 0.5133333333333333, + "English,Chinese,Spanish": 0.5533333333333333, + "English,Filipino,Indonesian": 0.56, + "English,Filipino,Spanish": 0.58, + "English,Indonesian,Spanish": 0.5866666666666667, + "Malay,Chinese,Filipino": 0.5, + "Malay,Chinese,Indonesian": 0.54, + "Malay,Chinese,Spanish": 0.56, + "Malay,Filipino,Indonesian": 0.54, + "Malay,Filipino,Spanish": 0.5333333333333333, + "Malay,Indonesian,Spanish": 0.5933333333333334, + "Chinese,Filipino,Indonesian": 0.5266666666666666, + "Chinese,Filipino,Spanish": 0.52, + "Chinese,Indonesian,Spanish": 0.5333333333333333, + "Filipino,Indonesian,Spanish": 0.5466666666666666 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.36, + "Vietnamese,English,Malay,Filipino": 0.36, + "Vietnamese,English,Malay,Indonesian": 0.38, + "Vietnamese,English,Malay,Spanish": 0.3933333333333333, + "Vietnamese,English,Chinese,Filipino": 0.36666666666666664, + "Vietnamese,English,Chinese,Indonesian": 0.34, + "Vietnamese,English,Chinese,Spanish": 0.36666666666666664, + "Vietnamese,English,Filipino,Indonesian": 0.36, + "Vietnamese,English,Filipino,Spanish": 0.38666666666666666, + "Vietnamese,English,Indonesian,Spanish": 0.36, + "Vietnamese,Malay,Chinese,Filipino": 0.38, + "Vietnamese,Malay,Chinese,Indonesian": 0.37333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.38666666666666666, + "Vietnamese,Malay,Filipino,Indonesian": 0.36666666666666664, + "Vietnamese,Malay,Filipino,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Indonesian,Spanish": 0.37333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.36666666666666664, + "Vietnamese,Chinese,Filipino,Spanish": 0.38, + "Vietnamese,Chinese,Indonesian,Spanish": 0.36666666666666664, + "Vietnamese,Filipino,Indonesian,Spanish": 0.36666666666666664, + "English,Malay,Chinese,Filipino": 0.42, + "English,Malay,Chinese,Indonesian": 0.44666666666666666, + "English,Malay,Chinese,Spanish": 0.48, + "English,Malay,Filipino,Indonesian": 0.4666666666666667, + "English,Malay,Filipino,Spanish": 0.49333333333333335, + "English,Malay,Indonesian,Spanish": 0.5266666666666666, + "English,Chinese,Filipino,Indonesian": 0.4266666666666667, + "English,Chinese,Filipino,Spanish": 0.44666666666666666, + "English,Chinese,Indonesian,Spanish": 0.44666666666666666, + "English,Filipino,Indonesian,Spanish": 0.4866666666666667, + "Malay,Chinese,Filipino,Indonesian": 0.4266666666666667, + "Malay,Chinese,Filipino,Spanish": 0.43333333333333335, + "Malay,Chinese,Indonesian,Spanish": 0.4666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.46, + "Chinese,Filipino,Indonesian,Spanish": 0.4266666666666667 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.31333333333333335, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.30666666666666664, + "Vietnamese,English,Malay,Chinese,Spanish": 0.32666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.3333333333333333, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.32666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.3, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.3, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.32, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.31333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.32, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.31333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.30666666666666664, + "English,Malay,Chinese,Filipino,Indonesian": 0.36666666666666664, + "English,Malay,Chinese,Filipino,Spanish": 0.3933333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.41333333333333333, + "English,Malay,Filipino,Indonesian,Spanish": 0.43333333333333335, + "English,Chinese,Filipino,Indonesian,Spanish": 0.37333333333333335, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.36666666666666664 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.34 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.24 + } + }, + "AC3_2": 0.6170417581919202, + "AC3_3": 0.5409770617608103, + "AC3_4": 0.47979425784309665, + "AC3_5": 0.4271875204790501, + "AC3_6": 0.3810800627503778, + "AC3_7": 0.3401618496696913 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.46185064935064934, + "language_acc": { + "Spanish": 0.4772727272727273, + "Chinese": 0.4431818181818182, + "Vietnamese": 0.4943181818181818, + "Indonesian": 0.42045454545454547, + "Malay": 0.4375, + "Filipino": 0.4772727272727273, + "English": 0.48295454545454547 + }, + "consistency_score_2": 0.5641233766233766, + "consistency_score_3": 0.3926948051948052, + "consistency_score_4": 0.2967532467532467, + "consistency_score_5": 0.2343073593073593, + "consistency_score_6": 0.19074675324675325, + "consistency_score_7": 0.1590909090909091, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4715909090909091, + "Spanish,Vietnamese": 0.5738636363636364, + "Spanish,Indonesian": 0.5795454545454546, + "Spanish,Malay": 0.5965909090909091, + "Spanish,Filipino": 0.625, + "Spanish,English": 0.6079545454545454, + "Chinese,Vietnamese": 0.45454545454545453, + "Chinese,Indonesian": 0.4602272727272727, + "Chinese,Malay": 0.5227272727272727, + "Chinese,Filipino": 0.5170454545454546, + "Chinese,English": 0.45454545454545453, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Malay": 0.5113636363636364, + "Vietnamese,Filipino": 0.5965909090909091, + "Vietnamese,English": 0.5852272727272727, + "Indonesian,Malay": 0.6136363636363636, + "Indonesian,Filipino": 0.6420454545454546, + "Indonesian,English": 0.5795454545454546, + "Malay,Filipino": 0.6534090909090909, + "Malay,English": 0.5795454545454546, + "Filipino,English": 0.625 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Indonesian": 0.3181818181818182, + "Spanish,Chinese,Malay": 0.3465909090909091, + "Spanish,Chinese,Filipino": 0.35795454545454547, + "Spanish,Chinese,English": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian": 0.42613636363636365, + "Spanish,Vietnamese,Malay": 0.38636363636363635, + "Spanish,Vietnamese,Filipino": 0.4431818181818182, + "Spanish,Vietnamese,English": 0.4147727272727273, + "Spanish,Indonesian,Malay": 0.44886363636363635, + "Spanish,Indonesian,Filipino": 0.4715909090909091, + "Spanish,Indonesian,English": 0.42045454545454547, + "Spanish,Malay,Filipino": 0.4659090909090909, + "Spanish,Malay,English": 0.42613636363636365, + "Spanish,Filipino,English": 0.45454545454545453, + "Chinese,Vietnamese,Indonesian": 0.30113636363636365, + "Chinese,Vietnamese,Malay": 0.3181818181818182, + "Chinese,Vietnamese,Filipino": 0.3465909090909091, + "Chinese,Vietnamese,English": 0.3125, + "Chinese,Indonesian,Malay": 0.3522727272727273, + "Chinese,Indonesian,Filipino": 0.3522727272727273, + "Chinese,Indonesian,English": 0.3068181818181818, + "Chinese,Malay,Filipino": 0.39204545454545453, + "Chinese,Malay,English": 0.3465909090909091, + "Chinese,Filipino,English": 0.3409090909090909, + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Filipino": 0.4602272727272727, + "Vietnamese,Indonesian,English": 0.4090909090909091, + "Vietnamese,Malay,Filipino": 0.42045454545454547, + "Vietnamese,Malay,English": 0.3806818181818182, + "Vietnamese,Filipino,English": 0.4375, + "Indonesian,Malay,Filipino": 0.4943181818181818, + "Indonesian,Malay,English": 0.42613636363636365, + "Indonesian,Filipino,English": 0.4659090909090909, + "Malay,Filipino,English": 0.4659090909090909 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Malay": 0.24431818181818182, + "Spanish,Chinese,Vietnamese,Filipino": 0.26136363636363635, + "Spanish,Chinese,Vietnamese,English": 0.2215909090909091, + "Spanish,Chinese,Indonesian,Malay": 0.2784090909090909, + "Spanish,Chinese,Indonesian,Filipino": 0.2840909090909091, + "Spanish,Chinese,Indonesian,English": 0.23863636363636365, + "Spanish,Chinese,Malay,Filipino": 0.30113636363636365, + "Spanish,Chinese,Malay,English": 0.26136363636363635, + "Spanish,Chinese,Filipino,English": 0.26136363636363635, + "Spanish,Vietnamese,Indonesian,Malay": 0.3465909090909091, + "Spanish,Vietnamese,Indonesian,Filipino": 0.3693181818181818, + "Spanish,Vietnamese,Indonesian,English": 0.32386363636363635, + "Spanish,Vietnamese,Malay,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,Malay,English": 0.2897727272727273, + "Spanish,Vietnamese,Filipino,English": 0.3352272727272727, + "Spanish,Indonesian,Malay,Filipino": 0.38636363636363635, + "Spanish,Indonesian,Malay,English": 0.3352272727272727, + "Spanish,Indonesian,Filipino,English": 0.35795454545454547, + "Spanish,Malay,Filipino,English": 0.3522727272727273, + "Chinese,Vietnamese,Indonesian,Malay": 0.2556818181818182, + "Chinese,Vietnamese,Indonesian,Filipino": 0.26704545454545453, + "Chinese,Vietnamese,Indonesian,English": 0.2159090909090909, + "Chinese,Vietnamese,Malay,Filipino": 0.2784090909090909, + "Chinese,Vietnamese,Malay,English": 0.24431818181818182, + "Chinese,Vietnamese,Filipino,English": 0.2556818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.3125, + "Chinese,Indonesian,Malay,English": 0.2727272727272727, + "Chinese,Indonesian,Filipino,English": 0.2556818181818182, + "Chinese,Malay,Filipino,English": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,Filipino": 0.35795454545454547, + "Vietnamese,Indonesian,Malay,English": 0.3068181818181818, + "Vietnamese,Indonesian,Filipino,English": 0.3465909090909091, + "Vietnamese,Malay,Filipino,English": 0.32954545454545453, + "Indonesian,Malay,Filipino,English": 0.3693181818181818 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.1875, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Spanish,Chinese,Indonesian,Malay,English": 0.2215909090909091, + "Spanish,Chinese,Indonesian,Filipino,English": 0.21022727272727273, + "Spanish,Chinese,Malay,Filipino,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.3068181818181818, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.26136363636363635, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2840909090909091, + "Spanish,Vietnamese,Malay,Filipino,English": 0.25, + "Spanish,Indonesian,Malay,Filipino,English": 0.29545454545454547, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.19886363636363635, + "Chinese,Vietnamese,Malay,Filipino,English": 0.2215909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2784090909090909 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1875 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + } + }, + "AC3_2": 0.5078895589674293, + "AC3_3": 0.42447443793635364, + "AC3_4": 0.36133660898852776, + "AC3_5": 0.3108920810572946, + "AC3_6": 0.2699873198526651, + "AC3_7": 0.23666072485790582 + }, + "prompt_2": { + "overall_acc": 0.4837662337662338, + "language_acc": { + "Spanish": 0.4943181818181818, + "Chinese": 0.4147727272727273, + "Vietnamese": 0.5, + "Indonesian": 0.4715909090909091, + "Malay": 0.4772727272727273, + "Filipino": 0.4715909090909091, + "English": 0.5568181818181818 + }, + "consistency_score_2": 0.5446428571428573, + "consistency_score_3": 0.3696428571428572, + "consistency_score_4": 0.26801948051948055, + "consistency_score_5": 0.19994588744588743, + "consistency_score_6": 0.15097402597402595, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4090909090909091, + "Spanish,Vietnamese": 0.5681818181818182, + "Spanish,Indonesian": 0.5454545454545454, + "Spanish,Malay": 0.6079545454545454, + "Spanish,Filipino": 0.6136363636363636, + "Spanish,English": 0.6363636363636364, + "Chinese,Vietnamese": 0.4147727272727273, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Malay": 0.375, + "Chinese,Filipino": 0.3977272727272727, + "Chinese,English": 0.4375, + "Vietnamese,Indonesian": 0.5625, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Filipino": 0.5625, + "Vietnamese,English": 0.6193181818181818, + "Indonesian,Malay": 0.6534090909090909, + "Indonesian,Filipino": 0.5795454545454546, + "Indonesian,English": 0.6363636363636364, + "Malay,Filipino": 0.5909090909090909, + "Malay,English": 0.6306818181818182, + "Filipino,English": 0.6306818181818182 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2840909090909091, + "Spanish,Chinese,Indonesian": 0.26136363636363635, + "Spanish,Chinese,Malay": 0.2784090909090909, + "Spanish,Chinese,Filipino": 0.2727272727272727, + "Spanish,Chinese,English": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian": 0.3977272727272727, + "Spanish,Vietnamese,Malay": 0.4147727272727273, + "Spanish,Vietnamese,Filipino": 0.4034090909090909, + "Spanish,Vietnamese,English": 0.4431818181818182, + "Spanish,Indonesian,Malay": 0.4375, + "Spanish,Indonesian,Filipino": 0.4034090909090909, + "Spanish,Indonesian,English": 0.44886363636363635, + "Spanish,Malay,Filipino": 0.4318181818181818, + "Spanish,Malay,English": 0.4715909090909091, + "Spanish,Filipino,English": 0.4715909090909091, + "Chinese,Vietnamese,Indonesian": 0.2897727272727273, + "Chinese,Vietnamese,Malay": 0.26136363636363635, + "Chinese,Vietnamese,Filipino": 0.2556818181818182, + "Chinese,Vietnamese,English": 0.2897727272727273, + "Chinese,Indonesian,Malay": 0.29545454545454547, + "Chinese,Indonesian,Filipino": 0.26704545454545453, + "Chinese,Indonesian,English": 0.30113636363636365, + "Chinese,Malay,Filipino": 0.25, + "Chinese,Malay,English": 0.2897727272727273, + "Chinese,Filipino,English": 0.2897727272727273, + "Vietnamese,Indonesian,Malay": 0.42045454545454547, + "Vietnamese,Indonesian,Filipino": 0.39204545454545453, + "Vietnamese,Indonesian,English": 0.44886363636363635, + "Vietnamese,Malay,Filipino": 0.3977272727272727, + "Vietnamese,Malay,English": 0.44886363636363635, + "Vietnamese,Filipino,English": 0.4375, + "Indonesian,Malay,Filipino": 0.44886363636363635, + "Indonesian,Malay,English": 0.4943181818181818, + "Indonesian,Filipino,English": 0.44886363636363635, + "Malay,Filipino,English": 0.4659090909090909 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,Malay": 0.20454545454545456, + "Spanish,Chinese,Vietnamese,Filipino": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.21022727272727273, + "Spanish,Chinese,Indonesian,Filipino": 0.18181818181818182, + "Spanish,Chinese,Indonesian,English": 0.23295454545454544, + "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Malay,English": 0.23863636363636365, + "Spanish,Chinese,Filipino,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.32386363636363635, + "Spanish,Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Spanish,Vietnamese,Indonesian,English": 0.3409090909090909, + "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, + "Spanish,Vietnamese,Malay,English": 0.3465909090909091, + "Spanish,Vietnamese,Filipino,English": 0.3409090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.3352272727272727, + "Spanish,Indonesian,Malay,English": 0.3806818181818182, + "Spanish,Indonesian,Filipino,English": 0.3465909090909091, + "Spanish,Malay,Filipino,English": 0.36363636363636365, + "Chinese,Vietnamese,Indonesian,Malay": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,Filipino": 0.20454545454545456, + "Chinese,Vietnamese,Indonesian,English": 0.22727272727272727, + "Chinese,Vietnamese,Malay,Filipino": 0.1875, + "Chinese,Vietnamese,Malay,English": 0.2159090909090909, + "Chinese,Vietnamese,Filipino,English": 0.20454545454545456, + "Chinese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Chinese,Indonesian,Malay,English": 0.23863636363636365, + "Chinese,Indonesian,Filipino,English": 0.2159090909090909, + "Chinese,Malay,Filipino,English": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,Filipino": 0.3181818181818182, + "Vietnamese,Indonesian,Malay,English": 0.36363636363636365, + "Vietnamese,Indonesian,Filipino,English": 0.32386363636363635, + "Vietnamese,Malay,Filipino,English": 0.3409090909090909, + "Indonesian,Malay,Filipino,English": 0.3806818181818182 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Indonesian,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Malay,Filipino,English": 0.17045454545454544, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2556818181818182, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2897727272727273, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.26136363636363635, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino,English": 0.30113636363636365, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2784090909090909 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.125, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + } + }, + "AC3_2": 0.5124027511058862, + "AC3_3": 0.41907388782828214, + "AC3_4": 0.3449354575002427, + "AC3_5": 0.2829467722207821, + "AC3_6": 0.230129205796275, + "AC3_7": 0.18404150194547764 + }, + "prompt_3": { + "overall_acc": 0.45860389610389607, + "language_acc": { + "Spanish": 0.48863636363636365, + "Chinese": 0.39204545454545453, + "Vietnamese": 0.4318181818181818, + "Indonesian": 0.4943181818181818, + "Malay": 0.4602272727272727, + "Filipino": 0.4318181818181818, + "English": 0.5113636363636364 + }, + "consistency_score_2": 0.5462662337662338, + "consistency_score_3": 0.3678571428571428, + "consistency_score_4": 0.2683441558441559, + "consistency_score_5": 0.20535714285714285, + "consistency_score_6": 0.16396103896103895, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.44886363636363635, + "Spanish,Vietnamese": 0.5340909090909091, + "Spanish,Indonesian": 0.5284090909090909, + "Spanish,Malay": 0.5852272727272727, + "Spanish,Filipino": 0.6420454545454546, + "Spanish,English": 0.6534090909090909, + "Chinese,Vietnamese": 0.44886363636363635, + "Chinese,Indonesian": 0.44886363636363635, + "Chinese,Malay": 0.4147727272727273, + "Chinese,Filipino": 0.44886363636363635, + "Chinese,English": 0.4602272727272727, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.5284090909090909, + "Vietnamese,Filipino": 0.5681818181818182, + "Vietnamese,English": 0.5795454545454546, + "Indonesian,Malay": 0.5909090909090909, + "Indonesian,Filipino": 0.5795454545454546, + "Indonesian,English": 0.5852272727272727, + "Malay,Filipino": 0.625, + "Malay,English": 0.6306818181818182, + "Filipino,English": 0.6477272727272727 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Spanish,Chinese,Indonesian": 0.2727272727272727, + "Spanish,Chinese,Malay": 0.2897727272727273, + "Spanish,Chinese,Filipino": 0.3181818181818182, + "Spanish,Chinese,English": 0.3352272727272727, + "Spanish,Vietnamese,Indonesian": 0.35795454545454547, + "Spanish,Vietnamese,Malay": 0.3693181818181818, + "Spanish,Vietnamese,Filipino": 0.4034090909090909, + "Spanish,Vietnamese,English": 0.4147727272727273, + "Spanish,Indonesian,Malay": 0.3977272727272727, + "Spanish,Indonesian,Filipino": 0.4034090909090909, + "Spanish,Indonesian,English": 0.42613636363636365, + "Spanish,Malay,Filipino": 0.4602272727272727, + "Spanish,Malay,English": 0.4659090909090909, + "Spanish,Filipino,English": 0.4943181818181818, + "Chinese,Vietnamese,Indonesian": 0.2840909090909091, + "Chinese,Vietnamese,Malay": 0.26704545454545453, + "Chinese,Vietnamese,Filipino": 0.2840909090909091, + "Chinese,Vietnamese,English": 0.3068181818181818, + "Chinese,Indonesian,Malay": 0.2727272727272727, + "Chinese,Indonesian,Filipino": 0.2840909090909091, + "Chinese,Indonesian,English": 0.3068181818181818, + "Chinese,Malay,Filipino": 0.30113636363636365, + "Chinese,Malay,English": 0.3125, + "Chinese,Filipino,English": 0.3409090909090909, + "Vietnamese,Indonesian,Malay": 0.38636363636363635, + "Vietnamese,Indonesian,Filipino": 0.38636363636363635, + "Vietnamese,Indonesian,English": 0.39204545454545453, + "Vietnamese,Malay,Filipino": 0.42045454545454547, + "Vietnamese,Malay,English": 0.4090909090909091, + "Vietnamese,Filipino,English": 0.42613636363636365, + "Indonesian,Malay,Filipino": 0.42613636363636365, + "Indonesian,Malay,English": 0.4375, + "Indonesian,Filipino,English": 0.4375, + "Malay,Filipino,English": 0.48863636363636365 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Malay": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino": 0.22727272727272727, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.22727272727272727, + "Spanish,Chinese,Malay,Filipino": 0.24431818181818182, + "Spanish,Chinese,Malay,English": 0.26136363636363635, + "Spanish,Chinese,Filipino,English": 0.2727272727272727, + "Spanish,Vietnamese,Indonesian,Malay": 0.2897727272727273, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,Indonesian,English": 0.29545454545454547, + "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, + "Spanish,Vietnamese,Malay,English": 0.3181818181818182, + "Spanish,Vietnamese,Filipino,English": 0.32954545454545453, + "Spanish,Indonesian,Malay,Filipino": 0.32386363636363635, + "Spanish,Indonesian,Malay,English": 0.3352272727272727, + "Spanish,Indonesian,Filipino,English": 0.3409090909090909, + "Spanish,Malay,Filipino,English": 0.3977272727272727, + "Chinese,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,English": 0.23295454545454544, + "Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Chinese,Vietnamese,Malay,English": 0.22727272727272727, + "Chinese,Vietnamese,Filipino,English": 0.24431818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Chinese,Indonesian,Malay,English": 0.2159090909090909, + "Chinese,Indonesian,Filipino,English": 0.23863636363636365, + "Chinese,Malay,Filipino,English": 0.2727272727272727, + "Vietnamese,Indonesian,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Indonesian,Malay,English": 0.3125, + "Vietnamese,Indonesian,Filipino,English": 0.3068181818181818, + "Vietnamese,Malay,Filipino,English": 0.3409090909090909, + "Indonesian,Malay,Filipino,English": 0.3409090909090909 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,English": 0.19886363636363635, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay,English": 0.17613636363636365, + "Spanish,Chinese,Indonesian,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Malay,Filipino,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.25, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino,English": 0.2840909090909091, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Malay,Filipino,English": 0.20454545454545456, + "Chinese,Indonesian,Malay,Filipino,English": 0.1875, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.25 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.1590909090909091, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.21022727272727273, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + } + }, + "AC3_2": 0.49861134417965686, + "AC3_3": 0.4082484495956346, + "AC3_4": 0.33857625732713653, + "AC3_5": 0.28368407260833933, + "AC3_6": 0.24155928813065697, + "AC3_7": 0.2102195212346658 + }, + "prompt_4": { + "overall_acc": 0.476461038961039, + "language_acc": { + "Spanish": 0.4772727272727273, + "Chinese": 0.5056818181818182, + "Vietnamese": 0.4943181818181818, + "Indonesian": 0.4375, + "Malay": 0.44886363636363635, + "Filipino": 0.4659090909090909, + "English": 0.5056818181818182 + }, + "consistency_score_2": 0.5430194805194805, + "consistency_score_3": 0.37142857142857155, + "consistency_score_4": 0.27938311688311684, + "consistency_score_5": 0.22023809523809523, + "consistency_score_6": 0.17857142857142858, + "consistency_score_7": 0.14772727272727273, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.48863636363636365, + "Spanish,Vietnamese": 0.5397727272727273, + "Spanish,Indonesian": 0.5454545454545454, + "Spanish,Malay": 0.5909090909090909, + "Spanish,Filipino": 0.6193181818181818, + "Spanish,English": 0.6306818181818182, + "Chinese,Vietnamese": 0.4034090909090909, + "Chinese,Indonesian": 0.4659090909090909, + "Chinese,Malay": 0.44886363636363635, + "Chinese,Filipino": 0.4659090909090909, + "Chinese,English": 0.48863636363636365, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.5397727272727273, + "Vietnamese,Filipino": 0.5852272727272727, + "Vietnamese,English": 0.5397727272727273, + "Indonesian,Malay": 0.6193181818181818, + "Indonesian,Filipino": 0.5681818181818182, + "Indonesian,English": 0.5795454545454546, + "Malay,Filipino": 0.5625, + "Malay,English": 0.5965909090909091, + "Filipino,English": 0.6022727272727273 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Indonesian": 0.3125, + "Spanish,Chinese,Malay": 0.32386363636363635, + "Spanish,Chinese,Filipino": 0.3465909090909091, + "Spanish,Chinese,English": 0.3522727272727273, + "Spanish,Vietnamese,Indonesian": 0.3693181818181818, + "Spanish,Vietnamese,Malay": 0.3977272727272727, + "Spanish,Vietnamese,Filipino": 0.4090909090909091, + "Spanish,Vietnamese,English": 0.4090909090909091, + "Spanish,Indonesian,Malay": 0.4431818181818182, + "Spanish,Indonesian,Filipino": 0.4147727272727273, + "Spanish,Indonesian,English": 0.42045454545454547, + "Spanish,Malay,Filipino": 0.42613636363636365, + "Spanish,Malay,English": 0.45454545454545453, + "Spanish,Filipino,English": 0.4715909090909091, + "Chinese,Vietnamese,Indonesian": 0.29545454545454547, + "Chinese,Vietnamese,Malay": 0.2840909090909091, + "Chinese,Vietnamese,Filipino": 0.2897727272727273, + "Chinese,Vietnamese,English": 0.2840909090909091, + "Chinese,Indonesian,Malay": 0.32954545454545453, + "Chinese,Indonesian,Filipino": 0.30113636363636365, + "Chinese,Indonesian,English": 0.3181818181818182, + "Chinese,Malay,Filipino": 0.3068181818181818, + "Chinese,Malay,English": 0.32954545454545453, + "Chinese,Filipino,English": 0.32954545454545453, + "Vietnamese,Indonesian,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.38636363636363635, + "Vietnamese,Indonesian,English": 0.375, + "Vietnamese,Malay,Filipino": 0.3977272727272727, + "Vietnamese,Malay,English": 0.39204545454545453, + "Vietnamese,Filipino,English": 0.4034090909090909, + "Indonesian,Malay,Filipino": 0.4147727272727273, + "Indonesian,Malay,English": 0.44886363636363635, + "Indonesian,Filipino,English": 0.42613636363636365, + "Malay,Filipino,English": 0.4318181818181818 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.23863636363636365, + "Spanish,Chinese,Vietnamese,English": 0.24431818181818182, + "Spanish,Chinese,Indonesian,Malay": 0.2556818181818182, + "Spanish,Chinese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Chinese,Indonesian,English": 0.25, + "Spanish,Chinese,Malay,Filipino": 0.2556818181818182, + "Spanish,Chinese,Malay,English": 0.26704545454545453, + "Spanish,Chinese,Filipino,English": 0.26704545454545453, + "Spanish,Vietnamese,Indonesian,Malay": 0.3181818181818182, + "Spanish,Vietnamese,Indonesian,Filipino": 0.3125, + "Spanish,Vietnamese,Indonesian,English": 0.3068181818181818, + "Spanish,Vietnamese,Malay,Filipino": 0.3181818181818182, + "Spanish,Vietnamese,Malay,English": 0.32954545454545453, + "Spanish,Vietnamese,Filipino,English": 0.3465909090909091, + "Spanish,Indonesian,Malay,Filipino": 0.3465909090909091, + "Spanish,Indonesian,Malay,English": 0.36363636363636365, + "Spanish,Indonesian,Filipino,English": 0.3465909090909091, + "Spanish,Malay,Filipino,English": 0.3693181818181818, + "Chinese,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Chinese,Vietnamese,Indonesian,English": 0.2215909090909091, + "Chinese,Vietnamese,Malay,Filipino": 0.2159090909090909, + "Chinese,Vietnamese,Malay,English": 0.23863636363636365, + "Chinese,Vietnamese,Filipino,English": 0.2215909090909091, + "Chinese,Indonesian,Malay,Filipino": 0.23295454545454544, + "Chinese,Indonesian,Malay,English": 0.2556818181818182, + "Chinese,Indonesian,Filipino,English": 0.23863636363636365, + "Chinese,Malay,Filipino,English": 0.26136363636363635, + "Vietnamese,Indonesian,Malay,Filipino": 0.3125, + "Vietnamese,Indonesian,Malay,English": 0.32386363636363635, + "Vietnamese,Indonesian,Filipino,English": 0.3068181818181818, + "Vietnamese,Malay,Filipino,English": 0.32386363636363635, + "Indonesian,Malay,Filipino,English": 0.3465909090909091 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Malay,English": 0.2159090909090909, + "Spanish,Chinese,Indonesian,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Malay,Filipino,English": 0.22727272727272727, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.26704545454545453, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2727272727272727, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2727272727272727, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2897727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.3068181818181818, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.17613636363636365, + "Chinese,Vietnamese,Malay,Filipino,English": 0.19318181818181818, + "Chinese,Indonesian,Malay,Filipino,English": 0.19886363636363635, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.26704545454545453 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.24431818181818182, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.14772727272727273 + } + }, + "AC3_2": 0.5075675717096639, + "AC3_3": 0.41743934788982806, + "AC3_4": 0.3522291443056106, + "AC3_5": 0.3012343966280525, + "AC3_6": 0.2597804920828404, + "AC3_7": 0.225529022306935 + }, + "prompt_5": { + "overall_acc": 0.4675324675324676, + "language_acc": { + "Spanish": 0.45454545454545453, + "Chinese": 0.44886363636363635, + "Vietnamese": 0.4602272727272727, + "Indonesian": 0.4715909090909091, + "Malay": 0.4659090909090909, + "Filipino": 0.4659090909090909, + "English": 0.5056818181818182 + }, + "consistency_score_2": 0.5625, + "consistency_score_3": 0.3925324675324674, + "consistency_score_4": 0.2983766233766234, + "consistency_score_5": 0.23566017316017318, + "consistency_score_6": 0.18912337662337664, + "consistency_score_7": 0.1534090909090909, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4715909090909091, + "Spanish,Vietnamese": 0.5568181818181818, + "Spanish,Indonesian": 0.5852272727272727, + "Spanish,Malay": 0.6136363636363636, + "Spanish,Filipino": 0.5795454545454546, + "Spanish,English": 0.6534090909090909, + "Chinese,Vietnamese": 0.4034090909090909, + "Chinese,Indonesian": 0.45454545454545453, + "Chinese,Malay": 0.4431818181818182, + "Chinese,Filipino": 0.4602272727272727, + "Chinese,English": 0.5340909090909091, + "Vietnamese,Indonesian": 0.5965909090909091, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,Filipino": 0.5454545454545454, + "Vietnamese,English": 0.5795454545454546, + "Indonesian,Malay": 0.6590909090909091, + "Indonesian,Filipino": 0.625, + "Indonesian,English": 0.6590909090909091, + "Malay,Filipino": 0.5852272727272727, + "Malay,English": 0.625, + "Filipino,English": 0.6477272727272727 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Spanish,Chinese,Indonesian": 0.3125, + "Spanish,Chinese,Malay": 0.3181818181818182, + "Spanish,Chinese,Filipino": 0.32386363636363635, + "Spanish,Chinese,English": 0.3693181818181818, + "Spanish,Vietnamese,Indonesian": 0.4090909090909091, + "Spanish,Vietnamese,Malay": 0.4147727272727273, + "Spanish,Vietnamese,Filipino": 0.3977272727272727, + "Spanish,Vietnamese,English": 0.4375, + "Spanish,Indonesian,Malay": 0.45454545454545453, + "Spanish,Indonesian,Filipino": 0.4375, + "Spanish,Indonesian,English": 0.48295454545454547, + "Spanish,Malay,Filipino": 0.4318181818181818, + "Spanish,Malay,English": 0.48863636363636365, + "Spanish,Filipino,English": 0.4772727272727273, + "Chinese,Vietnamese,Indonesian": 0.29545454545454547, + "Chinese,Vietnamese,Malay": 0.2727272727272727, + "Chinese,Vietnamese,Filipino": 0.2784090909090909, + "Chinese,Vietnamese,English": 0.3181818181818182, + "Chinese,Indonesian,Malay": 0.3352272727272727, + "Chinese,Indonesian,Filipino": 0.32954545454545453, + "Chinese,Indonesian,English": 0.3693181818181818, + "Chinese,Malay,Filipino": 0.3068181818181818, + "Chinese,Malay,English": 0.3465909090909091, + "Chinese,Filipino,English": 0.3693181818181818, + "Vietnamese,Indonesian,Malay": 0.4375, + "Vietnamese,Indonesian,Filipino": 0.42045454545454547, + "Vietnamese,Indonesian,English": 0.4431818181818182, + "Vietnamese,Malay,Filipino": 0.38636363636363635, + "Vietnamese,Malay,English": 0.4147727272727273, + "Vietnamese,Filipino,English": 0.42613636363636365, + "Indonesian,Malay,Filipino": 0.4715909090909091, + "Indonesian,Malay,English": 0.5056818181818182, + "Indonesian,Filipino,English": 0.5, + "Malay,Filipino,English": 0.4772727272727273 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,English": 0.25, + "Spanish,Chinese,Indonesian,Malay": 0.25, + "Spanish,Chinese,Indonesian,Filipino": 0.24431818181818182, + "Spanish,Chinese,Indonesian,English": 0.2840909090909091, + "Spanish,Chinese,Malay,Filipino": 0.25, + "Spanish,Chinese,Malay,English": 0.2784090909090909, + "Spanish,Chinese,Filipino,English": 0.2840909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.3409090909090909, + "Spanish,Vietnamese,Indonesian,Filipino": 0.3352272727272727, + "Spanish,Vietnamese,Indonesian,English": 0.3522727272727273, + "Spanish,Vietnamese,Malay,Filipino": 0.32386363636363635, + "Spanish,Vietnamese,Malay,English": 0.3522727272727273, + "Spanish,Vietnamese,Filipino,English": 0.3409090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.35795454545454547, + "Spanish,Indonesian,Malay,English": 0.3977272727272727, + "Spanish,Indonesian,Filipino,English": 0.39204545454545453, + "Spanish,Malay,Filipino,English": 0.38636363636363635, + "Chinese,Vietnamese,Indonesian,Malay": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,English": 0.26136363636363635, + "Chinese,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Chinese,Vietnamese,Malay,English": 0.24431818181818182, + "Chinese,Vietnamese,Filipino,English": 0.24431818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.26136363636363635, + "Chinese,Indonesian,Malay,English": 0.2897727272727273, + "Chinese,Indonesian,Filipino,English": 0.30113636363636365, + "Chinese,Malay,Filipino,English": 0.2784090909090909, + "Vietnamese,Indonesian,Malay,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,Malay,English": 0.35795454545454547, + "Vietnamese,Indonesian,Filipino,English": 0.35795454545454547, + "Vietnamese,Malay,Filipino,English": 0.32954545454545453, + "Indonesian,Malay,Filipino,English": 0.4034090909090909 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.1875, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Malay,English": 0.23295454545454544, + "Spanish,Chinese,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Chinese,Malay,Filipino,English": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.2784090909090909, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.30113636363636365, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.29545454545454547, + "Spanish,Vietnamese,Malay,Filipino,English": 0.2840909090909091, + "Spanish,Indonesian,Malay,Filipino,English": 0.32954545454545453, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.2159090909090909, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.2215909090909091, + "Chinese,Vietnamese,Malay,Filipino,English": 0.19886363636363635, + "Chinese,Indonesian,Malay,Filipino,English": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.2897727272727273 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.17045454545454544, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.19886363636363635, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.25, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.18181818181818182 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909 + } + }, + "AC3_2": 0.5106382978227654, + "AC3_3": 0.42676236555759284, + "AC3_4": 0.3642749788946497, + "AC3_5": 0.31336727907877565, + "AC3_6": 0.2693079478825107, + "AC3_7": 0.2310160427435445 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.5825242718446602 + }, + "prompt_2": { + "accuracy": 0.5728155339805825 + }, + "prompt_3": { + "accuracy": 0.6019417475728155 + }, + "prompt_4": { + "accuracy": 0.33980582524271846 + }, + "prompt_5": { + "accuracy": 0.5825242718446602 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.38095238095238093 + }, + "prompt_2": { + "accuracy": 0.3619047619047619 + }, + "prompt_3": { + "accuracy": 0.3904761904761905 + }, + "prompt_4": { + "accuracy": 0.26666666666666666 + }, + "prompt_5": { + "accuracy": 0.3142857142857143 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.6822429906542056 + }, + "prompt_2": { + "accuracy": 0.6915887850467289 + }, + "prompt_3": { + "accuracy": 0.7289719626168224 + }, + "prompt_4": { + "accuracy": 0.4953271028037383 + }, + "prompt_5": { + "accuracy": 0.6635514018691588 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.52, + "category_acc": { + "brand": 0.5, + "demographics": 0.4, + "biology": 0.3, + "history": 0.6, + "literature": 0.5, + "politics": 0.7, + "culture": 0.6, + "film": 0.5, + "law": 0.5, + "geography": 0.5 + } + }, + "prompt_2": { + "accuracy": 0.5, + "category_acc": { + "brand": 0.4, + "demographics": 0.0, + "biology": 0.3, + "history": 0.6, + "literature": 0.4, + "politics": 0.6, + "culture": 0.6, + "film": 0.7, + "law": 0.6, + "geography": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.54, + "category_acc": { + "brand": 0.5, + "demographics": 0.2, + "biology": 0.3, + "history": 0.5333333333333333, + "literature": 0.5, + "politics": 0.8, + "culture": 0.6, + "film": 0.6, + "law": 0.5, + "geography": 0.7 + } + }, + "prompt_4": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.4, + "demographics": 0.6, + "biology": 0.2, + "history": 0.3333333333333333, + "literature": 0.2, + "politics": 0.9, + "culture": 0.3, + "film": 0.5, + "law": 0.5, + "geography": 0.3 + } + }, + "prompt_5": { + "accuracy": 0.52, + "category_acc": { + "brand": 0.5, + "demographics": 0.0, + "biology": 0.3, + "history": 0.4666666666666667, + "literature": 0.5, + "politics": 0.9, + "culture": 0.6, + "film": 0.6, + "law": 0.6, + "geography": 0.5 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.06750971426706272 + }, + "prompt_2": { + "bleu_score": 0.07721750285072881 + }, + "prompt_3": { + "bleu_score": 0.06821978311551388 + }, + "prompt_4": { + "bleu_score": 0.06804397274407875 + }, + "prompt_5": { + "bleu_score": 0.05429875271696506 + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.0982752747830463 + }, + "prompt_2": { + "bleu_score": 0.09693382479933621 + }, + "prompt_3": { + "bleu_score": 0.09211183542188045 + }, + "prompt_4": { + "bleu_score": 0.09162870818146696 + }, + "prompt_5": { + "bleu_score": 0.1809729930555669 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.07481805117090375 + }, + "prompt_2": { + "bleu_score": 0.12408586583177651 + }, + "prompt_3": { + "bleu_score": 0.11323612407691637 + }, + "prompt_4": { + "bleu_score": 0.07103730915658504 + }, + "prompt_5": { + "bleu_score": 0.09878667276041729 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.06792021752065504 + }, + "prompt_2": { + "bleu_score": 0.08249521942696134 + }, + "prompt_3": { + "bleu_score": 0.08084386708497976 + }, + "prompt_4": { + "bleu_score": 0.06431676202990873 + }, + "prompt_5": { + "bleu_score": 0.08100137822980161 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.10560765530945605 + }, + "prompt_2": { + "bleu_score": 0.13240900994121152 + }, + "prompt_3": { + "bleu_score": 0.1296920316364985 + }, + "prompt_4": { + "bleu_score": 0.09176703052478538 + }, + "prompt_5": { + "bleu_score": 0.16824113101991417 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.5974329054842473 + }, + "prompt_2": { + "accuracy": 0.588098016336056 + }, + "prompt_3": { + "accuracy": 0.6102683780630105 + }, + "prompt_4": { + "accuracy": 0.5857642940490082 + }, + "prompt_5": { + "accuracy": 0.6149358226371062 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.5864855202002145, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.5656565656565656, + "clinical_knowledge": 0.6325757575757576, + "medical_genetics": 0.6464646464646465, + "high_school_us_history": 0.7635467980295566, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.7838983050847458, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.6286919831223629, + "econometrics": 0.4424778761061947, + "college_computer_science": 0.42424242424242425, + "high_school_biology": 0.7605177993527508, + "abstract_algebra": 0.32323232323232326, + "professional_accounting": 0.49110320284697506, + "philosophy": 0.632258064516129, + "professional_medicine": 0.6346863468634686, + "nutrition": 0.6950819672131148, + "global_facts": 0.37373737373737376, + "machine_learning": 0.4774774774774775, + "security_studies": 0.6639344262295082, + "public_relations": 0.5688073394495413, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.7461300309597523, + "anatomy": 0.582089552238806, + "human_sexuality": 0.6846153846153846, + "college_medicine": 0.6162790697674418, + "high_school_government_and_politics": 0.7916666666666666, + "college_chemistry": 0.41414141414141414, + "logical_fallacies": 0.6666666666666666, + "high_school_geography": 0.7614213197969543, + "elementary_mathematics": 0.3978779840848806, + "human_aging": 0.6396396396396397, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.8014705882352942, + "formal_logic": 0.368, + "high_school_statistics": 0.5255813953488372, + "international_law": 0.7, + "high_school_mathematics": 0.3048327137546468, + "high_school_computer_science": 0.6363636363636364, + "conceptual_physics": 0.5299145299145299, + "miscellaneous": 0.7480818414322251, + "high_school_chemistry": 0.504950495049505, + "marketing": 0.8025751072961373, + "professional_law": 0.47162426614481406, + "management": 0.7254901960784313, + "college_physics": 0.3069306930693069, + "jurisprudence": 0.7102803738317757, + "world_religions": 0.8117647058823529, + "sociology": 0.75, + "us_foreign_policy": 0.7777777777777778, + "high_school_macroeconomics": 0.5681233933161953, + "computer_security": 0.6161616161616161, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.6405797101449275, + "electrical_engineering": 0.5625, + "astronomy": 0.6887417218543046, + "college_biology": 0.7482517482517482 + } + }, + "prompt_2": { + "accuracy": 0.594780121558813, + "category_acc": { + "high_school_european_history": 0.774390243902439, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.6628787878787878, + "medical_genetics": 0.6767676767676768, + "high_school_us_history": 0.7832512315270936, + "high_school_physics": 0.36, + "high_school_world_history": 0.8220338983050848, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.6286919831223629, + "econometrics": 0.39823008849557523, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7669902912621359, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.4875444839857651, + "philosophy": 0.6870967741935484, + "professional_medicine": 0.6531365313653137, + "nutrition": 0.7016393442622951, + "global_facts": 0.3939393939393939, + "machine_learning": 0.44144144144144143, + "security_studies": 0.680327868852459, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.6579378068739771, + "prehistory": 0.7275541795665634, + "anatomy": 0.582089552238806, + "human_sexuality": 0.7153846153846154, + "college_medicine": 0.5988372093023255, + "high_school_government_and_politics": 0.8125, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.7160493827160493, + "high_school_geography": 0.7563451776649747, + "elementary_mathematics": 0.41114058355437666, + "human_aging": 0.6666666666666666, + "college_mathematics": 0.3333333333333333, + "high_school_psychology": 0.8069852941176471, + "formal_logic": 0.432, + "high_school_statistics": 0.5023255813953489, + "international_law": 0.7, + "high_school_mathematics": 0.3345724907063197, + "high_school_computer_science": 0.6262626262626263, + "conceptual_physics": 0.5128205128205128, + "miscellaneous": 0.7391304347826086, + "high_school_chemistry": 0.5297029702970297, + "marketing": 0.8025751072961373, + "professional_law": 0.4644487932159165, + "management": 0.7941176470588235, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.7476635514018691, + "world_religions": 0.7823529411764706, + "sociology": 0.8, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.5732647814910026, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.24161073825503357, + "moral_disputes": 0.6231884057971014, + "electrical_engineering": 0.5555555555555556, + "astronomy": 0.6887417218543046, + "college_biology": 0.7552447552447552 + } + }, + "prompt_3": { + "accuracy": 0.6030747229174115, + "category_acc": { + "high_school_european_history": 0.7987804878048781, + "business_ethics": 0.6262626262626263, + "clinical_knowledge": 0.6742424242424242, + "medical_genetics": 0.7070707070707071, + "high_school_us_history": 0.7783251231527094, + "high_school_physics": 0.3333333333333333, + "high_school_world_history": 0.809322033898305, + "virology": 0.5515151515151515, + "high_school_microeconomics": 0.6286919831223629, + "econometrics": 0.4424778761061947, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7637540453074434, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.5160142348754448, + "philosophy": 0.6741935483870968, + "professional_medicine": 0.6457564575645757, + "nutrition": 0.6918032786885245, + "global_facts": 0.40404040404040403, + "machine_learning": 0.36936936936936937, + "security_studies": 0.6885245901639344, + "public_relations": 0.6422018348623854, + "professional_psychology": 0.6481178396072013, + "prehistory": 0.7120743034055728, + "anatomy": 0.5895522388059702, + "human_sexuality": 0.7076923076923077, + "college_medicine": 0.6046511627906976, + "high_school_government_and_politics": 0.828125, + "college_chemistry": 0.4444444444444444, + "logical_fallacies": 0.7283950617283951, + "high_school_geography": 0.7918781725888325, + "elementary_mathematics": 0.4509283819628647, + "human_aging": 0.6891891891891891, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.8106617647058824, + "formal_logic": 0.384, + "high_school_statistics": 0.5581395348837209, + "international_law": 0.7083333333333334, + "high_school_mathematics": 0.35687732342007433, + "high_school_computer_science": 0.6565656565656566, + "conceptual_physics": 0.5470085470085471, + "miscellaneous": 0.7442455242966752, + "high_school_chemistry": 0.5495049504950495, + "marketing": 0.8240343347639485, + "professional_law": 0.4794520547945205, + "management": 0.7843137254901961, + "college_physics": 0.36633663366336633, + "jurisprudence": 0.719626168224299, + "world_religions": 0.7823529411764706, + "sociology": 0.77, + "us_foreign_policy": 0.8686868686868687, + "high_school_macroeconomics": 0.6066838046272494, + "computer_security": 0.6565656565656566, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.6260869565217392, + "electrical_engineering": 0.5625, + "astronomy": 0.6821192052980133, + "college_biology": 0.7552447552447552 + } + }, + "prompt_4": { + "accuracy": 0.5804790847336432, + "category_acc": { + "high_school_european_history": 0.7804878048780488, + "business_ethics": 0.5858585858585859, + "clinical_knowledge": 0.625, + "medical_genetics": 0.6868686868686869, + "high_school_us_history": 0.7684729064039408, + "high_school_physics": 0.3466666666666667, + "high_school_world_history": 0.8008474576271186, + "virology": 0.5212121212121212, + "high_school_microeconomics": 0.6160337552742616, + "econometrics": 0.4690265486725664, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.7702265372168284, + "abstract_algebra": 0.31313131313131315, + "professional_accounting": 0.4875444839857651, + "philosophy": 0.667741935483871, + "professional_medicine": 0.6199261992619927, + "nutrition": 0.6622950819672131, + "global_facts": 0.32323232323232326, + "machine_learning": 0.45045045045045046, + "security_studies": 0.6639344262295082, + "public_relations": 0.6238532110091743, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.718266253869969, + "anatomy": 0.5522388059701493, + "human_sexuality": 0.6461538461538462, + "college_medicine": 0.622093023255814, + "high_school_government_and_politics": 0.7760416666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6604938271604939, + "high_school_geography": 0.7208121827411168, + "elementary_mathematics": 0.38992042440318303, + "human_aging": 0.6306306306306306, + "college_mathematics": 0.35353535353535354, + "high_school_psychology": 0.7867647058823529, + "formal_logic": 0.4, + "high_school_statistics": 0.4883720930232558, + "international_law": 0.6583333333333333, + "high_school_mathematics": 0.31970260223048325, + "high_school_computer_science": 0.6060606060606061, + "conceptual_physics": 0.5555555555555556, + "miscellaneous": 0.6508951406649617, + "high_school_chemistry": 0.5445544554455446, + "marketing": 0.8369098712446352, + "professional_law": 0.47423352902804955, + "management": 0.6862745098039216, + "college_physics": 0.37623762376237624, + "jurisprudence": 0.7757009345794392, + "world_religions": 0.7823529411764706, + "sociology": 0.735, + "us_foreign_policy": 0.797979797979798, + "high_school_macroeconomics": 0.570694087403599, + "computer_security": 0.6262626262626263, + "moral_scenarios": 0.23937360178970918, + "moral_disputes": 0.6231884057971014, + "electrical_engineering": 0.6111111111111112, + "astronomy": 0.6754966887417219, + "college_biology": 0.7482517482517482 + } + }, + "prompt_5": { + "accuracy": 0.5931355023239184, + "category_acc": { + "high_school_european_history": 0.75, + "business_ethics": 0.6060606060606061, + "clinical_knowledge": 0.6515151515151515, + "medical_genetics": 0.6666666666666666, + "high_school_us_history": 0.7783251231527094, + "high_school_physics": 0.32666666666666666, + "high_school_world_history": 0.8050847457627118, + "virology": 0.509090909090909, + "high_school_microeconomics": 0.6371308016877637, + "econometrics": 0.4690265486725664, + "college_computer_science": 0.48484848484848486, + "high_school_biology": 0.7508090614886731, + "abstract_algebra": 0.29292929292929293, + "professional_accounting": 0.5160142348754448, + "philosophy": 0.6580645161290323, + "professional_medicine": 0.6051660516605166, + "nutrition": 0.6852459016393443, + "global_facts": 0.3838383838383838, + "machine_learning": 0.43243243243243246, + "security_studies": 0.6475409836065574, + "public_relations": 0.5871559633027523, + "professional_psychology": 0.6268412438625205, + "prehistory": 0.7275541795665634, + "anatomy": 0.5671641791044776, + "human_sexuality": 0.7076923076923077, + "college_medicine": 0.6162790697674418, + "high_school_government_and_politics": 0.8072916666666666, + "college_chemistry": 0.42424242424242425, + "logical_fallacies": 0.6296296296296297, + "high_school_geography": 0.8020304568527918, + "elementary_mathematics": 0.4376657824933687, + "human_aging": 0.6621621621621622, + "college_mathematics": 0.32323232323232326, + "high_school_psychology": 0.7977941176470589, + "formal_logic": 0.368, + "high_school_statistics": 0.5162790697674419, + "international_law": 0.6916666666666667, + "high_school_mathematics": 0.30111524163568776, + "high_school_computer_science": 0.6161616161616161, + "conceptual_physics": 0.5811965811965812, + "miscellaneous": 0.7851662404092071, + "high_school_chemistry": 0.5099009900990099, + "marketing": 0.8454935622317596, + "professional_law": 0.4657534246575342, + "management": 0.7156862745098039, + "college_physics": 0.3465346534653465, + "jurisprudence": 0.7383177570093458, + "world_religions": 0.8176470588235294, + "sociology": 0.72, + "us_foreign_policy": 0.8080808080808081, + "high_school_macroeconomics": 0.5604113110539846, + "computer_security": 0.6666666666666666, + "moral_scenarios": 0.24272930648769575, + "moral_disputes": 0.6579710144927536, + "electrical_engineering": 0.5833333333333334, + "astronomy": 0.7019867549668874, + "college_biology": 0.7622377622377622 + } + } + }, + "c_eval": { + "prompt_1": { + "accuracy": 0.4138187221396731 + }, + "prompt_2": { + "accuracy": 0.4309063893016345 + }, + "prompt_3": { + "accuracy": 0.4063893016344725 + }, + "prompt_4": { + "accuracy": 0.4026745913818722 + }, + "prompt_5": { + "accuracy": 0.3179791976225854 + } + }, + "c_eval_full": { + "prompt_1": { + "accuracy": 0.39975093399750933, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.5476190476190477, + "college_physics": 0.5, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.08695652173913043, + "discrete_mathematics": 0.09523809523809523, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.2916666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4583333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.46153846153846156, + "middle_school_physics": 0.625, + "middle_school_chemistry": 0.36, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.38333333333333336, + "business_administration": 0.2894736842105263, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.6551724137931034, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5510204081632653, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.375, + "middle_school_politics": 0.38461538461538464, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.35714285714285715, + "ideological_and_moral_cultivation": 0.4166666666666667, + "logic": 0.14814814814814814, + "law": 0.27586206896551724, + "chinese_language_and_literature": 0.4642857142857143, + "art_studies": 0.34210526315789475, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.2857142857142857, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.4444444444444444, + "civil_servant": 0.3269230769230769, + "sports_science": 0.5416666666666666, + "plant_protection": 0.6296296296296297, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.37037037037037035, + "urban_and_rural_planner": 0.43137254901960786, + "accountant": 0.3148148148148148, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.2962962962962963, + "physician": 0.46296296296296297 + } + }, + "prompt_2": { + "accuracy": 0.4277708592777086, + "category_acc": { + "computer_network": 0.4583333333333333, + "operating_system": 0.5833333333333334, + "computer_architecture": 0.34615384615384615, + "college_programming": 0.5, + "college_physics": 0.5, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.13043478260869565, + "discrete_mathematics": 0.23809523809523808, + "electrical_engineer": 0.21428571428571427, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.4166666666666667, + "high_school_chemistry": 0.5, + "high_school_biology": 0.4166666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.5714285714285714, + "college_economics": 0.48333333333333334, + "business_administration": 0.34210526315789475, + "marxism": 0.5833333333333334, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.5882352941176471, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.2916666666666667, + "high_school_geography": 0.4583333333333333, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.29411764705882354, + "modern_chinese_history": 0.32142857142857145, + "ideological_and_moral_cultivation": 0.5, + "logic": 0.2962962962962963, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.35714285714285715, + "art_studies": 0.39473684210526316, + "professional_tour_guide": 0.3235294117647059, + "legal_professional": 0.35714285714285715, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.5185185185185185, + "civil_servant": 0.3269230769230769, + "sports_science": 0.5833333333333334, + "plant_protection": 0.5555555555555556, + "basic_medicine": 0.3333333333333333, + "clinical_medicine": 0.48148148148148145, + "urban_and_rural_planner": 0.5294117647058824, + "accountant": 0.37037037037037035, + "fire_engineer": 0.4166666666666667, + "environmental_impact_assessment_engineer": 0.4166666666666667, + "tax_accountant": 0.4074074074074074, + "physician": 0.5 + } + }, + "prompt_3": { + "accuracy": 0.4122042341220423, + "category_acc": { + "computer_network": 0.5, + "operating_system": 0.4583333333333333, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.5, + "college_chemistry": 0.41379310344827586, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.19047619047619047, + "metrology_engineer": 0.41379310344827586, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.375, + "high_school_biology": 0.3333333333333333, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.6153846153846154, + "middle_school_physics": 0.5, + "middle_school_chemistry": 0.28, + "veterinary_medicine": 0.5357142857142857, + "college_economics": 0.45, + "business_administration": 0.34210526315789475, + "marxism": 0.3333333333333333, + "mao_zedong_thought": 0.5517241379310345, + "education_science": 0.5294117647058824, + "teacher_qualification": 0.5306122448979592, + "high_school_politics": 0.25, + "high_school_geography": 0.5833333333333334, + "middle_school_politics": 0.34615384615384615, + "middle_school_geography": 0.4117647058823529, + "modern_chinese_history": 0.39285714285714285, + "ideological_and_moral_cultivation": 0.5833333333333334, + "logic": 0.25925925925925924, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.5, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.4117647058823529, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.20833333333333334, + "high_school_history": 0.52, + "middle_school_history": 0.5555555555555556, + "civil_servant": 0.3269230769230769, + "sports_science": 0.5, + "plant_protection": 0.5925925925925926, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.3333333333333333, + "urban_and_rural_planner": 0.5490196078431373, + "accountant": 0.3333333333333333, + "fire_engineer": 0.4444444444444444, + "environmental_impact_assessment_engineer": 0.3333333333333333, + "tax_accountant": 0.3333333333333333, + "physician": 0.42592592592592593 + } + }, + "prompt_4": { + "accuracy": 0.4202988792029888, + "category_acc": { + "computer_network": 0.4166666666666667, + "operating_system": 0.5416666666666666, + "computer_architecture": 0.46153846153846156, + "college_programming": 0.5238095238095238, + "college_physics": 0.5, + "college_chemistry": 0.3793103448275862, + "advanced_mathematics": 0.4583333333333333, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.14285714285714285, + "electrical_engineer": 0.23809523809523808, + "metrology_engineer": 0.4482758620689655, + "high_school_mathematics": 0.17391304347826086, + "high_school_physics": 0.3333333333333333, + "high_school_chemistry": 0.4166666666666667, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.375, + "middle_school_biology": 0.5, + "middle_school_physics": 0.5416666666666666, + "middle_school_chemistry": 0.52, + "veterinary_medicine": 0.6071428571428571, + "college_economics": 0.4, + "business_administration": 0.34210526315789475, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.4827586206896552, + "education_science": 0.5, + "teacher_qualification": 0.5102040816326531, + "high_school_politics": 0.20833333333333334, + "high_school_geography": 0.5416666666666666, + "middle_school_politics": 0.5769230769230769, + "middle_school_geography": 0.47058823529411764, + "modern_chinese_history": 0.42857142857142855, + "ideological_and_moral_cultivation": 0.5416666666666666, + "logic": 0.2222222222222222, + "law": 0.3448275862068966, + "chinese_language_and_literature": 0.5, + "art_studies": 0.47368421052631576, + "professional_tour_guide": 0.38235294117647056, + "legal_professional": 0.39285714285714285, + "high_school_chinese": 0.2916666666666667, + "high_school_history": 0.56, + "middle_school_history": 0.48148148148148145, + "civil_servant": 0.40384615384615385, + "sports_science": 0.4583333333333333, + "plant_protection": 0.5185185185185185, + "basic_medicine": 0.4583333333333333, + "clinical_medicine": 0.4074074074074074, + "urban_and_rural_planner": 0.45098039215686275, + "accountant": 0.37037037037037035, + "fire_engineer": 0.5, + "environmental_impact_assessment_engineer": 0.3611111111111111, + "tax_accountant": 0.3148148148148148, + "physician": 0.46296296296296297 + } + }, + "prompt_5": { + "accuracy": 0.3150684931506849, + "category_acc": { + "computer_network": 0.3333333333333333, + "operating_system": 0.25, + "computer_architecture": 0.5384615384615384, + "college_programming": 0.47619047619047616, + "college_physics": 0.25, + "college_chemistry": 0.3448275862068966, + "advanced_mathematics": 0.4166666666666667, + "probability_and_statistics": 0.17391304347826086, + "discrete_mathematics": 0.19047619047619047, + "electrical_engineer": 0.2619047619047619, + "metrology_engineer": 0.27586206896551724, + "high_school_mathematics": 0.21739130434782608, + "high_school_physics": 0.20833333333333334, + "high_school_chemistry": 0.375, + "high_school_biology": 0.2916666666666667, + "middle_school_mathematics": 0.3333333333333333, + "middle_school_biology": 0.38461538461538464, + "middle_school_physics": 0.20833333333333334, + "middle_school_chemistry": 0.32, + "veterinary_medicine": 0.32142857142857145, + "college_economics": 0.35, + "business_administration": 0.15789473684210525, + "marxism": 0.4583333333333333, + "mao_zedong_thought": 0.5172413793103449, + "education_science": 0.2647058823529412, + "teacher_qualification": 0.3469387755102041, + "high_school_politics": 0.16666666666666666, + "high_school_geography": 0.375, + "middle_school_politics": 0.3076923076923077, + "middle_school_geography": 0.17647058823529413, + "modern_chinese_history": 0.17857142857142858, + "ideological_and_moral_cultivation": 0.375, + "logic": 0.14814814814814814, + "law": 0.3103448275862069, + "chinese_language_and_literature": 0.2857142857142857, + "art_studies": 0.4473684210526316, + "professional_tour_guide": 0.35294117647058826, + "legal_professional": 0.17857142857142858, + "high_school_chinese": 0.25, + "high_school_history": 0.44, + "middle_school_history": 0.37037037037037035, + "civil_servant": 0.3076923076923077, + "sports_science": 0.3333333333333333, + "plant_protection": 0.48148148148148145, + "basic_medicine": 0.375, + "clinical_medicine": 0.14814814814814814, + "urban_and_rural_planner": 0.35294117647058826, + "accountant": 0.25925925925925924, + "fire_engineer": 0.3333333333333333, + "environmental_impact_assessment_engineer": 0.3055555555555556, + "tax_accountant": 0.2777777777777778, + "physician": 0.37037037037037035 + } + } + }, + "cmmlu": { + "prompt_1": { + "accuracy": 0.48028673835125446 + }, + "prompt_2": { + "accuracy": 0.5089605734767025 + }, + "prompt_3": { + "accuracy": 0.4767025089605735 + }, + "prompt_4": { + "accuracy": 0.4731182795698925 + }, + "prompt_5": { + "accuracy": 0.3906810035842294 + } + }, + "cmmlu_full": { + "prompt_1": { + "accuracy": 0.3975133828354343, + "category_acc": { + "agronomy": 0.3609467455621302, + "anatomy": 0.30405405405405406, + "ancient_chinese": 0.22560975609756098, + "arts": 0.46875, + "astronomy": 0.30303030303030304, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.3625, + "chinese_driving_rule": 0.5648854961832062, + "chinese_food_culture": 0.3088235294117647, + "chinese_foreign_policy": 0.45794392523364486, + "chinese_history": 0.4117647058823529, + "chinese_literature": 0.3333333333333333, + "chinese_teacher_qualification": 0.5027932960893855, + "clinical_knowledge": 0.38396624472573837, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.45794392523364486, + "college_engineering_hydrology": 0.49056603773584906, + "college_law": 0.2962962962962963, + "college_mathematics": 0.29523809523809524, + "college_medical_statistics": 0.41509433962264153, + "college_medicine": 0.42857142857142855, + "computer_science": 0.4264705882352941, + "computer_security": 0.5146198830409356, + "conceptual_physics": 0.3333333333333333, + "construction_project_management": 0.3597122302158273, + "economics": 0.42138364779874216, + "education": 0.4171779141104294, + "electrical_engineering": 0.46511627906976744, + "elementary_chinese": 0.2857142857142857, + "elementary_commonsense": 0.3838383838383838, + "elementary_information_and_technology": 0.5672268907563025, + "elementary_mathematics": 0.33043478260869563, + "ethnology": 0.34074074074074073, + "food_science": 0.44755244755244755, + "genetics": 0.42045454545454547, + "global_facts": 0.37583892617449666, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.22727272727272727, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.35454545454545455, + "high_school_politics": 0.3706293706293706, + "human_sexuality": 0.4365079365079365, + "international_law": 0.3675675675675676, + "journalism": 0.3953488372093023, + "jurisprudence": 0.36009732360097324, + "legal_and_moral_basis": 0.5654205607476636, + "logical": 0.5203252032520326, + "machine_learning": 0.4344262295081967, + "management": 0.4, + "marketing": 0.40555555555555556, + "marxist_theory": 0.41798941798941797, + "modern_chinese": 0.3706896551724138, + "nutrition": 0.43448275862068964, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.4342857142857143, + "professional_law": 0.33649289099526064, + "professional_medicine": 0.34308510638297873, + "professional_psychology": 0.41379310344827586, + "public_relations": 0.5172413793103449, + "security_study": 0.4888888888888889, + "sociology": 0.47345132743362833, + "sports_science": 0.3878787878787879, + "traditional_chinese_medicine": 0.35135135135135137, + "virology": 0.47337278106508873, + "world_history": 0.40993788819875776, + "world_religions": 0.46875 + } + }, + "prompt_2": { + "accuracy": 0.4317043688482127, + "category_acc": { + "agronomy": 0.4556213017751479, + "anatomy": 0.2905405405405405, + "ancient_chinese": 0.1951219512195122, + "arts": 0.5, + "astronomy": 0.4, + "business_ethics": 0.4449760765550239, + "chinese_civil_service_exam": 0.35625, + "chinese_driving_rule": 0.5114503816793893, + "chinese_food_culture": 0.36764705882352944, + "chinese_foreign_policy": 0.5046728971962616, + "chinese_history": 0.4458204334365325, + "chinese_literature": 0.3382352941176471, + "chinese_teacher_qualification": 0.5586592178770949, + "clinical_knowledge": 0.43037974683544306, + "college_actuarial_science": 0.25471698113207547, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.4339622641509434, + "college_law": 0.37962962962962965, + "college_mathematics": 0.26666666666666666, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.4358974358974359, + "computer_science": 0.45098039215686275, + "computer_security": 0.6198830409356725, + "conceptual_physics": 0.38095238095238093, + "construction_project_management": 0.38848920863309355, + "economics": 0.5220125786163522, + "education": 0.4601226993865031, + "electrical_engineering": 0.48255813953488375, + "elementary_chinese": 0.3055555555555556, + "elementary_commonsense": 0.3484848484848485, + "elementary_information_and_technology": 0.6722689075630253, + "elementary_mathematics": 0.34347826086956523, + "ethnology": 0.362962962962963, + "food_science": 0.4965034965034965, + "genetics": 0.4715909090909091, + "global_facts": 0.4429530201342282, + "high_school_biology": 0.3668639053254438, + "high_school_chemistry": 0.2727272727272727, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.34146341463414637, + "high_school_physics": 0.32727272727272727, + "high_school_politics": 0.44755244755244755, + "human_sexuality": 0.42857142857142855, + "international_law": 0.3783783783783784, + "journalism": 0.45930232558139533, + "jurisprudence": 0.38686131386861317, + "legal_and_moral_basis": 0.677570093457944, + "logical": 0.44715447154471544, + "machine_learning": 0.4344262295081967, + "management": 0.49523809523809526, + "marketing": 0.48333333333333334, + "marxist_theory": 0.4656084656084656, + "modern_chinese": 0.35344827586206895, + "nutrition": 0.5241379310344828, + "philosophy": 0.49523809523809526, + "professional_accounting": 0.5085714285714286, + "professional_law": 0.3222748815165877, + "professional_medicine": 0.3829787234042553, + "professional_psychology": 0.4525862068965517, + "public_relations": 0.5287356321839081, + "security_study": 0.5259259259259259, + "sociology": 0.4557522123893805, + "sports_science": 0.4, + "traditional_chinese_medicine": 0.34594594594594597, + "virology": 0.5207100591715976, + "world_history": 0.45962732919254656, + "world_religions": 0.46875 + } + }, + "prompt_3": { + "accuracy": 0.4194439647729235, + "category_acc": { + "agronomy": 0.4556213017751479, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.21341463414634146, + "arts": 0.54375, + "astronomy": 0.3575757575757576, + "business_ethics": 0.4258373205741627, + "chinese_civil_service_exam": 0.33125, + "chinese_driving_rule": 0.549618320610687, + "chinese_food_culture": 0.375, + "chinese_foreign_policy": 0.5607476635514018, + "chinese_history": 0.43343653250773995, + "chinese_literature": 0.31862745098039214, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.3755274261603376, + "college_actuarial_science": 0.22641509433962265, + "college_education": 0.514018691588785, + "college_engineering_hydrology": 0.41509433962264153, + "college_law": 0.3611111111111111, + "college_mathematics": 0.3047619047619048, + "college_medical_statistics": 0.4339622641509434, + "college_medicine": 0.41025641025641024, + "computer_science": 0.45098039215686275, + "computer_security": 0.5321637426900585, + "conceptual_physics": 0.36054421768707484, + "construction_project_management": 0.3597122302158273, + "economics": 0.4968553459119497, + "education": 0.49693251533742333, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.2976190476190476, + "elementary_commonsense": 0.4292929292929293, + "elementary_information_and_technology": 0.5840336134453782, + "elementary_mathematics": 0.36086956521739133, + "ethnology": 0.3925925925925926, + "food_science": 0.4755244755244755, + "genetics": 0.4715909090909091, + "global_facts": 0.47651006711409394, + "high_school_biology": 0.27218934911242604, + "high_school_chemistry": 0.2196969696969697, + "high_school_geography": 0.4067796610169492, + "high_school_mathematics": 0.2926829268292683, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.32867132867132864, + "human_sexuality": 0.40476190476190477, + "international_law": 0.3675675675675676, + "journalism": 0.46511627906976744, + "jurisprudence": 0.3722627737226277, + "legal_and_moral_basis": 0.6728971962616822, + "logical": 0.43902439024390244, + "machine_learning": 0.4098360655737705, + "management": 0.49523809523809526, + "marketing": 0.5111111111111111, + "marxist_theory": 0.4708994708994709, + "modern_chinese": 0.3017241379310345, + "nutrition": 0.42758620689655175, + "philosophy": 0.5333333333333333, + "professional_accounting": 0.5028571428571429, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.35106382978723405, + "professional_psychology": 0.4482758620689655, + "public_relations": 0.4885057471264368, + "security_study": 0.5185185185185185, + "sociology": 0.47345132743362833, + "sports_science": 0.4121212121212121, + "traditional_chinese_medicine": 0.3783783783783784, + "virology": 0.5088757396449705, + "world_history": 0.43478260869565216, + "world_religions": 0.48125 + } + }, + "prompt_4": { + "accuracy": 0.4064064928337075, + "category_acc": { + "agronomy": 0.4319526627218935, + "anatomy": 0.2702702702702703, + "ancient_chinese": 0.21341463414634146, + "arts": 0.55625, + "astronomy": 0.3393939393939394, + "business_ethics": 0.3923444976076555, + "chinese_civil_service_exam": 0.34375, + "chinese_driving_rule": 0.5419847328244275, + "chinese_food_culture": 0.29411764705882354, + "chinese_foreign_policy": 0.4953271028037383, + "chinese_history": 0.3684210526315789, + "chinese_literature": 0.29901960784313725, + "chinese_teacher_qualification": 0.4972067039106145, + "clinical_knowledge": 0.35864978902953587, + "college_actuarial_science": 0.24528301886792453, + "college_education": 0.5046728971962616, + "college_engineering_hydrology": 0.4716981132075472, + "college_law": 0.3425925925925926, + "college_mathematics": 0.3142857142857143, + "college_medical_statistics": 0.4056603773584906, + "college_medicine": 0.40293040293040294, + "computer_science": 0.44607843137254904, + "computer_security": 0.4853801169590643, + "conceptual_physics": 0.41496598639455784, + "construction_project_management": 0.37410071942446044, + "economics": 0.5220125786163522, + "education": 0.39263803680981596, + "electrical_engineering": 0.47093023255813954, + "elementary_chinese": 0.3134920634920635, + "elementary_commonsense": 0.398989898989899, + "elementary_information_and_technology": 0.6260504201680672, + "elementary_mathematics": 0.33043478260869563, + "ethnology": 0.37777777777777777, + "food_science": 0.5034965034965035, + "genetics": 0.44886363636363635, + "global_facts": 0.4228187919463087, + "high_school_biology": 0.3076923076923077, + "high_school_chemistry": 0.25, + "high_school_geography": 0.3474576271186441, + "high_school_mathematics": 0.31097560975609756, + "high_school_physics": 0.2727272727272727, + "high_school_politics": 0.3706293706293706, + "human_sexuality": 0.47619047619047616, + "international_law": 0.31351351351351353, + "journalism": 0.4418604651162791, + "jurisprudence": 0.340632603406326, + "legal_and_moral_basis": 0.5654205607476636, + "logical": 0.4878048780487805, + "machine_learning": 0.45901639344262296, + "management": 0.45714285714285713, + "marketing": 0.43333333333333335, + "marxist_theory": 0.4656084656084656, + "modern_chinese": 0.3706896551724138, + "nutrition": 0.496551724137931, + "philosophy": 0.41904761904761906, + "professional_accounting": 0.4514285714285714, + "professional_law": 0.35071090047393366, + "professional_medicine": 0.3377659574468085, + "professional_psychology": 0.44396551724137934, + "public_relations": 0.47126436781609193, + "security_study": 0.562962962962963, + "sociology": 0.3805309734513274, + "sports_science": 0.4484848484848485, + "traditional_chinese_medicine": 0.32432432432432434, + "virology": 0.5029585798816568, + "world_history": 0.43478260869565216, + "world_religions": 0.46875 + } + }, + "prompt_5": { + "accuracy": 0.32844068381972025, + "category_acc": { + "agronomy": 0.28402366863905326, + "anatomy": 0.2635135135135135, + "ancient_chinese": 0.2926829268292683, + "arts": 0.4375, + "astronomy": 0.2727272727272727, + "business_ethics": 0.3253588516746411, + "chinese_civil_service_exam": 0.325, + "chinese_driving_rule": 0.3893129770992366, + "chinese_food_culture": 0.3602941176470588, + "chinese_foreign_policy": 0.32710280373831774, + "chinese_history": 0.34055727554179566, + "chinese_literature": 0.3137254901960784, + "chinese_teacher_qualification": 0.3575418994413408, + "clinical_knowledge": 0.31223628691983124, + "college_actuarial_science": 0.2169811320754717, + "college_education": 0.3925233644859813, + "college_engineering_hydrology": 0.3018867924528302, + "college_law": 0.28703703703703703, + "college_mathematics": 0.23809523809523808, + "college_medical_statistics": 0.37735849056603776, + "college_medicine": 0.3772893772893773, + "computer_science": 0.38235294117647056, + "computer_security": 0.3333333333333333, + "conceptual_physics": 0.3401360544217687, + "construction_project_management": 0.28776978417266186, + "economics": 0.31446540880503143, + "education": 0.3496932515337423, + "electrical_engineering": 0.36627906976744184, + "elementary_chinese": 0.3134920634920635, + "elementary_commonsense": 0.3434343434343434, + "elementary_information_and_technology": 0.3319327731092437, + "elementary_mathematics": 0.3173913043478261, + "ethnology": 0.362962962962963, + "food_science": 0.34965034965034963, + "genetics": 0.30113636363636365, + "global_facts": 0.348993288590604, + "high_school_biology": 0.2485207100591716, + "high_school_chemistry": 0.23484848484848486, + "high_school_geography": 0.3389830508474576, + "high_school_mathematics": 0.25609756097560976, + "high_school_physics": 0.2636363636363636, + "high_school_politics": 0.2867132867132867, + "human_sexuality": 0.35714285714285715, + "international_law": 0.2864864864864865, + "journalism": 0.4127906976744186, + "jurisprudence": 0.30656934306569344, + "legal_and_moral_basis": 0.4485981308411215, + "logical": 0.34146341463414637, + "machine_learning": 0.319672131147541, + "management": 0.3523809523809524, + "marketing": 0.34444444444444444, + "marxist_theory": 0.3544973544973545, + "modern_chinese": 0.27586206896551724, + "nutrition": 0.3103448275862069, + "philosophy": 0.3904761904761905, + "professional_accounting": 0.36, + "professional_law": 0.2796208530805687, + "professional_medicine": 0.2553191489361702, + "professional_psychology": 0.35344827586206895, + "public_relations": 0.3850574712643678, + "security_study": 0.3333333333333333, + "sociology": 0.3672566371681416, + "sports_science": 0.2787878787878788, + "traditional_chinese_medicine": 0.3081081081081081, + "virology": 0.378698224852071, + "world_history": 0.2981366459627329, + "world_religions": 0.40625 + } + } + }, + "zbench": { + "prompt_1": { + "accuracy": 0.2727272727272727 + }, + "prompt_2": { + "accuracy": 0.21212121212121213 + }, + "prompt_3": { + "accuracy": 0.24242424242424243 + }, + "prompt_4": { + "accuracy": 0.12121212121212122 + }, + "prompt_5": { + "accuracy": 0.2727272727272727 + } + }, + "ind_emotion": { + "prompt_1": { + "accuracy": 0.5522727272727272 + }, + "prompt_2": { + "accuracy": 0.22045454545454546 + }, + "prompt_3": { + "accuracy": 0.19318181818181818 + }, + "prompt_4": { + "accuracy": 0.6022727272727273 + }, + "prompt_5": { + "accuracy": 0.5363636363636364 + } + }, + "ocnli": { + "prompt_1": { + "accuracy": 0.3423728813559322 + }, + "prompt_2": { + "accuracy": 0.34576271186440677 + }, + "prompt_3": { + "accuracy": 0.32610169491525426 + }, + "prompt_4": { + "accuracy": 0.3423728813559322 + }, + "prompt_5": { + "accuracy": 0.32508474576271185 + } + }, + "c3": { + "prompt_1": { + "accuracy": 0.6619296933433059 + }, + "prompt_2": { + "accuracy": 0.6264023934181002 + }, + "prompt_3": { + "accuracy": 0.6024682124158564 + }, + "prompt_4": { + "accuracy": 0.6510845175766642 + }, + "prompt_5": { + "accuracy": 0.5314136125654451 + } + }, + "dream": { + "prompt_1": { + "accuracy": 0.8677119059284665 + }, + "prompt_2": { + "accuracy": 0.8162665360117589 + }, + "prompt_3": { + "accuracy": 0.8451739343459088 + }, + "prompt_4": { + "accuracy": 0.8784909358157765 + }, + "prompt_5": { + "accuracy": 0.8784909358157765 + } + }, + "samsum": { + "prompt_1": { + "rouge1": 0.3148466900867728, + "rouge2": 0.13439425963384796, + "rougeL": 0.2493723840004376, + "avg_rouge": 0.23287111124035278 + }, + "prompt_2": { + "rouge1": 0.3760284317692511, + "rouge2": 0.1703326819107988, + "rougeL": 0.30239763225528216, + "avg_rouge": 0.282919581978444 + }, + "prompt_3": { + "rouge1": 0.321710625396653, + "rouge2": 0.13296655744055622, + "rougeL": 0.2552227836573773, + "avg_rouge": 0.2366333221648622 + }, + "prompt_4": { + "rouge1": 0.3492817033666253, + "rouge2": 0.1568905106341149, + "rougeL": 0.27974950173421925, + "avg_rouge": 0.2619739052449865 + }, + "prompt_5": { + "rouge1": 0.3596885364704884, + "rouge2": 0.15550623284418724, + "rougeL": 0.2853950264312206, + "avg_rouge": 0.2668632652486321 + } + }, + "dialogsum": { + "prompt_1": { + "rouge1": 0.2180485502100887, + "rouge2": 0.06097714440972195, + "rougeL": 0.16243716383480442, + "avg_rouge": 0.14715428615153836 + }, + "prompt_2": { + "rouge1": 0.21923428230963032, + "rouge2": 0.06078194919283872, + "rougeL": 0.16199188272866893, + "avg_rouge": 0.14733603807704598 + }, + "prompt_3": { + "rouge1": 0.21698867466943603, + "rouge2": 0.062382836391260764, + "rougeL": 0.16111354597032204, + "avg_rouge": 0.14682835234367295 + }, + "prompt_4": { + "rouge1": 0.22413012734377513, + "rouge2": 0.06340741342797845, + "rougeL": 0.16625918343029397, + "avg_rouge": 0.15126557473401583 + }, + "prompt_5": { + "rouge1": 0.19968281716481368, + "rouge2": 0.06461254234379582, + "rougeL": 0.15056622645650905, + "avg_rouge": 0.1382871953217062 + } + }, + "sst2": { + "prompt_1": { + "accuracy": 0.8738532110091743 + }, + "prompt_2": { + "accuracy": 0.7809633027522935 + }, + "prompt_3": { + "accuracy": 0.8772935779816514 + }, + "prompt_4": { + "accuracy": 0.8692660550458715 + }, + "prompt_5": { + "accuracy": 0.5527522935779816 + } + }, + "cola": { + "prompt_1": { + "accuracy": 0.3317353787152445 + }, + "prompt_2": { + "accuracy": 0.3211888782358581 + }, + "prompt_3": { + "accuracy": 0.36145733461169705 + }, + "prompt_4": { + "accuracy": 0.3231064237775647 + }, + "prompt_5": { + "accuracy": 0.5589645254074784 + } + }, + "qqp": { + "prompt_1": { + "accuracy": 0.602 + }, + "prompt_2": { + "accuracy": 0.533 + }, + "prompt_3": { + "accuracy": 0.553 + }, + "prompt_4": { + "accuracy": 0.596 + }, + "prompt_5": { + "accuracy": 0.569 + } + }, + "mnli": { + "prompt_1": { + "accuracy": 0.406 + }, + "prompt_2": { + "accuracy": 0.3745 + }, + "prompt_3": { + "accuracy": 0.404 + }, + "prompt_4": { + "accuracy": 0.463 + }, + "prompt_5": { + "accuracy": 0.447 + } + }, + "qnli": { + "prompt_1": { + "accuracy": 0.513 + }, + "prompt_2": { + "accuracy": 0.5325 + }, + "prompt_3": { + "accuracy": 0.524 + }, + "prompt_4": { + "accuracy": 0.5205 + }, + "prompt_5": { + "accuracy": 0.531 + } + }, + "wnli": { + "prompt_1": { + "accuracy": 0.6056338028169014 + }, + "prompt_2": { + "accuracy": 0.5774647887323944 + }, + "prompt_3": { + "accuracy": 0.5915492957746479 + }, + "prompt_4": { + "accuracy": 0.43661971830985913 + }, + "prompt_5": { + "accuracy": 0.5070422535211268 + } + }, + "rte": { + "prompt_1": { + "accuracy": 0.6462093862815884 + }, + "prompt_2": { + "accuracy": 0.5703971119133574 + }, + "prompt_3": { + "accuracy": 0.6173285198555957 + }, + "prompt_4": { + "accuracy": 0.5595667870036101 + }, + "prompt_5": { + "accuracy": 0.628158844765343 + } + }, + "mrpc": { + "prompt_1": { + "accuracy": 0.6838235294117647 + }, + "prompt_2": { + "accuracy": 0.6053921568627451 + }, + "prompt_3": { + "accuracy": 0.6642156862745098 + }, + "prompt_4": { + "accuracy": 0.6715686274509803 + }, + "prompt_5": { + "accuracy": 0.6740196078431373 + } + }, + "indommlu": { + "prompt_1": { + "accuracy": 0.38914480272381335, + "category_acc": { + "History": 0.39959839357429716, + "Geography": 0.3551020408163265, + "Lampungic": 0.32653061224489793, + "Social science": 0.4574290484140234, + "Balinese": 0.3099787685774947, + "Makassarese": 0.3118279569892473, + "Banjarese": 0.3402777777777778, + "Chemistry": 0.2846715328467153, + "Biology": 0.421301775147929, + "Science": 0.43343653250773995, + "Christian religion": 0.44776119402985076, + "Art": 0.3910149750415973, + "Islam religion": 0.41963015647226176, + "Hindu religion": 0.42, + "Madurese": 0.3050847457627119, + "Sport": 0.4594594594594595, + "Indonesian language": 0.4392901618929016, + "Physics": 0.3414141414141414, + "Minangkabau culture": 0.32663316582914576, + "Dayak language": 0.25688073394495414, + "Sociology": 0.4012096774193548, + "Economy": 0.3709016393442623, + "Sundanese": 0.34485738980121, + "Javanese": 0.3094758064516129, + "Civic education": 0.4434907010014306 + } + }, + "prompt_2": { + "accuracy": 0.45116496428333, + "category_acc": { + "History": 0.42570281124497994, + "Geography": 0.4122448979591837, + "Lampungic": 0.3469387755102041, + "Social science": 0.6243739565943238, + "Balinese": 0.31422505307855625, + "Makassarese": 0.3225806451612903, + "Banjarese": 0.3680555555555556, + "Chemistry": 0.2934306569343066, + "Biology": 0.46272189349112425, + "Science": 0.5583075335397317, + "Christian religion": 0.5323383084577115, + "Art": 0.5124792013311148, + "Islam religion": 0.5092460881934566, + "Hindu religion": 0.5, + "Madurese": 0.3254237288135593, + "Sport": 0.4594594594594595, + "Indonesian language": 0.5065379825653799, + "Physics": 0.39595959595959596, + "Minangkabau culture": 0.4020100502512563, + "Dayak language": 0.28440366972477066, + "Sociology": 0.4435483870967742, + "Economy": 0.430327868852459, + "Sundanese": 0.3863439930855661, + "Javanese": 0.34173387096774194, + "Civic education": 0.51931330472103 + } + }, + "prompt_3": { + "accuracy": 0.42285866880299083, + "category_acc": { + "History": 0.39558232931726905, + "Geography": 0.3979591836734694, + "Lampungic": 0.30612244897959184, + "Social science": 0.5709515859766278, + "Balinese": 0.31422505307855625, + "Makassarese": 0.3333333333333333, + "Banjarese": 0.3333333333333333, + "Chemistry": 0.2832116788321168, + "Biology": 0.44260355029585796, + "Science": 0.5159958720330238, + "Christian religion": 0.44776119402985076, + "Art": 0.4442595673876872, + "Islam religion": 0.45803698435277385, + "Hindu religion": 0.4266666666666667, + "Madurese": 0.29491525423728815, + "Sport": 0.44594594594594594, + "Indonesian language": 0.4853673723536737, + "Physics": 0.40404040404040403, + "Minangkabau culture": 0.36180904522613067, + "Dayak language": 0.28440366972477066, + "Sociology": 0.4153225806451613, + "Economy": 0.38114754098360654, + "Sundanese": 0.3560933448573898, + "Javanese": 0.3336693548387097, + "Civic education": 0.48068669527896996 + } + }, + "prompt_4": { + "accuracy": 0.38240202950797786, + "category_acc": { + "History": 0.3895582329317269, + "Geography": 0.35918367346938773, + "Lampungic": 0.2925170068027211, + "Social science": 0.4590984974958264, + "Balinese": 0.3333333333333333, + "Makassarese": 0.3279569892473118, + "Banjarese": 0.3194444444444444, + "Chemistry": 0.28905109489051095, + "Biology": 0.41775147928994083, + "Science": 0.3973168214654283, + "Christian religion": 0.417910447761194, + "Art": 0.3910149750415973, + "Islam religion": 0.40540540540540543, + "Hindu religion": 0.38, + "Madurese": 0.3288135593220339, + "Sport": 0.34459459459459457, + "Indonesian language": 0.42745952677459526, + "Physics": 0.3090909090909091, + "Minangkabau culture": 0.3417085427135678, + "Dayak language": 0.27522935779816515, + "Sociology": 0.39314516129032256, + "Economy": 0.3463114754098361, + "Sundanese": 0.3500432152117545, + "Javanese": 0.3316532258064516, + "Civic education": 0.44206008583690987 + } + }, + "prompt_5": { + "accuracy": 0.4477601976099873, + "category_acc": { + "History": 0.44377510040160645, + "Geography": 0.42448979591836733, + "Lampungic": 0.3129251700680272, + "Social science": 0.5893155258764607, + "Balinese": 0.28450106157112526, + "Makassarese": 0.34946236559139787, + "Banjarese": 0.3472222222222222, + "Chemistry": 0.3343065693430657, + "Biology": 0.47218934911242605, + "Science": 0.5758513931888545, + "Christian religion": 0.4925373134328358, + "Art": 0.49584026622296173, + "Islam religion": 0.4822190611664296, + "Hindu religion": 0.4533333333333333, + "Madurese": 0.29152542372881357, + "Sport": 0.4797297297297297, + "Indonesian language": 0.5003113325031133, + "Physics": 0.4121212121212121, + "Minangkabau culture": 0.3768844221105528, + "Dayak language": 0.24770642201834864, + "Sociology": 0.41935483870967744, + "Economy": 0.4323770491803279, + "Sundanese": 0.39066551426101986, + "Javanese": 0.35080645161290325, + "Civic education": 0.5021459227467812 + } + } + } + }, + "five_shot": { + "cross_mmlu": { + "prompt_1": -1 + }, + "cross_logiqa": { + "prompt_1": -1 + }, + "sg_eval": { + "prompt_1": -1 + }, + "cn_eval": { + "prompt_1": -1 + }, + "us_eval": { + "prompt_1": -1 + }, + "ph_eval": { + "prompt_1": -1 + }, + "sing2eng": { + "prompt_1": -1 + }, + "flores_ind2eng": { + "prompt_1": -1 + }, + "flores_vie2eng": { + "prompt_1": -1 + }, + "flores_zho2eng": { + "prompt_1": -1 + }, + "flores_zsm2eng": { + "prompt_1": -1 + }, + "mmlu": { + "prompt_1": -1 + }, + "mmlu_full": { + "prompt_1": -1 + }, + "c_eval": { + "prompt_1": -1 + }, + "c_eval_full": { + "prompt_1": -1 + }, + "cmmlu": { + "prompt_1": -1 + }, + "cmmlu_full": { + "prompt_1": -1 + }, + "zbench": { + "prompt_1": -1 + }, + "ind_emotion": { + "prompt_1": -1 + }, + "ocnli": { + "prompt_1": -1 + }, + "c3": { + "prompt_1": -1 + }, + "dream": { + "prompt_1": -1 + }, + "samsum": { + "prompt_1": -1 + }, + "dialogsum": { + "prompt_1": -1 + }, + "sst2": { + "prompt_1": -1 + }, + "cola": { + "prompt_1": -1 + }, + "qqp": { + "prompt_1": -1 + }, + "mnli": { + "prompt_1": -1 + }, + "qnli": { + "prompt_1": -1 + }, + "wnli": { + "prompt_1": -1 + }, + "rte": { + "prompt_1": -1 + }, + "mrpc": { + "prompt_1": -1 + }, + "indommlu": { + "prompt_1": -1 + } + } + }, + "gemma-7b-it": { + "model_size": "7B", + "model_link": "https://huggingface.co/google/gemma-7b-it", + "zero_shot": { + "cross_mmlu": { + "prompt_1": { + "overall_acc": 0.43523809523809526, + "language_acc": { + "Vietnamese": 0.4066666666666667, + "English": 0.5666666666666667, + "Malay": 0.44, + "Chinese": 0.36, + "Filipino": 0.3933333333333333, + "Indonesian": 0.41333333333333333, + "Spanish": 0.4666666666666667 + }, + "consistency_score_2": 0.493015873015873, + "consistency_score_3": 0.3224761904761904, + "consistency_score_4": 0.2441904761904762, + "consistency_score_5": 0.2, + "consistency_score_6": 0.17047619047619045, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.54, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.49333333333333335, + "Vietnamese,Indonesian": 0.5466666666666666, + "Vietnamese,Spanish": 0.5266666666666666, + "English,Malay": 0.5466666666666666, + "English,Chinese": 0.44666666666666666, + "English,Filipino": 0.52, + "English,Indonesian": 0.52, + "English,Spanish": 0.6, + "Malay,Chinese": 0.43333333333333335, + "Malay,Filipino": 0.52, + "Malay,Indonesian": 0.5466666666666666, + "Malay,Spanish": 0.4866666666666667, + "Chinese,Filipino": 0.38, + "Chinese,Indonesian": 0.4266666666666667, + "Chinese,Spanish": 0.46, + "Filipino,Indonesian": 0.52, + "Filipino,Spanish": 0.4666666666666667, + "Indonesian,Spanish": 0.49333333333333335 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.37333333333333335, + "Vietnamese,English,Chinese": 0.28, + "Vietnamese,English,Filipino": 0.34, + "Vietnamese,English,Indonesian": 0.37333333333333335, + "Vietnamese,English,Spanish": 0.4, + "Vietnamese,Malay,Chinese": 0.25333333333333335, + "Vietnamese,Malay,Filipino": 0.3333333333333333, + "Vietnamese,Malay,Indonesian": 0.38666666666666666, + "Vietnamese,Malay,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.24, + "Vietnamese,Chinese,Indonesian": 0.2733333333333333, + "Vietnamese,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian": 0.35333333333333333, + "Vietnamese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish": 0.38, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.35333333333333333, + "English,Malay,Indonesian": 0.38, + "English,Malay,Spanish": 0.3933333333333333, + "English,Chinese,Filipino": 0.26666666666666666, + "English,Chinese,Indonesian": 0.28, + "English,Chinese,Spanish": 0.32, + "English,Filipino,Indonesian": 0.36666666666666664, + "English,Filipino,Spanish": 0.36, + "English,Indonesian,Spanish": 0.38666666666666666, + "Malay,Chinese,Filipino": 0.26666666666666666, + "Malay,Chinese,Indonesian": 0.28, + "Malay,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian": 0.35333333333333333, + "Malay,Filipino,Spanish": 0.31333333333333335, + "Malay,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Filipino,Indonesian": 0.2733333333333333, + "Chinese,Filipino,Spanish": 0.26, + "Chinese,Indonesian,Spanish": 0.2733333333333333, + "Filipino,Indonesian,Spanish": 0.32666666666666666 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino": 0.2733333333333333, + "Vietnamese,English,Malay,Indonesian": 0.32, + "Vietnamese,English,Malay,Spanish": 0.3, + "Vietnamese,English,Chinese,Filipino": 0.2, + "Vietnamese,English,Chinese,Indonesian": 0.21333333333333335, + "Vietnamese,English,Chinese,Spanish": 0.23333333333333334, + "Vietnamese,English,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,English,Filipino,Spanish": 0.2733333333333333, + "Vietnamese,English,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Malay,Chinese,Filipino": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.2, + "Vietnamese,Malay,Filipino,Indonesian": 0.28, + "Vietnamese,Malay,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Indonesian,Spanish": 0.30666666666666664, + "Vietnamese,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Filipino,Indonesian,Spanish": 0.28, + "English,Malay,Chinese,Filipino": 0.21333333333333335, + "English,Malay,Chinese,Indonesian": 0.21333333333333335, + "English,Malay,Chinese,Spanish": 0.21333333333333335, + "English,Malay,Filipino,Indonesian": 0.29333333333333333, + "English,Malay,Filipino,Spanish": 0.28, + "English,Malay,Indonesian,Spanish": 0.3, + "English,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Chinese,Filipino,Spanish": 0.22666666666666666, + "English,Chinese,Indonesian,Spanish": 0.23333333333333334, + "English,Filipino,Indonesian,Spanish": 0.2866666666666667, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.2, + "Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.17333333333333334, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Spanish": 0.17333333333333334, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.18, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.18, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.18, + "English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Spanish": 0.18, + "English,Malay,Chinese,Indonesian,Spanish": 0.18, + "English,Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.15333333333333332, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.16, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + } + }, + "AC3_2": 0.46232883846234274, + "AC3_3": 0.3704666139471331, + "AC3_4": 0.3128540723370417, + "AC3_5": 0.2740629684725988, + "AC3_6": 0.2449925126879149, + "AC3_7": 0.21939989085154882 + }, + "prompt_2": { + "overall_acc": 0.4485714285714285, + "language_acc": { + "Vietnamese": 0.41333333333333333, + "English": 0.5466666666666666, + "Malay": 0.4066666666666667, + "Chinese": 0.48, + "Filipino": 0.42, + "Indonesian": 0.42, + "Spanish": 0.4533333333333333 + }, + "consistency_score_2": 0.48730158730158746, + "consistency_score_3": 0.32476190476190475, + "consistency_score_4": 0.2554285714285714, + "consistency_score_5": 0.21714285714285717, + "consistency_score_6": 0.1923809523809524, + "consistency_score_7": 0.17333333333333334, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.52, + "Vietnamese,Malay": 0.48, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Filipino": 0.49333333333333335, + "Vietnamese,Indonesian": 0.5133333333333333, + "Vietnamese,Spanish": 0.5066666666666667, + "English,Malay": 0.4666666666666667, + "English,Chinese": 0.5, + "English,Filipino": 0.48, + "English,Indonesian": 0.4866666666666667, + "English,Spanish": 0.54, + "Malay,Chinese": 0.4666666666666667, + "Malay,Filipino": 0.4666666666666667, + "Malay,Indonesian": 0.5066666666666667, + "Malay,Spanish": 0.49333333333333335, + "Chinese,Filipino": 0.47333333333333333, + "Chinese,Indonesian": 0.4866666666666667, + "Chinese,Spanish": 0.5266666666666666, + "Filipino,Indonesian": 0.41333333333333333, + "Filipino,Spanish": 0.46, + "Indonesian,Spanish": 0.4533333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.3333333333333333, + "Vietnamese,English,Chinese": 0.36666666666666664, + "Vietnamese,English,Filipino": 0.32, + "Vietnamese,English,Indonesian": 0.36, + "Vietnamese,English,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Chinese": 0.32, + "Vietnamese,Malay,Filipino": 0.30666666666666664, + "Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Spanish": 0.3333333333333333, + "Vietnamese,Chinese,Filipino": 0.32666666666666666, + "Vietnamese,Chinese,Indonesian": 0.32666666666666666, + "Vietnamese,Chinese,Spanish": 0.3466666666666667, + "Vietnamese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Filipino,Spanish": 0.32666666666666666, + "Vietnamese,Indonesian,Spanish": 0.35333333333333333, + "English,Malay,Chinese": 0.32, + "English,Malay,Filipino": 0.31333333333333335, + "English,Malay,Indonesian": 0.32, + "English,Malay,Spanish": 0.34, + "English,Chinese,Filipino": 0.32, + "English,Chinese,Indonesian": 0.3333333333333333, + "English,Chinese,Spanish": 0.36, + "English,Filipino,Indonesian": 0.29333333333333333, + "English,Filipino,Spanish": 0.3333333333333333, + "English,Indonesian,Spanish": 0.32, + "Malay,Chinese,Filipino": 0.30666666666666664, + "Malay,Chinese,Indonesian": 0.31333333333333335, + "Malay,Chinese,Spanish": 0.31333333333333335, + "Malay,Filipino,Indonesian": 0.2866666666666667, + "Malay,Filipino,Spanish": 0.30666666666666664, + "Malay,Indonesian,Spanish": 0.31333333333333335, + "Chinese,Filipino,Indonesian": 0.29333333333333333, + "Chinese,Filipino,Spanish": 0.32666666666666666, + "Chinese,Indonesian,Spanish": 0.32666666666666666, + "Filipino,Indonesian,Spanish": 0.3 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.28, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,English,Malay,Spanish": 0.2733333333333333, + "Vietnamese,English,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,English,Chinese,Indonesian": 0.28, + "Vietnamese,English,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,English,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Filipino,Spanish": 0.26, + "Vietnamese,English,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Spanish": 0.24, + "Vietnamese,Malay,Indonesian,Spanish": 0.26, + "Vietnamese,Chinese,Filipino,Indonesian": 0.24, + "Vietnamese,Chinese,Filipino,Spanish": 0.26, + "Vietnamese,Chinese,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Chinese,Indonesian": 0.24, + "English,Malay,Chinese,Spanish": 0.25333333333333335, + "English,Malay,Filipino,Indonesian": 0.24, + "English,Malay,Filipino,Spanish": 0.26666666666666666, + "English,Malay,Indonesian,Spanish": 0.25333333333333335, + "English,Chinese,Filipino,Indonesian": 0.24666666666666667, + "English,Chinese,Filipino,Spanish": 0.26666666666666666, + "English,Chinese,Indonesian,Spanish": 0.26, + "English,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Malay,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Malay,Chinese,Filipino,Spanish": 0.25333333333333335, + "Malay,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Malay,Filipino,Indonesian,Spanish": 0.23333333333333334, + "Chinese,Filipino,Indonesian,Spanish": 0.25333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,English,Malay,Chinese,Spanish": 0.24, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.22, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.22, + "English,Malay,Chinese,Filipino,Indonesian": 0.2, + "English,Malay,Chinese,Filipino,Spanish": 0.22, + "English,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "English,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + } + }, + "AC3_2": 0.46713510365235356, + "AC3_3": 0.3767558057218656, + "AC3_4": 0.32550556581647205, + "AC3_5": 0.2926302881228115, + "AC3_6": 0.2692761621316243, + "AC3_7": 0.2500459417668379 + }, + "prompt_3": { + "overall_acc": 0.44, + "language_acc": { + "Vietnamese": 0.38, + "English": 0.5333333333333333, + "Malay": 0.38666666666666666, + "Chinese": 0.4266666666666667, + "Filipino": 0.41333333333333333, + "Indonesian": 0.46, + "Spanish": 0.48 + }, + "consistency_score_2": 0.4825396825396825, + "consistency_score_3": 0.3135238095238095, + "consistency_score_4": 0.24000000000000002, + "consistency_score_5": 0.1980952380952381, + "consistency_score_6": 0.1695238095238095, + "consistency_score_7": 0.14666666666666667, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.5066666666666667, + "Vietnamese,Malay": 0.4866666666666667, + "Vietnamese,Chinese": 0.37333333333333335, + "Vietnamese,Filipino": 0.44666666666666666, + "Vietnamese,Indonesian": 0.48, + "Vietnamese,Spanish": 0.5266666666666666, + "English,Malay": 0.47333333333333333, + "English,Chinese": 0.43333333333333335, + "English,Filipino": 0.5, + "English,Indonesian": 0.5533333333333333, + "English,Spanish": 0.5733333333333334, + "Malay,Chinese": 0.4533333333333333, + "Malay,Filipino": 0.49333333333333335, + "Malay,Indonesian": 0.54, + "Malay,Spanish": 0.48, + "Chinese,Filipino": 0.44666666666666666, + "Chinese,Indonesian": 0.4666666666666667, + "Chinese,Spanish": 0.46, + "Filipino,Indonesian": 0.4533333333333333, + "Filipino,Spanish": 0.47333333333333333, + "Indonesian,Spanish": 0.5133333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.34, + "Vietnamese,English,Chinese": 0.2866666666666667, + "Vietnamese,English,Filipino": 0.30666666666666664, + "Vietnamese,English,Indonesian": 0.34, + "Vietnamese,English,Spanish": 0.36666666666666664, + "Vietnamese,Malay,Chinese": 0.28, + "Vietnamese,Malay,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.26, + "Vietnamese,Chinese,Indonesian": 0.2733333333333333, + "Vietnamese,Chinese,Spanish": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian": 0.28, + "Vietnamese,Filipino,Spanish": 0.3, + "Vietnamese,Indonesian,Spanish": 0.34, + "English,Malay,Chinese": 0.29333333333333333, + "English,Malay,Filipino": 0.32, + "English,Malay,Indonesian": 0.35333333333333333, + "English,Malay,Spanish": 0.34, + "English,Chinese,Filipino": 0.2866666666666667, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.31333333333333335, + "English,Filipino,Indonesian": 0.32666666666666666, + "English,Filipino,Spanish": 0.35333333333333333, + "English,Indonesian,Spanish": 0.4, + "Malay,Chinese,Filipino": 0.28, + "Malay,Chinese,Indonesian": 0.30666666666666664, + "Malay,Chinese,Spanish": 0.2733333333333333, + "Malay,Filipino,Indonesian": 0.31333333333333335, + "Malay,Filipino,Spanish": 0.30666666666666664, + "Malay,Indonesian,Spanish": 0.34, + "Chinese,Filipino,Indonesian": 0.28, + "Chinese,Filipino,Spanish": 0.29333333333333333, + "Chinese,Indonesian,Spanish": 0.31333333333333335, + "Filipino,Indonesian,Spanish": 0.32 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.24, + "Vietnamese,English,Malay,Filipino": 0.25333333333333335, + "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Spanish": 0.2733333333333333, + "Vietnamese,English,Chinese,Filipino": 0.22, + "Vietnamese,English,Chinese,Indonesian": 0.24, + "Vietnamese,English,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,English,Filipino,Indonesian": 0.24, + "Vietnamese,English,Filipino,Spanish": 0.24, + "Vietnamese,English,Indonesian,Spanish": 0.29333333333333333, + "Vietnamese,Malay,Chinese,Filipino": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,Malay,Chinese,Spanish": 0.22, + "Vietnamese,Malay,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,Malay,Filipino,Spanish": 0.24, + "Vietnamese,Malay,Indonesian,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,Chinese,Indonesian,Spanish": 0.22, + "Vietnamese,Filipino,Indonesian,Spanish": 0.22666666666666666, + "English,Malay,Chinese,Filipino": 0.22, + "English,Malay,Chinese,Indonesian": 0.23333333333333334, + "English,Malay,Chinese,Spanish": 0.23333333333333334, + "English,Malay,Filipino,Indonesian": 0.26, + "English,Malay,Filipino,Spanish": 0.26, + "English,Malay,Indonesian,Spanish": 0.2866666666666667, + "English,Chinese,Filipino,Indonesian": 0.22666666666666666, + "English,Chinese,Filipino,Spanish": 0.24666666666666667, + "English,Chinese,Indonesian,Spanish": 0.26, + "English,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.20666666666666667, + "Malay,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Malay,Filipino,Indonesian,Spanish": 0.24666666666666667, + "Chinese,Filipino,Indonesian,Spanish": 0.24 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.2, + "Vietnamese,English,Malay,Chinese,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.2, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.18, + "English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "English,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "English,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "English,Malay,Filipino,Indonesian,Spanish": 0.22666666666666666, + "English,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.16, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.16666666666666666, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.18, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.18, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.15333333333333332, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.14666666666666667 + } + }, + "AC3_2": 0.46028905707329965, + "AC3_3": 0.366147623813896, + "AC3_4": 0.3105882352484429, + "AC3_5": 0.2731940298079323, + "AC3_6": 0.2447499999598457, + "AC3_7": 0.2199999999625 + }, + "prompt_4": { + "overall_acc": 0.45809523809523817, + "language_acc": { + "Vietnamese": 0.4266666666666667, + "English": 0.54, + "Malay": 0.4066666666666667, + "Chinese": 0.43333333333333335, + "Filipino": 0.46, + "Indonesian": 0.4533333333333333, + "Spanish": 0.4866666666666667 + }, + "consistency_score_2": 0.49142857142857144, + "consistency_score_3": 0.33219047619047615, + "consistency_score_4": 0.2678095238095239, + "consistency_score_5": 0.233968253968254, + "consistency_score_6": 0.21142857142857144, + "consistency_score_7": 0.19333333333333333, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.49333333333333335, + "Vietnamese,Malay": 0.5066666666666667, + "Vietnamese,Chinese": 0.5, + "Vietnamese,Filipino": 0.47333333333333333, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish": 0.49333333333333335, + "English,Malay": 0.4533333333333333, + "English,Chinese": 0.5066666666666667, + "English,Filipino": 0.5, + "English,Indonesian": 0.5266666666666666, + "English,Spanish": 0.58, + "Malay,Chinese": 0.43333333333333335, + "Malay,Filipino": 0.5, + "Malay,Indonesian": 0.5133333333333333, + "Malay,Spanish": 0.49333333333333335, + "Chinese,Filipino": 0.46, + "Chinese,Indonesian": 0.49333333333333335, + "Chinese,Spanish": 0.4866666666666667, + "Filipino,Indonesian": 0.46, + "Filipino,Spanish": 0.47333333333333333, + "Indonesian,Spanish": 0.5 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.32666666666666666, + "Vietnamese,English,Chinese": 0.3466666666666667, + "Vietnamese,English,Filipino": 0.32, + "Vietnamese,English,Indonesian": 0.34, + "Vietnamese,English,Spanish": 0.36, + "Vietnamese,Malay,Chinese": 0.31333333333333335, + "Vietnamese,Malay,Filipino": 0.34, + "Vietnamese,Malay,Indonesian": 0.34, + "Vietnamese,Malay,Spanish": 0.34, + "Vietnamese,Chinese,Filipino": 0.3333333333333333, + "Vietnamese,Chinese,Indonesian": 0.3333333333333333, + "Vietnamese,Chinese,Spanish": 0.34, + "Vietnamese,Filipino,Indonesian": 0.30666666666666664, + "Vietnamese,Filipino,Spanish": 0.32, + "Vietnamese,Indonesian,Spanish": 0.3466666666666667, + "English,Malay,Chinese": 0.3, + "English,Malay,Filipino": 0.32, + "English,Malay,Indonesian": 0.32666666666666666, + "English,Malay,Spanish": 0.36, + "English,Chinese,Filipino": 0.32666666666666666, + "English,Chinese,Indonesian": 0.36, + "English,Chinese,Spanish": 0.36666666666666664, + "English,Filipino,Indonesian": 0.34, + "English,Filipino,Spanish": 0.3466666666666667, + "English,Indonesian,Spanish": 0.35333333333333333, + "Malay,Chinese,Filipino": 0.31333333333333335, + "Malay,Chinese,Indonesian": 0.32666666666666666, + "Malay,Chinese,Spanish": 0.3, + "Malay,Filipino,Indonesian": 0.32666666666666666, + "Malay,Filipino,Spanish": 0.32666666666666666, + "Malay,Indonesian,Spanish": 0.3466666666666667, + "Chinese,Filipino,Indonesian": 0.32666666666666666, + "Chinese,Filipino,Spanish": 0.31333333333333335, + "Chinese,Indonesian,Spanish": 0.32666666666666666, + "Filipino,Indonesian,Spanish": 0.31333333333333335 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.26666666666666666, + "Vietnamese,English,Malay,Filipino": 0.26, + "Vietnamese,English,Malay,Indonesian": 0.2733333333333333, + "Vietnamese,English,Malay,Spanish": 0.28, + "Vietnamese,English,Chinese,Filipino": 0.28, + "Vietnamese,English,Chinese,Indonesian": 0.28, + "Vietnamese,English,Chinese,Spanish": 0.29333333333333333, + "Vietnamese,English,Filipino,Indonesian": 0.2733333333333333, + "Vietnamese,English,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,English,Indonesian,Spanish": 0.2866666666666667, + "Vietnamese,Malay,Chinese,Filipino": 0.26666666666666666, + "Vietnamese,Malay,Chinese,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,Malay,Filipino,Indonesian": 0.25333333333333335, + "Vietnamese,Malay,Filipino,Spanish": 0.25333333333333335, + "Vietnamese,Malay,Indonesian,Spanish": 0.28, + "Vietnamese,Chinese,Filipino,Indonesian": 0.26, + "Vietnamese,Chinese,Filipino,Spanish": 0.26666666666666666, + "Vietnamese,Chinese,Indonesian,Spanish": 0.2733333333333333, + "Vietnamese,Filipino,Indonesian,Spanish": 0.26, + "English,Malay,Chinese,Filipino": 0.24666666666666667, + "English,Malay,Chinese,Indonesian": 0.24666666666666667, + "English,Malay,Chinese,Spanish": 0.26666666666666666, + "English,Malay,Filipino,Indonesian": 0.2733333333333333, + "English,Malay,Filipino,Spanish": 0.2733333333333333, + "English,Malay,Indonesian,Spanish": 0.28, + "English,Chinese,Filipino,Indonesian": 0.28, + "English,Chinese,Filipino,Spanish": 0.26666666666666666, + "English,Chinese,Indonesian,Spanish": 0.28, + "English,Filipino,Indonesian,Spanish": 0.2733333333333333, + "Malay,Chinese,Filipino,Indonesian": 0.25333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.24666666666666667, + "Malay,Chinese,Indonesian,Spanish": 0.26, + "Malay,Filipino,Indonesian,Spanish": 0.26666666666666666, + "Chinese,Filipino,Indonesian,Spanish": 0.26666666666666666 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.23333333333333334, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.22666666666666666, + "Vietnamese,English,Malay,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.23333333333333334, + "Vietnamese,English,Malay,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.24666666666666667, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.24666666666666667, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.24, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.22, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.22, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.24, + "English,Malay,Chinese,Filipino,Indonesian": 0.22, + "English,Malay,Chinese,Filipino,Spanish": 0.22666666666666666, + "English,Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, + "English,Malay,Filipino,Indonesian,Spanish": 0.24, + "English,Chinese,Filipino,Indonesian,Spanish": 0.24, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.20666666666666667, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.20666666666666667 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333 + } + }, + "AC3_2": 0.47417681611283563, + "AC3_3": 0.3851135671906498, + "AC3_4": 0.3380120202955533, + "AC3_5": 0.3097396242453271, + "AC3_6": 0.2893233082274634, + "AC3_7": 0.271910331342275 + }, + "prompt_5": { + "overall_acc": 0.4476190476190475, + "language_acc": { + "Vietnamese": 0.4266666666666667, + "English": 0.5466666666666666, + "Malay": 0.42, + "Chinese": 0.44666666666666666, + "Filipino": 0.41333333333333333, + "Indonesian": 0.42, + "Spanish": 0.46 + }, + "consistency_score_2": 0.47777777777777786, + "consistency_score_3": 0.3095238095238096, + "consistency_score_4": 0.24209523809523809, + "consistency_score_5": 0.20761904761904765, + "consistency_score_6": 0.18476190476190477, + "consistency_score_7": 0.16666666666666666, + "detailed_consistency_score": { + "2_combine": { + "Vietnamese,English": 0.47333333333333333, + "Vietnamese,Malay": 0.5266666666666666, + "Vietnamese,Chinese": 0.4666666666666667, + "Vietnamese,Filipino": 0.4266666666666667, + "Vietnamese,Indonesian": 0.47333333333333333, + "Vietnamese,Spanish": 0.46, + "English,Malay": 0.4866666666666667, + "English,Chinese": 0.44666666666666666, + "English,Filipino": 0.5133333333333333, + "English,Indonesian": 0.5, + "English,Spanish": 0.5666666666666667, + "Malay,Chinese": 0.44, + "Malay,Filipino": 0.44666666666666666, + "Malay,Indonesian": 0.5466666666666666, + "Malay,Spanish": 0.5266666666666666, + "Chinese,Filipino": 0.38666666666666666, + "Chinese,Indonesian": 0.5066666666666667, + "Chinese,Spanish": 0.46, + "Filipino,Indonesian": 0.48, + "Filipino,Spanish": 0.44666666666666666, + "Indonesian,Spanish": 0.4533333333333333 + }, + "3_combine": { + "Vietnamese,English,Malay": 0.32, + "Vietnamese,English,Chinese": 0.31333333333333335, + "Vietnamese,English,Filipino": 0.29333333333333333, + "Vietnamese,English,Indonesian": 0.32, + "Vietnamese,English,Spanish": 0.32, + "Vietnamese,Malay,Chinese": 0.32, + "Vietnamese,Malay,Filipino": 0.31333333333333335, + "Vietnamese,Malay,Indonesian": 0.3333333333333333, + "Vietnamese,Malay,Spanish": 0.3466666666666667, + "Vietnamese,Chinese,Filipino": 0.28, + "Vietnamese,Chinese,Indonesian": 0.32, + "Vietnamese,Chinese,Spanish": 0.31333333333333335, + "Vietnamese,Filipino,Indonesian": 0.2866666666666667, + "Vietnamese,Filipino,Spanish": 0.2866666666666667, + "Vietnamese,Indonesian,Spanish": 0.30666666666666664, + "English,Malay,Chinese": 0.2866666666666667, + "English,Malay,Filipino": 0.31333333333333335, + "English,Malay,Indonesian": 0.36, + "English,Malay,Spanish": 0.35333333333333333, + "English,Chinese,Filipino": 0.28, + "English,Chinese,Indonesian": 0.32, + "English,Chinese,Spanish": 0.3333333333333333, + "English,Filipino,Indonesian": 0.32666666666666666, + "English,Filipino,Spanish": 0.3466666666666667, + "English,Indonesian,Spanish": 0.3333333333333333, + "Malay,Chinese,Filipino": 0.26, + "Malay,Chinese,Indonesian": 0.3, + "Malay,Chinese,Spanish": 0.28, + "Malay,Filipino,Indonesian": 0.32, + "Malay,Filipino,Spanish": 0.29333333333333333, + "Malay,Indonesian,Spanish": 0.32666666666666666, + "Chinese,Filipino,Indonesian": 0.2866666666666667, + "Chinese,Filipino,Spanish": 0.26666666666666666, + "Chinese,Indonesian,Spanish": 0.2866666666666667, + "Filipino,Indonesian,Spanish": 0.2866666666666667 + }, + "4_combine": { + "Vietnamese,English,Malay,Chinese": 0.26, + "Vietnamese,English,Malay,Filipino": 0.24666666666666667, + "Vietnamese,English,Malay,Indonesian": 0.26666666666666666, + "Vietnamese,English,Malay,Spanish": 0.25333333333333335, + "Vietnamese,English,Chinese,Filipino": 0.24, + "Vietnamese,English,Chinese,Indonesian": 0.26, + "Vietnamese,English,Chinese,Spanish": 0.26666666666666666, + "Vietnamese,English,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,English,Filipino,Spanish": 0.24, + "Vietnamese,English,Indonesian,Spanish": 0.26, + "Vietnamese,Malay,Chinese,Filipino": 0.22666666666666666, + "Vietnamese,Malay,Chinese,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Chinese,Spanish": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Indonesian": 0.24666666666666667, + "Vietnamese,Malay,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Malay,Indonesian,Spanish": 0.25333333333333335, + "Vietnamese,Chinese,Filipino,Indonesian": 0.22666666666666666, + "Vietnamese,Chinese,Filipino,Spanish": 0.23333333333333334, + "Vietnamese,Chinese,Indonesian,Spanish": 0.23333333333333334, + "Vietnamese,Filipino,Indonesian,Spanish": 0.23333333333333334, + "English,Malay,Chinese,Filipino": 0.22666666666666666, + "English,Malay,Chinese,Indonesian": 0.22666666666666666, + "English,Malay,Chinese,Spanish": 0.24, + "English,Malay,Filipino,Indonesian": 0.26666666666666666, + "English,Malay,Filipino,Spanish": 0.26, + "English,Malay,Indonesian,Spanish": 0.26, + "English,Chinese,Filipino,Indonesian": 0.23333333333333334, + "English,Chinese,Filipino,Spanish": 0.23333333333333334, + "English,Chinese,Indonesian,Spanish": 0.24666666666666667, + "English,Filipino,Indonesian,Spanish": 0.25333333333333335, + "Malay,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Malay,Chinese,Filipino,Spanish": 0.21333333333333335, + "Malay,Chinese,Indonesian,Spanish": 0.22666666666666666, + "Malay,Filipino,Indonesian,Spanish": 0.24, + "Chinese,Filipino,Indonesian,Spanish": 0.21333333333333335 + }, + "5_combine": { + "Vietnamese,English,Malay,Chinese,Filipino": 0.21333333333333335, + "Vietnamese,English,Malay,Chinese,Indonesian": 0.22, + "Vietnamese,English,Malay,Chinese,Spanish": 0.22666666666666666, + "Vietnamese,English,Malay,Filipino,Indonesian": 0.22, + "Vietnamese,English,Malay,Filipino,Spanish": 0.20666666666666667, + "Vietnamese,English,Malay,Indonesian,Spanish": 0.22, + "Vietnamese,English,Chinese,Filipino,Indonesian": 0.21333333333333335, + "Vietnamese,English,Chinese,Filipino,Spanish": 0.21333333333333335, + "Vietnamese,English,Chinese,Indonesian,Spanish": 0.22, + "Vietnamese,English,Filipino,Indonesian,Spanish": 0.21333333333333335, + "Vietnamese,Malay,Chinese,Filipino,Indonesian": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Indonesian,Spanish": 0.20666666666666667, + "Vietnamese,Malay,Filipino,Indonesian,Spanish": 0.2, + "Vietnamese,Chinese,Filipino,Indonesian,Spanish": 0.2, + "English,Malay,Chinese,Filipino,Indonesian": 0.19333333333333333, + "English,Malay,Chinese,Filipino,Spanish": 0.2, + "English,Malay,Chinese,Indonesian,Spanish": 0.2, + "English,Malay,Filipino,Indonesian,Spanish": 0.22, + "English,Chinese,Filipino,Indonesian,Spanish": 0.2, + "Malay,Chinese,Filipino,Indonesian,Spanish": 0.18666666666666668 + }, + "6_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Filipino,Spanish": 0.18666666666666668, + "Vietnamese,English,Malay,Chinese,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,English,Malay,Filipino,Indonesian,Spanish": 0.18666666666666668, + "Vietnamese,English,Chinese,Filipino,Indonesian,Spanish": 0.19333333333333333, + "Vietnamese,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334, + "English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.17333333333333334 + }, + "7_combine": { + "Vietnamese,English,Malay,Chinese,Filipino,Indonesian,Spanish": 0.16666666666666666 + } + }, + "AC3_2": 0.46220697536457567, + "AC3_3": 0.3659778376276142, + "AC3_4": 0.3142357411992944, + "AC3_5": 0.2836655592036626, + "AC3_6": 0.26156052778422684, + "AC3_7": 0.2428940568080043 + } + }, + "cross_logiqa": { + "prompt_1": { + "overall_acc": 0.3774350649350649, + "language_acc": { + "Spanish": 0.3806818181818182, + "Chinese": 0.39204545454545453, + "Vietnamese": 0.3977272727272727, + "Indonesian": 0.39204545454545453, + "Malay": 0.38636363636363635, + "Filipino": 0.26136363636363635, + "English": 0.4318181818181818 + }, + "consistency_score_2": 0.48376623376623373, + "consistency_score_3": 0.2928571428571429, + "consistency_score_4": 0.20081168831168825, + "consistency_score_5": 0.1482683982683983, + "consistency_score_6": 0.11444805194805195, + "consistency_score_7": 0.09090909090909091, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.44886363636363635, + "Spanish,Vietnamese": 0.5625, + "Spanish,Indonesian": 0.5284090909090909, + "Spanish,Malay": 0.5170454545454546, + "Spanish,Filipino": 0.4375, + "Spanish,English": 0.5965909090909091, + "Chinese,Vietnamese": 0.4431818181818182, + "Chinese,Indonesian": 0.3409090909090909, + "Chinese,Malay": 0.39204545454545453, + "Chinese,Filipino": 0.3352272727272727, + "Chinese,English": 0.4318181818181818, + "Vietnamese,Indonesian": 0.5397727272727273, + "Vietnamese,Malay": 0.625, + "Vietnamese,Filipino": 0.4715909090909091, + "Vietnamese,English": 0.5625, + "Indonesian,Malay": 0.5454545454545454, + "Indonesian,Filipino": 0.4431818181818182, + "Indonesian,English": 0.48863636363636365, + "Malay,Filipino": 0.48863636363636365, + "Malay,English": 0.5227272727272727, + "Filipino,English": 0.4375 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.2784090909090909, + "Spanish,Chinese,Indonesian": 0.23295454545454544, + "Spanish,Chinese,Malay": 0.25, + "Spanish,Chinese,Filipino": 0.19886363636363635, + "Spanish,Chinese,English": 0.3125, + "Spanish,Vietnamese,Indonesian": 0.38636363636363635, + "Spanish,Vietnamese,Malay": 0.39204545454545453, + "Spanish,Vietnamese,Filipino": 0.3068181818181818, + "Spanish,Vietnamese,English": 0.42613636363636365, + "Spanish,Indonesian,Malay": 0.3522727272727273, + "Spanish,Indonesian,Filipino": 0.2840909090909091, + "Spanish,Indonesian,English": 0.375, + "Spanish,Malay,Filipino": 0.2897727272727273, + "Spanish,Malay,English": 0.39204545454545453, + "Spanish,Filipino,English": 0.30113636363636365, + "Chinese,Vietnamese,Indonesian": 0.23863636363636365, + "Chinese,Vietnamese,Malay": 0.2784090909090909, + "Chinese,Vietnamese,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,English": 0.29545454545454547, + "Chinese,Indonesian,Malay": 0.21022727272727273, + "Chinese,Indonesian,Filipino": 0.1590909090909091, + "Chinese,Indonesian,English": 0.2215909090909091, + "Chinese,Malay,Filipino": 0.1875, + "Chinese,Malay,English": 0.25, + "Chinese,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay": 0.4034090909090909, + "Vietnamese,Indonesian,Filipino": 0.29545454545454547, + "Vietnamese,Indonesian,English": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.3352272727272727, + "Vietnamese,Malay,English": 0.38636363636363635, + "Vietnamese,Filipino,English": 0.30113636363636365, + "Indonesian,Malay,Filipino": 0.30113636363636365, + "Indonesian,Malay,English": 0.32954545454545453, + "Indonesian,Filipino,English": 0.2727272727272727, + "Malay,Filipino,English": 0.2784090909090909 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay": 0.19886363636363635, + "Spanish,Chinese,Vietnamese,Filipino": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,English": 0.23295454545454544, + "Spanish,Chinese,Indonesian,Malay": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Filipino": 0.13636363636363635, + "Spanish,Chinese,Indonesian,English": 0.18181818181818182, + "Spanish,Chinese,Malay,Filipino": 0.14204545454545456, + "Spanish,Chinese,Malay,English": 0.19886363636363635, + "Spanish,Chinese,Filipino,English": 0.1534090909090909, + "Spanish,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Spanish,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,English": 0.29545454545454547, + "Spanish,Vietnamese,Malay,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Malay,English": 0.3181818181818182, + "Spanish,Vietnamese,Filipino,English": 0.2556818181818182, + "Spanish,Indonesian,Malay,Filipino": 0.21022727272727273, + "Spanish,Indonesian,Malay,English": 0.2840909090909091, + "Spanish,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Malay,Filipino,English": 0.23863636363636365, + "Chinese,Vietnamese,Indonesian,Malay": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Filipino": 0.125, + "Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, + "Chinese,Vietnamese,Malay,Filipino": 0.14772727272727273, + "Chinese,Vietnamese,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Filipino,English": 0.14772727272727273, + "Chinese,Indonesian,Malay,Filipino": 0.11363636363636363, + "Chinese,Indonesian,Malay,English": 0.16477272727272727, + "Chinese,Indonesian,Filipino,English": 0.125, + "Chinese,Malay,Filipino,English": 0.125, + "Vietnamese,Indonesian,Malay,Filipino": 0.24431818181818182, + "Vietnamese,Indonesian,Malay,English": 0.2727272727272727, + "Vietnamese,Indonesian,Filipino,English": 0.21022727272727273, + "Vietnamese,Malay,Filipino,English": 0.23863636363636365, + "Indonesian,Malay,Filipino,English": 0.20454545454545456 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.11363636363636363, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.10227272727272728, + "Spanish,Chinese,Indonesian,Malay,English": 0.14204545454545456, + "Spanish,Chinese,Indonesian,Filipino,English": 0.11931818181818182, + "Spanish,Chinese,Malay,Filipino,English": 0.11931818181818182, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.24431818181818182, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.10795454545454546, + "Chinese,Vietnamese,Malay,Filipino,English": 0.11931818181818182, + "Chinese,Indonesian,Malay,Filipino,English": 0.09659090909090909, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.17613636363636365 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.09659090909090909, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.125, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.10795454545454546, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.11363636363636363, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.09659090909090909, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09090909090909091 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09090909090909091 + } + }, + "AC3_2": 0.42403637827554763, + "AC3_3": 0.32981005427041693, + "AC3_4": 0.26214889120939544, + "AC3_5": 0.2129021261400631, + "AC3_6": 0.17563809948853085, + "AC3_7": 0.14652591772513443 + }, + "prompt_2": { + "overall_acc": 0.372564935064935, + "language_acc": { + "Spanish": 0.375, + "Chinese": 0.35795454545454547, + "Vietnamese": 0.38636363636363635, + "Indonesian": 0.3465909090909091, + "Malay": 0.38636363636363635, + "Filipino": 0.32386363636363635, + "English": 0.4318181818181818 + }, + "consistency_score_2": 0.48430735930735935, + "consistency_score_3": 0.2982142857142857, + "consistency_score_4": 0.210064935064935, + "consistency_score_5": 0.1609848484848485, + "consistency_score_6": 0.1314935064935065, + "consistency_score_7": 0.11363636363636363, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4659090909090909, + "Spanish,Vietnamese": 0.5625, + "Spanish,Indonesian": 0.5, + "Spanish,Malay": 0.4715909090909091, + "Spanish,Filipino": 0.4772727272727273, + "Spanish,English": 0.6022727272727273, + "Chinese,Vietnamese": 0.4431818181818182, + "Chinese,Indonesian": 0.3977272727272727, + "Chinese,Malay": 0.4431818181818182, + "Chinese,Filipino": 0.38636363636363635, + "Chinese,English": 0.4602272727272727, + "Vietnamese,Indonesian": 0.5511363636363636, + "Vietnamese,Malay": 0.5454545454545454, + "Vietnamese,Filipino": 0.48295454545454547, + "Vietnamese,English": 0.5795454545454546, + "Indonesian,Malay": 0.5625, + "Indonesian,Filipino": 0.3806818181818182, + "Indonesian,English": 0.5284090909090909, + "Malay,Filipino": 0.4147727272727273, + "Malay,English": 0.4659090909090909, + "Filipino,English": 0.44886363636363635 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.29545454545454547, + "Spanish,Chinese,Indonesian": 0.2556818181818182, + "Spanish,Chinese,Malay": 0.2784090909090909, + "Spanish,Chinese,Filipino": 0.24431818181818182, + "Spanish,Chinese,English": 0.3181818181818182, + "Spanish,Vietnamese,Indonesian": 0.375, + "Spanish,Vietnamese,Malay": 0.3465909090909091, + "Spanish,Vietnamese,Filipino": 0.32386363636363635, + "Spanish,Vietnamese,English": 0.42613636363636365, + "Spanish,Indonesian,Malay": 0.3409090909090909, + "Spanish,Indonesian,Filipino": 0.2727272727272727, + "Spanish,Indonesian,English": 0.3806818181818182, + "Spanish,Malay,Filipino": 0.25, + "Spanish,Malay,English": 0.3409090909090909, + "Spanish,Filipino,English": 0.32954545454545453, + "Chinese,Vietnamese,Indonesian": 0.2840909090909091, + "Chinese,Vietnamese,Malay": 0.29545454545454547, + "Chinese,Vietnamese,Filipino": 0.23863636363636365, + "Chinese,Vietnamese,English": 0.3068181818181818, + "Chinese,Indonesian,Malay": 0.2784090909090909, + "Chinese,Indonesian,Filipino": 0.18181818181818182, + "Chinese,Indonesian,English": 0.26136363636363635, + "Chinese,Malay,Filipino": 0.22727272727272727, + "Chinese,Malay,English": 0.2556818181818182, + "Chinese,Filipino,English": 0.22727272727272727, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.2840909090909091, + "Vietnamese,Indonesian,English": 0.3806818181818182, + "Vietnamese,Malay,Filipino": 0.2897727272727273, + "Vietnamese,Malay,English": 0.3522727272727273, + "Vietnamese,Filipino,English": 0.32386363636363635, + "Indonesian,Malay,Filipino": 0.26136363636363635, + "Indonesian,Malay,English": 0.32386363636363635, + "Indonesian,Filipino,English": 0.26136363636363635, + "Malay,Filipino,English": 0.23295454545454544 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.2215909090909091, + "Spanish,Chinese,Vietnamese,Malay": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,Filipino": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.1534090909090909, + "Spanish,Chinese,Indonesian,English": 0.21022727272727273, + "Spanish,Chinese,Malay,Filipino": 0.17613636363636365, + "Spanish,Chinese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Filipino,English": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay": 0.2727272727272727, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,English": 0.3125, + "Spanish,Vietnamese,Malay,Filipino": 0.21022727272727273, + "Spanish,Vietnamese,Malay,English": 0.2727272727272727, + "Spanish,Vietnamese,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.19318181818181818, + "Spanish,Indonesian,Malay,English": 0.26136363636363635, + "Spanish,Indonesian,Filipino,English": 0.23295454545454544, + "Spanish,Malay,Filipino,English": 0.20454545454545456, + "Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,English": 0.20454545454545456, + "Chinese,Vietnamese,Malay,Filipino": 0.17613636363636365, + "Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Chinese,Vietnamese,Filipino,English": 0.18181818181818182, + "Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Indonesian,Malay,English": 0.1875, + "Chinese,Indonesian,Filipino,English": 0.14772727272727273, + "Chinese,Malay,Filipino,English": 0.1590909090909091, + "Vietnamese,Indonesian,Malay,Filipino": 0.2159090909090909, + "Vietnamese,Indonesian,Malay,English": 0.26704545454545453, + "Vietnamese,Indonesian,Filipino,English": 0.2159090909090909, + "Vietnamese,Malay,Filipino,English": 0.20454545454545456, + "Indonesian,Malay,Filipino,English": 0.18181818181818182 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.14772727272727273, + "Spanish,Chinese,Vietnamese,Malay,English": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,English": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.14772727272727273, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.1875, + "Spanish,Indonesian,Malay,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Chinese,Vietnamese,Malay,Filipino,English": 0.14204545454545456, + "Chinese,Indonesian,Malay,Filipino,English": 0.125, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.11931818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.13636363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.125, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.11363636363636363 + } + }, + "AC3_2": 0.42115013178933064, + "AC3_3": 0.3312690153011882, + "AC3_4": 0.26865367840138227, + "AC3_5": 0.22482366766946138, + "AC3_6": 0.19438170521270712, + "AC3_7": 0.17415389281590324 + }, + "prompt_3": { + "overall_acc": 0.3547077922077922, + "language_acc": { + "Spanish": 0.38636363636363635, + "Chinese": 0.375, + "Vietnamese": 0.36363636363636365, + "Indonesian": 0.32954545454545453, + "Malay": 0.35795454545454547, + "Filipino": 0.26136363636363635, + "English": 0.4090909090909091 + }, + "consistency_score_2": 0.4862012987012986, + "consistency_score_3": 0.3025974025974026, + "consistency_score_4": 0.21120129870129875, + "consistency_score_5": 0.15557359307359309, + "consistency_score_6": 0.11931818181818181, + "consistency_score_7": 0.09659090909090909, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.4090909090909091, + "Spanish,Vietnamese": 0.5454545454545454, + "Spanish,Indonesian": 0.5454545454545454, + "Spanish,Malay": 0.5340909090909091, + "Spanish,Filipino": 0.5227272727272727, + "Spanish,English": 0.6022727272727273, + "Chinese,Vietnamese": 0.5056818181818182, + "Chinese,Indonesian": 0.39204545454545453, + "Chinese,Malay": 0.4147727272727273, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,English": 0.4602272727272727, + "Vietnamese,Indonesian": 0.5227272727272727, + "Vietnamese,Malay": 0.5340909090909091, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.5511363636363636, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.4147727272727273, + "Indonesian,English": 0.48863636363636365, + "Malay,Filipino": 0.4318181818181818, + "Malay,English": 0.4715909090909091, + "Filipino,English": 0.44886363636363635 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.30113636363636365, + "Spanish,Chinese,Indonesian": 0.26136363636363635, + "Spanish,Chinese,Malay": 0.2556818181818182, + "Spanish,Chinese,Filipino": 0.2215909090909091, + "Spanish,Chinese,English": 0.3068181818181818, + "Spanish,Vietnamese,Indonesian": 0.375, + "Spanish,Vietnamese,Malay": 0.35795454545454547, + "Spanish,Vietnamese,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,English": 0.4034090909090909, + "Spanish,Indonesian,Malay": 0.4034090909090909, + "Spanish,Indonesian,Filipino": 0.30113636363636365, + "Spanish,Indonesian,English": 0.39204545454545453, + "Spanish,Malay,Filipino": 0.3125, + "Spanish,Malay,English": 0.36363636363636365, + "Spanish,Filipino,English": 0.3352272727272727, + "Chinese,Vietnamese,Indonesian": 0.29545454545454547, + "Chinese,Vietnamese,Malay": 0.30113636363636365, + "Chinese,Vietnamese,Filipino": 0.2556818181818182, + "Chinese,Vietnamese,English": 0.32386363636363635, + "Chinese,Indonesian,Malay": 0.26136363636363635, + "Chinese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Indonesian,English": 0.25, + "Chinese,Malay,Filipino": 0.21022727272727273, + "Chinese,Malay,English": 0.24431818181818182, + "Chinese,Filipino,English": 0.2159090909090909, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,English": 0.35795454545454547, + "Vietnamese,Malay,Filipino": 0.29545454545454547, + "Vietnamese,Malay,English": 0.32954545454545453, + "Vietnamese,Filipino,English": 0.3125, + "Indonesian,Malay,Filipino": 0.2897727272727273, + "Indonesian,Malay,English": 0.32386363636363635, + "Indonesian,Filipino,English": 0.26704545454545453, + "Malay,Filipino,English": 0.24431818181818182 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Malay": 0.20454545454545456, + "Spanish,Chinese,Vietnamese,Filipino": 0.17613636363636365, + "Spanish,Chinese,Vietnamese,English": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Malay": 0.20454545454545456, + "Spanish,Chinese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Indonesian,English": 0.20454545454545456, + "Spanish,Chinese,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Filipino,English": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Spanish,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,English": 0.30113636363636365, + "Spanish,Vietnamese,Malay,Filipino": 0.2215909090909091, + "Spanish,Vietnamese,Malay,English": 0.2727272727272727, + "Spanish,Vietnamese,Filipino,English": 0.26136363636363635, + "Spanish,Indonesian,Malay,Filipino": 0.23295454545454544, + "Spanish,Indonesian,Malay,English": 0.29545454545454547, + "Spanish,Indonesian,Filipino,English": 0.24431818181818182, + "Spanish,Malay,Filipino,English": 0.22727272727272727, + "Chinese,Vietnamese,Indonesian,Malay": 0.2215909090909091, + "Chinese,Vietnamese,Indonesian,Filipino": 0.17613636363636365, + "Chinese,Vietnamese,Indonesian,English": 0.20454545454545456, + "Chinese,Vietnamese,Malay,Filipino": 0.17045454545454544, + "Chinese,Vietnamese,Malay,English": 0.19318181818181818, + "Chinese,Vietnamese,Filipino,English": 0.1875, + "Chinese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Indonesian,Malay,English": 0.17045454545454544, + "Chinese,Indonesian,Filipino,English": 0.14772727272727273, + "Chinese,Malay,Filipino,English": 0.14204545454545456, + "Vietnamese,Indonesian,Malay,Filipino": 0.22727272727272727, + "Vietnamese,Indonesian,Malay,English": 0.25, + "Vietnamese,Indonesian,Filipino,English": 0.2215909090909091, + "Vietnamese,Malay,Filipino,English": 0.19886363636363635, + "Indonesian,Malay,Filipino,English": 0.19318181818181818 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.13068181818181818, + "Spanish,Chinese,Vietnamese,Malay,English": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.13068181818181818, + "Spanish,Chinese,Indonesian,Malay,English": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Filipino,English": 0.14204545454545456, + "Spanish,Chinese,Malay,Filipino,English": 0.13636363636363635, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Malay,Filipino,English": 0.1875, + "Spanish,Indonesian,Malay,Filipino,English": 0.1875, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.14204545454545456, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.13068181818181818, + "Chinese,Vietnamese,Malay,Filipino,English": 0.11931818181818182, + "Chinese,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1590909090909091 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.10795454545454546, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.125, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.125, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.11363636363636363, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.11363636363636363, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.1534090909090909, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09659090909090909 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.09659090909090909 + } + }, + "AC3_2": 0.41017368244633357, + "AC3_3": 0.3265869718818157, + "AC3_4": 0.26475894296086744, + "AC3_5": 0.21628523910870737, + "AC3_6": 0.17856864877926584, + "AC3_7": 0.15183534986825256 + }, + "prompt_4": { + "overall_acc": 0.37905844155844154, + "language_acc": { + "Spanish": 0.39204545454545453, + "Chinese": 0.4034090909090909, + "Vietnamese": 0.375, + "Indonesian": 0.38636363636363635, + "Malay": 0.375, + "Filipino": 0.3068181818181818, + "English": 0.4147727272727273 + }, + "consistency_score_2": 0.5016233766233766, + "consistency_score_3": 0.3220779220779221, + "consistency_score_4": 0.23522727272727276, + "consistency_score_5": 0.18560606060606064, + "consistency_score_6": 0.15503246753246755, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.48295454545454547, + "Spanish,Vietnamese": 0.5454545454545454, + "Spanish,Indonesian": 0.5170454545454546, + "Spanish,Malay": 0.5, + "Spanish,Filipino": 0.4772727272727273, + "Spanish,English": 0.5795454545454546, + "Chinese,Vietnamese": 0.5170454545454546, + "Chinese,Indonesian": 0.42045454545454547, + "Chinese,Malay": 0.4659090909090909, + "Chinese,Filipino": 0.36363636363636365, + "Chinese,English": 0.5056818181818182, + "Vietnamese,Indonesian": 0.5852272727272727, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.5909090909090909, + "Indonesian,Malay": 0.5852272727272727, + "Indonesian,Filipino": 0.44886363636363635, + "Indonesian,English": 0.48295454545454547, + "Malay,Filipino": 0.44886363636363635, + "Malay,English": 0.5, + "Filipino,English": 0.4715909090909091 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.32954545454545453, + "Spanish,Chinese,Indonesian": 0.2897727272727273, + "Spanish,Chinese,Malay": 0.3125, + "Spanish,Chinese,Filipino": 0.24431818181818182, + "Spanish,Chinese,English": 0.36363636363636365, + "Spanish,Vietnamese,Indonesian": 0.39204545454545453, + "Spanish,Vietnamese,Malay": 0.36363636363636365, + "Spanish,Vietnamese,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,English": 0.4147727272727273, + "Spanish,Indonesian,Malay": 0.3693181818181818, + "Spanish,Indonesian,Filipino": 0.3125, + "Spanish,Indonesian,English": 0.375, + "Spanish,Malay,Filipino": 0.2727272727272727, + "Spanish,Malay,English": 0.375, + "Spanish,Filipino,English": 0.32954545454545453, + "Chinese,Vietnamese,Indonesian": 0.3181818181818182, + "Chinese,Vietnamese,Malay": 0.32954545454545453, + "Chinese,Vietnamese,Filipino": 0.26136363636363635, + "Chinese,Vietnamese,English": 0.35795454545454547, + "Chinese,Indonesian,Malay": 0.3125, + "Chinese,Indonesian,Filipino": 0.2159090909090909, + "Chinese,Indonesian,English": 0.30113636363636365, + "Chinese,Malay,Filipino": 0.23295454545454544, + "Chinese,Malay,English": 0.3125, + "Chinese,Filipino,English": 0.23863636363636365, + "Vietnamese,Indonesian,Malay": 0.4090909090909091, + "Vietnamese,Indonesian,Filipino": 0.32954545454545453, + "Vietnamese,Indonesian,English": 0.38636363636363635, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,English": 0.35795454545454547, + "Vietnamese,Filipino,English": 0.3409090909090909, + "Indonesian,Malay,Filipino": 0.29545454545454547, + "Indonesian,Malay,English": 0.3352272727272727, + "Indonesian,Filipino,English": 0.2840909090909091, + "Malay,Filipino,English": 0.2727272727272727 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.23863636363636365, + "Spanish,Chinese,Vietnamese,Malay": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Filipino": 0.20454545454545456, + "Spanish,Chinese,Vietnamese,English": 0.2784090909090909, + "Spanish,Chinese,Indonesian,Malay": 0.23295454545454544, + "Spanish,Chinese,Indonesian,Filipino": 0.17613636363636365, + "Spanish,Chinese,Indonesian,English": 0.25, + "Spanish,Chinese,Malay,Filipino": 0.19318181818181818, + "Spanish,Chinese,Malay,English": 0.26136363636363635, + "Spanish,Chinese,Filipino,English": 0.20454545454545456, + "Spanish,Vietnamese,Indonesian,Malay": 0.30113636363636365, + "Spanish,Vietnamese,Indonesian,Filipino": 0.2556818181818182, + "Spanish,Vietnamese,Indonesian,English": 0.3181818181818182, + "Spanish,Vietnamese,Malay,Filipino": 0.22727272727272727, + "Spanish,Vietnamese,Malay,English": 0.30113636363636365, + "Spanish,Vietnamese,Filipino,English": 0.2784090909090909, + "Spanish,Indonesian,Malay,Filipino": 0.2159090909090909, + "Spanish,Indonesian,Malay,English": 0.2897727272727273, + "Spanish,Indonesian,Filipino,English": 0.25, + "Spanish,Malay,Filipino,English": 0.23295454545454544, + "Chinese,Vietnamese,Indonesian,Malay": 0.25, + "Chinese,Vietnamese,Indonesian,Filipino": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,English": 0.25, + "Chinese,Vietnamese,Malay,Filipino": 0.19886363636363635, + "Chinese,Vietnamese,Malay,English": 0.23863636363636365, + "Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Chinese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Chinese,Indonesian,Malay,English": 0.23295454545454544, + "Chinese,Indonesian,Filipino,English": 0.17045454545454544, + "Chinese,Malay,Filipino,English": 0.18181818181818182, + "Vietnamese,Indonesian,Malay,Filipino": 0.23863636363636365, + "Vietnamese,Indonesian,Malay,English": 0.2840909090909091, + "Vietnamese,Indonesian,Filipino,English": 0.24431818181818182, + "Vietnamese,Malay,Filipino,English": 0.22727272727272727, + "Indonesian,Malay,Filipino,English": 0.21022727272727273 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19318181818181818, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.1590909090909091, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.2159090909090909, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.18181818181818182, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Indonesian,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Malay,Filipino,English": 0.17045454545454544, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.1875, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.2556818181818182, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.22727272727272727, + "Spanish,Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.19318181818181818, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1590909090909091, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.1534090909090909, + "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.1875 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.18181818181818182, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.1534090909090909, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.1534090909090909, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14204545454545456, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.18181818181818182, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + } + }, + "AC3_2": 0.43181219697053874, + "AC3_3": 0.3482528121994379, + "AC3_4": 0.290304271526833, + "AC3_5": 0.24919414553238128, + "AC3_6": 0.2200612836598984, + "AC3_7": 0.20057265565185162 + }, + "prompt_5": { + "overall_acc": 0.37987012987012997, + "language_acc": { + "Spanish": 0.4375, + "Chinese": 0.4147727272727273, + "Vietnamese": 0.39204545454545453, + "Indonesian": 0.3465909090909091, + "Malay": 0.38636363636363635, + "Filipino": 0.30113636363636365, + "English": 0.3806818181818182 + }, + "consistency_score_2": 0.510551948051948, + "consistency_score_3": 0.3287337662337663, + "consistency_score_4": 0.23912337662337663, + "consistency_score_5": 0.18641774891774893, + "consistency_score_6": 0.15422077922077923, + "consistency_score_7": 0.13636363636363635, + "detailed_consistency_score": { + "2_combine": { + "Spanish,Chinese": 0.5056818181818182, + "Spanish,Vietnamese": 0.5681818181818182, + "Spanish,Indonesian": 0.5284090909090909, + "Spanish,Malay": 0.5284090909090909, + "Spanish,Filipino": 0.5113636363636364, + "Spanish,English": 0.6363636363636364, + "Chinese,Vietnamese": 0.5340909090909091, + "Chinese,Indonesian": 0.4715909090909091, + "Chinese,Malay": 0.5056818181818182, + "Chinese,Filipino": 0.375, + "Chinese,English": 0.5113636363636364, + "Vietnamese,Indonesian": 0.5738636363636364, + "Vietnamese,Malay": 0.5511363636363636, + "Vietnamese,Filipino": 0.4943181818181818, + "Vietnamese,English": 0.5909090909090909, + "Indonesian,Malay": 0.5568181818181818, + "Indonesian,Filipino": 0.4090909090909091, + "Indonesian,English": 0.4772727272727273, + "Malay,Filipino": 0.45454545454545453, + "Malay,English": 0.4659090909090909, + "Filipino,English": 0.4715909090909091 + }, + "3_combine": { + "Spanish,Chinese,Vietnamese": 0.35795454545454547, + "Spanish,Chinese,Indonesian": 0.3125, + "Spanish,Chinese,Malay": 0.32954545454545453, + "Spanish,Chinese,Filipino": 0.26704545454545453, + "Spanish,Chinese,English": 0.3806818181818182, + "Spanish,Vietnamese,Indonesian": 0.39204545454545453, + "Spanish,Vietnamese,Malay": 0.39204545454545453, + "Spanish,Vietnamese,Filipino": 0.32954545454545453, + "Spanish,Vietnamese,English": 0.4602272727272727, + "Spanish,Indonesian,Malay": 0.3693181818181818, + "Spanish,Indonesian,Filipino": 0.30113636363636365, + "Spanish,Indonesian,English": 0.375, + "Spanish,Malay,Filipino": 0.3068181818181818, + "Spanish,Malay,English": 0.38636363636363635, + "Spanish,Filipino,English": 0.3522727272727273, + "Chinese,Vietnamese,Indonesian": 0.3465909090909091, + "Chinese,Vietnamese,Malay": 0.3409090909090909, + "Chinese,Vietnamese,Filipino": 0.2727272727272727, + "Chinese,Vietnamese,English": 0.38636363636363635, + "Chinese,Indonesian,Malay": 0.3181818181818182, + "Chinese,Indonesian,Filipino": 0.23295454545454544, + "Chinese,Indonesian,English": 0.3068181818181818, + "Chinese,Malay,Filipino": 0.24431818181818182, + "Chinese,Malay,English": 0.29545454545454547, + "Chinese,Filipino,English": 0.23863636363636365, + "Vietnamese,Indonesian,Malay": 0.39204545454545453, + "Vietnamese,Indonesian,Filipino": 0.30113636363636365, + "Vietnamese,Indonesian,English": 0.39204545454545453, + "Vietnamese,Malay,Filipino": 0.3068181818181818, + "Vietnamese,Malay,English": 0.3693181818181818, + "Vietnamese,Filipino,English": 0.32386363636363635, + "Indonesian,Malay,Filipino": 0.2727272727272727, + "Indonesian,Malay,English": 0.32954545454545453, + "Indonesian,Filipino,English": 0.26136363636363635, + "Malay,Filipino,English": 0.26136363636363635 + }, + "4_combine": { + "Spanish,Chinese,Vietnamese,Indonesian": 0.26704545454545453, + "Spanish,Chinese,Vietnamese,Malay": 0.24431818181818182, + "Spanish,Chinese,Vietnamese,Filipino": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,English": 0.3125, + "Spanish,Chinese,Indonesian,Malay": 0.23863636363636365, + "Spanish,Chinese,Indonesian,Filipino": 0.19886363636363635, + "Spanish,Chinese,Indonesian,English": 0.2556818181818182, + "Spanish,Chinese,Malay,Filipino": 0.21022727272727273, + "Spanish,Chinese,Malay,English": 0.25, + "Spanish,Chinese,Filipino,English": 0.2215909090909091, + "Spanish,Vietnamese,Indonesian,Malay": 0.29545454545454547, + "Spanish,Vietnamese,Indonesian,Filipino": 0.23295454545454544, + "Spanish,Vietnamese,Indonesian,English": 0.32954545454545453, + "Spanish,Vietnamese,Malay,Filipino": 0.23863636363636365, + "Spanish,Vietnamese,Malay,English": 0.3181818181818182, + "Spanish,Vietnamese,Filipino,English": 0.2840909090909091, + "Spanish,Indonesian,Malay,Filipino": 0.2215909090909091, + "Spanish,Indonesian,Malay,English": 0.2784090909090909, + "Spanish,Indonesian,Filipino,English": 0.23863636363636365, + "Spanish,Malay,Filipino,English": 0.24431818181818182, + "Chinese,Vietnamese,Indonesian,Malay": 0.26136363636363635, + "Chinese,Vietnamese,Indonesian,Filipino": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,English": 0.26704545454545453, + "Chinese,Vietnamese,Malay,Filipino": 0.19318181818181818, + "Chinese,Vietnamese,Malay,English": 0.24431818181818182, + "Chinese,Vietnamese,Filipino,English": 0.21022727272727273, + "Chinese,Indonesian,Malay,Filipino": 0.18181818181818182, + "Chinese,Indonesian,Malay,English": 0.22727272727272727, + "Chinese,Indonesian,Filipino,English": 0.18181818181818182, + "Chinese,Malay,Filipino,English": 0.17613636363636365, + "Vietnamese,Indonesian,Malay,Filipino": 0.21022727272727273, + "Vietnamese,Indonesian,Malay,English": 0.2784090909090909, + "Vietnamese,Indonesian,Filipino,English": 0.22727272727272727, + "Vietnamese,Malay,Filipino,English": 0.2215909090909091, + "Indonesian,Malay,Filipino,English": 0.19886363636363635 + }, + "5_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay": 0.19886363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,English": 0.23295454545454544, + "Spanish,Chinese,Vietnamese,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,English": 0.21022727272727273, + "Spanish,Chinese,Vietnamese,Filipino,English": 0.19886363636363635, + "Spanish,Chinese,Indonesian,Malay,Filipino": 0.16477272727272727, + "Spanish,Chinese,Indonesian,Malay,English": 0.19318181818181818, + "Spanish,Chinese,Indonesian,Filipino,English": 0.17613636363636365, + "Spanish,Chinese,Malay,Filipino,English": 0.17613636363636365, + "Spanish,Vietnamese,Indonesian,Malay,Filipino": 0.17613636363636365, + "Spanish,Vietnamese,Indonesian,Malay,English": 0.23863636363636365, + "Spanish,Vietnamese,Indonesian,Filipino,English": 0.2159090909090909, + "Spanish,Vietnamese,Malay,Filipino,English": 0.21022727272727273, + "Spanish,Indonesian,Malay,Filipino,English": 0.1875, + "Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.1534090909090909, + "Chinese,Vietnamese,Indonesian,Malay,English": 0.19886363636363635, + "Chinese,Vietnamese,Indonesian,Filipino,English": 0.17045454545454544, + "Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Chinese,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Vietnamese,Indonesian,Malay,Filipino,English": 0.17045454545454544 + }, + "6_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino": 0.13636363636363635, + "Spanish,Chinese,Vietnamese,Indonesian,Malay,English": 0.17045454545454544, + "Spanish,Chinese,Vietnamese,Indonesian,Filipino,English": 0.16477272727272727, + "Spanish,Chinese,Vietnamese,Malay,Filipino,English": 0.1590909090909091, + "Spanish,Chinese,Indonesian,Malay,Filipino,English": 0.14772727272727273, + "Spanish,Vietnamese,Indonesian,Malay,Filipino,English": 0.16477272727272727, + "Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + }, + "7_combine": { + "Spanish,Chinese,Vietnamese,Indonesian,Malay,Filipino,English": 0.13636363636363635 + } + }, + "AC3_2": 0.4356213521631312, + "AC3_3": 0.35245682147924695, + "AC3_4": 0.2934952537153448, + "AC3_5": 0.2501008308020075, + "AC3_6": 0.21937788651006385, + "AC3_7": 0.20068610630760866 + } + }, + "sg_eval": { + "prompt_1": { + "accuracy": 0.5145631067961165 + }, + "prompt_2": { + "accuracy": 0.5242718446601942 + }, + "prompt_3": { + "accuracy": 0.5533980582524272 + }, + "prompt_4": { + "accuracy": 0.5242718446601942 + }, + "prompt_5": { + "accuracy": 0.5339805825242718 + } + }, + "cn_eval": { + "prompt_1": { + "accuracy": 0.37142857142857144 + }, + "prompt_2": { + "accuracy": 0.37142857142857144 + }, + "prompt_3": { + "accuracy": 0.3619047619047619 + }, + "prompt_4": { + "accuracy": 0.3523809523809524 + }, + "prompt_5": { + "accuracy": 0.38095238095238093 + } + }, + "us_eval": { + "prompt_1": { + "accuracy": 0.4672897196261682 + }, + "prompt_2": { + "accuracy": 0.4485981308411215 + }, + "prompt_3": { + "accuracy": 0.48598130841121495 + }, + "prompt_4": { + "accuracy": 0.4766355140186916 + }, + "prompt_5": { + "accuracy": 0.4205607476635514 + } + }, + "ph_eval": { + "prompt_1": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.6, + "history": 0.4666666666666667, + "literature": 0.4, + "politics": 0.4, + "culture": 0.7, + "film": 0.4, + "law": 0.1, + "geography": 0.4 + } + }, + "prompt_2": { + "accuracy": 0.41, + "category_acc": { + "brand": 0.3, + "demographics": 0.0, + "biology": 0.7, + "history": 0.4, + "literature": 0.4, + "politics": 0.2, + "culture": 0.7, + "film": 0.6, + "law": 0.2, + "geography": 0.4 + } + }, + "prompt_3": { + "accuracy": 0.44, + "category_acc": { + "brand": 0.3, + "demographics": 0.4, + "biology": 0.5, + "history": 0.5333333333333333, + "literature": 0.4, + "politics": 0.3, + "culture": 0.7, + "film": 0.4, + "law": 0.3, + "geography": 0.5 + } + }, + "prompt_4": { + "accuracy": 0.4, + "category_acc": { + "brand": 0.2, + "demographics": 0.0, + "biology": 0.6, + "history": 0.3333333333333333, + "literature": 0.3, + "politics": 0.3, + "culture": 0.6, + "film": 0.5, + "law": 0.6, + "geography": 0.4 + } + }, + "prompt_5": { + "accuracy": 0.38, + "category_acc": { + "brand": 0.4, + "demographics": 0.2, + "biology": 0.5, + "history": 0.4666666666666667, + "literature": 0.3, + "politics": 0.3, + "culture": 0.6, + "film": 0.4, + "law": 0.1, + "geography": 0.4 + } + } + }, + "sing2eng": { + "prompt_1": { + "bleu_score": 0.15501284774251278 + }, + "prompt_2": { + "bleu_score": 0.15198585128851483 + }, + "prompt_3": { + "bleu_score": 0.15081158203998288 + }, + "prompt_4": { + "bleu_score": 0.15210517965664572 + }, + "prompt_5": { + "bleu_score": 0.12416601209312363 + } + }, + "flores_ind2eng": { + "prompt_1": { + "bleu_score": 0.12538511120752693 + }, + "prompt_2": { + "bleu_score": 0.16192990491157527 + }, + "prompt_3": { + "bleu_score": 0.15766460992501238 + }, + "prompt_4": { + "bleu_score": 0.14443671193775645 + }, + "prompt_5": { + "bleu_score": 0.14643957455420825 + } + }, + "flores_vie2eng": { + "prompt_1": { + "bleu_score": 0.10643823932021736 + }, + "prompt_2": { + "bleu_score": 0.13501374407353342 + }, + "prompt_3": { + "bleu_score": 0.1323068208710863 + }, + "prompt_4": { + "bleu_score": 0.12773445658677324 + }, + "prompt_5": { + "bleu_score": 0.12708312423895515 + } + }, + "flores_zho2eng": { + "prompt_1": { + "bleu_score": 0.10376840053827412 + }, + "prompt_2": { + "bleu_score": 0.11120920584140642 + }, + "prompt_3": { + "bleu_score": 0.10930912476728209 + }, + "prompt_4": { + "bleu_score": 0.10573670220898011 + }, + "prompt_5": { + "bleu_score": 0.11153238338109045 + } + }, + "flores_zsm2eng": { + "prompt_1": { + "bleu_score": 0.10950210929702053 + }, + "prompt_2": { + "bleu_score": 0.15023363197616493 + }, + "prompt_3": { + "bleu_score": 0.1393033398727237 + }, + "prompt_4": { + "bleu_score": 0.13261968387717776 + }, + "prompt_5": { + "bleu_score": 0.12920092254734256 + } + }, + "mmlu": { + "prompt_1": { + "accuracy": 0.4515752625437573 + }, + "prompt_2": { + "accuracy": 0.3652275379229872 + }, + "prompt_3": { + "accuracy": 0.4574095682613769 + }, + "prompt_4": { + "accuracy": 0.47141190198366395 + }, + "prompt_5": { + "accuracy": 0.4632438739789965 + } + }, + "mmlu_full": { + "prompt_1": { + "accuracy": 0.4366821594565606, + "category_acc": { + "high_school_european_history": 0.20121951219512196, + "business_ethics": 0.48484848484848486, + "clinical_knowledge": 0.4734848484848485, + "medical_genetics": 0.48484848484848486, + "high_school_us_history": 0.22660098522167488, + "high_school_physics": 0.29333333333333333, + "high_school_world_history": 0.2966101694915254, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.459915611814346, + "econometrics": 0.35398230088495575, + "college_computer_science": 0.32323232323232326, + "high_school_biology": 0.5598705501618123, + "abstract_algebra": 0.16161616161616163, + "professional_accounting": 0.3807829181494662, + "philosophy": 0.5451612903225806, + "professional_medicine": 0.22878228782287824, + "nutrition": 0.4557377049180328, + "global_facts": 0.31313131313131315, + "machine_learning": 0.36036036036036034, + "security_studies": 0.4385245901639344, + "public_relations": 0.5963302752293578, + "professional_psychology": 0.4369885433715221, + "prehistory": 0.5046439628482973, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5230769230769231, + "college_medicine": 0.38372093023255816, + "high_school_government_and_politics": 0.5989583333333334, + "college_chemistry": 0.36363636363636365, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6395939086294417, + "elementary_mathematics": 0.2440318302387268, + "human_aging": 0.5900900900900901, + "college_mathematics": 0.20202020202020202, + "high_school_psychology": 0.6727941176470589, + "formal_logic": 0.336, + "high_school_statistics": 0.2651162790697674, + "international_law": 0.6, + "high_school_mathematics": 0.22676579925650558, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.44017094017094016, + "miscellaneous": 0.6355498721227621, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.7553648068669528, + "professional_law": 0.34833659491193736, + "management": 0.6176470588235294, + "college_physics": 0.27722772277227725, + "jurisprudence": 0.6074766355140186, + "world_religions": 0.6411764705882353, + "sociology": 0.54, + "us_foreign_policy": 0.5858585858585859, + "high_school_macroeconomics": 0.4652956298200514, + "computer_security": 0.5757575757575758, + "moral_scenarios": 0.2606263982102908, + "moral_disputes": 0.4927536231884058, + "electrical_engineering": 0.3819444444444444, + "astronomy": 0.47019867549668876, + "college_biology": 0.5874125874125874 + } + }, + "prompt_2": { + "accuracy": 0.3429388630675724, + "category_acc": { + "high_school_european_history": 0.24390243902439024, + "business_ethics": 0.5454545454545454, + "clinical_knowledge": 0.3446969696969697, + "medical_genetics": 0.3939393939393939, + "high_school_us_history": 0.27586206896551724, + "high_school_physics": 0.2733333333333333, + "high_school_world_history": 0.2584745762711864, + "virology": 0.2727272727272727, + "high_school_microeconomics": 0.4177215189873418, + "econometrics": 0.3185840707964602, + "college_computer_science": 0.32323232323232326, + "high_school_biology": 0.4110032362459547, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.35587188612099646, + "philosophy": 0.31290322580645163, + "professional_medicine": 0.24723247232472326, + "nutrition": 0.3442622950819672, + "global_facts": 0.30303030303030304, + "machine_learning": 0.32432432432432434, + "security_studies": 0.45491803278688525, + "public_relations": 0.43119266055045874, + "professional_psychology": 0.36661211129296234, + "prehistory": 0.33126934984520123, + "anatomy": 0.23880597014925373, + "human_sexuality": 0.3076923076923077, + "college_medicine": 0.29651162790697677, + "high_school_government_and_politics": 0.4635416666666667, + "college_chemistry": 0.23232323232323232, + "logical_fallacies": 0.4691358024691358, + "high_school_geography": 0.4619289340101523, + "elementary_mathematics": 0.20424403183023873, + "human_aging": 0.31981981981981983, + "college_mathematics": 0.29292929292929293, + "high_school_psychology": 0.4834558823529412, + "formal_logic": 0.344, + "high_school_statistics": 0.31627906976744186, + "international_law": 0.5416666666666666, + "high_school_mathematics": 0.24535315985130113, + "high_school_computer_science": 0.41414141414141414, + "conceptual_physics": 0.26495726495726496, + "miscellaneous": 0.3145780051150895, + "high_school_chemistry": 0.297029702970297, + "marketing": 0.6223175965665236, + "professional_law": 0.3359425962165688, + "management": 0.38235294117647056, + "college_physics": 0.24752475247524752, + "jurisprudence": 0.4953271028037383, + "world_religions": 0.35294117647058826, + "sociology": 0.485, + "us_foreign_policy": 0.43434343434343436, + "high_school_macroeconomics": 0.3393316195372751, + "computer_security": 0.494949494949495, + "moral_scenarios": 0.22483221476510068, + "moral_disputes": 0.3884057971014493, + "electrical_engineering": 0.2916666666666667, + "astronomy": 0.33774834437086093, + "college_biology": 0.3356643356643357 + } + }, + "prompt_3": { + "accuracy": 0.4423310690025027, + "category_acc": { + "high_school_european_history": 0.22560975609756098, + "business_ethics": 0.5353535353535354, + "clinical_knowledge": 0.48484848484848486, + "medical_genetics": 0.46464646464646464, + "high_school_us_history": 0.2561576354679803, + "high_school_physics": 0.26666666666666666, + "high_school_world_history": 0.2584745762711864, + "virology": 0.3878787878787879, + "high_school_microeconomics": 0.48523206751054854, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.3434343434343434, + "high_school_biology": 0.5760517799352751, + "abstract_algebra": 0.24242424242424243, + "professional_accounting": 0.33807829181494664, + "philosophy": 0.5548387096774193, + "professional_medicine": 0.25092250922509224, + "nutrition": 0.4459016393442623, + "global_facts": 0.23232323232323232, + "machine_learning": 0.40540540540540543, + "security_studies": 0.46311475409836067, + "public_relations": 0.6697247706422018, + "professional_psychology": 0.44844517184942717, + "prehistory": 0.5108359133126935, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5153846153846153, + "college_medicine": 0.38372093023255816, + "high_school_government_and_politics": 0.6041666666666666, + "college_chemistry": 0.3838383838383838, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6446700507614214, + "elementary_mathematics": 0.22811671087533156, + "human_aging": 0.6486486486486487, + "college_mathematics": 0.24242424242424243, + "high_school_psychology": 0.6727941176470589, + "formal_logic": 0.352, + "high_school_statistics": 0.26976744186046514, + "international_law": 0.6083333333333333, + "high_school_mathematics": 0.24907063197026022, + "high_school_computer_science": 0.42424242424242425, + "conceptual_physics": 0.45726495726495725, + "miscellaneous": 0.6662404092071611, + "high_school_chemistry": 0.3465346534653465, + "marketing": 0.7725321888412017, + "professional_law": 0.33268101761252444, + "management": 0.6764705882352942, + "college_physics": 0.32673267326732675, + "jurisprudence": 0.6448598130841121, + "world_religions": 0.6882352941176471, + "sociology": 0.53, + "us_foreign_policy": 0.6262626262626263, + "high_school_macroeconomics": 0.4601542416452442, + "computer_security": 0.5656565656565656, + "moral_scenarios": 0.24496644295302014, + "moral_disputes": 0.527536231884058, + "electrical_engineering": 0.4027777777777778, + "astronomy": 0.40397350993377484, + "college_biology": 0.5944055944055944 + } + }, + "prompt_4": { + "accuracy": 0.44869503038970326, + "category_acc": { + "high_school_european_history": 0.27439024390243905, + "business_ethics": 0.5555555555555556, + "clinical_knowledge": 0.4659090909090909, + "medical_genetics": 0.5858585858585859, + "high_school_us_history": 0.30049261083743845, + "high_school_physics": 0.35333333333333333, + "high_school_world_history": 0.288135593220339, + "virology": 0.38181818181818183, + "high_school_microeconomics": 0.4767932489451477, + "econometrics": 0.30973451327433627, + "college_computer_science": 0.41414141414141414, + "high_school_biology": 0.5857605177993528, + "abstract_algebra": 0.2828282828282828, + "professional_accounting": 0.3665480427046263, + "philosophy": 0.5580645161290323, + "professional_medicine": 0.2656826568265683, + "nutrition": 0.4491803278688525, + "global_facts": 0.36363636363636365, + "machine_learning": 0.36936936936936937, + "security_studies": 0.45081967213114754, + "public_relations": 0.6788990825688074, + "professional_psychology": 0.45662847790507366, + "prehistory": 0.5232198142414861, + "anatomy": 0.47761194029850745, + "human_sexuality": 0.5692307692307692, + "college_medicine": 0.4127906976744186, + "high_school_government_and_politics": 0.6041666666666666, + "college_chemistry": 0.3434343434343434, + "logical_fallacies": 0.5493827160493827, + "high_school_geography": 0.6598984771573604, + "elementary_mathematics": 0.27586206896551724, + "human_aging": 0.5990990990990991, + "college_mathematics": 0.1919191919191919, + "high_school_psychology": 0.6746323529411765, + "formal_logic": 0.328, + "high_school_statistics": 0.31627906976744186, + "international_law": 0.5583333333333333, + "high_school_mathematics": 0.23048327137546468, + "high_school_computer_science": 0.4444444444444444, + "conceptual_physics": 0.43162393162393164, + "miscellaneous": 0.6521739130434783, + "high_school_chemistry": 0.3415841584158416, + "marketing": 0.759656652360515, + "professional_law": 0.3385518590998043, + "management": 0.6764705882352942, + "college_physics": 0.2871287128712871, + "jurisprudence": 0.6542056074766355, + "world_religions": 0.711764705882353, + "sociology": 0.58, + "us_foreign_policy": 0.6767676767676768, + "high_school_macroeconomics": 0.4473007712082262, + "computer_security": 0.5555555555555556, + "moral_scenarios": 0.23154362416107382, + "moral_disputes": 0.5217391304347826, + "electrical_engineering": 0.4513888888888889, + "astronomy": 0.4105960264900662, + "college_biology": 0.5804195804195804 + } + }, + "prompt_5": -1 + }, + "c_eval": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "c_eval_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "cmmlu_full": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 + }, + "zbench": { + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "ind_emotion": { - "prompt_1": { - "accuracy": 0.5522727272727272 - }, - "prompt_2": { - "accuracy": 0.22045454545454546 - }, - "prompt_3": { - "accuracy": 0.19318181818181818 - }, - "prompt_4": { - "accuracy": 0.6022727272727273 - }, - "prompt_5": { - "accuracy": 0.5363636363636364 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "ocnli": { - "prompt_1": { - "accuracy": 0.3423728813559322 - }, - "prompt_2": { - "accuracy": 0.34576271186440677 - }, - "prompt_3": { - "accuracy": 0.32610169491525426 - }, - "prompt_4": { - "accuracy": 0.3423728813559322 - }, - "prompt_5": { - "accuracy": 0.32508474576271185 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "c3": { - "prompt_1": { - "accuracy": 0.6619296933433059 - }, - "prompt_2": { - "accuracy": 0.6264023934181002 - }, - "prompt_3": { - "accuracy": 0.6024682124158564 - }, - "prompt_4": { - "accuracy": 0.6510845175766642 - }, - "prompt_5": { - "accuracy": 0.5314136125654451 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "dream": { - "prompt_1": { - "accuracy": 0.8677119059284665 - }, - "prompt_2": { - "accuracy": 0.8162665360117589 - }, - "prompt_3": { - "accuracy": 0.8451739343459088 - }, - "prompt_4": { - "accuracy": 0.8784909358157765 - }, - "prompt_5": { - "accuracy": 0.8784909358157765 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "samsum": { - "prompt_1": { - "rouge1": 0.3148466900867728, - "rouge2": 0.13439425963384796, - "rougeL": 0.2493723840004376, - "avg_rouge": 0.23287111124035278 - }, - "prompt_2": { - "rouge1": 0.3760284317692511, - "rouge2": 0.1703326819107988, - "rougeL": 0.30239763225528216, - "avg_rouge": 0.282919581978444 - }, - "prompt_3": { - "rouge1": 0.321710625396653, - "rouge2": 0.13296655744055622, - "rougeL": 0.2552227836573773, - "avg_rouge": 0.2366333221648622 - }, - "prompt_4": { - "rouge1": 0.3492817033666253, - "rouge2": 0.1568905106341149, - "rougeL": 0.27974950173421925, - "avg_rouge": 0.2619739052449865 - }, - "prompt_5": { - "rouge1": 0.3596885364704884, - "rouge2": 0.15550623284418724, - "rougeL": 0.2853950264312206, - "avg_rouge": 0.2668632652486321 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "dialogsum": { - "prompt_1": { - "rouge1": 0.2180485502100887, - "rouge2": 0.06097714440972195, - "rougeL": 0.16243716383480442, - "avg_rouge": 0.14715428615153836 - }, - "prompt_2": { - "rouge1": 0.21923428230963032, - "rouge2": 0.06078194919283872, - "rougeL": 0.16199188272866893, - "avg_rouge": 0.14733603807704598 - }, - "prompt_3": { - "rouge1": 0.21698867466943603, - "rouge2": 0.062382836391260764, - "rougeL": 0.16111354597032204, - "avg_rouge": 0.14682835234367295 - }, - "prompt_4": { - "rouge1": 0.22413012734377513, - "rouge2": 0.06340741342797845, - "rougeL": 0.16625918343029397, - "avg_rouge": 0.15126557473401583 - }, - "prompt_5": { - "rouge1": 0.19968281716481368, - "rouge2": 0.06461254234379582, - "rougeL": 0.15056622645650905, - "avg_rouge": 0.1382871953217062 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "sst2": { - "prompt_1": { - "accuracy": 0.8738532110091743 - }, - "prompt_2": { - "accuracy": 0.7809633027522935 - }, - "prompt_3": { - "accuracy": 0.8772935779816514 - }, - "prompt_4": { - "accuracy": 0.8692660550458715 - }, - "prompt_5": { - "accuracy": 0.5527522935779816 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "cola": { - "prompt_1": { - "accuracy": 0.3317353787152445 - }, - "prompt_2": { - "accuracy": 0.3211888782358581 - }, - "prompt_3": { - "accuracy": 0.36145733461169705 - }, - "prompt_4": { - "accuracy": 0.3231064237775647 - }, - "prompt_5": { - "accuracy": 0.5589645254074784 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "qqp": { - "prompt_1": { - "accuracy": 0.602 - }, - "prompt_2": { - "accuracy": 0.533 - }, - "prompt_3": { - "accuracy": 0.553 - }, - "prompt_4": { - "accuracy": 0.596 - }, - "prompt_5": { - "accuracy": 0.569 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "mnli": { - "prompt_1": { - "accuracy": 0.406 - }, - "prompt_2": { - "accuracy": 0.3745 - }, - "prompt_3": { - "accuracy": 0.404 - }, - "prompt_4": { - "accuracy": 0.463 - }, - "prompt_5": { - "accuracy": 0.447 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "qnli": { - "prompt_1": { - "accuracy": 0.513 - }, - "prompt_2": { - "accuracy": 0.5325 - }, - "prompt_3": { - "accuracy": 0.524 - }, - "prompt_4": { - "accuracy": 0.5205 - }, - "prompt_5": { - "accuracy": 0.531 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "wnli": { - "prompt_1": { - "accuracy": 0.6056338028169014 - }, - "prompt_2": { - "accuracy": 0.5774647887323944 - }, - "prompt_3": { - "accuracy": 0.5915492957746479 - }, - "prompt_4": { - "accuracy": 0.43661971830985913 - }, - "prompt_5": { - "accuracy": 0.5070422535211268 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "rte": { - "prompt_1": { - "accuracy": 0.6462093862815884 - }, - "prompt_2": { - "accuracy": 0.5703971119133574 - }, - "prompt_3": { - "accuracy": 0.6173285198555957 - }, - "prompt_4": { - "accuracy": 0.5595667870036101 - }, - "prompt_5": { - "accuracy": 0.628158844765343 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "mrpc": { - "prompt_1": { - "accuracy": 0.6838235294117647 - }, - "prompt_2": { - "accuracy": 0.6053921568627451 - }, - "prompt_3": { - "accuracy": 0.6642156862745098 - }, - "prompt_4": { - "accuracy": 0.6715686274509803 - }, - "prompt_5": { - "accuracy": 0.6740196078431373 - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 }, "indommlu": { - "prompt_1": { - "accuracy": 0.38914480272381335, - "category_acc": { - "History": 0.39959839357429716, - "Geography": 0.3551020408163265, - "Lampungic": 0.32653061224489793, - "Social science": 0.4574290484140234, - "Balinese": 0.3099787685774947, - "Makassarese": 0.3118279569892473, - "Banjarese": 0.3402777777777778, - "Chemistry": 0.2846715328467153, - "Biology": 0.421301775147929, - "Science": 0.43343653250773995, - "Christian religion": 0.44776119402985076, - "Art": 0.3910149750415973, - "Islam religion": 0.41963015647226176, - "Hindu religion": 0.42, - "Madurese": 0.3050847457627119, - "Sport": 0.4594594594594595, - "Indonesian language": 0.4392901618929016, - "Physics": 0.3414141414141414, - "Minangkabau culture": 0.32663316582914576, - "Dayak language": 0.25688073394495414, - "Sociology": 0.4012096774193548, - "Economy": 0.3709016393442623, - "Sundanese": 0.34485738980121, - "Javanese": 0.3094758064516129, - "Civic education": 0.4434907010014306 - } - }, - "prompt_2": { - "accuracy": 0.45116496428333, - "category_acc": { - "History": 0.42570281124497994, - "Geography": 0.4122448979591837, - "Lampungic": 0.3469387755102041, - "Social science": 0.6243739565943238, - "Balinese": 0.31422505307855625, - "Makassarese": 0.3225806451612903, - "Banjarese": 0.3680555555555556, - "Chemistry": 0.2934306569343066, - "Biology": 0.46272189349112425, - "Science": 0.5583075335397317, - "Christian religion": 0.5323383084577115, - "Art": 0.5124792013311148, - "Islam religion": 0.5092460881934566, - "Hindu religion": 0.5, - "Madurese": 0.3254237288135593, - "Sport": 0.4594594594594595, - "Indonesian language": 0.5065379825653799, - "Physics": 0.39595959595959596, - "Minangkabau culture": 0.4020100502512563, - "Dayak language": 0.28440366972477066, - "Sociology": 0.4435483870967742, - "Economy": 0.430327868852459, - "Sundanese": 0.3863439930855661, - "Javanese": 0.34173387096774194, - "Civic education": 0.51931330472103 - } - }, - "prompt_3": { - "accuracy": 0.42285866880299083, - "category_acc": { - "History": 0.39558232931726905, - "Geography": 0.3979591836734694, - "Lampungic": 0.30612244897959184, - "Social science": 0.5709515859766278, - "Balinese": 0.31422505307855625, - "Makassarese": 0.3333333333333333, - "Banjarese": 0.3333333333333333, - "Chemistry": 0.2832116788321168, - "Biology": 0.44260355029585796, - "Science": 0.5159958720330238, - "Christian religion": 0.44776119402985076, - "Art": 0.4442595673876872, - "Islam religion": 0.45803698435277385, - "Hindu religion": 0.4266666666666667, - "Madurese": 0.29491525423728815, - "Sport": 0.44594594594594594, - "Indonesian language": 0.4853673723536737, - "Physics": 0.40404040404040403, - "Minangkabau culture": 0.36180904522613067, - "Dayak language": 0.28440366972477066, - "Sociology": 0.4153225806451613, - "Economy": 0.38114754098360654, - "Sundanese": 0.3560933448573898, - "Javanese": 0.3336693548387097, - "Civic education": 0.48068669527896996 - } - }, - "prompt_4": { - "accuracy": 0.38240202950797786, - "category_acc": { - "History": 0.3895582329317269, - "Geography": 0.35918367346938773, - "Lampungic": 0.2925170068027211, - "Social science": 0.4590984974958264, - "Balinese": 0.3333333333333333, - "Makassarese": 0.3279569892473118, - "Banjarese": 0.3194444444444444, - "Chemistry": 0.28905109489051095, - "Biology": 0.41775147928994083, - "Science": 0.3973168214654283, - "Christian religion": 0.417910447761194, - "Art": 0.3910149750415973, - "Islam religion": 0.40540540540540543, - "Hindu religion": 0.38, - "Madurese": 0.3288135593220339, - "Sport": 0.34459459459459457, - "Indonesian language": 0.42745952677459526, - "Physics": 0.3090909090909091, - "Minangkabau culture": 0.3417085427135678, - "Dayak language": 0.27522935779816515, - "Sociology": 0.39314516129032256, - "Economy": 0.3463114754098361, - "Sundanese": 0.3500432152117545, - "Javanese": 0.3316532258064516, - "Civic education": 0.44206008583690987 - } - }, - "prompt_5": { - "accuracy": 0.4477601976099873, - "category_acc": { - "History": 0.44377510040160645, - "Geography": 0.42448979591836733, - "Lampungic": 0.3129251700680272, - "Social science": 0.5893155258764607, - "Balinese": 0.28450106157112526, - "Makassarese": 0.34946236559139787, - "Banjarese": 0.3472222222222222, - "Chemistry": 0.3343065693430657, - "Biology": 0.47218934911242605, - "Science": 0.5758513931888545, - "Christian religion": 0.4925373134328358, - "Art": 0.49584026622296173, - "Islam religion": 0.4822190611664296, - "Hindu religion": 0.4533333333333333, - "Madurese": 0.29152542372881357, - "Sport": 0.4797297297297297, - "Indonesian language": 0.5003113325031133, - "Physics": 0.4121212121212121, - "Minangkabau culture": 0.3768844221105528, - "Dayak language": 0.24770642201834864, - "Sociology": 0.41935483870967744, - "Economy": 0.4323770491803279, - "Sundanese": 0.39066551426101986, - "Javanese": 0.35080645161290325, - "Civic education": 0.5021459227467812 - } - } + "prompt_1": -1, + "prompt_2": -1, + "prompt_3": -1, + "prompt_4": -1, + "prompt_5": -1 } }, "five_shot": {