Update README.md
Browse files
README.md
CHANGED
@@ -127,30 +127,27 @@ All models were evaluated using our proprietary evaluation pipeline and [LM Eval
|
|
127 |
The evaluation scores of ALLaM can be found in JSON format [here](https://huggingface.co/ALLaM-AI/ALLaM-7B-Instruct-preview/tree/main/evaluation).
|
128 |
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
|
133 |
-
|
134 |
-
|
|
135 |
-
|
|
136 |
-
| jais-family-
|
137 |
-
| jais-family-
|
138 |
-
| jais-
|
139 |
-
| jais-
|
140 |
-
| jais-adapted-
|
141 |
-
|
|
142 |
-
|
|
143 |
-
| Qwen2.5-
|
144 |
-
|
|
145 |
-
|
|
146 |
-
| Mistral-
|
147 |
-
|
|
148 |
-
|
|
149 |
-
|
|
150 |
-
|
|
151 |
-
| Llama-3.3-70B-Instruct | 68.84 | 79.6 | 78.81 | 70.49 | 70.91 | **70.9** | **88.6** | **65.74** | 76.93 | 72.01 | 70.25 | 44.12 |
|
152 |
-
<!-- | AceGPT-v2-8B-Chat | nan | nan | nan | nan | nan | nan | nan | 51.96 | 72.69 | 57.02 | 49.99 | 36.15 | -->
|
153 |
-
|
154 |
|
155 |
Closed models evaluations:
|
156 |
|
|
|
127 |
The evaluation scores of ALLaM can be found in JSON format [here](https://huggingface.co/ALLaM-AI/ALLaM-7B-Instruct-preview/tree/main/evaluation).
|
128 |
|
129 |
|
130 |
+
| Model | ETEC <br>0 shot | IEN-MCQ <br>0 shot | IEN-TF <br>0 shot | AraPro <br>0 shot | AraMath <br>5 shot | Ar-IFEval <br>(prompt strict) <br>0 shot | Ar-IFEval <br>(inst strict) <br>0 shot | ExamsAR <br>5 shot | ACVA <br> 5 shot | Arabic MMLU <br>0 Shot | Openai MMLU <br>0 shot | GAT <br>0 shot |AVG |
|
131 |
+
|:----------------------------|:---------|:-----------------|:----------------|:----------------|:-----------------|:-----------------------------------|:---------------------------------|:------------------|:--------------|:--------------------|:--------------------|:-----------------------------|:----------------------|
|
132 |
+
| ALLaM_7b-v1.27.2.25 | 66.67 | **91.77** | 82.95 | 69.71 | 66.78 | 31.34 | 67.65 | 51.58 | 76.33 | 67.78 | 55.91 | 44.53 | 64.42 |
|
133 |
+
| AceGPT-v2-32B-Chat | 64.81 | 81.6 | 80.35 | 67.19 | 64.46 | 25.75 | 63.41 | 55.31 | 71.57 | 68.3 | 60.8 | 43.21 | 62.23 |
|
134 |
+
| jais-family-6p7b-chat | 45.47 | 46.22 | 63.92 | 54.31 | 25.29 | 13.99 | 52.97 | 46.93 | 73.8 | 56.15 | 44.96 | 31.71 | 46.31 |
|
135 |
+
| jais-family-13b-chat | 48.65 | 62.95 | 68.68 | 57.53 | 26.61 | 17.16 | 54.27 | 45.07 | 71.18 | 58.14 | 47.73 | 31.72 | 49.14 |
|
136 |
+
| jais-family-30b-8k-chat | 53.52 | 72.76 | 70.65 | 61.27 | 33.39 | 16.79 | 54.68 | 50.28 | 74.47 | 63.11 | 50.9 | 36.44 | 53.19 |
|
137 |
+
| jais-family-30b-16k-chat | 53.31 | 74.88 | 68.76 | 62.79 | 41.49 | 16.6 | 54.95 | 49.72 | 60.08 | 62.04 | 50.98 | 34.85 | 52.54 |
|
138 |
+
| jais-adapted-7b-chat | 40.49 | 57.38 | 67.18 | 50.59 | 28.43 | 14.93 | 54.27 | 40.6 | 70.44 | 49.75 | 38.54 | 29.68 | 45.19 |
|
139 |
+
| jais-adapted-13b-chat | 48.12 | 69.65 | 71.85 | 59.07 | 37.02 | 23.32 | 60.61 | 48.23 | 67.78 | 56.42 | 46.83 | 33.4 | 51.86 |
|
140 |
+
| jais-adapted-70b-chat | 56.81 | 74.51 | 76.47 | 64.59 | 45.62 | 27.05 | 65.05 | 54.75 | 73.33 | 65.74 | 56.82 | 39.15 | 58.32 |
|
141 |
+
| Qwen2.5-7B-Instruct | 64.12 | 66.38 | 78.46 | 64.63 | 71.74 | 28.17 | 65.19 | 50.65 | 78.17 | 61.54 | 56.1 | 41.42 | 60.55 |
|
142 |
+
| Qwen2.5-14B-Instruct | 72.18 | 80.51 | 77.64 | 69.11 | 82.81 | 68.66 | 86.76 | 57.54 | 75.04 | 69.36 | 63.8 | 51.7 | 71.26 |
|
143 |
+
| Qwen2.5-72B-Instruct | **78.7** | 86.88 | **86.62** | **74.69** | **92.89** | 67.72 | 87.51 | 60.71 | **79.92** | **74.1** | **73.59** | **59.54** | **76.91** |
|
144 |
+
| Mistral-7B-Instruct-v0.3 | 35.67 | 53.59 | 63.4 | 43.85 | 27.11 | 30.41 | 64.03 | 34.08 | 60.25 | 45.27 | 32.3 | 26.65 | 43.05 |
|
145 |
+
| Mistral-Nemo-Instruct-2407 | 49.28 | 68.43 | 71.78 | 57.61 | 40.0 | 35.82 | 70.58 | 47.49 | 76.92 | 55.97 | 46.15 | 25.44 | 53.79 |
|
146 |
+
| Mistral-Small-Instruct-2409 | 40.96 | 60.64 | 63.66 | 47.73 | 44.46 | 51.12 | 78.16 | 38.73 | 68.93 | 50.43 | 39.63 | 28.82 | 51.11 |
|
147 |
+
| falcon-mamba-7b-instruct | 37.52 | 52.65 | 57.63 | 41.47 | 56.53 | 8.58 | 47.92 | 28.49 | 63.52 | 39.27 | 28.45 | 29.69 | 40.98 |
|
148 |
+
| Llama-3.1-8B-Instruct | 45.68 | 59.23 | 71.7 | 52.51 | 34.38 | 51.87 | 79.11 | 54.0 | 70.54 | 56.53 | 44.67 | 30.76 | 54.25 |
|
149 |
+
| Llama-3.3-70B-Instruct | 68.84 | 79.6 | 78.81 | 70.49 | 70.91 | **70.9** | **88.6** | **65.74** | 76.93 | 72.01 | 70.25 | 44.12 | 71.43 |
|
150 |
+
<!-- | AceGPT-v2-8B-Chat | nan | nan | nan | nan | nan | nan | nan | 51.96 | 72.69 | 57.02 | 49.99 | 36.15 | 53.56 | -->
|
|
|
|
|
|
|
151 |
|
152 |
Closed models evaluations:
|
153 |
|