David Pomerenke commited on
Commit
d1a7111
·
1 Parent(s): 9dbdcb2

Basic language table

Browse files
evals/languages.py CHANGED
@@ -21,21 +21,6 @@ languages["language_name"] = languages["bcp_47"].apply(
21
  lambda x: Language.get(x).display_name()
22
  )
23
 
24
- # load script codes and names
25
- scripts = pd.read_csv("data/ScriptCodes.csv").rename(
26
- columns={"Code": "iso15924", "English Name": "script_name"}
27
- )
28
-
29
-
30
- def population(bcp_47):
31
- items = {
32
- re.sub(r"^[a-z]+-", "", lang): pop
33
- for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
34
- if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
35
- }
36
- return items
37
-
38
-
39
  glottolog = pd.read_csv(
40
  "data/glottolog_languoid.csv/languoid.csv", na_values=[""], keep_default_na=False
41
  ) # Min _Nan_ Chinese is not N/A!
@@ -43,7 +28,6 @@ glottolog["bcp_47"] = glottolog["iso639P3code"].apply(
43
  lambda x: standardize_tag(x, macro=True) if not pd.isna(x) else None
44
  )
45
 
46
-
47
  @cache
48
  def language_family(bcp_47):
49
  languoid = glottolog[glottolog["bcp_47"] == bcp_47].iloc[0]
@@ -52,6 +36,21 @@ def language_family(bcp_47):
52
  family = glottolog[glottolog["id"] == languoid["family_id"]].iloc[0]
53
  return family["name"]
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def script_name(iso15924):
57
  return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
 
21
  lambda x: Language.get(x).display_name()
22
  )
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  glottolog = pd.read_csv(
25
  "data/glottolog_languoid.csv/languoid.csv", na_values=[""], keep_default_na=False
26
  ) # Min _Nan_ Chinese is not N/A!
 
28
  lambda x: standardize_tag(x, macro=True) if not pd.isna(x) else None
29
  )
30
 
 
31
  @cache
32
  def language_family(bcp_47):
33
  languoid = glottolog[glottolog["bcp_47"] == bcp_47].iloc[0]
 
36
  family = glottolog[glottolog["id"] == languoid["family_id"]].iloc[0]
37
  return family["name"]
38
 
39
+ languages["family"] = languages["bcp_47"].apply(language_family)
40
+
41
+ # load script codes and names
42
+ scripts = pd.read_csv("data/ScriptCodes.csv").rename(
43
+ columns={"Code": "iso15924", "English Name": "script_name"}
44
+ )
45
+
46
+
47
+ def population(bcp_47):
48
+ items = {
49
+ re.sub(r"^[a-z]+-", "", lang): pop
50
+ for lang, pop in LANGUAGE_SPEAKING_POPULATION.items()
51
+ if re.match(rf"^{bcp_47}-[A-Z]{{2}}$", lang)
52
+ }
53
+ return items
54
 
55
  def script_name(iso15924):
56
  return scripts[scripts["iso15924"] == iso15924]["script_name"].values[0]
evals/main.py CHANGED
@@ -95,6 +95,20 @@ def make_model_table(df):
95
  return df
96
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  async def main():
99
  results = await evaluate()
100
  results, lang_results, model_results, task_results = aggregate(results)
@@ -107,9 +121,9 @@ async def main():
107
  with open("results.json", "w") as f:
108
  json.dump(all_results, f, indent=2, ensure_ascii=False)
109
 
110
- model_table = make_model_table(model_results)
111
  all_tables = {
112
- "model_table": serialize(model_table),
 
113
  }
114
  with open("frontend/public/results.json", "w") as f:
115
  json.dump(all_tables, f, indent=2, ensure_ascii=False)
 
95
  return df
96
 
97
 
98
+ def make_language_table(df):
99
+ df["task_metric"] = df["task"] + "_" + df["metric"]
100
+ df = df.drop(columns=["task", "metric"])
101
+ task_metrics = df["task_metric"].unique()
102
+ df = df.pivot(index="bcp_47", columns="task_metric", values="score").fillna(0).reset_index()
103
+ df["average"] = df[task_metrics].mean(axis=1)
104
+ for row in [*task_metrics, "average"]:
105
+ df[row] = df[row].round(2)
106
+ df = pd.merge(languages, df, on="bcp_47", how="outer")
107
+ df = df.sort_values(by="average", ascending=False)
108
+ df = df[["language_name", "speakers", "family", "average", "in_benchmark", *task_metrics]]
109
+ return df
110
+
111
+
112
  async def main():
113
  results = await evaluate()
114
  results, lang_results, model_results, task_results = aggregate(results)
 
121
  with open("results.json", "w") as f:
122
  json.dump(all_results, f, indent=2, ensure_ascii=False)
123
 
 
124
  all_tables = {
125
+ "model_table": serialize(make_model_table(model_results)),
126
+ "language_table": serialize(make_language_table(lang_results)),
127
  }
128
  with open("frontend/public/results.json", "w") as f:
129
  json.dump(all_tables, f, indent=2, ensure_ascii=False)
frontend/public/results.json CHANGED
The diff for this file is too large to render. See raw diff
 
frontend/src/App.js CHANGED
@@ -1,53 +1,63 @@
1
- import './App.css';
2
- import { useState, useEffect } from 'react';
3
- import { PrimeReactProvider } from 'primereact/api';
4
- import "primereact/resources/themes/lara-light-cyan/theme.css";
5
- import ModelTable from './components/ModelTable';
 
6
 
7
-
8
-
9
- function App() {
10
- const [data, setData] = useState(null);
11
- const [loading, setLoading] = useState(true);
12
- const [error, setError] = useState(null);
13
 
14
  useEffect(() => {
15
  fetch('/results.json')
16
  .then(response => {
17
  if (!response.ok) {
18
- throw new Error('Network response was not ok');
19
  }
20
- return response.json();
21
  })
22
  .then(jsonData => {
23
- setData(jsonData);
24
- setLoading(false);
25
  })
26
  .catch(err => {
27
- setError(err.message);
28
- setLoading(false);
29
- });
30
- }, []);
31
 
32
  return (
33
- <div className="App">
34
- <header className="App-header">
35
- <div className="emoji-container">
36
- <span role="img" aria-label="Hugging Face Emoji" className="header-emoji">🌍</span>
 
 
 
 
 
 
37
  </div>
38
  <h1>Global AI Language Monitor</h1>
39
  <p>Tracking language proficiency of AI models for every language</p>
40
-
41
- <div className="data-container" style={{ width: '100%' }}>
42
  <PrimeReactProvider>
43
  {loading && <p>...</p>}
44
  {error && <p>Error: {error}</p>}
45
- {data && <ModelTable data={data} />}
 
 
 
 
 
46
  </PrimeReactProvider>
47
  </div>
48
  </header>
49
  </div>
50
- );
51
  }
52
 
53
- export default App;
 
1
+ import './App.css'
2
+ import { useState, useEffect } from 'react'
3
+ import { PrimeReactProvider } from 'primereact/api'
4
+ import 'primereact/resources/themes/lara-light-cyan/theme.css'
5
+ import ModelTable from './components/ModelTable'
6
+ import LanguageTable from './components/LanguageTable'
7
 
8
+ function App () {
9
+ const [data, setData] = useState(null)
10
+ const [loading, setLoading] = useState(true)
11
+ const [error, setError] = useState(null)
 
 
12
 
13
  useEffect(() => {
14
  fetch('/results.json')
15
  .then(response => {
16
  if (!response.ok) {
17
+ throw new Error('Network response was not ok')
18
  }
19
+ return response.json()
20
  })
21
  .then(jsonData => {
22
+ setData(jsonData)
23
+ setLoading(false)
24
  })
25
  .catch(err => {
26
+ setError(err.message)
27
+ setLoading(false)
28
+ })
29
+ }, [])
30
 
31
  return (
32
+ <div className='App'>
33
+ <header className='App-header'>
34
+ <div className='emoji-container'>
35
+ <span
36
+ role='img'
37
+ aria-label='Hugging Face Emoji'
38
+ className='header-emoji'
39
+ >
40
+ 🌍
41
+ </span>
42
  </div>
43
  <h1>Global AI Language Monitor</h1>
44
  <p>Tracking language proficiency of AI models for every language</p>
45
+
46
+ <div className='data-container' style={{ width: '100%' }}>
47
  <PrimeReactProvider>
48
  {loading && <p>...</p>}
49
  {error && <p>Error: {error}</p>}
50
+ {data && (
51
+ <div style={{ display: 'flex', flexDirection: 'row', gap: '2rem' }}>
52
+ <ModelTable data={data} />
53
+ <LanguageTable data={data} />
54
+ </div>
55
+ )}
56
  </PrimeReactProvider>
57
  </div>
58
  </header>
59
  </div>
60
+ )
61
  }
62
 
63
+ export default App
frontend/src/components/LanguageTable.js ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { DataTable } from 'primereact/datatable'
2
+ import { Column } from 'primereact/column'
3
+ import { FilterMatchMode } from 'primereact/api'
4
+ import { MultiSelect } from 'primereact/multiselect'
5
+ import { useState, useEffect } from 'react'
6
+ import { Slider } from 'primereact/slider'
7
+ import ScoreField from './ScoreField'
8
+
9
+ const LanguageTable = ({ data }) => {
10
+ const [filters, setFilters] = useState({
11
+ language_name: { value: null, matchMode: FilterMatchMode.CONTAINS },
12
+ family: { value: null, matchMode: FilterMatchMode.IN },
13
+ speakers: { value: null, matchMode: FilterMatchMode.BETWEEN },
14
+ })
15
+ const table = data.language_table
16
+
17
+ const families = [...new Set(table.map(item => item.family))]
18
+ const familyRowFilterTemplate = options => {
19
+ return (
20
+ <MultiSelect
21
+ value={options.value}
22
+ options={families}
23
+ onChange={e => {
24
+ options.filterApplyCallback(e.value)
25
+ setFilters(prevFilters => ({
26
+ ...prevFilters,
27
+ family: { value: e.value, matchMode: FilterMatchMode.IN }
28
+ }))
29
+ }}
30
+ placeholder='All families'
31
+ />
32
+ )
33
+ }
34
+
35
+ const formatPopulation = population => {
36
+ if (population === null) {
37
+ return ''
38
+ } else if (population < 1000) {
39
+ return population.toFixed(0) + ''
40
+ } else if (population < 1000 * 1000) {
41
+ return (population / 1000).toFixed(1) + 'K'
42
+ } else if (population < 1000 * 1000 * 1000) {
43
+ return (population / 1000 / 1000).toFixed(1) + 'M'
44
+ } else {
45
+ return (population / 1000 / 1000 / 1000).toFixed(1) + 'B'
46
+ }
47
+ }
48
+
49
+ const SliderWithLabel = ({ value, onChange }) => {
50
+ const p = 10
51
+ const min = 2
52
+ const max = 12
53
+ const start = value === null ? min : Math.log(value[0]) / Math.log(p)
54
+ const stop = value === null ? max : Math.log(value[1]) / Math.log(p)
55
+ const [_value, _setValue] = useState([start, stop])
56
+ useEffect(() => {
57
+ const timer = setTimeout(() => {
58
+ onChange({
59
+ value:
60
+ _value[0] <= min + 0.1 && _value[1] >= max - 0.1
61
+ ? null
62
+ : [p ** _value[0], p ** _value[1]]
63
+ })
64
+ }, 1000)
65
+ return () => clearTimeout(timer)
66
+ }, [_value, onChange])
67
+ return (
68
+ <div style={{ minWidth: '20rem' }}>
69
+ <div>{formatPopulation(p ** _value[0])}</div>
70
+ <div>{formatPopulation(p ** _value[1])}</div>
71
+ <Slider
72
+ value={_value}
73
+ onChange={e => _setValue(e.value)}
74
+ placeholder='All sizes'
75
+ min={min}
76
+ max={max}
77
+ step={0.01}
78
+ range
79
+ style={{ marginTop: '5rem' }}
80
+ />
81
+ </div>
82
+ )
83
+ }
84
+
85
+ const speakerFilterTemplate = options => {
86
+ return (
87
+ <SliderWithLabel
88
+ value={options.value}
89
+ onChange={e => {
90
+ options.filterApplyCallback(e.value)
91
+ setFilters(prevFilters => ({
92
+ ...prevFilters,
93
+ speakers: { value: e.value, matchMode: FilterMatchMode.BETWEEN }
94
+ }))
95
+ }}
96
+ />
97
+ )
98
+ }
99
+
100
+ const speakerBodyTemplate = rowData => {
101
+ const populationStr = formatPopulation(rowData.speakers)
102
+ return <div>{populationStr}</div>
103
+ }
104
+
105
+ const languageBodyTemplate = rowData => {
106
+ return <div style={{ fontWeight: 'bold' }}>{rowData.language_name}</div>
107
+ }
108
+
109
+ const scoreBodyTemplate = (field, options = {}) => {
110
+ const { minScore = 0, maxScore = 1 } = options
111
+
112
+ return rowData => {
113
+ const score = rowData[field]
114
+ return ScoreField(score, minScore, maxScore)
115
+ }
116
+ }
117
+
118
+ return (
119
+ <DataTable
120
+ value={table}
121
+ header={<>Languages</>}
122
+ sortField='speakers'
123
+ removableSort
124
+ filters={filters}
125
+ filterDisplay='menu'
126
+ scrollable
127
+ scrollHeight='500px'
128
+ style={{ minWidth: '200px' }}
129
+ >
130
+ <Column
131
+ field='language_name'
132
+ header='Language'
133
+ body={languageBodyTemplate}
134
+ filter
135
+ showFilterMatchModes={false}
136
+ style={{ minWidth: '5rem' }}
137
+ frozen
138
+ />
139
+ <Column
140
+ field='speakers'
141
+ header='Speakers'
142
+ body={speakerBodyTemplate}
143
+ filter
144
+ filterElement={speakerFilterTemplate}
145
+ showFilterMatchModes={false}
146
+ style={{ minWidth: '5rem' }}
147
+ />
148
+ <Column
149
+ field='family'
150
+ header='Family'
151
+ filter
152
+ showFilterMatchModes={false}
153
+ filterElement={familyRowFilterTemplate}
154
+ style={{ minWidth: '10rem' }}
155
+ />
156
+ <Column
157
+ field='average'
158
+ header='Average'
159
+ sortable
160
+ body={scoreBodyTemplate('average', { minScore: 0.4, maxScore: 0.8 })}
161
+ style={{ minWidth: '5rem', maxWidth: '10rem' }}
162
+ />
163
+ <Column
164
+ field='translation_chrf'
165
+ header='Translation'
166
+ sortable
167
+ body={scoreBodyTemplate('translation_chrf', {
168
+ minScore: 0.4,
169
+ maxScore: 0.7
170
+ })}
171
+ style={{ minWidth: '5rem', maxWidth: '10rem' }}
172
+ />
173
+ <Column
174
+ field='classification_accuracy'
175
+ header='Classification'
176
+ sortable
177
+ body={scoreBodyTemplate('classification_accuracy', {
178
+ minScore: 0.4,
179
+ maxScore: 1
180
+ })}
181
+ style={{ minWidth: '5rem', maxWidth: '10rem' }}
182
+ />
183
+ <Column
184
+ field='language_modeling_chrf'
185
+ header='Language Modeling'
186
+ sortable
187
+ body={scoreBodyTemplate('language_modeling_chrf', {
188
+ minScore: 0.8,
189
+ maxScore: 1
190
+ })}
191
+ style={{ minWidth: '5rem', maxWidth: '10rem' }}
192
+ />
193
+ </DataTable>
194
+ )
195
+ }
196
+
197
+ export default LanguageTable
frontend/src/components/ModelTable.js CHANGED
@@ -124,7 +124,7 @@ const ModelTable = ({ data }) => {
124
  }
125
 
126
  const modelBodyTemplate = rowData => {
127
- return <div style={{ fontWeight: 'bold' }}>{rowData.model}</div>
128
  }
129
 
130
  const scoreBodyTemplate = (field, options = {}) => {
@@ -162,7 +162,7 @@ const ModelTable = ({ data }) => {
162
  header='Model'
163
  filter
164
  showFilterMatchModes={false}
165
- style={{ minWidth: '15rem' }}
166
  body={modelBodyTemplate}
167
  frozen
168
  />
@@ -188,7 +188,7 @@ const ModelTable = ({ data }) => {
188
  field='average'
189
  header='Average'
190
  sortable
191
- body={scoreBodyTemplate('average', { minScore: 0.4, maxScore: 0.8 })}
192
  style={{ minWidth: '5rem', maxWidth: '10rem' }}
193
  />
194
  <Column
@@ -196,7 +196,7 @@ const ModelTable = ({ data }) => {
196
  header='Translation'
197
  sortable
198
  body={scoreBodyTemplate('translation_chrf', {
199
- minScore: 0.4,
200
  maxScore: 0.7
201
  })}
202
  style={{ minWidth: '5rem', maxWidth: '10rem' }}
@@ -206,8 +206,8 @@ const ModelTable = ({ data }) => {
206
  header='Classification'
207
  sortable
208
  body={scoreBodyTemplate('classification_accuracy', {
209
- minScore: 0.4,
210
- maxScore: 1
211
  })}
212
  style={{ minWidth: '5rem', maxWidth: '10rem' }}
213
  />
 
124
  }
125
 
126
  const modelBodyTemplate = rowData => {
127
+ return <div style={{ fontWeight: 'bold', height: '100%' }}>{rowData.model}</div>
128
  }
129
 
130
  const scoreBodyTemplate = (field, options = {}) => {
 
162
  header='Model'
163
  filter
164
  showFilterMatchModes={false}
165
+ style={{ minWidth: '10rem' }}
166
  body={modelBodyTemplate}
167
  frozen
168
  />
 
188
  field='average'
189
  header='Average'
190
  sortable
191
+ body={scoreBodyTemplate('average', { minScore: 0.3, maxScore: 0.6 })}
192
  style={{ minWidth: '5rem', maxWidth: '10rem' }}
193
  />
194
  <Column
 
196
  header='Translation'
197
  sortable
198
  body={scoreBodyTemplate('translation_chrf', {
199
+ minScore: 0.3,
200
  maxScore: 0.7
201
  })}
202
  style={{ minWidth: '5rem', maxWidth: '10rem' }}
 
206
  header='Classification'
207
  sortable
208
  body={scoreBodyTemplate('classification_accuracy', {
209
+ minScore: 0.3,
210
+ maxScore: 0.8
211
  })}
212
  style={{ minWidth: '5rem', maxWidth: '10rem' }}
213
  />
results.json CHANGED
The diff for this file is too large to render. See raw diff