David Pomerenke commited on
Commit
a32a92f
·
1 Parent(s): a0679b4

Get popular models from OpenRouter

Browse files
Files changed (1) hide show
  1. evals/models.py +42 -4
evals/models.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  from os import getenv
2
 
3
  import pandas as pd
@@ -40,13 +44,33 @@ transcription_models = [
40
  # "facebook/seamless-m4t-v2-large",
41
  ]
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  load_dotenv()
44
  client = AsyncOpenAI(
45
  base_url="https://openrouter.ai/api/v1",
46
  api_key=getenv("OPENROUTER_API_KEY"),
47
  )
48
 
49
- cache = Memory(location=".cache", verbose=0).cache
50
  openrouter_rate_limit = AsyncLimiter(max_rate=20, time_period=1)
51
  elevenlabs_rate_limit = AsyncLimiter(max_rate=2, time_period=1)
52
  huggingface_rate_limit = AsyncLimiter(max_rate=5, time_period=1)
@@ -117,7 +141,10 @@ def get_hf_metadata(row):
117
  "type": "Commercial",
118
  "license": None,
119
  }
120
- id = row["hf_slug"] or row["slug"]
 
 
 
121
  if not id:
122
  return empty
123
  try:
@@ -126,7 +153,7 @@ def get_hf_metadata(row):
126
  return {
127
  "hf_id": info.id,
128
  "creation_date": info.created_at,
129
- "size": info.safetensors.total,
130
  "type": "Open",
131
  "license": license,
132
  }
@@ -143,13 +170,24 @@ def get_cost(row):
143
  return round(cost * 1_000_000, 2)
144
 
145
 
 
 
 
 
 
 
 
 
 
 
 
146
  models = models.assign(
147
  name=or_metadata.str["short_name"],
148
  provider_name=or_metadata.str["name"].str.split(": ").str[0],
149
  cost=or_metadata.apply(get_cost),
150
  hf_id=hf_metadata.str["hf_id"],
151
- creation_date=pd.to_datetime(hf_metadata.str["creation_date"]),
152
  size=hf_metadata.str["size"],
153
  type=hf_metadata.str["type"],
154
  license=hf_metadata.str["license"],
 
155
  )
 
1
+ import json
2
+ import re
3
+ from collections import defaultdict
4
+ from datetime import date
5
  from os import getenv
6
 
7
  import pandas as pd
 
44
  # "facebook/seamless-m4t-v2-large",
45
  ]
46
 
47
+ cache = Memory(location=".cache", verbose=0).cache
48
+
49
+
50
+ @cache
51
+ def get_popular_models(date: date):
52
+ raw = get("https://openrouter.ai/rankings").text
53
+ data = re.search(r'{\\"data\\":(.*),\\"isPercentage\\"', raw).group(1)
54
+ data = json.loads(data.replace("\\", ""))
55
+ counts = defaultdict(int)
56
+ for day in data:
57
+ for model, count in day["ys"].items():
58
+ if model.startswith("openrouter") or model == "Others":
59
+ continue
60
+ counts[model.split(":")[0]] += count
61
+ counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)
62
+ return [model for model, _ in counts]
63
+
64
+
65
+ pop_models = get_popular_models(date.today())
66
+ models += [m for m in pop_models if m not in models][:1]
67
+
68
  load_dotenv()
69
  client = AsyncOpenAI(
70
  base_url="https://openrouter.ai/api/v1",
71
  api_key=getenv("OPENROUTER_API_KEY"),
72
  )
73
 
 
74
  openrouter_rate_limit = AsyncLimiter(max_rate=20, time_period=1)
75
  elevenlabs_rate_limit = AsyncLimiter(max_rate=2, time_period=1)
76
  huggingface_rate_limit = AsyncLimiter(max_rate=5, time_period=1)
 
141
  "type": "Commercial",
142
  "license": None,
143
  }
144
+ if not row:
145
+ return empty
146
+ id = row["hf_slug"] or row["slug"].split(":")[0]
147
+ print(id)
148
  if not id:
149
  return empty
150
  try:
 
153
  return {
154
  "hf_id": info.id,
155
  "creation_date": info.created_at,
156
+ "size": info.safetensors.total if info.safetensors else None,
157
  "type": "Open",
158
  "license": license,
159
  }
 
170
  return round(cost * 1_000_000, 2)
171
 
172
 
173
+ exists = or_metadata.apply(lambda x: x is not None)
174
+ models, or_metadata, hf_metadata = (
175
+ models[exists],
176
+ or_metadata[exists],
177
+ hf_metadata[exists],
178
+ )
179
+ creation_date_hf = pd.to_datetime(hf_metadata.str["creation_date"]).dt.date
180
+ creation_date_or = pd.to_datetime(
181
+ or_metadata.str["created_at"].str.split("T").str[0]
182
+ ).dt.date
183
+
184
  models = models.assign(
185
  name=or_metadata.str["short_name"],
186
  provider_name=or_metadata.str["name"].str.split(": ").str[0],
187
  cost=or_metadata.apply(get_cost),
188
  hf_id=hf_metadata.str["hf_id"],
 
189
  size=hf_metadata.str["size"],
190
  type=hf_metadata.str["type"],
191
  license=hf_metadata.str["license"],
192
+ creation_date=creation_date_hf.combine_first(creation_date_or),
193
  )