David Pomerenke
Nicer model table with type and size and filters and colourful score bars
9dbdcb2
raw
history blame
3.92 kB
from os import getenv
import pandas as pd
from aiolimiter import AsyncLimiter
from dotenv import load_dotenv
from elevenlabs import AsyncElevenLabs
from huggingface_hub import AsyncInferenceClient, HfApi
from joblib.memory import Memory
from openai import AsyncOpenAI
from requests import HTTPError
# for development purposes, all languages will be evaluated on the fast models
# and only a sample of languages will be evaluated on all models
models = [
"openai/gpt-4o-mini", # 0.6$/M tokens
# "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive for dev
"meta-llama/llama-3.3-70b-instruct", # 0.3$/M tokens
"mistralai/mistral-small-24b-instruct-2501", # 0.14$/M tokens
"google/gemini-2.0-flash-001", # 0.4$/M tokens
# "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
# "deepseek/deepseek-chat", # 0.9$/M tokens
# "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
"google/gemma-3-27b-it", # 0.2$/M tokens
]
model_fast = "meta-llama/llama-3.3-70b-instruct"
transcription_models = [
"elevenlabs/scribe_v1",
"openai/whisper-large-v3",
# "openai/whisper-small",
# "facebook/seamless-m4t-v2-large",
]
transcription_model_fast = "elevenlabs/scribe_v1"
load_dotenv()
client = AsyncOpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=getenv("OPENROUTER_API_KEY"),
)
cache = Memory(location=".cache", verbose=0).cache
openrouter_rate_limit = AsyncLimiter(max_rate=20, time_period=1)
elevenlabs_rate_limit = AsyncLimiter(max_rate=2, time_period=1)
huggingface_rate_limit = AsyncLimiter(max_rate=5, time_period=1)
@cache
async def complete(**kwargs):
async with openrouter_rate_limit:
response = await client.chat.completions.create(**kwargs)
if not response.choices:
raise Exception(response)
return response
@cache
async def transcribe_elevenlabs(path, model):
modelname = model.split("/")[-1]
client = AsyncElevenLabs(api_key=getenv("ELEVENLABS_API_KEY"))
async with elevenlabs_rate_limit:
with open(path, "rb") as file:
response = await client.speech_to_text.convert(
model_id=modelname, file=file
)
return response.text
@cache
async def transcribe_huggingface(path, model):
client = AsyncInferenceClient(api_key=getenv("HUGGINGFACE_ACCESS_TOKEN"))
async with huggingface_rate_limit:
output = await client.automatic_speech_recognition(model=model, audio=path)
return output.text
async def transcribe(path, model="elevenlabs/scribe_v1"):
provider, modelname = model.split("/")
match provider:
case "elevenlabs":
return await transcribe_elevenlabs(path, modelname)
case "openai" | "facebook":
return await transcribe_huggingface(path, model)
case _:
raise ValueError(f"Model {model} not supported")
models = pd.DataFrame(models, columns=["id"])
api = HfApi()
def get_metadata(id):
try:
info = api.model_info(id)
license = info.card_data.license.replace("-", " ").replace("mit", "MIT").title()
return {
"hf_id": info.id,
"creation_date": info.created_at,
"size": info.safetensors.total,
"type": "Open",
"license": license,
}
except HTTPError:
return {
"hf_id": None,
"creation_date": None,
"size": None,
"type": "Commercial",
"license": None,
}
models["hf_id"] = models["id"].apply(get_metadata).str["hf_id"]
models["creation_date"] = models["id"].apply(get_metadata).str["creation_date"]
models["creation_date"] = pd.to_datetime(models["creation_date"])
models["size"] = models["id"].apply(get_metadata).str["size"]
models["type"] = models["id"].apply(get_metadata).str["type"]
models["license"] = models["id"].apply(get_metadata).str["license"]