File size: 2,838 Bytes
da6e1bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from os import getenv
from aiolimiter import AsyncLimiter
from dotenv import load_dotenv
from elevenlabs import AsyncElevenLabs
from huggingface_hub import AsyncInferenceClient
from joblib.memory import Memory
from openai import AsyncOpenAI
# for development purposes, all languages will be evaluated on the fast models
# and only a sample of languages will be evaluated on all models
models = [
"openai/gpt-4o-mini", # 0.6$/M tokens
# "anthropic/claude-3.5-haiku", # 4$/M tokens -> too expensive for dev
"meta-llama/llama-3.3-70b-instruct", # 0.3$/M tokens
"mistralai/mistral-small-24b-instruct-2501", # 0.14$/M tokens
"google/gemini-2.0-flash-001", # 0.4$/M tokens
# "qwen/qwen-turbo", # 0.2$/M tokens; recognizes "inappropriate content"
# "deepseek/deepseek-chat", # 0.9$/M tokens
# "microsoft/phi-4", # 0.07$/M tokens; only 16k tokens context
"google/gemma-3-27b-it", # 0.2$/M tokens
]
model_fast = "meta-llama/llama-3.3-70b-instruct"
transcription_models = [
"elevenlabs/scribe_v1",
"openai/whisper-large-v3",
# "openai/whisper-small",
# "facebook/seamless-m4t-v2-large",
]
transcription_model_fast = "elevenlabs/scribe_v1"
load_dotenv()
client = AsyncOpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=getenv("OPENROUTER_API_KEY"),
)
cache = Memory(location=".cache", verbose=0).cache
openrouter_rate_limit = AsyncLimiter(max_rate=20, time_period=1)
elevenlabs_rate_limit = AsyncLimiter(max_rate=2, time_period=1)
huggingface_rate_limit = AsyncLimiter(max_rate=5, time_period=1)
@cache
async def complete(**kwargs):
async with openrouter_rate_limit:
response = await client.chat.completions.create(**kwargs)
if not response.choices:
raise Exception(response)
return response
@cache
async def transcribe_elevenlabs(path, model):
modelname = model.split("/")[-1]
client = AsyncElevenLabs(api_key=getenv("ELEVENLABS_API_KEY"))
async with elevenlabs_rate_limit:
with open(path, "rb") as file:
response = await client.speech_to_text.convert(
model_id=modelname, file=file
)
return response.text
@cache
async def transcribe_huggingface(path, model):
client = AsyncInferenceClient(api_key=getenv("HUGGINGFACE_ACCESS_TOKEN"))
async with huggingface_rate_limit:
output = await client.automatic_speech_recognition(model=model, audio=path)
return output.text
async def transcribe(path, model="elevenlabs/scribe_v1"):
provider, modelname = model.split("/")
match provider:
case "elevenlabs":
return await transcribe_elevenlabs(path, modelname)
case "openai" | "facebook":
return await transcribe_huggingface(path, model)
case _:
raise ValueError(f"Model {model} not supported")
|