David Pomerenke commited on
Commit
c4c59ec
·
1 Parent(s): 08735bb

Transcription APIs

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. evals.py +25 -1
  3. pyproject.toml +2 -0
  4. uv.lock +21 -0
.gitignore CHANGED
@@ -1,5 +1,6 @@
1
  floresp-*
2
  glottolog-*
 
3
  LanguageCodes.tab
4
  ScriptCodes.csv
5
  .cache
 
1
  floresp-*
2
  glottolog-*
3
+ *.m4a
4
  LanguageCodes.tab
5
  ScriptCodes.csv
6
  .cache
evals.py CHANGED
@@ -11,15 +11,17 @@ import pandas as pd
11
  import requests
12
  from aiolimiter import AsyncLimiter
13
  from dotenv import load_dotenv
 
14
  from joblib.memory import Memory
15
  from langcodes import Language, standardize_tag
16
  from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
17
  from openai import AsyncOpenAI
 
18
  from requests import get
19
  from rich import print
20
  from tqdm.asyncio import tqdm_asyncio
21
  from transformers import NllbTokenizer
22
- from pyglottolog import Glottolog
23
 
24
  # config
25
  models = [
@@ -48,6 +50,28 @@ tokenizer = NllbTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
48
  rate_limit = AsyncLimiter(max_rate=20, time_period=1)
49
 
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  # load general language data
52
  languages = {
53
  lang: pop
 
11
  import requests
12
  from aiolimiter import AsyncLimiter
13
  from dotenv import load_dotenv
14
+ from elevenlabs import ElevenLabs
15
  from joblib.memory import Memory
16
  from langcodes import Language, standardize_tag
17
  from language_data.population_data import LANGUAGE_SPEAKING_POPULATION
18
  from openai import AsyncOpenAI
19
+ from pyglottolog import Glottolog
20
  from requests import get
21
  from rich import print
22
  from tqdm.asyncio import tqdm_asyncio
23
  from transformers import NllbTokenizer
24
+ from huggingface_hub import InferenceClient
25
 
26
  # config
27
  models = [
 
50
  rate_limit = AsyncLimiter(max_rate=20, time_period=1)
51
 
52
 
53
+ @cache
54
+ def transcribe(filename, model="elevenlabs/scribe_v1"):
55
+ provider, modelname = model.split("/")
56
+ with open(filename, "rb") as f:
57
+ audio = f.read()
58
+ match provider:
59
+ case "elevenlabs":
60
+ client = ElevenLabs(api_key=getenv("ELEVENLABS_API_KEY"))
61
+ response = client.speech_to_text.convert(model_id=modelname, file=audio)
62
+ return response.text
63
+ case "openai":
64
+ client = InferenceClient(api_key=getenv("HUGGINGFACE_ACCESS_TOKEN"))
65
+ output = client.automatic_speech_recognition(model=model, audio=audio)
66
+ return output.text
67
+ case _:
68
+ raise ValueError(f"Model {model} not supported")
69
+
70
+
71
+ print(transcribe("data/test.m4a", "openai/whisper-large-v3-turbo"))
72
+ exit()
73
+
74
+
75
  # load general language data
76
  languages = {
77
  lang: pop
pyproject.toml CHANGED
@@ -15,7 +15,9 @@ dependencies = [
15
  dev-dependencies = [
16
  "aiolimiter>=1.1.0",
17
  "bert-score>=0.3.13",
 
18
  "evaluate==0.4.0",
 
19
  "joblib>=1.4.2",
20
  "langcodes>=3.5.0",
21
  "openai>=1.52.2",
 
15
  dev-dependencies = [
16
  "aiolimiter>=1.1.0",
17
  "bert-score>=0.3.13",
18
+ "elevenlabs>=1.53.0",
19
  "evaluate==0.4.0",
20
+ "huggingface-hub>=0.29.1",
21
  "joblib>=1.4.2",
22
  "langcodes>=3.5.0",
23
  "openai>=1.52.2",
uv.lock CHANGED
@@ -565,6 +565,23 @@ wheels = [
565
  { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
566
  ]
567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
568
  [[package]]
569
  name = "evaluate"
570
  version = "0.4.0"
@@ -1194,7 +1211,9 @@ dependencies = [
1194
  dev = [
1195
  { name = "aiolimiter" },
1196
  { name = "bert-score" },
 
1197
  { name = "evaluate" },
 
1198
  { name = "joblib" },
1199
  { name = "langcodes" },
1200
  { name = "openai" },
@@ -1220,7 +1239,9 @@ requires-dist = [
1220
  dev = [
1221
  { name = "aiolimiter", specifier = ">=1.1.0" },
1222
  { name = "bert-score", specifier = ">=0.3.13" },
 
1223
  { name = "evaluate", specifier = "==0.4.0" },
 
1224
  { name = "joblib", specifier = ">=1.4.2" },
1225
  { name = "langcodes", specifier = ">=3.5.0" },
1226
  { name = "openai", specifier = ">=1.52.2" },
 
565
  { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
566
  ]
567
 
568
+ [[package]]
569
+ name = "elevenlabs"
570
+ version = "1.53.0"
571
+ source = { registry = "https://pypi.org/simple" }
572
+ dependencies = [
573
+ { name = "httpx" },
574
+ { name = "pydantic" },
575
+ { name = "pydantic-core" },
576
+ { name = "requests" },
577
+ { name = "typing-extensions" },
578
+ { name = "websockets" },
579
+ ]
580
+ sdist = { url = "https://files.pythonhosted.org/packages/83/c1/5bf18b8f8be29032196484a4f55d1ec85ada125d3a5df87a5ebe5b754a0e/elevenlabs-1.53.0.tar.gz", hash = "sha256:bc900f7e6123575014672e5f4f004a12c75508d6b22e370a1327bd0d536b0f0a", size = 149562 }
581
+ wheels = [
582
+ { url = "https://files.pythonhosted.org/packages/5f/b8/63c7bbbe9d02b1a16afe02512727e4afa17e25ed85c2a0f298b31ac1ddb1/elevenlabs-1.53.0-py3-none-any.whl", hash = "sha256:90b33135204bdd538ab3624dff31aa57b3192f660f72b71c2bf18b8436a53e2d", size = 344998 },
583
+ ]
584
+
585
  [[package]]
586
  name = "evaluate"
587
  version = "0.4.0"
 
1211
  dev = [
1212
  { name = "aiolimiter" },
1213
  { name = "bert-score" },
1214
+ { name = "elevenlabs" },
1215
  { name = "evaluate" },
1216
+ { name = "huggingface-hub" },
1217
  { name = "joblib" },
1218
  { name = "langcodes" },
1219
  { name = "openai" },
 
1239
  dev = [
1240
  { name = "aiolimiter", specifier = ">=1.1.0" },
1241
  { name = "bert-score", specifier = ">=0.3.13" },
1242
+ { name = "elevenlabs", specifier = ">=1.53.0" },
1243
  { name = "evaluate", specifier = "==0.4.0" },
1244
+ { name = "huggingface-hub" },
1245
  { name = "joblib", specifier = ">=1.4.2" },
1246
  { name = "langcodes", specifier = ">=3.5.0" },
1247
  { name = "openai", specifier = ">=1.52.2" },