Update README.md
Browse files
README.md
CHANGED
@@ -50,3 +50,55 @@ pipeline_tag: audio-text-to-text
|
|
50 |
base_model:
|
51 |
- fixie-ai/ultravox-v0_5-llama-3_2-1b
|
52 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
base_model:
|
51 |
- fixie-ai/ultravox-v0_5-llama-3_2-1b
|
52 |
---
|
53 |
+
|
54 |
+
## Usage (Transformers.js)
|
55 |
+
|
56 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
57 |
+
```bash
|
58 |
+
npm i @huggingface/transformers
|
59 |
+
```
|
60 |
+
|
61 |
+
You can then use the model like this:
|
62 |
+
```js
|
63 |
+
import { UltravoxProcessor, UltravoxModel, read_audio } from "@huggingface/transformers";
|
64 |
+
|
65 |
+
const processor = await UltravoxProcessor.from_pretrained(
|
66 |
+
"onnx-community/ultravox-v0_5-llama-3_2-1b-ONNX",
|
67 |
+
);
|
68 |
+
const model = await UltravoxModel.from_pretrained(
|
69 |
+
"onnx-community/ultravox-v0_5-llama-3_2-1b-ONNX",
|
70 |
+
{
|
71 |
+
dtype: {
|
72 |
+
embed_tokens: "q8", // "fp32", "fp16", "q8"
|
73 |
+
audio_encoder: "q4", // "fp32", "fp16", "q8", "q4", "q4f16"
|
74 |
+
decoder_model_merged: "q4", // "q8", "q4", "q4f16"
|
75 |
+
},
|
76 |
+
},
|
77 |
+
);
|
78 |
+
|
79 |
+
const audio = await read_audio("http://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav", 16000);
|
80 |
+
const messages = [
|
81 |
+
{
|
82 |
+
role: "system",
|
83 |
+
content: "You are a helpful assistant.",
|
84 |
+
},
|
85 |
+
{ role: "user", content: "Transcribe this audio:<|audio|>" },
|
86 |
+
];
|
87 |
+
const text = processor.tokenizer.apply_chat_template(messages, {
|
88 |
+
add_generation_prompt: true,
|
89 |
+
tokenize: false,
|
90 |
+
});
|
91 |
+
|
92 |
+
const inputs = await processor(text, audio);
|
93 |
+
const generated_ids = await model.generate({
|
94 |
+
...inputs,
|
95 |
+
max_new_tokens: 128,
|
96 |
+
});
|
97 |
+
|
98 |
+
const generated_texts = processor.batch_decode(
|
99 |
+
generated_ids.slice(null, [inputs.input_ids.dims.at(-1), null]),
|
100 |
+
{ skip_special_tokens: true },
|
101 |
+
);
|
102 |
+
console.log(generated_texts[0]);
|
103 |
+
// "I can transcribe the audio for you. Here's the transcription:\n\n\"I have a dream that one day this nation will rise up and live out the true meaning of its creed.\"\n\n- Martin Luther King Jr.\n\nWould you like me to provide the transcription in a specific format (e.g., word-for-word, character-for-character, or a specific font)?"
|
104 |
+
```
|