jbilcke-hf HF Staff commited on
Commit
422262e
·
1 Parent(s): 8e4eac4

fix issue with YAML parsing

Browse files
api_core.py CHANGED
@@ -319,6 +319,7 @@ class VideoGenerationAPI:
319
  model_override: Optional[str] = None) -> str:
320
  """
321
  Helper method to generate text using the appropriate client and configuration.
 
322
 
323
  Args:
324
  prompt: The prompt to generate text from
@@ -333,37 +334,83 @@ class VideoGenerationAPI:
333
  # Get the appropriate client
334
  client = self._get_inference_client(llm_config)
335
 
336
- # For third-party providers, we don't need to specify model in text_generation
337
- # as it's already configured in the client
338
- if llm_config and llm_config.get('provider') != 'huggingface':
339
- response = await asyncio.get_event_loop().run_in_executor(
340
- None,
341
- lambda: client.text_generation(
342
- prompt,
343
- max_new_tokens=max_new_tokens,
344
- temperature=temperature
345
- )
346
- )
347
  else:
348
- # For HuggingFace models, we need to specify the model
349
- if model_override:
350
- model_to_use = model_override
351
- elif llm_config:
352
- model_to_use = llm_config.get('model', TEXT_MODEL)
 
 
 
 
 
 
 
 
 
 
 
353
  else:
354
- model_to_use = TEXT_MODEL
355
-
356
- response = await asyncio.get_event_loop().run_in_executor(
357
- None,
358
- lambda: client.text_generation(
359
- prompt,
360
- model=model_to_use,
361
- max_new_tokens=max_new_tokens,
362
- temperature=temperature
363
  )
364
- )
365
-
366
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
 
368
 
369
  def _add_event(self, video_id: str, event: Dict[str, Any]):
@@ -486,16 +533,35 @@ Describe the first scene/shot for: "{query}".
486
  title: \""""
487
 
488
  try:
489
- response = await self._generate_text(
490
  prompt,
491
  llm_config=llm_config,
492
  max_new_tokens=200,
493
  temperature=temperature
494
  )
495
 
496
- response_text = re.sub(r'^\s*\.\s*\n', '', f"title: \"{response.strip()}")
497
- sanitized_yaml = sanitize_yaml_response(response_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
 
 
 
 
499
  try:
500
  result = yaml.safe_load(sanitized_yaml)
501
  except yaml.YAMLError as e:
 
319
  model_override: Optional[str] = None) -> str:
320
  """
321
  Helper method to generate text using the appropriate client and configuration.
322
+ Tries chat_completion first (modern standard), falls back to text_generation.
323
 
324
  Args:
325
  prompt: The prompt to generate text from
 
334
  # Get the appropriate client
335
  client = self._get_inference_client(llm_config)
336
 
337
+ # Determine the model to use
338
+ if model_override:
339
+ model_to_use = model_override
340
+ elif llm_config:
341
+ model_to_use = llm_config.get('model', TEXT_MODEL)
 
 
 
 
 
 
342
  else:
343
+ model_to_use = TEXT_MODEL
344
+
345
+ # Try chat_completion first (modern standard, more widely supported)
346
+ try:
347
+ messages = [{"role": "user", "content": prompt}]
348
+
349
+ if llm_config and llm_config.get('provider') != 'huggingface':
350
+ # For third-party providers
351
+ completion = await asyncio.get_event_loop().run_in_executor(
352
+ None,
353
+ lambda: client.chat.completions.create(
354
+ messages=messages,
355
+ max_tokens=max_new_tokens,
356
+ temperature=temperature
357
+ )
358
+ )
359
  else:
360
+ # For HuggingFace models, specify the model
361
+ completion = await asyncio.get_event_loop().run_in_executor(
362
+ None,
363
+ lambda: client.chat.completions.create(
364
+ model=model_to_use,
365
+ messages=messages,
366
+ max_tokens=max_new_tokens,
367
+ temperature=temperature
368
+ )
369
  )
370
+
371
+ # Extract the generated text from the chat completion response
372
+ return completion.choices[0].message.content
373
+
374
+ except Exception as e:
375
+ error_message = str(e).lower()
376
+ # Check if the error is related to task compatibility or API not supported
377
+ if ("not supported for task" in error_message or
378
+ "conversational" in error_message or
379
+ "chat" in error_message):
380
+ logger.info(f"chat_completion not supported, falling back to text_generation: {e}")
381
+
382
+ # Fall back to text_generation API
383
+ try:
384
+ if llm_config and llm_config.get('provider') != 'huggingface':
385
+ # For third-party providers
386
+ response = await asyncio.get_event_loop().run_in_executor(
387
+ None,
388
+ lambda: client.text_generation(
389
+ prompt,
390
+ max_new_tokens=max_new_tokens,
391
+ temperature=temperature
392
+ )
393
+ )
394
+ else:
395
+ # For HuggingFace models, specify the model
396
+ response = await asyncio.get_event_loop().run_in_executor(
397
+ None,
398
+ lambda: client.text_generation(
399
+ prompt,
400
+ model=model_to_use,
401
+ max_new_tokens=max_new_tokens,
402
+ temperature=temperature
403
+ )
404
+ )
405
+ return response
406
+
407
+ except Exception as text_error:
408
+ logger.error(f"Both chat_completion and text_generation failed: {text_error}")
409
+ raise text_error
410
+ else:
411
+ # Re-raise the original error if it's not a task compatibility issue
412
+ logger.error(f"chat_completion failed with non-compatibility error: {e}")
413
+ raise e
414
 
415
 
416
  def _add_event(self, video_id: str, event: Dict[str, Any]):
 
533
  title: \""""
534
 
535
  try:
536
+ raw_yaml_str = await self._generate_text(
537
  prompt,
538
  llm_config=llm_config,
539
  max_new_tokens=200,
540
  temperature=temperature
541
  )
542
 
543
+ raw_yaml_str = raw_yaml_str.strip()
544
+
545
+ #logger.info(f"search_video(): raw_yaml_str = {raw_yaml_str}")
546
+
547
+ if raw_yaml_str.startswith("```yaml"):
548
+ # Remove the "```yaml" at the beginning and closing ```
549
+ raw_yaml_str = raw_yaml_str[7:] # Remove "```yaml" (7 characters)
550
+ if raw_yaml_str.endswith("```"):
551
+ raw_yaml_str = raw_yaml_str[:-3] # Remove closing ```
552
+ raw_yaml_str = raw_yaml_str.strip()
553
+ elif raw_yaml_str.startswith("```"):
554
+ # Remove the "```" at the beginning and closing ```
555
+ raw_yaml_str = raw_yaml_str[3:] # Remove opening ```
556
+ if raw_yaml_str.endswith("```"):
557
+ raw_yaml_str = raw_yaml_str[:-3] # Remove closing ```
558
+ raw_yaml_str = raw_yaml_str.strip()
559
+ else:
560
+ raw_yaml_str = re.sub(r'^\s*\.\s*\n', '', f"title: \"{raw_yaml_str}")
561
 
562
+ sanitized_yaml = sanitize_yaml_response(raw_yaml_str)
563
+ #logger.info(f"search_video(): sanitized_yaml = {sanitized_yaml}")
564
+
565
  try:
566
  result = yaml.safe_load(sanitized_yaml)
567
  except yaml.YAMLError as e:
assets/config/curated_models.yaml CHANGED
@@ -74,9 +74,9 @@ models:
74
  display_name: Qwen3 235B A22B
75
  num_of_parameters: 235B
76
 
77
- - model_id: deepseek-ai/DeepSeek-V3-0324
78
- display_name: DeepSeek V3
79
- num_of_parameters: 685B
80
 
81
  - model_id: moonshotai/Kimi-K2-Instruct
82
  display_name: Kimi K2
 
74
  display_name: Qwen3 235B A22B
75
  num_of_parameters: 235B
76
 
77
+ #- model_id: deepseek-ai/DeepSeek-V3-0324
78
+ # display_name: DeepSeek V3
79
+ # num_of_parameters: 685B
80
 
81
  - model_id: moonshotai/Kimi-K2-Instruct
82
  display_name: Kimi K2
build/web/assets/assets/config/curated_models.yaml CHANGED
@@ -74,9 +74,9 @@ models:
74
  display_name: Qwen3 235B A22B
75
  num_of_parameters: 235B
76
 
77
- - model_id: deepseek-ai/DeepSeek-V3-0324
78
- display_name: DeepSeek V3
79
- num_of_parameters: 685B
80
 
81
  - model_id: moonshotai/Kimi-K2-Instruct
82
  display_name: Kimi K2
 
74
  display_name: Qwen3 235B A22B
75
  num_of_parameters: 235B
76
 
77
+ #- model_id: deepseek-ai/DeepSeek-V3-0324
78
+ # display_name: DeepSeek V3
79
+ # num_of_parameters: 685B
80
 
81
  - model_id: moonshotai/Kimi-K2-Instruct
82
  display_name: Kimi K2
build/web/flutter_bootstrap.js CHANGED
@@ -38,6 +38,6 @@ _flutter.buildConfig = {"engineRevision":"1c9c20e7c3dd48c66f400a24d48ea806b4ab31
38
 
39
  _flutter.loader.load({
40
  serviceWorkerSettings: {
41
- serviceWorkerVersion: "485624187"
42
  }
43
  });
 
38
 
39
  _flutter.loader.load({
40
  serviceWorkerSettings: {
41
+ serviceWorkerVersion: "3912302714"
42
  }
43
  });
build/web/flutter_service_worker.js CHANGED
@@ -3,7 +3,7 @@ const MANIFEST = 'flutter-app-manifest';
3
  const TEMP = 'flutter-temp-cache';
4
  const CACHE_NAME = 'flutter-app-cache';
5
 
6
- const RESOURCES = {"flutter_bootstrap.js": "67612d11664e1438c6f25ef6f2340c5f",
7
  "version.json": "68350cac7987de2728345c72918dd067",
8
  "tikslop.png": "570e1db759046e2d224fef729983634e",
9
  "index.html": "3a7029b3672560e7938aab6fa4d30a46",
@@ -28,7 +28,7 @@ const RESOURCES = {"flutter_bootstrap.js": "67612d11664e1438c6f25ef6f2340c5f",
28
  "assets/assets/ads/smolagents.gif": "45338af5a4d440b707d02f364be8195c",
29
  "assets/assets/ads/README.md": "1959fb6b85a966348396f2f0f9c3f32a",
30
  "assets/assets/ads/lerobot.gif": "0f90b2fc4d15eefb5572363724d6d925",
31
- "assets/assets/config/curated_models.yaml": "32e4a868fcaf5617bccfb5a1a26f9690",
32
  "assets/assets/config/README.md": "07a87720dd00dd1ca98c9d6884440e31",
33
  "assets/assets/config/custom.yaml": "52bd30aa4d8b980626a5eb02d0871c01",
34
  "assets/assets/config/default.yaml": "9ca1d05d06721c2b6f6382a1ba40af48",
 
3
  const TEMP = 'flutter-temp-cache';
4
  const CACHE_NAME = 'flutter-app-cache';
5
 
6
+ const RESOURCES = {"flutter_bootstrap.js": "f833cb89d68c8ddba5bc70cec281205c",
7
  "version.json": "68350cac7987de2728345c72918dd067",
8
  "tikslop.png": "570e1db759046e2d224fef729983634e",
9
  "index.html": "3a7029b3672560e7938aab6fa4d30a46",
 
28
  "assets/assets/ads/smolagents.gif": "45338af5a4d440b707d02f364be8195c",
29
  "assets/assets/ads/README.md": "1959fb6b85a966348396f2f0f9c3f32a",
30
  "assets/assets/ads/lerobot.gif": "0f90b2fc4d15eefb5572363724d6d925",
31
+ "assets/assets/config/curated_models.yaml": "94e54843953b4f90c454cd8e5a3176fb",
32
  "assets/assets/config/README.md": "07a87720dd00dd1ca98c9d6884440e31",
33
  "assets/assets/config/custom.yaml": "52bd30aa4d8b980626a5eb02d0871c01",
34
  "assets/assets/config/default.yaml": "9ca1d05d06721c2b6f6382a1ba40af48",
build/web/index.html CHANGED
@@ -156,7 +156,7 @@
156
  </script>
157
 
158
  <!-- Add version parameter for cache busting -->
159
- <script src="flutter_bootstrap.js?v=1753274269" async></script>
160
 
161
  <!-- Add cache busting script -->
162
  <script>
 
156
  </script>
157
 
158
  <!-- Add version parameter for cache busting -->
159
+ <script src="flutter_bootstrap.js?v=1753281547" async></script>
160
 
161
  <!-- Add cache busting script -->
162
  <script>
docs/for-bots/huggingface/chat-completion.md ADDED
@@ -0,0 +1,734 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [](#chat-completion)Chat Completion
2
+ -----------------------------------
3
+
4
+ Generate a response given a list of messages in a conversational context, supporting both conversational Language Models (LLMs) and conversational Vision-Language Models (VLMs). This is a subtask of [`text-generation`](https://huggingface.co/docs/inference-providers/tasks/text-generation) and [`image-text-to-text`](https://huggingface.co/docs/inference-providers/tasks/image-text-to-text).
5
+
6
+ ### [](#recommended-models)Recommended models
7
+
8
+ #### [](#conversational-large-language-models-llms)Conversational Large Language Models (LLMs)
9
+
10
+ * [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
11
+ * [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
12
+ * [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
13
+ * [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
14
+ * [Qwen/Qwen2.5-7B-Instruct-1M](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M): Strong conversational model that supports very long instructions.
15
+ * [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
16
+ * [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
17
+
18
+ #### [](#conversational-vision-language-models-vlms)Conversational Vision-Language Models (VLMs)
19
+
20
+ * [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
21
+
22
+ Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending).
23
+
24
+ ### [](#api-playground)API Playground
25
+
26
+ For Chat Completion models, we provide an interactive UI Playground for easier testing:
27
+
28
+ * Quickly iterate on your prompts from the UI.
29
+ * Set and override system, assistant and user messages.
30
+ * Browse and select models currently available on the Inference API.
31
+ * Compare the output of two models side-by-side.
32
+ * Adjust requests parameters from the UI.
33
+ * Easily switch between UI view and code snippets.
34
+
35
+ [![](https://cdn-uploads.huggingface.co/production/uploads/5f17f0a0925b9863e28ad517/9_Tgf0Tv65srhBirZQMTp.png)](https://huggingface.co/playground)
36
+
37
+ Access the Inference UI Playground and start exploring: [https://huggingface.co/playground](https://huggingface.co/playground)
38
+
39
+ ### [](#using-the-api)Using the API
40
+
41
+ The API supports:
42
+
43
+ * Using the chat completion API compatible with the OpenAI SDK.
44
+ * Using grammars, constraints, and tools.
45
+ * Streaming the output
46
+
47
+ #### [](#code-snippet-example-for-conversational-llms)Code snippet example for conversational LLMs
48
+
49
+ Language
50
+
51
+ Python JavaScript cURL
52
+
53
+ Client
54
+
55
+ huggingface\_hub requests openai
56
+
57
+ Provider
58
+
59
+ Featherless Nscale
60
+
61
+ +9
62
+
63
+ Settings
64
+
65
+ Settings
66
+
67
+ Settings
68
+
69
+ Copied
70
+
71
+ import os
72
+ from huggingface\_hub import InferenceClient
73
+
74
+ client = InferenceClient(
75
+ provider="featherless-ai",
76
+ api\_key=os.environ\["HF\_TOKEN"\],
77
+ )
78
+
79
+ completion = client.chat.completions.create(
80
+ model="meta-llama/Llama-3.3-70B-Instruct",
81
+ messages=\[
82
+ {
83
+ "role": "user",
84
+ "content": "What is the capital of France?"
85
+ }
86
+ \],
87
+ )
88
+
89
+ print(completion.choices\[0\].message)
90
+
91
+ #### [](#code-snippet-example-for-conversational-vlms)Code snippet example for conversational VLMs
92
+
93
+ Language
94
+
95
+ Python JavaScript cURL
96
+
97
+ Client
98
+
99
+ huggingface\_hub requests openai
100
+
101
+ Provider
102
+
103
+ Fireworks Featherless
104
+
105
+ +10
106
+
107
+ Settings
108
+
109
+ Settings
110
+
111
+ Settings
112
+
113
+ Copied
114
+
115
+ import os
116
+ from huggingface\_hub import InferenceClient
117
+
118
+ client = InferenceClient(
119
+ provider="fireworks-ai",
120
+ api\_key=os.environ\["HF\_TOKEN"\],
121
+ )
122
+
123
+ completion = client.chat.completions.create(
124
+ model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
125
+ messages=\[
126
+ {
127
+ "role": "user",
128
+ "content": \[
129
+ {
130
+ "type": "text",
131
+ "text": "Describe this image in one sentence."
132
+ },
133
+ {
134
+ "type": "image\_url",
135
+ "image\_url": {
136
+ "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
137
+ }
138
+ }
139
+ \]
140
+ }
141
+ \],
142
+ )
143
+
144
+ print(completion.choices\[0\].message)
145
+
146
+ ### [](#api-specification)API specification
147
+
148
+ #### [](#request)Request
149
+
150
+ Headers
151
+
152
+ **authorization**
153
+
154
+ _string_
155
+
156
+ Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with “Inference Providers” permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained).
157
+
158
+ Payload
159
+
160
+ **frequency\_penalty**
161
+
162
+ _number_
163
+
164
+ Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model’s likelihood to repeat the same line verbatim.
165
+
166
+ **logprobs**
167
+
168
+ _boolean_
169
+
170
+ Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.
171
+
172
+ **max\_tokens**
173
+
174
+ _integer_
175
+
176
+ The maximum number of tokens that can be generated in the chat completion.
177
+
178
+ **messages\***
179
+
180
+ _object\[\]_
181
+
182
+ A list of messages comprising the conversation so far.
183
+
184
+          **(#1)**
185
+
186
+ _unknown_
187
+
188
+ One of the following:
189
+
190
+                  **(#1)**
191
+
192
+ _object_
193
+
194
+                         **content\***
195
+
196
+ _unknown_
197
+
198
+ One of the following:
199
+
200
+                                  **(#1)**
201
+
202
+ _string_
203
+
204
+                                  **(#2)**
205
+
206
+ _object\[\]_
207
+
208
+                                          **(#1)**
209
+
210
+ _object_
211
+
212
+                                                 **text\***
213
+
214
+ _string_
215
+
216
+                                                 **type\***
217
+
218
+ _enum_
219
+
220
+ Possible values: text.
221
+
222
+                                          **(#2)**
223
+
224
+ _object_
225
+
226
+                                                 **image\_url\***
227
+
228
+ _object_
229
+
230
+                                                         **url\***
231
+
232
+ _string_
233
+
234
+                                                 **type\***
235
+
236
+ _enum_
237
+
238
+ Possible values: image\_url.
239
+
240
+                  **(#2)**
241
+
242
+ _object_
243
+
244
+                         **tool\_calls\***
245
+
246
+ _object\[\]_
247
+
248
+                                 **function\***
249
+
250
+ _object_
251
+
252
+                                         **parameters\***
253
+
254
+ _unknown_
255
+
256
+                                         **description**
257
+
258
+ _string_
259
+
260
+                                         **name\***
261
+
262
+ _string_
263
+
264
+                                 **id\***
265
+
266
+ _string_
267
+
268
+                                 **type\***
269
+
270
+ _string_
271
+
272
+          **(#2)**
273
+
274
+ _object_
275
+
276
+                 **name**
277
+
278
+ _string_
279
+
280
+                 **role\***
281
+
282
+ _string_
283
+
284
+ **presence\_penalty**
285
+
286
+ _number_
287
+
288
+ Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model’s likelihood to talk about new topics
289
+
290
+ **response\_format**
291
+
292
+ _unknown_
293
+
294
+ One of the following:
295
+
296
+          **(#1)**
297
+
298
+ _object_
299
+
300
+                 **type\***
301
+
302
+ _enum_
303
+
304
+ Possible values: text.
305
+
306
+          **(#2)**
307
+
308
+ _object_
309
+
310
+                 **type\***
311
+
312
+ _enum_
313
+
314
+ Possible values: json\_schema.
315
+
316
+                 **json\_schema\***
317
+
318
+ _object_
319
+
320
+                         **name\***
321
+
322
+ _string_
323
+
324
+ The name of the response format.
325
+
326
+                         **description**
327
+
328
+ _string_
329
+
330
+ A description of what the response format is for, used by the model to determine how to respond in the format.
331
+
332
+                         **schema**
333
+
334
+ _object_
335
+
336
+ The schema for the response format, described as a JSON Schema object. Learn how to build JSON schemas [here](https://json-schema.org/).
337
+
338
+                         **strict**
339
+
340
+ _boolean_
341
+
342
+ Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field.
343
+
344
+          **(#3)**
345
+
346
+ _object_
347
+
348
+                 **type\***
349
+
350
+ _enum_
351
+
352
+ Possible values: json\_object.
353
+
354
+ **seed**
355
+
356
+ _integer_
357
+
358
+ **stop**
359
+
360
+ _string\[\]_
361
+
362
+ Up to 4 sequences where the API will stop generating further tokens.
363
+
364
+ **stream**
365
+
366
+ _boolean_
367
+
368
+ **stream\_options**
369
+
370
+ _object_
371
+
372
+         **include\_usage**
373
+
374
+ _boolean_
375
+
376
+ If set, an additional chunk will be streamed before the data: \[DONE\] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
377
+
378
+ **temperature**
379
+
380
+ _number_
381
+
382
+ What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
383
+
384
+ **tool\_choice**
385
+
386
+ _unknown_
387
+
388
+ One of the following:
389
+
390
+          **(#1)**
391
+
392
+ _enum_
393
+
394
+ Possible values: auto.
395
+
396
+          **(#2)**
397
+
398
+ _enum_
399
+
400
+ Possible values: none.
401
+
402
+          **(#3)**
403
+
404
+ _enum_
405
+
406
+ Possible values: required.
407
+
408
+          **(#4)**
409
+
410
+ _object_
411
+
412
+                 **function\***
413
+
414
+ _object_
415
+
416
+                         **name\***
417
+
418
+ _string_
419
+
420
+ **tool\_prompt**
421
+
422
+ _string_
423
+
424
+ A prompt to be appended before the tools
425
+
426
+ **tools**
427
+
428
+ _object\[\]_
429
+
430
+ A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.
431
+
432
+         **function\***
433
+
434
+ _object_
435
+
436
+                 **parameters\***
437
+
438
+ _unknown_
439
+
440
+                 **description**
441
+
442
+ _string_
443
+
444
+                 **name\***
445
+
446
+ _string_
447
+
448
+         **type\***
449
+
450
+ _string_
451
+
452
+ **top\_logprobs**
453
+
454
+ _integer_
455
+
456
+ An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.
457
+
458
+ **top\_p**
459
+
460
+ _number_
461
+
462
+ An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top\_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
463
+
464
+ #### [](#response)Response
465
+
466
+ Output type depends on the `stream` input parameter. If `stream` is `false` (default), the response will be a JSON object with the following fields:
467
+
468
+ Body
469
+
470
+ **choices**
471
+
472
+ _object\[\]_
473
+
474
+         **finish\_reason**
475
+
476
+ _string_
477
+
478
+         **index**
479
+
480
+ _integer_
481
+
482
+         **logprobs**
483
+
484
+ _object_
485
+
486
+                 **content**
487
+
488
+ _object\[\]_
489
+
490
+                         **logprob**
491
+
492
+ _number_
493
+
494
+                         **token**
495
+
496
+ _string_
497
+
498
+                         **top\_logprobs**
499
+
500
+ _object\[\]_
501
+
502
+                                 **logprob**
503
+
504
+ _number_
505
+
506
+                                 **token**
507
+
508
+ _string_
509
+
510
+         **message**
511
+
512
+ _unknown_
513
+
514
+ One of the following:
515
+
516
+                  **(#1)**
517
+
518
+ _object_
519
+
520
+                         **content**
521
+
522
+ _string_
523
+
524
+                         **role**
525
+
526
+ _string_
527
+
528
+                         **tool\_call\_id**
529
+
530
+ _string_
531
+
532
+                  **(#2)**
533
+
534
+ _object_
535
+
536
+                         **role**
537
+
538
+ _string_
539
+
540
+                         **tool\_calls**
541
+
542
+ _object\[\]_
543
+
544
+                                 **function**
545
+
546
+ _object_
547
+
548
+                                         **arguments**
549
+
550
+ _string_
551
+
552
+                                         **description**
553
+
554
+ _string_
555
+
556
+                                         **name**
557
+
558
+ _string_
559
+
560
+                                 **id**
561
+
562
+ _string_
563
+
564
+                                 **type**
565
+
566
+ _string_
567
+
568
+ **created**
569
+
570
+ _integer_
571
+
572
+ **id**
573
+
574
+ _string_
575
+
576
+ **model**
577
+
578
+ _string_
579
+
580
+ **system\_fingerprint**
581
+
582
+ _string_
583
+
584
+ **usage**
585
+
586
+ _object_
587
+
588
+         **completion\_tokens**
589
+
590
+ _integer_
591
+
592
+         **prompt\_tokens**
593
+
594
+ _integer_
595
+
596
+         **total\_tokens**
597
+
598
+ _integer_
599
+
600
+ If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE). For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
601
+
602
+ Body
603
+
604
+ **choices**
605
+
606
+ _object\[\]_
607
+
608
+         **delta**
609
+
610
+ _unknown_
611
+
612
+ One of the following:
613
+
614
+                  **(#1)**
615
+
616
+ _object_
617
+
618
+                         **content**
619
+
620
+ _string_
621
+
622
+                         **role**
623
+
624
+ _string_
625
+
626
+                         **tool\_call\_id**
627
+
628
+ _string_
629
+
630
+                  **(#2)**
631
+
632
+ _object_
633
+
634
+                         **role**
635
+
636
+ _string_
637
+
638
+                         **tool\_calls**
639
+
640
+ _object\[\]_
641
+
642
+                                 **function**
643
+
644
+ _object_
645
+
646
+                                         **arguments**
647
+
648
+ _string_
649
+
650
+                                         **name**
651
+
652
+ _string_
653
+
654
+                                 **id**
655
+
656
+ _string_
657
+
658
+                                 **index**
659
+
660
+ _integer_
661
+
662
+                                 **type**
663
+
664
+ _string_
665
+
666
+         **finish\_reason**
667
+
668
+ _string_
669
+
670
+         **index**
671
+
672
+ _integer_
673
+
674
+         **logprobs**
675
+
676
+ _object_
677
+
678
+                 **content**
679
+
680
+ _object\[\]_
681
+
682
+                         **logprob**
683
+
684
+ _number_
685
+
686
+                         **token**
687
+
688
+ _string_
689
+
690
+                         **top\_logprobs**
691
+
692
+ _object\[\]_
693
+
694
+                                 **logprob**
695
+
696
+ _number_
697
+
698
+                                 **token**
699
+
700
+ _string_
701
+
702
+ **created**
703
+
704
+ _integer_
705
+
706
+ **id**
707
+
708
+ _string_
709
+
710
+ **model**
711
+
712
+ _string_
713
+
714
+ **system\_fingerprint**
715
+
716
+ _string_
717
+
718
+ **usage**
719
+
720
+ _object_
721
+
722
+         **completion\_tokens**
723
+
724
+ _integer_
725
+
726
+         **prompt\_tokens**
727
+
728
+ _integer_
729
+
730
+         **total\_tokens**
731
+
732
+ _integer_
733
+
734
+ [< \> Update on GitHub](https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/tasks/chat-completion.md)
docs/for-bots/huggingface/text-generation.md ADDED
@@ -0,0 +1,493 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [](#text-generation)Text Generation
2
+ -----------------------------------
3
+
4
+ Generate text based on a prompt.
5
+
6
+ If you are interested in a Chat Completion task, which generates a response based on a list of messages, check out the [`chat-completion`](./chat_completion) task.
7
+
8
+ For more details about the `text-generation` task, check out its [dedicated page](https://huggingface.co/tasks/text-generation)! You will find examples and related materials.
9
+
10
+ ### [](#recommended-models)Recommended models
11
+
12
+ * [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
13
+ * [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
14
+ * [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
15
+ * [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
16
+ * [Qwen/Qwen2.5-7B-Instruct-1M](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M): Strong conversational model that supports very long instructions.
17
+ * [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
18
+ * [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
19
+
20
+ Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
21
+
22
+ ### [](#using-the-api)Using the API
23
+
24
+ Language
25
+
26
+ Python JavaScript cURL
27
+
28
+ Client
29
+
30
+ huggingface\_hub requests openai
31
+
32
+ Provider
33
+
34
+ Featherless Together AI
35
+
36
+ Settings
37
+
38
+ Settings
39
+
40
+ Settings
41
+
42
+ Copied
43
+
44
+ import os
45
+ from huggingface\_hub import InferenceClient
46
+
47
+ client = InferenceClient(
48
+ provider="featherless-ai",
49
+ api\_key=os.environ\["HF\_TOKEN"\],
50
+ )
51
+
52
+ completion = client.chat.completions.create(
53
+ model="mistralai/Magistral-Small-2506",
54
+ messages="\\"Can you please let us know more details about your \\"",
55
+ )
56
+
57
+ print(completion.choices\[0\].message)
58
+
59
+ ### [](#api-specification)API specification
60
+
61
+ #### [](#request)Request
62
+
63
+ Headers
64
+
65
+ **authorization**
66
+
67
+ _string_
68
+
69
+ Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with “Inference Providers” permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained).
70
+
71
+ Payload
72
+
73
+ **inputs\***
74
+
75
+ _string_
76
+
77
+ **parameters**
78
+
79
+ _object_
80
+
81
+         **adapter\_id**
82
+
83
+ _string_
84
+
85
+ Lora adapter id
86
+
87
+         **best\_of**
88
+
89
+ _integer_
90
+
91
+ Generate best\_of sequences and return the one if the highest token logprobs.
92
+
93
+         **decoder\_input\_details**
94
+
95
+ _boolean_
96
+
97
+ Whether to return decoder input token logprobs and ids.
98
+
99
+         **details**
100
+
101
+ _boolean_
102
+
103
+ Whether to return generation details.
104
+
105
+         **do\_sample**
106
+
107
+ _boolean_
108
+
109
+ Activate logits sampling.
110
+
111
+         **frequency\_penalty**
112
+
113
+ _number_
114
+
115
+ The parameter for frequency penalty. 1.0 means no penalty Penalize new tokens based on their existing frequency in the text so far, decreasing the model’s likelihood to repeat the same line verbatim.
116
+
117
+         **grammar**
118
+
119
+ _unknown_
120
+
121
+ One of the following:
122
+
123
+                  **(#1)**
124
+
125
+ _object_
126
+
127
+                         **type\***
128
+
129
+ _enum_
130
+
131
+ Possible values: json.
132
+
133
+                         **value\***
134
+
135
+ _unknown_
136
+
137
+ A string that represents a [JSON Schema](https://json-schema.org/). JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions.
138
+
139
+                  **(#2)**
140
+
141
+ _object_
142
+
143
+                         **type\***
144
+
145
+ _enum_
146
+
147
+ Possible values: regex.
148
+
149
+                         **value\***
150
+
151
+ _string_
152
+
153
+                  **(#3)**
154
+
155
+ _object_
156
+
157
+                         **type\***
158
+
159
+ _enum_
160
+
161
+ Possible values: json\_schema.
162
+
163
+                         **value\***
164
+
165
+ _object_
166
+
167
+                                 **name**
168
+
169
+ _string_
170
+
171
+ Optional name identifier for the schema
172
+
173
+                                 **schema\***
174
+
175
+ _unknown_
176
+
177
+ The actual JSON schema definition
178
+
179
+         **max\_new\_tokens**
180
+
181
+ _integer_
182
+
183
+ Maximum number of tokens to generate.
184
+
185
+         **repetition\_penalty**
186
+
187
+ _number_
188
+
189
+ The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
190
+
191
+         **return\_full\_text**
192
+
193
+ _boolean_
194
+
195
+ Whether to prepend the prompt to the generated text
196
+
197
+         **seed**
198
+
199
+ _integer_
200
+
201
+ Random sampling seed.
202
+
203
+         **stop**
204
+
205
+ _string\[\]_
206
+
207
+ Stop generating tokens if a member of `stop` is generated.
208
+
209
+         **temperature**
210
+
211
+ _number_
212
+
213
+ The value used to module the logits distribution.
214
+
215
+         **top\_k**
216
+
217
+ _integer_
218
+
219
+ The number of highest probability vocabulary tokens to keep for top-k-filtering.
220
+
221
+         **top\_n\_tokens**
222
+
223
+ _integer_
224
+
225
+ The number of highest probability vocabulary tokens to keep for top-n-filtering.
226
+
227
+         **top\_p**
228
+
229
+ _number_
230
+
231
+ Top-p value for nucleus sampling.
232
+
233
+         **truncate**
234
+
235
+ _integer_
236
+
237
+ Truncate inputs tokens to the given size.
238
+
239
+         **typical\_p**
240
+
241
+ _number_
242
+
243
+ Typical Decoding mass See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.
244
+
245
+         **watermark**
246
+
247
+ _boolean_
248
+
249
+ Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).
250
+
251
+ **stream**
252
+
253
+ _boolean_
254
+
255
+ #### [](#response)Response
256
+
257
+ Output type depends on the `stream` input parameter. If `stream` is `false` (default), the response will be a JSON object with the following fields:
258
+
259
+ Body
260
+
261
+ **details**
262
+
263
+ _object_
264
+
265
+         **best\_of\_sequences**
266
+
267
+ _object\[\]_
268
+
269
+                 **finish\_reason**
270
+
271
+ _enum_
272
+
273
+ Possible values: length, eos\_token, stop\_sequence.
274
+
275
+                 **generated\_text**
276
+
277
+ _string_
278
+
279
+                 **generated\_tokens**
280
+
281
+ _integer_
282
+
283
+                 **prefill**
284
+
285
+ _object\[\]_
286
+
287
+                         **id**
288
+
289
+ _integer_
290
+
291
+                         **logprob**
292
+
293
+ _number_
294
+
295
+                         **text**
296
+
297
+ _string_
298
+
299
+                 **seed**
300
+
301
+ _integer_
302
+
303
+                 **tokens**
304
+
305
+ _object\[\]_
306
+
307
+                         **id**
308
+
309
+ _integer_
310
+
311
+                         **logprob**
312
+
313
+ _number_
314
+
315
+                         **special**
316
+
317
+ _boolean_
318
+
319
+                         **text**
320
+
321
+ _string_
322
+
323
+                 **top\_tokens**
324
+
325
+ _array\[\]_
326
+
327
+                         **id**
328
+
329
+ _integer_
330
+
331
+                         **logprob**
332
+
333
+ _number_
334
+
335
+                         **special**
336
+
337
+ _boolean_
338
+
339
+                         **text**
340
+
341
+ _string_
342
+
343
+         **finish\_reason**
344
+
345
+ _enum_
346
+
347
+ Possible values: length, eos\_token, stop\_sequence.
348
+
349
+         **generated\_tokens**
350
+
351
+ _integer_
352
+
353
+         **prefill**
354
+
355
+ _object\[\]_
356
+
357
+                 **id**
358
+
359
+ _integer_
360
+
361
+                 **logprob**
362
+
363
+ _number_
364
+
365
+                 **text**
366
+
367
+ _string_
368
+
369
+         **seed**
370
+
371
+ _integer_
372
+
373
+         **tokens**
374
+
375
+ _object\[\]_
376
+
377
+                 **id**
378
+
379
+ _integer_
380
+
381
+                 **logprob**
382
+
383
+ _number_
384
+
385
+                 **special**
386
+
387
+ _boolean_
388
+
389
+                 **text**
390
+
391
+ _string_
392
+
393
+         **top\_tokens**
394
+
395
+ _array\[\]_
396
+
397
+                 **id**
398
+
399
+ _integer_
400
+
401
+                 **logprob**
402
+
403
+ _number_
404
+
405
+                 **special**
406
+
407
+ _boolean_
408
+
409
+                 **text**
410
+
411
+ _string_
412
+
413
+ **generated\_text**
414
+
415
+ _string_
416
+
417
+ If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE). For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
418
+
419
+ Body
420
+
421
+ **details**
422
+
423
+ _object_
424
+
425
+         **finish\_reason**
426
+
427
+ _enum_
428
+
429
+ Possible values: length, eos\_token, stop\_sequence.
430
+
431
+         **generated\_tokens**
432
+
433
+ _integer_
434
+
435
+         **input\_length**
436
+
437
+ _integer_
438
+
439
+         **seed**
440
+
441
+ _integer_
442
+
443
+ **generated\_text**
444
+
445
+ _string_
446
+
447
+ **index**
448
+
449
+ _integer_
450
+
451
+ **token**
452
+
453
+ _object_
454
+
455
+         **id**
456
+
457
+ _integer_
458
+
459
+         **logprob**
460
+
461
+ _number_
462
+
463
+         **special**
464
+
465
+ _boolean_
466
+
467
+         **text**
468
+
469
+ _string_
470
+
471
+ **top\_tokens**
472
+
473
+ _object\[\]_
474
+
475
+         **id**
476
+
477
+ _integer_
478
+
479
+         **logprob**
480
+
481
+ _number_
482
+
483
+         **special**
484
+
485
+ _boolean_
486
+
487
+         **text**
488
+
489
+ _string_
490
+
491
+ [< \> Update on GitHub](https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/tasks/text-generation.md)
492
+
493
+ Chat Completion