Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
422262e
1
Parent(s):
8e4eac4
fix issue with YAML parsing
Browse files- api_core.py +97 -31
- assets/config/curated_models.yaml +3 -3
- build/web/assets/assets/config/curated_models.yaml +3 -3
- build/web/flutter_bootstrap.js +1 -1
- build/web/flutter_service_worker.js +2 -2
- build/web/index.html +1 -1
- docs/for-bots/huggingface/chat-completion.md +734 -0
- docs/for-bots/huggingface/text-generation.md +493 -0
api_core.py
CHANGED
@@ -319,6 +319,7 @@ class VideoGenerationAPI:
|
|
319 |
model_override: Optional[str] = None) -> str:
|
320 |
"""
|
321 |
Helper method to generate text using the appropriate client and configuration.
|
|
|
322 |
|
323 |
Args:
|
324 |
prompt: The prompt to generate text from
|
@@ -333,37 +334,83 @@ class VideoGenerationAPI:
|
|
333 |
# Get the appropriate client
|
334 |
client = self._get_inference_client(llm_config)
|
335 |
|
336 |
-
#
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
lambda: client.text_generation(
|
342 |
-
prompt,
|
343 |
-
max_new_tokens=max_new_tokens,
|
344 |
-
temperature=temperature
|
345 |
-
)
|
346 |
-
)
|
347 |
else:
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
else:
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
)
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
|
369 |
def _add_event(self, video_id: str, event: Dict[str, Any]):
|
@@ -486,16 +533,35 @@ Describe the first scene/shot for: "{query}".
|
|
486 |
title: \""""
|
487 |
|
488 |
try:
|
489 |
-
|
490 |
prompt,
|
491 |
llm_config=llm_config,
|
492 |
max_new_tokens=200,
|
493 |
temperature=temperature
|
494 |
)
|
495 |
|
496 |
-
|
497 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
|
|
|
|
|
|
|
499 |
try:
|
500 |
result = yaml.safe_load(sanitized_yaml)
|
501 |
except yaml.YAMLError as e:
|
|
|
319 |
model_override: Optional[str] = None) -> str:
|
320 |
"""
|
321 |
Helper method to generate text using the appropriate client and configuration.
|
322 |
+
Tries chat_completion first (modern standard), falls back to text_generation.
|
323 |
|
324 |
Args:
|
325 |
prompt: The prompt to generate text from
|
|
|
334 |
# Get the appropriate client
|
335 |
client = self._get_inference_client(llm_config)
|
336 |
|
337 |
+
# Determine the model to use
|
338 |
+
if model_override:
|
339 |
+
model_to_use = model_override
|
340 |
+
elif llm_config:
|
341 |
+
model_to_use = llm_config.get('model', TEXT_MODEL)
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
else:
|
343 |
+
model_to_use = TEXT_MODEL
|
344 |
+
|
345 |
+
# Try chat_completion first (modern standard, more widely supported)
|
346 |
+
try:
|
347 |
+
messages = [{"role": "user", "content": prompt}]
|
348 |
+
|
349 |
+
if llm_config and llm_config.get('provider') != 'huggingface':
|
350 |
+
# For third-party providers
|
351 |
+
completion = await asyncio.get_event_loop().run_in_executor(
|
352 |
+
None,
|
353 |
+
lambda: client.chat.completions.create(
|
354 |
+
messages=messages,
|
355 |
+
max_tokens=max_new_tokens,
|
356 |
+
temperature=temperature
|
357 |
+
)
|
358 |
+
)
|
359 |
else:
|
360 |
+
# For HuggingFace models, specify the model
|
361 |
+
completion = await asyncio.get_event_loop().run_in_executor(
|
362 |
+
None,
|
363 |
+
lambda: client.chat.completions.create(
|
364 |
+
model=model_to_use,
|
365 |
+
messages=messages,
|
366 |
+
max_tokens=max_new_tokens,
|
367 |
+
temperature=temperature
|
368 |
+
)
|
369 |
)
|
370 |
+
|
371 |
+
# Extract the generated text from the chat completion response
|
372 |
+
return completion.choices[0].message.content
|
373 |
+
|
374 |
+
except Exception as e:
|
375 |
+
error_message = str(e).lower()
|
376 |
+
# Check if the error is related to task compatibility or API not supported
|
377 |
+
if ("not supported for task" in error_message or
|
378 |
+
"conversational" in error_message or
|
379 |
+
"chat" in error_message):
|
380 |
+
logger.info(f"chat_completion not supported, falling back to text_generation: {e}")
|
381 |
+
|
382 |
+
# Fall back to text_generation API
|
383 |
+
try:
|
384 |
+
if llm_config and llm_config.get('provider') != 'huggingface':
|
385 |
+
# For third-party providers
|
386 |
+
response = await asyncio.get_event_loop().run_in_executor(
|
387 |
+
None,
|
388 |
+
lambda: client.text_generation(
|
389 |
+
prompt,
|
390 |
+
max_new_tokens=max_new_tokens,
|
391 |
+
temperature=temperature
|
392 |
+
)
|
393 |
+
)
|
394 |
+
else:
|
395 |
+
# For HuggingFace models, specify the model
|
396 |
+
response = await asyncio.get_event_loop().run_in_executor(
|
397 |
+
None,
|
398 |
+
lambda: client.text_generation(
|
399 |
+
prompt,
|
400 |
+
model=model_to_use,
|
401 |
+
max_new_tokens=max_new_tokens,
|
402 |
+
temperature=temperature
|
403 |
+
)
|
404 |
+
)
|
405 |
+
return response
|
406 |
+
|
407 |
+
except Exception as text_error:
|
408 |
+
logger.error(f"Both chat_completion and text_generation failed: {text_error}")
|
409 |
+
raise text_error
|
410 |
+
else:
|
411 |
+
# Re-raise the original error if it's not a task compatibility issue
|
412 |
+
logger.error(f"chat_completion failed with non-compatibility error: {e}")
|
413 |
+
raise e
|
414 |
|
415 |
|
416 |
def _add_event(self, video_id: str, event: Dict[str, Any]):
|
|
|
533 |
title: \""""
|
534 |
|
535 |
try:
|
536 |
+
raw_yaml_str = await self._generate_text(
|
537 |
prompt,
|
538 |
llm_config=llm_config,
|
539 |
max_new_tokens=200,
|
540 |
temperature=temperature
|
541 |
)
|
542 |
|
543 |
+
raw_yaml_str = raw_yaml_str.strip()
|
544 |
+
|
545 |
+
#logger.info(f"search_video(): raw_yaml_str = {raw_yaml_str}")
|
546 |
+
|
547 |
+
if raw_yaml_str.startswith("```yaml"):
|
548 |
+
# Remove the "```yaml" at the beginning and closing ```
|
549 |
+
raw_yaml_str = raw_yaml_str[7:] # Remove "```yaml" (7 characters)
|
550 |
+
if raw_yaml_str.endswith("```"):
|
551 |
+
raw_yaml_str = raw_yaml_str[:-3] # Remove closing ```
|
552 |
+
raw_yaml_str = raw_yaml_str.strip()
|
553 |
+
elif raw_yaml_str.startswith("```"):
|
554 |
+
# Remove the "```" at the beginning and closing ```
|
555 |
+
raw_yaml_str = raw_yaml_str[3:] # Remove opening ```
|
556 |
+
if raw_yaml_str.endswith("```"):
|
557 |
+
raw_yaml_str = raw_yaml_str[:-3] # Remove closing ```
|
558 |
+
raw_yaml_str = raw_yaml_str.strip()
|
559 |
+
else:
|
560 |
+
raw_yaml_str = re.sub(r'^\s*\.\s*\n', '', f"title: \"{raw_yaml_str}")
|
561 |
|
562 |
+
sanitized_yaml = sanitize_yaml_response(raw_yaml_str)
|
563 |
+
#logger.info(f"search_video(): sanitized_yaml = {sanitized_yaml}")
|
564 |
+
|
565 |
try:
|
566 |
result = yaml.safe_load(sanitized_yaml)
|
567 |
except yaml.YAMLError as e:
|
assets/config/curated_models.yaml
CHANGED
@@ -74,9 +74,9 @@ models:
|
|
74 |
display_name: Qwen3 235B A22B
|
75 |
num_of_parameters: 235B
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
|
81 |
- model_id: moonshotai/Kimi-K2-Instruct
|
82 |
display_name: Kimi K2
|
|
|
74 |
display_name: Qwen3 235B A22B
|
75 |
num_of_parameters: 235B
|
76 |
|
77 |
+
#- model_id: deepseek-ai/DeepSeek-V3-0324
|
78 |
+
# display_name: DeepSeek V3
|
79 |
+
# num_of_parameters: 685B
|
80 |
|
81 |
- model_id: moonshotai/Kimi-K2-Instruct
|
82 |
display_name: Kimi K2
|
build/web/assets/assets/config/curated_models.yaml
CHANGED
@@ -74,9 +74,9 @@ models:
|
|
74 |
display_name: Qwen3 235B A22B
|
75 |
num_of_parameters: 235B
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
|
81 |
- model_id: moonshotai/Kimi-K2-Instruct
|
82 |
display_name: Kimi K2
|
|
|
74 |
display_name: Qwen3 235B A22B
|
75 |
num_of_parameters: 235B
|
76 |
|
77 |
+
#- model_id: deepseek-ai/DeepSeek-V3-0324
|
78 |
+
# display_name: DeepSeek V3
|
79 |
+
# num_of_parameters: 685B
|
80 |
|
81 |
- model_id: moonshotai/Kimi-K2-Instruct
|
82 |
display_name: Kimi K2
|
build/web/flutter_bootstrap.js
CHANGED
@@ -38,6 +38,6 @@ _flutter.buildConfig = {"engineRevision":"1c9c20e7c3dd48c66f400a24d48ea806b4ab31
|
|
38 |
|
39 |
_flutter.loader.load({
|
40 |
serviceWorkerSettings: {
|
41 |
-
serviceWorkerVersion: "
|
42 |
}
|
43 |
});
|
|
|
38 |
|
39 |
_flutter.loader.load({
|
40 |
serviceWorkerSettings: {
|
41 |
+
serviceWorkerVersion: "3912302714"
|
42 |
}
|
43 |
});
|
build/web/flutter_service_worker.js
CHANGED
@@ -3,7 +3,7 @@ const MANIFEST = 'flutter-app-manifest';
|
|
3 |
const TEMP = 'flutter-temp-cache';
|
4 |
const CACHE_NAME = 'flutter-app-cache';
|
5 |
|
6 |
-
const RESOURCES = {"flutter_bootstrap.js": "
|
7 |
"version.json": "68350cac7987de2728345c72918dd067",
|
8 |
"tikslop.png": "570e1db759046e2d224fef729983634e",
|
9 |
"index.html": "3a7029b3672560e7938aab6fa4d30a46",
|
@@ -28,7 +28,7 @@ const RESOURCES = {"flutter_bootstrap.js": "67612d11664e1438c6f25ef6f2340c5f",
|
|
28 |
"assets/assets/ads/smolagents.gif": "45338af5a4d440b707d02f364be8195c",
|
29 |
"assets/assets/ads/README.md": "1959fb6b85a966348396f2f0f9c3f32a",
|
30 |
"assets/assets/ads/lerobot.gif": "0f90b2fc4d15eefb5572363724d6d925",
|
31 |
-
"assets/assets/config/curated_models.yaml": "
|
32 |
"assets/assets/config/README.md": "07a87720dd00dd1ca98c9d6884440e31",
|
33 |
"assets/assets/config/custom.yaml": "52bd30aa4d8b980626a5eb02d0871c01",
|
34 |
"assets/assets/config/default.yaml": "9ca1d05d06721c2b6f6382a1ba40af48",
|
|
|
3 |
const TEMP = 'flutter-temp-cache';
|
4 |
const CACHE_NAME = 'flutter-app-cache';
|
5 |
|
6 |
+
const RESOURCES = {"flutter_bootstrap.js": "f833cb89d68c8ddba5bc70cec281205c",
|
7 |
"version.json": "68350cac7987de2728345c72918dd067",
|
8 |
"tikslop.png": "570e1db759046e2d224fef729983634e",
|
9 |
"index.html": "3a7029b3672560e7938aab6fa4d30a46",
|
|
|
28 |
"assets/assets/ads/smolagents.gif": "45338af5a4d440b707d02f364be8195c",
|
29 |
"assets/assets/ads/README.md": "1959fb6b85a966348396f2f0f9c3f32a",
|
30 |
"assets/assets/ads/lerobot.gif": "0f90b2fc4d15eefb5572363724d6d925",
|
31 |
+
"assets/assets/config/curated_models.yaml": "94e54843953b4f90c454cd8e5a3176fb",
|
32 |
"assets/assets/config/README.md": "07a87720dd00dd1ca98c9d6884440e31",
|
33 |
"assets/assets/config/custom.yaml": "52bd30aa4d8b980626a5eb02d0871c01",
|
34 |
"assets/assets/config/default.yaml": "9ca1d05d06721c2b6f6382a1ba40af48",
|
build/web/index.html
CHANGED
@@ -156,7 +156,7 @@
|
|
156 |
</script>
|
157 |
|
158 |
<!-- Add version parameter for cache busting -->
|
159 |
-
<script src="flutter_bootstrap.js?v=
|
160 |
|
161 |
<!-- Add cache busting script -->
|
162 |
<script>
|
|
|
156 |
</script>
|
157 |
|
158 |
<!-- Add version parameter for cache busting -->
|
159 |
+
<script src="flutter_bootstrap.js?v=1753281547" async></script>
|
160 |
|
161 |
<!-- Add cache busting script -->
|
162 |
<script>
|
docs/for-bots/huggingface/chat-completion.md
ADDED
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[](#chat-completion)Chat Completion
|
2 |
+
-----------------------------------
|
3 |
+
|
4 |
+
Generate a response given a list of messages in a conversational context, supporting both conversational Language Models (LLMs) and conversational Vision-Language Models (VLMs). This is a subtask of [`text-generation`](https://huggingface.co/docs/inference-providers/tasks/text-generation) and [`image-text-to-text`](https://huggingface.co/docs/inference-providers/tasks/image-text-to-text).
|
5 |
+
|
6 |
+
### [](#recommended-models)Recommended models
|
7 |
+
|
8 |
+
#### [](#conversational-large-language-models-llms)Conversational Large Language Models (LLMs)
|
9 |
+
|
10 |
+
* [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
|
11 |
+
* [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
|
12 |
+
* [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
|
13 |
+
* [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
|
14 |
+
* [Qwen/Qwen2.5-7B-Instruct-1M](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M): Strong conversational model that supports very long instructions.
|
15 |
+
* [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
|
16 |
+
* [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
|
17 |
+
|
18 |
+
#### [](#conversational-vision-language-models-vlms)Conversational Vision-Language Models (VLMs)
|
19 |
+
|
20 |
+
* [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct): Strong image-text-to-text model.
|
21 |
+
|
22 |
+
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=image-text-to-text&sort=trending).
|
23 |
+
|
24 |
+
### [](#api-playground)API Playground
|
25 |
+
|
26 |
+
For Chat Completion models, we provide an interactive UI Playground for easier testing:
|
27 |
+
|
28 |
+
* Quickly iterate on your prompts from the UI.
|
29 |
+
* Set and override system, assistant and user messages.
|
30 |
+
* Browse and select models currently available on the Inference API.
|
31 |
+
* Compare the output of two models side-by-side.
|
32 |
+
* Adjust requests parameters from the UI.
|
33 |
+
* Easily switch between UI view and code snippets.
|
34 |
+
|
35 |
+
[](https://huggingface.co/playground)
|
36 |
+
|
37 |
+
Access the Inference UI Playground and start exploring: [https://huggingface.co/playground](https://huggingface.co/playground)
|
38 |
+
|
39 |
+
### [](#using-the-api)Using the API
|
40 |
+
|
41 |
+
The API supports:
|
42 |
+
|
43 |
+
* Using the chat completion API compatible with the OpenAI SDK.
|
44 |
+
* Using grammars, constraints, and tools.
|
45 |
+
* Streaming the output
|
46 |
+
|
47 |
+
#### [](#code-snippet-example-for-conversational-llms)Code snippet example for conversational LLMs
|
48 |
+
|
49 |
+
Language
|
50 |
+
|
51 |
+
Python JavaScript cURL
|
52 |
+
|
53 |
+
Client
|
54 |
+
|
55 |
+
huggingface\_hub requests openai
|
56 |
+
|
57 |
+
Provider
|
58 |
+
|
59 |
+
Featherless Nscale
|
60 |
+
|
61 |
+
+9
|
62 |
+
|
63 |
+
Settings
|
64 |
+
|
65 |
+
Settings
|
66 |
+
|
67 |
+
Settings
|
68 |
+
|
69 |
+
Copied
|
70 |
+
|
71 |
+
import os
|
72 |
+
from huggingface\_hub import InferenceClient
|
73 |
+
|
74 |
+
client = InferenceClient(
|
75 |
+
provider="featherless-ai",
|
76 |
+
api\_key=os.environ\["HF\_TOKEN"\],
|
77 |
+
)
|
78 |
+
|
79 |
+
completion = client.chat.completions.create(
|
80 |
+
model="meta-llama/Llama-3.3-70B-Instruct",
|
81 |
+
messages=\[
|
82 |
+
{
|
83 |
+
"role": "user",
|
84 |
+
"content": "What is the capital of France?"
|
85 |
+
}
|
86 |
+
\],
|
87 |
+
)
|
88 |
+
|
89 |
+
print(completion.choices\[0\].message)
|
90 |
+
|
91 |
+
#### [](#code-snippet-example-for-conversational-vlms)Code snippet example for conversational VLMs
|
92 |
+
|
93 |
+
Language
|
94 |
+
|
95 |
+
Python JavaScript cURL
|
96 |
+
|
97 |
+
Client
|
98 |
+
|
99 |
+
huggingface\_hub requests openai
|
100 |
+
|
101 |
+
Provider
|
102 |
+
|
103 |
+
Fireworks Featherless
|
104 |
+
|
105 |
+
+10
|
106 |
+
|
107 |
+
Settings
|
108 |
+
|
109 |
+
Settings
|
110 |
+
|
111 |
+
Settings
|
112 |
+
|
113 |
+
Copied
|
114 |
+
|
115 |
+
import os
|
116 |
+
from huggingface\_hub import InferenceClient
|
117 |
+
|
118 |
+
client = InferenceClient(
|
119 |
+
provider="fireworks-ai",
|
120 |
+
api\_key=os.environ\["HF\_TOKEN"\],
|
121 |
+
)
|
122 |
+
|
123 |
+
completion = client.chat.completions.create(
|
124 |
+
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
125 |
+
messages=\[
|
126 |
+
{
|
127 |
+
"role": "user",
|
128 |
+
"content": \[
|
129 |
+
{
|
130 |
+
"type": "text",
|
131 |
+
"text": "Describe this image in one sentence."
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"type": "image\_url",
|
135 |
+
"image\_url": {
|
136 |
+
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
|
137 |
+
}
|
138 |
+
}
|
139 |
+
\]
|
140 |
+
}
|
141 |
+
\],
|
142 |
+
)
|
143 |
+
|
144 |
+
print(completion.choices\[0\].message)
|
145 |
+
|
146 |
+
### [](#api-specification)API specification
|
147 |
+
|
148 |
+
#### [](#request)Request
|
149 |
+
|
150 |
+
Headers
|
151 |
+
|
152 |
+
**authorization**
|
153 |
+
|
154 |
+
_string_
|
155 |
+
|
156 |
+
Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with “Inference Providers” permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained).
|
157 |
+
|
158 |
+
Payload
|
159 |
+
|
160 |
+
**frequency\_penalty**
|
161 |
+
|
162 |
+
_number_
|
163 |
+
|
164 |
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model’s likelihood to repeat the same line verbatim.
|
165 |
+
|
166 |
+
**logprobs**
|
167 |
+
|
168 |
+
_boolean_
|
169 |
+
|
170 |
+
Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.
|
171 |
+
|
172 |
+
**max\_tokens**
|
173 |
+
|
174 |
+
_integer_
|
175 |
+
|
176 |
+
The maximum number of tokens that can be generated in the chat completion.
|
177 |
+
|
178 |
+
**messages\***
|
179 |
+
|
180 |
+
_object\[\]_
|
181 |
+
|
182 |
+
A list of messages comprising the conversation so far.
|
183 |
+
|
184 |
+
**(#1)**
|
185 |
+
|
186 |
+
_unknown_
|
187 |
+
|
188 |
+
One of the following:
|
189 |
+
|
190 |
+
**(#1)**
|
191 |
+
|
192 |
+
_object_
|
193 |
+
|
194 |
+
**content\***
|
195 |
+
|
196 |
+
_unknown_
|
197 |
+
|
198 |
+
One of the following:
|
199 |
+
|
200 |
+
**(#1)**
|
201 |
+
|
202 |
+
_string_
|
203 |
+
|
204 |
+
**(#2)**
|
205 |
+
|
206 |
+
_object\[\]_
|
207 |
+
|
208 |
+
**(#1)**
|
209 |
+
|
210 |
+
_object_
|
211 |
+
|
212 |
+
**text\***
|
213 |
+
|
214 |
+
_string_
|
215 |
+
|
216 |
+
**type\***
|
217 |
+
|
218 |
+
_enum_
|
219 |
+
|
220 |
+
Possible values: text.
|
221 |
+
|
222 |
+
**(#2)**
|
223 |
+
|
224 |
+
_object_
|
225 |
+
|
226 |
+
**image\_url\***
|
227 |
+
|
228 |
+
_object_
|
229 |
+
|
230 |
+
**url\***
|
231 |
+
|
232 |
+
_string_
|
233 |
+
|
234 |
+
**type\***
|
235 |
+
|
236 |
+
_enum_
|
237 |
+
|
238 |
+
Possible values: image\_url.
|
239 |
+
|
240 |
+
**(#2)**
|
241 |
+
|
242 |
+
_object_
|
243 |
+
|
244 |
+
**tool\_calls\***
|
245 |
+
|
246 |
+
_object\[\]_
|
247 |
+
|
248 |
+
**function\***
|
249 |
+
|
250 |
+
_object_
|
251 |
+
|
252 |
+
**parameters\***
|
253 |
+
|
254 |
+
_unknown_
|
255 |
+
|
256 |
+
**description**
|
257 |
+
|
258 |
+
_string_
|
259 |
+
|
260 |
+
**name\***
|
261 |
+
|
262 |
+
_string_
|
263 |
+
|
264 |
+
**id\***
|
265 |
+
|
266 |
+
_string_
|
267 |
+
|
268 |
+
**type\***
|
269 |
+
|
270 |
+
_string_
|
271 |
+
|
272 |
+
**(#2)**
|
273 |
+
|
274 |
+
_object_
|
275 |
+
|
276 |
+
**name**
|
277 |
+
|
278 |
+
_string_
|
279 |
+
|
280 |
+
**role\***
|
281 |
+
|
282 |
+
_string_
|
283 |
+
|
284 |
+
**presence\_penalty**
|
285 |
+
|
286 |
+
_number_
|
287 |
+
|
288 |
+
Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model’s likelihood to talk about new topics
|
289 |
+
|
290 |
+
**response\_format**
|
291 |
+
|
292 |
+
_unknown_
|
293 |
+
|
294 |
+
One of the following:
|
295 |
+
|
296 |
+
**(#1)**
|
297 |
+
|
298 |
+
_object_
|
299 |
+
|
300 |
+
**type\***
|
301 |
+
|
302 |
+
_enum_
|
303 |
+
|
304 |
+
Possible values: text.
|
305 |
+
|
306 |
+
**(#2)**
|
307 |
+
|
308 |
+
_object_
|
309 |
+
|
310 |
+
**type\***
|
311 |
+
|
312 |
+
_enum_
|
313 |
+
|
314 |
+
Possible values: json\_schema.
|
315 |
+
|
316 |
+
**json\_schema\***
|
317 |
+
|
318 |
+
_object_
|
319 |
+
|
320 |
+
**name\***
|
321 |
+
|
322 |
+
_string_
|
323 |
+
|
324 |
+
The name of the response format.
|
325 |
+
|
326 |
+
**description**
|
327 |
+
|
328 |
+
_string_
|
329 |
+
|
330 |
+
A description of what the response format is for, used by the model to determine how to respond in the format.
|
331 |
+
|
332 |
+
**schema**
|
333 |
+
|
334 |
+
_object_
|
335 |
+
|
336 |
+
The schema for the response format, described as a JSON Schema object. Learn how to build JSON schemas [here](https://json-schema.org/).
|
337 |
+
|
338 |
+
**strict**
|
339 |
+
|
340 |
+
_boolean_
|
341 |
+
|
342 |
+
Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field.
|
343 |
+
|
344 |
+
**(#3)**
|
345 |
+
|
346 |
+
_object_
|
347 |
+
|
348 |
+
**type\***
|
349 |
+
|
350 |
+
_enum_
|
351 |
+
|
352 |
+
Possible values: json\_object.
|
353 |
+
|
354 |
+
**seed**
|
355 |
+
|
356 |
+
_integer_
|
357 |
+
|
358 |
+
**stop**
|
359 |
+
|
360 |
+
_string\[\]_
|
361 |
+
|
362 |
+
Up to 4 sequences where the API will stop generating further tokens.
|
363 |
+
|
364 |
+
**stream**
|
365 |
+
|
366 |
+
_boolean_
|
367 |
+
|
368 |
+
**stream\_options**
|
369 |
+
|
370 |
+
_object_
|
371 |
+
|
372 |
+
**include\_usage**
|
373 |
+
|
374 |
+
_boolean_
|
375 |
+
|
376 |
+
If set, an additional chunk will be streamed before the data: \[DONE\] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
|
377 |
+
|
378 |
+
**temperature**
|
379 |
+
|
380 |
+
_number_
|
381 |
+
|
382 |
+
What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
|
383 |
+
|
384 |
+
**tool\_choice**
|
385 |
+
|
386 |
+
_unknown_
|
387 |
+
|
388 |
+
One of the following:
|
389 |
+
|
390 |
+
**(#1)**
|
391 |
+
|
392 |
+
_enum_
|
393 |
+
|
394 |
+
Possible values: auto.
|
395 |
+
|
396 |
+
**(#2)**
|
397 |
+
|
398 |
+
_enum_
|
399 |
+
|
400 |
+
Possible values: none.
|
401 |
+
|
402 |
+
**(#3)**
|
403 |
+
|
404 |
+
_enum_
|
405 |
+
|
406 |
+
Possible values: required.
|
407 |
+
|
408 |
+
**(#4)**
|
409 |
+
|
410 |
+
_object_
|
411 |
+
|
412 |
+
**function\***
|
413 |
+
|
414 |
+
_object_
|
415 |
+
|
416 |
+
**name\***
|
417 |
+
|
418 |
+
_string_
|
419 |
+
|
420 |
+
**tool\_prompt**
|
421 |
+
|
422 |
+
_string_
|
423 |
+
|
424 |
+
A prompt to be appended before the tools
|
425 |
+
|
426 |
+
**tools**
|
427 |
+
|
428 |
+
_object\[\]_
|
429 |
+
|
430 |
+
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.
|
431 |
+
|
432 |
+
**function\***
|
433 |
+
|
434 |
+
_object_
|
435 |
+
|
436 |
+
**parameters\***
|
437 |
+
|
438 |
+
_unknown_
|
439 |
+
|
440 |
+
**description**
|
441 |
+
|
442 |
+
_string_
|
443 |
+
|
444 |
+
**name\***
|
445 |
+
|
446 |
+
_string_
|
447 |
+
|
448 |
+
**type\***
|
449 |
+
|
450 |
+
_string_
|
451 |
+
|
452 |
+
**top\_logprobs**
|
453 |
+
|
454 |
+
_integer_
|
455 |
+
|
456 |
+
An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.
|
457 |
+
|
458 |
+
**top\_p**
|
459 |
+
|
460 |
+
_number_
|
461 |
+
|
462 |
+
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top\_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
|
463 |
+
|
464 |
+
#### [](#response)Response
|
465 |
+
|
466 |
+
Output type depends on the `stream` input parameter. If `stream` is `false` (default), the response will be a JSON object with the following fields:
|
467 |
+
|
468 |
+
Body
|
469 |
+
|
470 |
+
**choices**
|
471 |
+
|
472 |
+
_object\[\]_
|
473 |
+
|
474 |
+
**finish\_reason**
|
475 |
+
|
476 |
+
_string_
|
477 |
+
|
478 |
+
**index**
|
479 |
+
|
480 |
+
_integer_
|
481 |
+
|
482 |
+
**logprobs**
|
483 |
+
|
484 |
+
_object_
|
485 |
+
|
486 |
+
**content**
|
487 |
+
|
488 |
+
_object\[\]_
|
489 |
+
|
490 |
+
**logprob**
|
491 |
+
|
492 |
+
_number_
|
493 |
+
|
494 |
+
**token**
|
495 |
+
|
496 |
+
_string_
|
497 |
+
|
498 |
+
**top\_logprobs**
|
499 |
+
|
500 |
+
_object\[\]_
|
501 |
+
|
502 |
+
**logprob**
|
503 |
+
|
504 |
+
_number_
|
505 |
+
|
506 |
+
**token**
|
507 |
+
|
508 |
+
_string_
|
509 |
+
|
510 |
+
**message**
|
511 |
+
|
512 |
+
_unknown_
|
513 |
+
|
514 |
+
One of the following:
|
515 |
+
|
516 |
+
**(#1)**
|
517 |
+
|
518 |
+
_object_
|
519 |
+
|
520 |
+
**content**
|
521 |
+
|
522 |
+
_string_
|
523 |
+
|
524 |
+
**role**
|
525 |
+
|
526 |
+
_string_
|
527 |
+
|
528 |
+
**tool\_call\_id**
|
529 |
+
|
530 |
+
_string_
|
531 |
+
|
532 |
+
**(#2)**
|
533 |
+
|
534 |
+
_object_
|
535 |
+
|
536 |
+
**role**
|
537 |
+
|
538 |
+
_string_
|
539 |
+
|
540 |
+
**tool\_calls**
|
541 |
+
|
542 |
+
_object\[\]_
|
543 |
+
|
544 |
+
**function**
|
545 |
+
|
546 |
+
_object_
|
547 |
+
|
548 |
+
**arguments**
|
549 |
+
|
550 |
+
_string_
|
551 |
+
|
552 |
+
**description**
|
553 |
+
|
554 |
+
_string_
|
555 |
+
|
556 |
+
**name**
|
557 |
+
|
558 |
+
_string_
|
559 |
+
|
560 |
+
**id**
|
561 |
+
|
562 |
+
_string_
|
563 |
+
|
564 |
+
**type**
|
565 |
+
|
566 |
+
_string_
|
567 |
+
|
568 |
+
**created**
|
569 |
+
|
570 |
+
_integer_
|
571 |
+
|
572 |
+
**id**
|
573 |
+
|
574 |
+
_string_
|
575 |
+
|
576 |
+
**model**
|
577 |
+
|
578 |
+
_string_
|
579 |
+
|
580 |
+
**system\_fingerprint**
|
581 |
+
|
582 |
+
_string_
|
583 |
+
|
584 |
+
**usage**
|
585 |
+
|
586 |
+
_object_
|
587 |
+
|
588 |
+
**completion\_tokens**
|
589 |
+
|
590 |
+
_integer_
|
591 |
+
|
592 |
+
**prompt\_tokens**
|
593 |
+
|
594 |
+
_integer_
|
595 |
+
|
596 |
+
**total\_tokens**
|
597 |
+
|
598 |
+
_integer_
|
599 |
+
|
600 |
+
If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE). For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
|
601 |
+
|
602 |
+
Body
|
603 |
+
|
604 |
+
**choices**
|
605 |
+
|
606 |
+
_object\[\]_
|
607 |
+
|
608 |
+
**delta**
|
609 |
+
|
610 |
+
_unknown_
|
611 |
+
|
612 |
+
One of the following:
|
613 |
+
|
614 |
+
**(#1)**
|
615 |
+
|
616 |
+
_object_
|
617 |
+
|
618 |
+
**content**
|
619 |
+
|
620 |
+
_string_
|
621 |
+
|
622 |
+
**role**
|
623 |
+
|
624 |
+
_string_
|
625 |
+
|
626 |
+
**tool\_call\_id**
|
627 |
+
|
628 |
+
_string_
|
629 |
+
|
630 |
+
**(#2)**
|
631 |
+
|
632 |
+
_object_
|
633 |
+
|
634 |
+
**role**
|
635 |
+
|
636 |
+
_string_
|
637 |
+
|
638 |
+
**tool\_calls**
|
639 |
+
|
640 |
+
_object\[\]_
|
641 |
+
|
642 |
+
**function**
|
643 |
+
|
644 |
+
_object_
|
645 |
+
|
646 |
+
**arguments**
|
647 |
+
|
648 |
+
_string_
|
649 |
+
|
650 |
+
**name**
|
651 |
+
|
652 |
+
_string_
|
653 |
+
|
654 |
+
**id**
|
655 |
+
|
656 |
+
_string_
|
657 |
+
|
658 |
+
**index**
|
659 |
+
|
660 |
+
_integer_
|
661 |
+
|
662 |
+
**type**
|
663 |
+
|
664 |
+
_string_
|
665 |
+
|
666 |
+
**finish\_reason**
|
667 |
+
|
668 |
+
_string_
|
669 |
+
|
670 |
+
**index**
|
671 |
+
|
672 |
+
_integer_
|
673 |
+
|
674 |
+
**logprobs**
|
675 |
+
|
676 |
+
_object_
|
677 |
+
|
678 |
+
**content**
|
679 |
+
|
680 |
+
_object\[\]_
|
681 |
+
|
682 |
+
**logprob**
|
683 |
+
|
684 |
+
_number_
|
685 |
+
|
686 |
+
**token**
|
687 |
+
|
688 |
+
_string_
|
689 |
+
|
690 |
+
**top\_logprobs**
|
691 |
+
|
692 |
+
_object\[\]_
|
693 |
+
|
694 |
+
**logprob**
|
695 |
+
|
696 |
+
_number_
|
697 |
+
|
698 |
+
**token**
|
699 |
+
|
700 |
+
_string_
|
701 |
+
|
702 |
+
**created**
|
703 |
+
|
704 |
+
_integer_
|
705 |
+
|
706 |
+
**id**
|
707 |
+
|
708 |
+
_string_
|
709 |
+
|
710 |
+
**model**
|
711 |
+
|
712 |
+
_string_
|
713 |
+
|
714 |
+
**system\_fingerprint**
|
715 |
+
|
716 |
+
_string_
|
717 |
+
|
718 |
+
**usage**
|
719 |
+
|
720 |
+
_object_
|
721 |
+
|
722 |
+
**completion\_tokens**
|
723 |
+
|
724 |
+
_integer_
|
725 |
+
|
726 |
+
**prompt\_tokens**
|
727 |
+
|
728 |
+
_integer_
|
729 |
+
|
730 |
+
**total\_tokens**
|
731 |
+
|
732 |
+
_integer_
|
733 |
+
|
734 |
+
[< \> Update on GitHub](https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/tasks/chat-completion.md)
|
docs/for-bots/huggingface/text-generation.md
ADDED
@@ -0,0 +1,493 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[](#text-generation)Text Generation
|
2 |
+
-----------------------------------
|
3 |
+
|
4 |
+
Generate text based on a prompt.
|
5 |
+
|
6 |
+
If you are interested in a Chat Completion task, which generates a response based on a list of messages, check out the [`chat-completion`](./chat_completion) task.
|
7 |
+
|
8 |
+
For more details about the `text-generation` task, check out its [dedicated page](https://huggingface.co/tasks/text-generation)! You will find examples and related materials.
|
9 |
+
|
10 |
+
### [](#recommended-models)Recommended models
|
11 |
+
|
12 |
+
* [google/gemma-2-2b-it](https://huggingface.co/google/gemma-2-2b-it): A text-generation model trained to follow instructions.
|
13 |
+
* [deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B): Smaller variant of one of the most powerful models.
|
14 |
+
* [meta-llama/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct): Very powerful text generation model trained to follow instructions.
|
15 |
+
* [microsoft/phi-4](https://huggingface.co/microsoft/phi-4): Powerful text generation model by Microsoft.
|
16 |
+
* [Qwen/Qwen2.5-7B-Instruct-1M](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct-1M): Strong conversational model that supports very long instructions.
|
17 |
+
* [Qwen/Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct): Text generation model used to write code.
|
18 |
+
* [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1): Powerful reasoning based open large language model.
|
19 |
+
|
20 |
+
Explore all available models and find the one that suits you best [here](https://huggingface.co/models?inference=warm&pipeline_tag=text-generation&sort=trending).
|
21 |
+
|
22 |
+
### [](#using-the-api)Using the API
|
23 |
+
|
24 |
+
Language
|
25 |
+
|
26 |
+
Python JavaScript cURL
|
27 |
+
|
28 |
+
Client
|
29 |
+
|
30 |
+
huggingface\_hub requests openai
|
31 |
+
|
32 |
+
Provider
|
33 |
+
|
34 |
+
Featherless Together AI
|
35 |
+
|
36 |
+
Settings
|
37 |
+
|
38 |
+
Settings
|
39 |
+
|
40 |
+
Settings
|
41 |
+
|
42 |
+
Copied
|
43 |
+
|
44 |
+
import os
|
45 |
+
from huggingface\_hub import InferenceClient
|
46 |
+
|
47 |
+
client = InferenceClient(
|
48 |
+
provider="featherless-ai",
|
49 |
+
api\_key=os.environ\["HF\_TOKEN"\],
|
50 |
+
)
|
51 |
+
|
52 |
+
completion = client.chat.completions.create(
|
53 |
+
model="mistralai/Magistral-Small-2506",
|
54 |
+
messages="\\"Can you please let us know more details about your \\"",
|
55 |
+
)
|
56 |
+
|
57 |
+
print(completion.choices\[0\].message)
|
58 |
+
|
59 |
+
### [](#api-specification)API specification
|
60 |
+
|
61 |
+
#### [](#request)Request
|
62 |
+
|
63 |
+
Headers
|
64 |
+
|
65 |
+
**authorization**
|
66 |
+
|
67 |
+
_string_
|
68 |
+
|
69 |
+
Authentication header in the form `'Bearer: hf_****'` when `hf_****` is a personal user access token with “Inference Providers” permission. You can generate one from [your settings page](https://huggingface.co/settings/tokens/new?ownUserPermissions=inference.serverless.write&tokenType=fineGrained).
|
70 |
+
|
71 |
+
Payload
|
72 |
+
|
73 |
+
**inputs\***
|
74 |
+
|
75 |
+
_string_
|
76 |
+
|
77 |
+
**parameters**
|
78 |
+
|
79 |
+
_object_
|
80 |
+
|
81 |
+
**adapter\_id**
|
82 |
+
|
83 |
+
_string_
|
84 |
+
|
85 |
+
Lora adapter id
|
86 |
+
|
87 |
+
**best\_of**
|
88 |
+
|
89 |
+
_integer_
|
90 |
+
|
91 |
+
Generate best\_of sequences and return the one if the highest token logprobs.
|
92 |
+
|
93 |
+
**decoder\_input\_details**
|
94 |
+
|
95 |
+
_boolean_
|
96 |
+
|
97 |
+
Whether to return decoder input token logprobs and ids.
|
98 |
+
|
99 |
+
**details**
|
100 |
+
|
101 |
+
_boolean_
|
102 |
+
|
103 |
+
Whether to return generation details.
|
104 |
+
|
105 |
+
**do\_sample**
|
106 |
+
|
107 |
+
_boolean_
|
108 |
+
|
109 |
+
Activate logits sampling.
|
110 |
+
|
111 |
+
**frequency\_penalty**
|
112 |
+
|
113 |
+
_number_
|
114 |
+
|
115 |
+
The parameter for frequency penalty. 1.0 means no penalty Penalize new tokens based on their existing frequency in the text so far, decreasing the model’s likelihood to repeat the same line verbatim.
|
116 |
+
|
117 |
+
**grammar**
|
118 |
+
|
119 |
+
_unknown_
|
120 |
+
|
121 |
+
One of the following:
|
122 |
+
|
123 |
+
**(#1)**
|
124 |
+
|
125 |
+
_object_
|
126 |
+
|
127 |
+
**type\***
|
128 |
+
|
129 |
+
_enum_
|
130 |
+
|
131 |
+
Possible values: json.
|
132 |
+
|
133 |
+
**value\***
|
134 |
+
|
135 |
+
_unknown_
|
136 |
+
|
137 |
+
A string that represents a [JSON Schema](https://json-schema.org/). JSON Schema is a declarative language that allows to annotate JSON documents with types and descriptions.
|
138 |
+
|
139 |
+
**(#2)**
|
140 |
+
|
141 |
+
_object_
|
142 |
+
|
143 |
+
**type\***
|
144 |
+
|
145 |
+
_enum_
|
146 |
+
|
147 |
+
Possible values: regex.
|
148 |
+
|
149 |
+
**value\***
|
150 |
+
|
151 |
+
_string_
|
152 |
+
|
153 |
+
**(#3)**
|
154 |
+
|
155 |
+
_object_
|
156 |
+
|
157 |
+
**type\***
|
158 |
+
|
159 |
+
_enum_
|
160 |
+
|
161 |
+
Possible values: json\_schema.
|
162 |
+
|
163 |
+
**value\***
|
164 |
+
|
165 |
+
_object_
|
166 |
+
|
167 |
+
**name**
|
168 |
+
|
169 |
+
_string_
|
170 |
+
|
171 |
+
Optional name identifier for the schema
|
172 |
+
|
173 |
+
**schema\***
|
174 |
+
|
175 |
+
_unknown_
|
176 |
+
|
177 |
+
The actual JSON schema definition
|
178 |
+
|
179 |
+
**max\_new\_tokens**
|
180 |
+
|
181 |
+
_integer_
|
182 |
+
|
183 |
+
Maximum number of tokens to generate.
|
184 |
+
|
185 |
+
**repetition\_penalty**
|
186 |
+
|
187 |
+
_number_
|
188 |
+
|
189 |
+
The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
190 |
+
|
191 |
+
**return\_full\_text**
|
192 |
+
|
193 |
+
_boolean_
|
194 |
+
|
195 |
+
Whether to prepend the prompt to the generated text
|
196 |
+
|
197 |
+
**seed**
|
198 |
+
|
199 |
+
_integer_
|
200 |
+
|
201 |
+
Random sampling seed.
|
202 |
+
|
203 |
+
**stop**
|
204 |
+
|
205 |
+
_string\[\]_
|
206 |
+
|
207 |
+
Stop generating tokens if a member of `stop` is generated.
|
208 |
+
|
209 |
+
**temperature**
|
210 |
+
|
211 |
+
_number_
|
212 |
+
|
213 |
+
The value used to module the logits distribution.
|
214 |
+
|
215 |
+
**top\_k**
|
216 |
+
|
217 |
+
_integer_
|
218 |
+
|
219 |
+
The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
220 |
+
|
221 |
+
**top\_n\_tokens**
|
222 |
+
|
223 |
+
_integer_
|
224 |
+
|
225 |
+
The number of highest probability vocabulary tokens to keep for top-n-filtering.
|
226 |
+
|
227 |
+
**top\_p**
|
228 |
+
|
229 |
+
_number_
|
230 |
+
|
231 |
+
Top-p value for nucleus sampling.
|
232 |
+
|
233 |
+
**truncate**
|
234 |
+
|
235 |
+
_integer_
|
236 |
+
|
237 |
+
Truncate inputs tokens to the given size.
|
238 |
+
|
239 |
+
**typical\_p**
|
240 |
+
|
241 |
+
_number_
|
242 |
+
|
243 |
+
Typical Decoding mass See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.
|
244 |
+
|
245 |
+
**watermark**
|
246 |
+
|
247 |
+
_boolean_
|
248 |
+
|
249 |
+
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226).
|
250 |
+
|
251 |
+
**stream**
|
252 |
+
|
253 |
+
_boolean_
|
254 |
+
|
255 |
+
#### [](#response)Response
|
256 |
+
|
257 |
+
Output type depends on the `stream` input parameter. If `stream` is `false` (default), the response will be a JSON object with the following fields:
|
258 |
+
|
259 |
+
Body
|
260 |
+
|
261 |
+
**details**
|
262 |
+
|
263 |
+
_object_
|
264 |
+
|
265 |
+
**best\_of\_sequences**
|
266 |
+
|
267 |
+
_object\[\]_
|
268 |
+
|
269 |
+
**finish\_reason**
|
270 |
+
|
271 |
+
_enum_
|
272 |
+
|
273 |
+
Possible values: length, eos\_token, stop\_sequence.
|
274 |
+
|
275 |
+
**generated\_text**
|
276 |
+
|
277 |
+
_string_
|
278 |
+
|
279 |
+
**generated\_tokens**
|
280 |
+
|
281 |
+
_integer_
|
282 |
+
|
283 |
+
**prefill**
|
284 |
+
|
285 |
+
_object\[\]_
|
286 |
+
|
287 |
+
**id**
|
288 |
+
|
289 |
+
_integer_
|
290 |
+
|
291 |
+
**logprob**
|
292 |
+
|
293 |
+
_number_
|
294 |
+
|
295 |
+
**text**
|
296 |
+
|
297 |
+
_string_
|
298 |
+
|
299 |
+
**seed**
|
300 |
+
|
301 |
+
_integer_
|
302 |
+
|
303 |
+
**tokens**
|
304 |
+
|
305 |
+
_object\[\]_
|
306 |
+
|
307 |
+
**id**
|
308 |
+
|
309 |
+
_integer_
|
310 |
+
|
311 |
+
**logprob**
|
312 |
+
|
313 |
+
_number_
|
314 |
+
|
315 |
+
**special**
|
316 |
+
|
317 |
+
_boolean_
|
318 |
+
|
319 |
+
**text**
|
320 |
+
|
321 |
+
_string_
|
322 |
+
|
323 |
+
**top\_tokens**
|
324 |
+
|
325 |
+
_array\[\]_
|
326 |
+
|
327 |
+
**id**
|
328 |
+
|
329 |
+
_integer_
|
330 |
+
|
331 |
+
**logprob**
|
332 |
+
|
333 |
+
_number_
|
334 |
+
|
335 |
+
**special**
|
336 |
+
|
337 |
+
_boolean_
|
338 |
+
|
339 |
+
**text**
|
340 |
+
|
341 |
+
_string_
|
342 |
+
|
343 |
+
**finish\_reason**
|
344 |
+
|
345 |
+
_enum_
|
346 |
+
|
347 |
+
Possible values: length, eos\_token, stop\_sequence.
|
348 |
+
|
349 |
+
**generated\_tokens**
|
350 |
+
|
351 |
+
_integer_
|
352 |
+
|
353 |
+
**prefill**
|
354 |
+
|
355 |
+
_object\[\]_
|
356 |
+
|
357 |
+
**id**
|
358 |
+
|
359 |
+
_integer_
|
360 |
+
|
361 |
+
**logprob**
|
362 |
+
|
363 |
+
_number_
|
364 |
+
|
365 |
+
**text**
|
366 |
+
|
367 |
+
_string_
|
368 |
+
|
369 |
+
**seed**
|
370 |
+
|
371 |
+
_integer_
|
372 |
+
|
373 |
+
**tokens**
|
374 |
+
|
375 |
+
_object\[\]_
|
376 |
+
|
377 |
+
**id**
|
378 |
+
|
379 |
+
_integer_
|
380 |
+
|
381 |
+
**logprob**
|
382 |
+
|
383 |
+
_number_
|
384 |
+
|
385 |
+
**special**
|
386 |
+
|
387 |
+
_boolean_
|
388 |
+
|
389 |
+
**text**
|
390 |
+
|
391 |
+
_string_
|
392 |
+
|
393 |
+
**top\_tokens**
|
394 |
+
|
395 |
+
_array\[\]_
|
396 |
+
|
397 |
+
**id**
|
398 |
+
|
399 |
+
_integer_
|
400 |
+
|
401 |
+
**logprob**
|
402 |
+
|
403 |
+
_number_
|
404 |
+
|
405 |
+
**special**
|
406 |
+
|
407 |
+
_boolean_
|
408 |
+
|
409 |
+
**text**
|
410 |
+
|
411 |
+
_string_
|
412 |
+
|
413 |
+
**generated\_text**
|
414 |
+
|
415 |
+
_string_
|
416 |
+
|
417 |
+
If `stream` is `true`, generated tokens are returned as a stream, using Server-Sent Events (SSE). For more information about streaming, check out [this guide](https://huggingface.co/docs/text-generation-inference/conceptual/streaming).
|
418 |
+
|
419 |
+
Body
|
420 |
+
|
421 |
+
**details**
|
422 |
+
|
423 |
+
_object_
|
424 |
+
|
425 |
+
**finish\_reason**
|
426 |
+
|
427 |
+
_enum_
|
428 |
+
|
429 |
+
Possible values: length, eos\_token, stop\_sequence.
|
430 |
+
|
431 |
+
**generated\_tokens**
|
432 |
+
|
433 |
+
_integer_
|
434 |
+
|
435 |
+
**input\_length**
|
436 |
+
|
437 |
+
_integer_
|
438 |
+
|
439 |
+
**seed**
|
440 |
+
|
441 |
+
_integer_
|
442 |
+
|
443 |
+
**generated\_text**
|
444 |
+
|
445 |
+
_string_
|
446 |
+
|
447 |
+
**index**
|
448 |
+
|
449 |
+
_integer_
|
450 |
+
|
451 |
+
**token**
|
452 |
+
|
453 |
+
_object_
|
454 |
+
|
455 |
+
**id**
|
456 |
+
|
457 |
+
_integer_
|
458 |
+
|
459 |
+
**logprob**
|
460 |
+
|
461 |
+
_number_
|
462 |
+
|
463 |
+
**special**
|
464 |
+
|
465 |
+
_boolean_
|
466 |
+
|
467 |
+
**text**
|
468 |
+
|
469 |
+
_string_
|
470 |
+
|
471 |
+
**top\_tokens**
|
472 |
+
|
473 |
+
_object\[\]_
|
474 |
+
|
475 |
+
**id**
|
476 |
+
|
477 |
+
_integer_
|
478 |
+
|
479 |
+
**logprob**
|
480 |
+
|
481 |
+
_number_
|
482 |
+
|
483 |
+
**special**
|
484 |
+
|
485 |
+
_boolean_
|
486 |
+
|
487 |
+
**text**
|
488 |
+
|
489 |
+
_string_
|
490 |
+
|
491 |
+
[< \> Update on GitHub](https://github.com/huggingface/hub-docs/blob/main/docs/inference-providers/tasks/text-generation.md)
|
492 |
+
|
493 |
+
Chat Completion
|