Spaces:
Runtime error
Runtime error
File size: 1,124 Bytes
9dfb71c eedee66 9dfb71c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import transformers
import torch
# Specify the model you want to use
model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
# Set up the text-generation pipeline
pipeline = transformers.pipeline(
"text-generation", # You are using the text generation pipeline
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16}, # Specifying the torch dtype
device_map="auto", # This will use available hardware (GPU or CPU)
)
# Define the conversation/messages you want the model to handle
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": "Who are you?"}
]
# Use the pipeline to generate a response
outputs = pipeline(
messages[1]["content"], # Use only the user message here
max_new_tokens=256, # Limit the number of tokens generated
)
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device=-1, # Use CPU (avoid device_map)
)
# Print the generated text from the output
print(outputs[0]["generated_text"])
|