File size: 1,124 Bytes
9dfb71c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eedee66
 
 
 
 
 
 
9dfb71c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import transformers
import torch

# Specify the model you want to use
model_id = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"

# Set up the text-generation pipeline
pipeline = transformers.pipeline(
    "text-generation",  # You are using the text generation pipeline
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},  # Specifying the torch dtype
    device_map="auto",  # This will use available hardware (GPU or CPU)
)

# Define the conversation/messages you want the model to handle
messages = [
    {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
    {"role": "user", "content": "Who are you?"}
]

# Use the pipeline to generate a response
outputs = pipeline(
    messages[1]["content"],  # Use only the user message here
    max_new_tokens=256,  # Limit the number of tokens generated
)
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device=-1,  # Use CPU (avoid device_map)
)


# Print the generated text from the output
print(outputs[0]["generated_text"])