Pratham Bhat commited on
Commit
e165882
·
1 Parent(s): 09d5f8b

Added trace logs

Browse files
Files changed (1) hide show
  1. main.py +13 -0
main.py CHANGED
@@ -12,6 +12,7 @@
12
  from fastapi import FastAPI
13
  from pydantic import BaseModel
14
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
15
  import uvicorn
16
  import torch
17
  # torch.mps.empty_cache()
@@ -67,20 +68,32 @@ def generate(item: Item):
67
  device = "cuda" if torch.cuda.is_available() else "cpu"
68
 
69
  model_path = "ibm-granite/granite-34b-code-instruct-8k"
 
 
70
  tokenizer = AutoTokenizer.from_pretrained(model_path, cache_dir="/code/huggingface/transformers")
71
  # drop device_map if running on CPU
 
 
72
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
73
  model.eval()
74
  # change input text as desired
75
  chat = format_prompt(item.system_prompt, item.prompt, item.history)
76
  chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
 
 
77
  # tokenize the text
78
  input_tokens = tokenizer(chat, return_tensors="pt")
 
 
79
  # transfer tokenized inputs to the device
80
  for i in input_tokens:
81
  input_tokens[i] = input_tokens[i].to(device)
 
 
82
  # generate output tokens
83
  output = model.generate(**input_tokens, max_new_tokens=900)
 
 
84
  output_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
85
  return output_text
86
 
 
12
  from fastapi import FastAPI
13
  from pydantic import BaseModel
14
  from transformers import AutoModelForCausalLM, AutoTokenizer
15
+ import sys
16
  import uvicorn
17
  import torch
18
  # torch.mps.empty_cache()
 
68
  device = "cuda" if torch.cuda.is_available() else "cpu"
69
 
70
  model_path = "ibm-granite/granite-34b-code-instruct-8k"
71
+
72
+ print("Loading tokenizer for model: " + model_path, file=sys.stderr)
73
  tokenizer = AutoTokenizer.from_pretrained(model_path, cache_dir="/code/huggingface/transformers")
74
  # drop device_map if running on CPU
75
+
76
+ print("Loading Model for causal LM for model: " + model_path, file=sys.stderr)
77
  model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
78
  model.eval()
79
  # change input text as desired
80
  chat = format_prompt(item.system_prompt, item.prompt, item.history)
81
  chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
82
+
83
+ print("Tokenizing text", file=sys.stderr)
84
  # tokenize the text
85
  input_tokens = tokenizer(chat, return_tensors="pt")
86
+
87
+ print("Transferring tokens to device: " + device, file=sys.stderr)
88
  # transfer tokenized inputs to the device
89
  for i in input_tokens:
90
  input_tokens[i] = input_tokens[i].to(device)
91
+
92
+ print("Generating output tokens", file=sys.stderr)
93
  # generate output tokens
94
  output = model.generate(**input_tokens, max_new_tokens=900)
95
+
96
+ print("Decoding output tokens", file=sys.stderr)
97
  output_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
98
  return output_text
99