Spaces:

janbanot
/

bielik_goblin_zero

Running on Zero

janbanot commited on Feb 19

Commit

16d3aa3

1 Parent(s): daeec09

fix: refactor

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,40 +8,38 @@ from transformers import (
     TextStreamer,
 )
-@spaces.GPU
-def test():
-    if torch.cuda.is_available():
-        device = torch.device("cuda")
-        print("Using GPU:", torch.cuda.get_device_name(0))
-    else:
-        device = torch.device("cpu")
-        print("CUDA is not available. Using CPU.")
-    device = "cuda"
-    model_name = "speakleash/Bielik-11B-v2.3-Instruct"
     max_tokens = 5000
     temperature = 0
     top_k = 0
     top_p = 0
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    tokenizer.pad_token = tokenizer.eos_token
     streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
-    )
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.bfloat16,
-        quantization_config=quantization_config,
-        low_cpu_mem_usage=True,
-    )
     model.generation_config.pad_token_id = tokenizer.pad_token_id
     prompt = "Kim jesteś?"

     TextStreamer,
 )
+MODEL_ID = "speakleash/Bielik-11B-v2.3-Instruct"
+MODEL_NAME = MODEL_ID.split("/")[-1]
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+    print("Using GPU:", torch.cuda.get_device_name(0))
+else:
+    device = torch.device("cpu")
+    print("CUDA is not available. Using CPU.")
+quantization_config = BitsAndBytesConfig(
+        load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
+    )
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+tokenizer.pad_token = tokenizer.eos_token
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    quantization_config=quantization_config,
+    low_cpu_mem_usage=True,
+)
+@spaces.GPU
+def test():
     max_tokens = 5000
     temperature = 0
     top_k = 0
     top_p = 0
     streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     model.generation_config.pad_token_id = tokenizer.pad_token_id
     prompt = "Kim jesteś?"