janbanot commited on
Commit
16d3aa3
·
1 Parent(s): daeec09

fix: refactor

Browse files
Files changed (1) hide show
  1. app.py +22 -24
app.py CHANGED
@@ -8,40 +8,38 @@ from transformers import (
8
  TextStreamer,
9
  )
10
 
 
 
11
 
12
- @spaces.GPU
13
- def test():
14
- if torch.cuda.is_available():
15
- device = torch.device("cuda")
16
- print("Using GPU:", torch.cuda.get_device_name(0))
17
- else:
18
- device = torch.device("cpu")
19
- print("CUDA is not available. Using CPU.")
 
 
 
 
 
 
 
 
 
 
20
 
21
- device = "cuda"
22
- model_name = "speakleash/Bielik-11B-v2.3-Instruct"
23
 
 
 
24
  max_tokens = 5000
25
  temperature = 0
26
  top_k = 0
27
  top_p = 0
28
 
29
- tokenizer = AutoTokenizer.from_pretrained(model_name)
30
- tokenizer.pad_token = tokenizer.eos_token
31
-
32
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
33
 
34
- quantization_config = BitsAndBytesConfig(
35
- load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
36
- )
37
-
38
- model = AutoModelForCausalLM.from_pretrained(
39
- model_name,
40
- torch_dtype=torch.bfloat16,
41
- quantization_config=quantization_config,
42
- low_cpu_mem_usage=True,
43
- )
44
-
45
  model.generation_config.pad_token_id = tokenizer.pad_token_id
46
 
47
  prompt = "Kim jesteś?"
 
8
  TextStreamer,
9
  )
10
 
11
+ MODEL_ID = "speakleash/Bielik-11B-v2.3-Instruct"
12
+ MODEL_NAME = MODEL_ID.split("/")[-1]
13
 
14
+ if torch.cuda.is_available():
15
+ device = torch.device("cuda")
16
+ print("Using GPU:", torch.cuda.get_device_name(0))
17
+ else:
18
+ device = torch.device("cpu")
19
+ print("CUDA is not available. Using CPU.")
20
+
21
+ quantization_config = BitsAndBytesConfig(
22
+ load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16
23
+ )
24
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ MODEL_ID,
28
+ torch_dtype=torch.bfloat16,
29
+ quantization_config=quantization_config,
30
+ low_cpu_mem_usage=True,
31
+ )
32
 
 
 
33
 
34
+ @spaces.GPU
35
+ def test():
36
  max_tokens = 5000
37
  temperature = 0
38
  top_k = 0
39
  top_p = 0
40
 
 
 
 
41
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
42
 
 
 
 
 
 
 
 
 
 
 
 
43
  model.generation_config.pad_token_id = tokenizer.pad_token_id
44
 
45
  prompt = "Kim jesteś?"