gpt-99 commited on
Commit
4dae401
·
verified ·
1 Parent(s): a090ab6

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -1,17 +1,23 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from einops import einsum
5
  from tqdm import tqdm
6
 
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
  model_name = 'microsoft/Phi-3-mini-4k-instruct'
9
 
 
 
 
 
 
10
  model = AutoModelForCausalLM.from_pretrained(
11
  model_name,
12
  device_map=device,
13
  torch_dtype="auto",
14
  trust_remote_code=True,
 
15
  )
16
 
17
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  from einops import einsum
5
  from tqdm import tqdm
6
 
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
  model_name = 'microsoft/Phi-3-mini-4k-instruct'
9
 
10
+ quantization_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_compute_dtype=torch.float16
13
+ )
14
+
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  device_map=device,
18
  torch_dtype="auto",
19
  trust_remote_code=True,
20
+ quantization_config=quantization_config,
21
  )
22
 
23
  tokenizer = AutoTokenizer.from_pretrained(model_name)