nikravan commited on
Commit
ada1a42
·
verified ·
1 Parent(s): 655a147

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -4,7 +4,12 @@ import spaces
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
5
  import os
6
  from threading import Thread
 
7
 
 
 
 
 
8
  MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
9
 
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
@@ -46,7 +51,8 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
46
  low_cpu_mem_usage=True,
47
  trust_remote_code=True,
48
  quantization_config=quantization_config,
49
- device_map="auto"
 
50
  )
51
 
52
  print(f'message is - {message}')
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
5
  import os
6
  from threading import Thread
7
+ from accelerate import init_empty_weights
8
 
9
+ max_memory = {
10
+ 0: "40GiB", # بسته به VRAM کارت گرافیکت، مثلاً اگه 8 گیگ داری، بذار 6 یا 5
11
+ "cpu": "32GiB", # بسته به RAM سیستمت
12
+ }
13
  MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
14
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
 
51
  low_cpu_mem_usage=True,
52
  trust_remote_code=True,
53
  quantization_config=quantization_config,
54
+ device_map="auto"و
55
+ max_memory=max_memory,
56
  )
57
 
58
  print(f'message is - {message}')