Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -7,8 +7,8 @@ from threading import Thread
|
|
7 |
from accelerate import init_empty_weights
|
8 |
|
9 |
max_memory = {
|
10 |
-
0: "50GiB",
|
11 |
-
"cpu": "32GiB",
|
12 |
}
|
13 |
MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
|
14 |
|
@@ -51,7 +51,7 @@ def stream_chat(message, history: list, temperature: float, max_length: int, top
|
|
51 |
low_cpu_mem_usage=True,
|
52 |
trust_remote_code=True,
|
53 |
quantization_config=quantization_config,
|
54 |
-
device_map="auto"
|
55 |
max_memory=max_memory,
|
56 |
)
|
57 |
|
|
|
7 |
from accelerate import init_empty_weights
|
8 |
|
9 |
max_memory = {
|
10 |
+
0: "50GiB",
|
11 |
+
"cpu": "32GiB",
|
12 |
}
|
13 |
MODEL_LIST = ["THUDM/GLM-4-Z1-32B-0414"]
|
14 |
|
|
|
51 |
low_cpu_mem_usage=True,
|
52 |
trust_remote_code=True,
|
53 |
quantization_config=quantization_config,
|
54 |
+
device_map="auto",
|
55 |
max_memory=max_memory,
|
56 |
)
|
57 |
|