Update README.md
Browse files
README.md
CHANGED
@@ -96,7 +96,7 @@ for n, m in block.named_modules():
|
|
96 |
if isinstance(m, (torch.nn.Linear, transformers.modeling_utils.Conv1D)):
|
97 |
if "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2])<63 :
|
98 |
device ="cuda:1"
|
99 |
-
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 63 and int(n.split('.')[-2]) < 128
|
100 |
device = "cuda:2"
|
101 |
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 128 and int(
|
102 |
n.split('.')[-2]) < 192:
|
@@ -114,7 +114,7 @@ from auto_round import AutoRound
|
|
114 |
autoround = AutoRound(model=model, tokenizer=tokenizer, device_map=device_map,
|
115 |
iters=200, batch_size=8, seqlen=512, enable_torch_compile=False)
|
116 |
autoround.quantize()
|
117 |
-
autoround.save_quantized(format="gguf:q4_0", output_dir="tmp_autoround"
|
118 |
```
|
119 |
|
120 |
## Ethical Considerations and Limitations
|
|
|
96 |
if isinstance(m, (torch.nn.Linear, transformers.modeling_utils.Conv1D)):
|
97 |
if "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2])<63 :
|
98 |
device ="cuda:1"
|
99 |
+
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 63 and int(n.split('.')[-2]) < 128:
|
100 |
device = "cuda:2"
|
101 |
elif "experts" in n and ("shared_experts" not in n) and int(n.split('.')[-2]) >= 128 and int(
|
102 |
n.split('.')[-2]) < 192:
|
|
|
114 |
autoround = AutoRound(model=model, tokenizer=tokenizer, device_map=device_map,
|
115 |
iters=200, batch_size=8, seqlen=512, enable_torch_compile=False)
|
116 |
autoround.quantize()
|
117 |
+
autoround.save_quantized(format="gguf:q4_0", output_dir="tmp_autoround")
|
118 |
```
|
119 |
|
120 |
## Ethical Considerations and Limitations
|