nm-research commited on
Commit
1f006c1
·
verified ·
1 Parent(s): a9d5df7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -8,7 +8,7 @@ base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
8
  library_name: transformers
9
  ---
10
 
11
- # DeepSeek-R1-Distill-Llama-70B-FP8-Dynamic
12
 
13
  ## Model Overview
14
  - **Model Architecture:** LlamaForCausalLM
@@ -43,7 +43,7 @@ from transformers import AutoTokenizer
43
  from vllm import LLM, SamplingParams
44
 
45
  number_gpus = 2
46
- model_name = "neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-Dynamic"
47
 
48
  tokenizer = AutoTokenizer.from_pretrained(model_name)
49
  sampling_params = SamplingParams(temperature=0.6, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
@@ -87,7 +87,7 @@ device_map = calculate_offload_device_map(
87
  )
88
 
89
  model = AutoModelForCausalLM.from_pretrained(
90
- model_stup,
91
  device_map=device_map,
92
  torch_dtype="auto",
93
  )
@@ -122,7 +122,7 @@ OpenLLM Leaderboard V1:
122
  ```
123
  lm_eval \
124
  --model vllm \
125
- --model_args pretrained="neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-Dynamic",dtype=auto,max_model_len=4096,tensor_parallel_size=2,enable_chunked_prefill=True \
126
  --tasks openllm \
127
  --write_out \
128
  --batch_size auto \
@@ -134,7 +134,7 @@ OpenLLM Leaderboard V2:
134
  ```
135
  lm_eval \
136
  --model vllm \
137
- --model_args pretrained="neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-Dynamic",dtype=auto,max_model_len=4096,tensor_parallel_size=2,enable_chunked_prefill=True \
138
  --apply_chat_template \
139
  --fewshot_as_multiturn \
140
  --tasks leaderboard \
@@ -152,7 +152,7 @@ lm_eval \
152
  <th>Category</th>
153
  <th>Metric</th>
154
  <th>deepseek-ai/DeepSeek-R1-Distill-Llama-70B</th>
155
- <th>neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-Dynamic</th>
156
  <th>Recovery</th>
157
  </tr>
158
  </thead>
 
8
  library_name: transformers
9
  ---
10
 
11
+ # DeepSeek-R1-Distill-Llama-70B-FP8-dynamic
12
 
13
  ## Model Overview
14
  - **Model Architecture:** LlamaForCausalLM
 
43
  from vllm import LLM, SamplingParams
44
 
45
  number_gpus = 2
46
+ model_name = "neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic"
47
 
48
  tokenizer = AutoTokenizer.from_pretrained(model_name)
49
  sampling_params = SamplingParams(temperature=0.6, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
 
87
  )
88
 
89
  model = AutoModelForCausalLM.from_pretrained(
90
+ model_stub,
91
  device_map=device_map,
92
  torch_dtype="auto",
93
  )
 
122
  ```
123
  lm_eval \
124
  --model vllm \
125
+ --model_args pretrained="neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic",dtype=auto,max_model_len=4096,tensor_parallel_size=2,enable_chunked_prefill=True \
126
  --tasks openllm \
127
  --write_out \
128
  --batch_size auto \
 
134
  ```
135
  lm_eval \
136
  --model vllm \
137
+ --model_args pretrained="neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic",dtype=auto,max_model_len=4096,tensor_parallel_size=2,enable_chunked_prefill=True \
138
  --apply_chat_template \
139
  --fewshot_as_multiturn \
140
  --tasks leaderboard \
 
152
  <th>Category</th>
153
  <th>Metric</th>
154
  <th>deepseek-ai/DeepSeek-R1-Distill-Llama-70B</th>
155
+ <th>neuralmagic/DeepSeek-R1-Distill-Llama-70B-FP8-dynamic</th>
156
  <th>Recovery</th>
157
  </tr>
158
  </thead>