Update README.md
Browse files
README.md
CHANGED
|
@@ -94,7 +94,7 @@ from llmcompressor import oneshot
|
|
| 94 |
from llmcompressor.modeling import replace_modules_for_calibration
|
| 95 |
from llmcompressor.modifiers.quantization import QuantizationModifier
|
| 96 |
|
| 97 |
-
MODEL_ID = "
|
| 98 |
|
| 99 |
# Load model.
|
| 100 |
model = LlamaForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
|
|
@@ -135,7 +135,7 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
|
|
| 135 |
```
|
| 136 |
lm_eval \
|
| 137 |
--model vllm \
|
| 138 |
-
--model_args pretrained
|
| 139 |
--tasks openllm \
|
| 140 |
--write_out \
|
| 141 |
--batch_size auto \
|
|
@@ -148,7 +148,7 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
|
|
| 148 |
```
|
| 149 |
lm_eval \
|
| 150 |
--model vllm \
|
| 151 |
-
--model_args pretrained
|
| 152 |
--tasks leaderboard \
|
| 153 |
--apply_chat_template \
|
| 154 |
--fewshot_as_multiturn \
|
|
@@ -162,12 +162,12 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
|
|
| 162 |
**Coding Benchmarks**
|
| 163 |
|
| 164 |
```
|
| 165 |
-
evalplus.evaluate --model
|
| 166 |
--dataset "humaneval" \
|
| 167 |
--backend vllm \
|
| 168 |
--tp 4 \
|
| 169 |
--greedy
|
| 170 |
-
evalplus.evaluate --model
|
| 171 |
--dataset "mbpp" \
|
| 172 |
--backend vllm \
|
| 173 |
--tp 4 \
|
|
|
|
| 94 |
from llmcompressor.modeling import replace_modules_for_calibration
|
| 95 |
from llmcompressor.modifiers.quantization import QuantizationModifier
|
| 96 |
|
| 97 |
+
MODEL_ID = "nm-testing/Llama-3.3-70B-Instruct-FP8-block"
|
| 98 |
|
| 99 |
# Load model.
|
| 100 |
model = LlamaForCausalLM.from_pretrained(MODEL_ID, dtype="auto")
|
|
|
|
| 135 |
```
|
| 136 |
lm_eval \
|
| 137 |
--model vllm \
|
| 138 |
+
--model_args pretrained="nm-testing/Llama-3.3-70B-Instruct-FP8-block",dtype=auto,add_bos_token=True,max_model_len=16384,tensor_parallel_size=4,gpu_memory_utilization=0.9,enable_chunked_prefill=True,trust_remote_code=True \
|
| 139 |
--tasks openllm \
|
| 140 |
--write_out \
|
| 141 |
--batch_size auto \
|
|
|
|
| 148 |
```
|
| 149 |
lm_eval \
|
| 150 |
--model vllm \
|
| 151 |
+
--model_args pretrained="nm-testing/Llama-3.3-70B-Instruct-FP8-block",dtype=auto,add_bos_token=False,max_model_len=16384,tensor_parallel_size=4,gpu_memory_utilization=0.7,disable_log_stats=True,enable_chunked_prefill=True,trust_remote_code=True \
|
| 152 |
--tasks leaderboard \
|
| 153 |
--apply_chat_template \
|
| 154 |
--fewshot_as_multiturn \
|
|
|
|
| 162 |
**Coding Benchmarks**
|
| 163 |
|
| 164 |
```
|
| 165 |
+
evalplus.evaluate --model "nm-testing/Llama-3.3-70B-Instruct-FP8-block" \
|
| 166 |
--dataset "humaneval" \
|
| 167 |
--backend vllm \
|
| 168 |
--tp 4 \
|
| 169 |
--greedy
|
| 170 |
+
evalplus.evaluate --model "nm-testing/Llama-3.3-70B-Instruct-FP8-block" \
|
| 171 |
--dataset "mbpp" \
|
| 172 |
--backend vllm \
|
| 173 |
--tp 4 \
|