Model save

Browse files

Files changed (5) hide show

README.md +58 -0
all_results.json +8 -0
generation_config.json +14 -0
train_results.json +8 -0
trainer_state.json +468 -0

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: Qwen/Qwen2.5-1.5B-Instruct
+library_name: transformers
+model_name: Qwen2.5-1.5B-Open-R1-Distill
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for Qwen2.5-1.5B-Open-R1-Distill
+This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="qgallouedec/Qwen2.5-1.5B-Open-R1-Distill", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/huggingface/huggingface/runs/6u2cdkxu)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.16.0.dev0
+- Transformers: 4.50.0.dev0
+- Pytorch: 2.6.0
+- Datasets: 3.0.0
+- Tokenizers: 0.21.0
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 490444463013888.0,
+    "train_loss": 0.5787169348351574,
+    "train_runtime": 3181.4532,
+    "train_samples": 93733,
+    "train_samples_per_second": 10.784,
+    "train_steps_per_second": 0.085
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.50.0.dev0"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "total_flos": 490444463013888.0,
+    "train_loss": 0.5787169348351574,
+    "train_runtime": 3181.4532,
+    "train_samples": 93733,
+    "train_samples_per_second": 10.784,
+    "train_steps_per_second": 0.085
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,468 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 269,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01858736059479554,
+      "grad_norm": 1.7748656124882543,
+      "learning_rate": 1.785714285714286e-05,
+      "loss": 0.8465,
+      "num_tokens": 10465392.0,
+      "step": 5
+    },
+    {
+      "epoch": 0.03717472118959108,
+      "grad_norm": 0.7775323625994008,
+      "learning_rate": 3.571428571428572e-05,
+      "loss": 0.7778,
+      "num_tokens": 20951152.0,
+      "step": 10
+    },
+    {
+      "epoch": 0.055762081784386616,
+      "grad_norm": 0.48936318299981746,
+      "learning_rate": 4.9998292477583695e-05,
+      "loss": 0.7095,
+      "num_tokens": 31436912.0,
+      "step": 15
+    },
+    {
+      "epoch": 0.07434944237918216,
+      "grad_norm": 0.4073886070673796,
+      "learning_rate": 4.993855640118024e-05,
+      "loss": 0.6711,
+      "num_tokens": 41909269.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.09293680297397769,
+      "grad_norm": 0.3704019213957072,
+      "learning_rate": 4.9793703194560106e-05,
+      "loss": 0.652,
+      "num_tokens": 52353719.0,
+      "step": 25
+    },
+    {
+      "epoch": 0.11152416356877323,
+      "grad_norm": 0.2594836074576581,
+      "learning_rate": 4.9564282335552e-05,
+      "loss": 0.6366,
+      "num_tokens": 62792124.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.13011152416356878,
+      "grad_norm": 0.25594286080216416,
+      "learning_rate": 4.9251164096056716e-05,
+      "loss": 0.6218,
+      "num_tokens": 73260442.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.14869888475836432,
+      "grad_norm": 0.23970074498248772,
+      "learning_rate": 4.885553624080778e-05,
+      "loss": 0.6054,
+      "num_tokens": 83727556.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.16728624535315986,
+      "grad_norm": 0.2511053919569689,
+      "learning_rate": 4.8378899521772935e-05,
+      "loss": 0.6042,
+      "num_tokens": 94201938.0,
+      "step": 45
+    },
+    {
+      "epoch": 0.18587360594795538,
+      "grad_norm": 0.29733281598714095,
+      "learning_rate": 4.78230619852879e-05,
+      "loss": 0.5968,
+      "num_tokens": 104670740.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.20446096654275092,
+      "grad_norm": 0.2960638661673597,
+      "learning_rate": 4.719013211351733e-05,
+      "loss": 0.5998,
+      "num_tokens": 115151933.0,
+      "step": 55
+    },
+    {
+      "epoch": 0.22304832713754646,
+      "grad_norm": 0.3607422222807977,
+      "learning_rate": 4.648251082625975e-05,
+      "loss": 0.5906,
+      "num_tokens": 125607410.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.241635687732342,
+      "grad_norm": 0.36527321547019587,
+      "learning_rate": 4.570288237343632e-05,
+      "loss": 0.5882,
+      "num_tokens": 136036466.0,
+      "step": 65
+    },
+    {
+      "epoch": 0.26022304832713755,
+      "grad_norm": 0.3483411666820654,
+      "learning_rate": 4.4854204152811567e-05,
+      "loss": 0.5848,
+      "num_tokens": 146502251.0,
+      "step": 70
+    },
+    {
+      "epoch": 0.2788104089219331,
+      "grad_norm": 0.32469856470379493,
+      "learning_rate": 4.39396954915706e-05,
+      "loss": 0.5822,
+      "num_tokens": 156977308.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.29739776951672864,
+      "grad_norm": 0.24670208955198386,
+      "learning_rate": 4.2962825434308415e-05,
+      "loss": 0.576,
+      "num_tokens": 167423794.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.3159851301115242,
+      "grad_norm": 0.28914180510023146,
+      "learning_rate": 4.1927299583755515e-05,
+      "loss": 0.5685,
+      "num_tokens": 177900466.0,
+      "step": 85
+    },
+    {
+      "epoch": 0.3345724907063197,
+      "grad_norm": 0.28196774826939586,
+      "learning_rate": 4.083704604415748e-05,
+      "loss": 0.5725,
+      "num_tokens": 188373813.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.35315985130111527,
+      "grad_norm": 0.26866232768746134,
+      "learning_rate": 3.969620052063012e-05,
+      "loss": 0.5707,
+      "num_tokens": 198859382.0,
+      "step": 95
+    },
+    {
+      "epoch": 0.37174721189591076,
+      "grad_norm": 0.29490730565713075,
+      "learning_rate": 3.850909063101328e-05,
+      "loss": 0.5676,
+      "num_tokens": 209345142.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.3903345724907063,
+      "grad_norm": 0.28997329137722166,
+      "learning_rate": 3.728021948973421e-05,
+      "loss": 0.5668,
+      "num_tokens": 219820636.0,
+      "step": 105
+    },
+    {
+      "epoch": 0.40892193308550184,
+      "grad_norm": 0.2427463235734826,
+      "learning_rate": 3.6014248625951984e-05,
+      "loss": 0.559,
+      "num_tokens": 230306396.0,
+      "step": 110
+    },
+    {
+      "epoch": 0.4275092936802974,
+      "grad_norm": 0.30000276433313355,
+      "learning_rate": 3.4715980300780745e-05,
+      "loss": 0.5627,
+      "num_tokens": 240792156.0,
+      "step": 115
+    },
+    {
+      "epoch": 0.44609665427509293,
+      "grad_norm": 0.2840658840182488,
+      "learning_rate": 3.339033929066841e-05,
+      "loss": 0.5612,
+      "num_tokens": 251259365.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.4646840148698885,
+      "grad_norm": 0.27262466075161956,
+      "learning_rate": 3.204235420603273e-05,
+      "loss": 0.5644,
+      "num_tokens": 261736444.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.483271375464684,
+      "grad_norm": 0.27049031008365026,
+      "learning_rate": 3.0677138416019556e-05,
+      "loss": 0.563,
+      "num_tokens": 272205866.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.5018587360594795,
+      "grad_norm": 0.23124549228542488,
+      "learning_rate": 2.9299870651742188e-05,
+      "loss": 0.5587,
+      "num_tokens": 282683327.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.5204460966542751,
+      "grad_norm": 0.22494436138190718,
+      "learning_rate": 2.7915775361580428e-05,
+      "loss": 0.5612,
+      "num_tokens": 293122806.0,
+      "step": 140
+    },
+    {
+      "epoch": 0.5390334572490706,
+      "grad_norm": 0.18937691441460602,
+      "learning_rate": 2.6530102893058357e-05,
+      "loss": 0.5605,
+      "num_tokens": 303598947.0,
+      "step": 145
+    },
+    {
+      "epoch": 0.5576208178438662,
+      "grad_norm": 0.2351559125981879,
+      "learning_rate": 2.5148109576477802e-05,
+      "loss": 0.5578,
+      "num_tokens": 314077641.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.5762081784386617,
+      "grad_norm": 0.19092993827655919,
+      "learning_rate": 2.3775037785857073e-05,
+      "loss": 0.5488,
+      "num_tokens": 324545386.0,
+      "step": 155
+    },
+    {
+      "epoch": 0.5947955390334573,
+      "grad_norm": 0.17223510717812895,
+      "learning_rate": 2.2416096052810688e-05,
+      "loss": 0.5609,
+      "num_tokens": 335031146.0,
+      "step": 160
+    },
+    {
+      "epoch": 0.6133828996282528,
+      "grad_norm": 0.19945511088993198,
+      "learning_rate": 2.1076439308804808e-05,
+      "loss": 0.5411,
+      "num_tokens": 345504904.0,
+      "step": 165
+    },
+    {
+      "epoch": 0.6319702602230484,
+      "grad_norm": 0.17394579706852514,
+      "learning_rate": 1.976114933073662e-05,
+      "loss": 0.5507,
+      "num_tokens": 355956520.0,
+      "step": 170
+    },
+    {
+      "epoch": 0.6505576208178439,
+      "grad_norm": 0.16263450610078103,
+      "learning_rate": 1.847521546401383e-05,
+      "loss": 0.5515,
+      "num_tokens": 366434348.0,
+      "step": 175
+    },
+    {
+      "epoch": 0.6691449814126395,
+      "grad_norm": 0.18127092614108742,
+      "learning_rate": 1.7223515696258592e-05,
+      "loss": 0.5543,
+      "num_tokens": 376908695.0,
+      "step": 180
+    },
+    {
+      "epoch": 0.6877323420074349,
+      "grad_norm": 0.1797771983319447,
+      "learning_rate": 1.60107981534296e-05,
+      "loss": 0.5571,
+      "num_tokens": 387384301.0,
+      "step": 185
+    },
+    {
+      "epoch": 0.7063197026022305,
+      "grad_norm": 0.16421806343683648,
+      "learning_rate": 1.484166308855398e-05,
+      "loss": 0.5479,
+      "num_tokens": 397835323.0,
+      "step": 190
+    },
+    {
+      "epoch": 0.724907063197026,
+      "grad_norm": 0.15793988144816604,
+      "learning_rate": 1.372054543139188e-05,
+      "loss": 0.5467,
+      "num_tokens": 408298322.0,
+      "step": 195
+    },
+    {
+      "epoch": 0.7434944237918215,
+      "grad_norm": 0.15338002718093754,
+      "learning_rate": 1.2651697965228748e-05,
+      "loss": 0.5525,
+      "num_tokens": 418781566.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.7620817843866171,
+      "grad_norm": 0.16149871221553752,
+      "learning_rate": 1.1639175194611693e-05,
+      "loss": 0.5484,
+      "num_tokens": 429246222.0,
+      "step": 205
+    },
+    {
+      "epoch": 0.7806691449814126,
+      "grad_norm": 0.16060225092798314,
+      "learning_rate": 1.0686817965224952e-05,
+      "loss": 0.5425,
+      "num_tokens": 439717644.0,
+      "step": 210
+    },
+    {
+      "epoch": 0.7992565055762082,
+      "grad_norm": 0.1488501132096454,
+      "learning_rate": 9.798238894246628e-06,
+      "loss": 0.5517,
+      "num_tokens": 450170885.0,
+      "step": 215
+    },
+    {
+      "epoch": 0.8178438661710037,
+      "grad_norm": 0.17478653780365394,
+      "learning_rate": 8.976808666454292e-06,
+      "loss": 0.5477,
+      "num_tokens": 460656645.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.8364312267657993,
+      "grad_norm": 0.17024941986957184,
+      "learning_rate": 8.225643248063091e-06,
+      "loss": 0.54,
+      "num_tokens": 471142405.0,
+      "step": 225
+    },
+    {
+      "epoch": 0.8550185873605948,
+      "grad_norm": 0.1374034986500908,
+      "learning_rate": 7.547592066798609e-06,
+      "loss": 0.5443,
+      "num_tokens": 481616409.0,
+      "step": 230
+    },
+    {
+      "epoch": 0.8736059479553904,
+      "grad_norm": 0.1555520491850222,
+      "learning_rate": 6.94522720304148e-06,
+      "loss": 0.5436,
+      "num_tokens": 492097674.0,
+      "step": 235
+    },
+    {
+      "epoch": 0.8921933085501859,
+      "grad_norm": 0.14362154894139065,
+      "learning_rate": 6.420833633045514e-06,
+      "loss": 0.5413,
+      "num_tokens": 502555042.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.9107806691449815,
+      "grad_norm": 0.1403050136321079,
+      "learning_rate": 5.976400561240085e-06,
+      "loss": 0.5393,
+      "num_tokens": 513031521.0,
+      "step": 245
+    },
+    {
+      "epoch": 0.929368029739777,
+      "grad_norm": 0.13648147962434784,
+      "learning_rate": 5.613613874496393e-06,
+      "loss": 0.5475,
+      "num_tokens": 523517281.0,
+      "step": 250
+    },
+    {
+      "epoch": 0.9479553903345725,
+      "grad_norm": 0.14324784070203328,
+      "learning_rate": 5.333849746981104e-06,
+      "loss": 0.5439,
+      "num_tokens": 533998146.0,
+      "step": 255
+    },
+    {
+      "epoch": 0.966542750929368,
+      "grad_norm": 0.13166747340193674,
+      "learning_rate": 5.138169419856345e-06,
+      "loss": 0.5399,
+      "num_tokens": 544470561.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.9851301115241635,
+      "grad_norm": 0.14044943374018817,
+      "learning_rate": 5.027315175628478e-06,
+      "loss": 0.5362,
+      "num_tokens": 554934577.0,
+      "step": 265
+    },
+    {
+      "epoch": 1.0,
+      "num_tokens": 562274609.0,
+      "step": 269,
+      "total_flos": 490444463013888.0,
+      "train_loss": 0.5787169348351574,
+      "train_runtime": 3181.4532,
+      "train_samples_per_second": 10.784,
+      "train_steps_per_second": 0.085
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 269,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 490444463013888.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}