rootxhacker commited on
Commit
ae2f3cf
·
verified ·
1 Parent(s): c33f4bd

Training in progress, step 38000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d960d4cf1314782ff9fa34b4fa4ff52147a6a3edb83af4b92d07e2bfe2de068c
3
  size 132187888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f14125fba192c696e9ea3e36ca9f9429c24cac19e9b8c350f6135ab01f7ad59
3
  size 132187888
last-checkpoint/ar_diffusion_info.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3c47f9755f99713864a7b4e3ec92efc30fda75d652ffd65ed16182316bd0cf3
3
  size 1800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea672d9cf3b33b67a7d7775ed77487a43c2ed825e9c82fdac570ac86bb7b82e0
3
  size 1800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae4ce6863400bf14083af33a7f6b1eb29b1d7a79f7ec21fe556f80564974288f
3
  size 264665786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcbfe92a4edd63a8b647a0117b3f07614d7a6540fc40ddbb287c8b91dd837c1
3
  size 264665786
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52e415f584d39fd895ae020039e8e625bf0351da63110c6500e3a11320d6621f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc11b8250e89187182c146356c8dd099d66b6a75fadfa56262487e142dbe4b90
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:689e59e066b48be0feeeeb12da6d1787ebe3b841285102ea7503ee5943525726
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:102e993b93214454fb03b9ca20f482147915e5a724733ab537e52178df0f84f1
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23ce717bb1ff7d19ce8b39673c5e006d14b3fec124190d834c88a63ab05da6d0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541cb6dea9c80e2359e5134078f161ec83a5de3af8e29b3e9b6b880e5dd9058f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 37500,
3
- "best_metric": 0.595755934715271,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37500",
5
- "epoch": 2.8843935081916774,
6
  "eval_steps": 250,
7
- "global_step": 37500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -6458,6 +6458,92 @@
6458
  "eval_samples_per_second": 22.655,
6459
  "eval_steps_per_second": 5.664,
6460
  "step": 37500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6461
  }
6462
  ],
6463
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 37750,
3
+ "best_metric": 0.5946066975593567,
4
  "best_model_checkpoint": "./qwen3-4b-ar-diffusion-checkpoints/checkpoint-37500",
5
+ "epoch": 2.9228520883008997,
6
  "eval_steps": 250,
7
+ "global_step": 38000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
6458
  "eval_samples_per_second": 22.655,
6459
  "eval_steps_per_second": 5.664,
6460
  "step": 37500
6461
+ },
6462
+ {
6463
+ "epoch": 2.8882393662026,
6464
+ "grad_norm": 0.6938227415084839,
6465
+ "learning_rate": 7.630574240968237e-06,
6466
+ "loss": 0.6071,
6467
+ "step": 37550
6468
+ },
6469
+ {
6470
+ "epoch": 2.892085224213522,
6471
+ "grad_norm": 0.8504564166069031,
6472
+ "learning_rate": 7.3708542191517545e-06,
6473
+ "loss": 0.5963,
6474
+ "step": 37600
6475
+ },
6476
+ {
6477
+ "epoch": 2.895931082224444,
6478
+ "grad_norm": 0.5555813312530518,
6479
+ "learning_rate": 7.111134197335273e-06,
6480
+ "loss": 0.628,
6481
+ "step": 37650
6482
+ },
6483
+ {
6484
+ "epoch": 2.8997769402353666,
6485
+ "grad_norm": 0.9856501221656799,
6486
+ "learning_rate": 6.851414175518792e-06,
6487
+ "loss": 0.6144,
6488
+ "step": 37700
6489
+ },
6490
+ {
6491
+ "epoch": 2.903622798246289,
6492
+ "grad_norm": 1.2680622339248657,
6493
+ "learning_rate": 6.591694153702309e-06,
6494
+ "loss": 0.6156,
6495
+ "step": 37750
6496
+ },
6497
+ {
6498
+ "epoch": 2.903622798246289,
6499
+ "eval_loss": 0.5946066975593567,
6500
+ "eval_runtime": 21.1442,
6501
+ "eval_samples_per_second": 23.647,
6502
+ "eval_steps_per_second": 5.912,
6503
+ "step": 37750
6504
+ },
6505
+ {
6506
+ "epoch": 2.907468656257211,
6507
+ "grad_norm": 0.7417711615562439,
6508
+ "learning_rate": 6.331974131885828e-06,
6509
+ "loss": 0.6468,
6510
+ "step": 37800
6511
+ },
6512
+ {
6513
+ "epoch": 2.9113145142681334,
6514
+ "grad_norm": 1.0585455894470215,
6515
+ "learning_rate": 6.072254110069346e-06,
6516
+ "loss": 0.585,
6517
+ "step": 37850
6518
+ },
6519
+ {
6520
+ "epoch": 2.9151603722790553,
6521
+ "grad_norm": 0.5458203554153442,
6522
+ "learning_rate": 5.812534088252864e-06,
6523
+ "loss": 0.6077,
6524
+ "step": 37900
6525
+ },
6526
+ {
6527
+ "epoch": 2.9190062302899777,
6528
+ "grad_norm": 0.7168423533439636,
6529
+ "learning_rate": 5.552814066436382e-06,
6530
+ "loss": 0.5597,
6531
+ "step": 37950
6532
+ },
6533
+ {
6534
+ "epoch": 2.9228520883008997,
6535
+ "grad_norm": 0.8493395447731018,
6536
+ "learning_rate": 5.2930940446198996e-06,
6537
+ "loss": 0.5849,
6538
+ "step": 38000
6539
+ },
6540
+ {
6541
+ "epoch": 2.9228520883008997,
6542
+ "eval_loss": 0.5946918725967407,
6543
+ "eval_runtime": 22.1504,
6544
+ "eval_samples_per_second": 22.573,
6545
+ "eval_steps_per_second": 5.643,
6546
+ "step": 38000
6547
  }
6548
  ],
6549
  "logging_steps": 50,