pere commited on
Commit
e466c9c
·
verified ·
1 Parent(s): 1a90f29

Saving train state of step 2000

Browse files
checkpoint-2000-epoch-3/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c74bc5c3109ae439029036150ad00022243260fa7e8821c38767aa4b7331cd4
3
  size 3025686376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3383ce8239fd9a5346296993f4068931faff75aafbb1e863f55802be68be183b
3
  size 3025686376
checkpoint-2000-epoch-3/model_1.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28897ec4b789c0dc382a6975366fcb16206be64b6b691a60b218831c8f6af1ea
3
- size 4361070048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56570ecf66e2cbf1e212810317afdc44b85396298beab22e66ff759a1116f26a
3
+ size 4361069272
checkpoint-2000-epoch-3/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:004272d1268cb87da3d4f93da3e6e5e2184d6627957db9c697c6c8c45823211e
3
  size 950951226
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69da6783a2bb6483a2623217bf874ce7cd7d99e80a36638e0ffcc67bf80de6e7
3
  size 950951226
checkpoint-2000-epoch-3/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2379f6d25b7b3c11f41a4f66cb6a54cec5f124966fe580594bb5386afcd0fca1
3
+ size 988
checkpoint-2000-epoch-3/scheduler.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e72412f426c58539f1dcfef4d31369e79764f60ce3a6e20df06cde830d8946e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dad6b56b74593b411aa2335a4636d028f73ce8d740f99b52582f884503cebaa
3
  size 1064
run_large_training.sh CHANGED
@@ -29,7 +29,7 @@ accelerate launch run_distillation.py \
29
  --dataloader_num_workers 8 \
30
  --preprocessing_num_workers 8 \
31
  --ddp_timeout 7200 \
32
- --dtype "bfloat16" \
33
  --attn_implementation "flash_attention_2" \
34
  --output_dir "./" \
35
  --do_train \
 
29
  --dataloader_num_workers 8 \
30
  --preprocessing_num_workers 8 \
31
  --ddp_timeout 7200 \
32
+ --dtype "float16" \
33
  --attn_implementation "flash_attention_2" \
34
  --output_dir "./" \
35
  --do_train \