rbelanec commited on
Commit
42c8961
verified
1 Parent(s): 0c1e956

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ license: gemma
4
  base_model: google/gemma-3-1b-it
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: train_2025-04-10-10-34-00
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # train_2025-04-10-10-34-00
17
 
18
- This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) on an unknown dataset.
19
 
20
  ## Model description
21
 
 
4
  base_model: google/gemma-3-1b-it
5
  tags:
6
  - llama-factory
7
+ - prompt-tuning
8
  - generated_from_trainer
9
  model-index:
10
  - name: train_2025-04-10-10-34-00
 
16
 
17
  # train_2025-04-10-10-34-00
18
 
19
+ This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it) on the mnli_train dataset.
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.634920634920635,
3
+ "eval_mnli_eval_loss": 0.5607374906539917,
4
+ "eval_mnli_eval_runtime": 10.1933,
5
+ "eval_mnli_eval_samples_per_second": 98.104,
6
+ "eval_mnli_eval_steps_per_second": 6.181,
7
+ "num_input_tokens_seen": 256448,
8
+ "total_flos": 1073844298924032.0,
9
+ "train_loss": 5.759574245838892,
10
+ "train_runtime": 153.0301,
11
+ "train_samples_per_second": 19.604,
12
+ "train_steps_per_second": 0.137
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.634920634920635,
3
+ "eval_mnli_eval_loss": 0.5607374906539917,
4
+ "eval_mnli_eval_runtime": 10.1933,
5
+ "eval_mnli_eval_samples_per_second": 98.104,
6
+ "eval_mnli_eval_steps_per_second": 6.181,
7
+ "num_input_tokens_seen": 256448
8
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.634920634920635,
3
+ "num_input_tokens_seen": 256448,
4
+ "total_flos": 1073844298924032.0,
5
+ "train_loss": 5.759574245838892,
6
+ "train_runtime": 153.0301,
7
+ "train_samples_per_second": 19.604,
8
+ "train_steps_per_second": 0.137
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 21,
3
+ "best_metric": 0.5607374906539917,
4
+ "best_model_checkpoint": "saves/prompt-tuning/gemma-3-1b-it/train_2025-04-10-10-34-00/checkpoint-21",
5
+ "epoch": 2.634920634920635,
6
+ "eval_steps": 3,
7
+ "global_step": 21,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.38095238095238093,
14
+ "eval_mnli_eval_loss": 11.7061185836792,
15
+ "eval_mnli_eval_runtime": 10.1405,
16
+ "eval_mnli_eval_samples_per_second": 98.615,
17
+ "eval_mnli_eval_steps_per_second": 6.213,
18
+ "num_input_tokens_seen": 39424,
19
+ "step": 3
20
+ },
21
+ {
22
+ "epoch": 0.6349206349206349,
23
+ "grad_norm": 13.432448387145996,
24
+ "learning_rate": 0.2739358161473992,
25
+ "loss": 11.6642,
26
+ "num_input_tokens_seen": 62976,
27
+ "step": 5
28
+ },
29
+ {
30
+ "epoch": 0.7619047619047619,
31
+ "eval_mnli_eval_loss": 9.128030776977539,
32
+ "eval_mnli_eval_runtime": 10.2011,
33
+ "eval_mnli_eval_samples_per_second": 98.029,
34
+ "eval_mnli_eval_steps_per_second": 6.176,
35
+ "num_input_tokens_seen": 76288,
36
+ "step": 6
37
+ },
38
+ {
39
+ "epoch": 1.126984126984127,
40
+ "eval_mnli_eval_loss": 5.672970294952393,
41
+ "eval_mnli_eval_runtime": 10.1995,
42
+ "eval_mnli_eval_samples_per_second": 98.044,
43
+ "eval_mnli_eval_steps_per_second": 6.177,
44
+ "num_input_tokens_seen": 110336,
45
+ "step": 9
46
+ },
47
+ {
48
+ "epoch": 1.253968253968254,
49
+ "grad_norm": 2.507275342941284,
50
+ "learning_rate": 0.18337814009344713,
51
+ "loss": 7.965,
52
+ "num_input_tokens_seen": 121856,
53
+ "step": 10
54
+ },
55
+ {
56
+ "epoch": 1.507936507936508,
57
+ "eval_mnli_eval_loss": 2.735905170440674,
58
+ "eval_mnli_eval_runtime": 10.2202,
59
+ "eval_mnli_eval_samples_per_second": 97.845,
60
+ "eval_mnli_eval_steps_per_second": 6.164,
61
+ "num_input_tokens_seen": 149120,
62
+ "step": 12
63
+ },
64
+ {
65
+ "epoch": 1.8888888888888888,
66
+ "grad_norm": 1.7794915437698364,
67
+ "learning_rate": 0.07500000000000002,
68
+ "loss": 3.4004,
69
+ "num_input_tokens_seen": 187264,
70
+ "step": 15
71
+ },
72
+ {
73
+ "epoch": 1.8888888888888888,
74
+ "eval_mnli_eval_loss": 1.661912441253662,
75
+ "eval_mnli_eval_runtime": 10.2019,
76
+ "eval_mnli_eval_samples_per_second": 98.021,
77
+ "eval_mnli_eval_steps_per_second": 6.175,
78
+ "num_input_tokens_seen": 187264,
79
+ "step": 15
80
+ },
81
+ {
82
+ "epoch": 2.253968253968254,
83
+ "eval_mnli_eval_loss": 0.739750325679779,
84
+ "eval_mnli_eval_runtime": 10.2066,
85
+ "eval_mnli_eval_samples_per_second": 97.976,
86
+ "eval_mnli_eval_steps_per_second": 6.172,
87
+ "num_input_tokens_seen": 219072,
88
+ "step": 18
89
+ },
90
+ {
91
+ "epoch": 2.507936507936508,
92
+ "grad_norm": 0.5217874050140381,
93
+ "learning_rate": 0.0066640791320788815,
94
+ "loss": 1.0424,
95
+ "num_input_tokens_seen": 244032,
96
+ "step": 20
97
+ },
98
+ {
99
+ "epoch": 2.634920634920635,
100
+ "eval_mnli_eval_loss": 0.5607374906539917,
101
+ "eval_mnli_eval_runtime": 10.1954,
102
+ "eval_mnli_eval_samples_per_second": 98.083,
103
+ "eval_mnli_eval_steps_per_second": 6.179,
104
+ "num_input_tokens_seen": 256448,
105
+ "step": 21
106
+ },
107
+ {
108
+ "epoch": 2.634920634920635,
109
+ "num_input_tokens_seen": 256448,
110
+ "step": 21,
111
+ "total_flos": 1073844298924032.0,
112
+ "train_loss": 5.759574245838892,
113
+ "train_runtime": 153.0301,
114
+ "train_samples_per_second": 19.604,
115
+ "train_steps_per_second": 0.137
116
+ }
117
+ ],
118
+ "logging_steps": 5,
119
+ "max_steps": 21,
120
+ "num_input_tokens_seen": 256448,
121
+ "num_train_epochs": 3,
122
+ "save_steps": 3,
123
+ "stateful_callbacks": {
124
+ "TrainerControl": {
125
+ "args": {
126
+ "should_epoch_stop": false,
127
+ "should_evaluate": false,
128
+ "should_log": false,
129
+ "should_save": true,
130
+ "should_training_stop": true
131
+ },
132
+ "attributes": {}
133
+ }
134
+ },
135
+ "total_flos": 1073844298924032.0,
136
+ "train_batch_size": 16,
137
+ "trial_name": null,
138
+ "trial_params": null
139
+ }
training_eval_mnli_eval_loss.png ADDED
training_loss.png ADDED