|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0396000396000396, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000198000198000198, |
|
"eval_loss": 1.1046350002288818, |
|
"eval_runtime": 177.7012, |
|
"eval_samples_per_second": 11.97, |
|
"eval_steps_per_second": 5.988, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00198000198000198, |
|
"grad_norm": 1.1289453506469727, |
|
"learning_rate": 0.0002, |
|
"loss": 4.1006, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00396000396000396, |
|
"grad_norm": 1.3300156593322754, |
|
"learning_rate": 0.0002, |
|
"loss": 3.7418, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00594000594000594, |
|
"grad_norm": 1.6589006185531616, |
|
"learning_rate": 0.0002, |
|
"loss": 3.7895, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00792000792000792, |
|
"grad_norm": 1.0909771919250488, |
|
"learning_rate": 0.0002, |
|
"loss": 3.6444, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0099000099000099, |
|
"grad_norm": 1.8193849325180054, |
|
"learning_rate": 0.0002, |
|
"loss": 3.92, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0099000099000099, |
|
"eval_loss": 0.8933662176132202, |
|
"eval_runtime": 177.8394, |
|
"eval_samples_per_second": 11.96, |
|
"eval_steps_per_second": 5.983, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01188001188001188, |
|
"grad_norm": 1.5985904932022095, |
|
"learning_rate": 0.0002, |
|
"loss": 3.7766, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01386001386001386, |
|
"grad_norm": 1.0298365354537964, |
|
"learning_rate": 0.0002, |
|
"loss": 3.9574, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01584001584001584, |
|
"grad_norm": 1.3167791366577148, |
|
"learning_rate": 0.0002, |
|
"loss": 3.5841, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01782001782001782, |
|
"grad_norm": 1.6314549446105957, |
|
"learning_rate": 0.0002, |
|
"loss": 3.5142, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0198000198000198, |
|
"grad_norm": 0.9245526790618896, |
|
"learning_rate": 0.0002, |
|
"loss": 3.4552, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0198000198000198, |
|
"eval_loss": 0.8583095669746399, |
|
"eval_runtime": 177.6926, |
|
"eval_samples_per_second": 11.97, |
|
"eval_steps_per_second": 5.988, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02178002178002178, |
|
"grad_norm": 0.9592312574386597, |
|
"learning_rate": 0.0002, |
|
"loss": 3.1673, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02376002376002376, |
|
"grad_norm": 1.070432186126709, |
|
"learning_rate": 0.0002, |
|
"loss": 3.1173, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02574002574002574, |
|
"grad_norm": 2.247570753097534, |
|
"learning_rate": 0.0002, |
|
"loss": 3.7768, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02772002772002772, |
|
"grad_norm": 1.5730074644088745, |
|
"learning_rate": 0.0002, |
|
"loss": 3.3516, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0297000297000297, |
|
"grad_norm": 1.3724550008773804, |
|
"learning_rate": 0.0002, |
|
"loss": 3.2577, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0297000297000297, |
|
"eval_loss": 0.8379923701286316, |
|
"eval_runtime": 177.6782, |
|
"eval_samples_per_second": 11.971, |
|
"eval_steps_per_second": 5.988, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03168003168003168, |
|
"grad_norm": 1.7976691722869873, |
|
"learning_rate": 0.0002, |
|
"loss": 3.2878, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03366003366003366, |
|
"grad_norm": 1.4198904037475586, |
|
"learning_rate": 0.0002, |
|
"loss": 3.3432, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03564003564003564, |
|
"grad_norm": 1.6277859210968018, |
|
"learning_rate": 0.0002, |
|
"loss": 3.8561, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03762003762003762, |
|
"grad_norm": 1.1398273706436157, |
|
"learning_rate": 0.0002, |
|
"loss": 3.5024, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0396000396000396, |
|
"grad_norm": 1.2557786703109741, |
|
"learning_rate": 0.0002, |
|
"loss": 3.0593, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0396000396000396, |
|
"eval_loss": 0.8267537951469421, |
|
"eval_runtime": 177.798, |
|
"eval_samples_per_second": 11.963, |
|
"eval_steps_per_second": 5.984, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.680730813628416e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|