|
{ |
|
"best_metric": 1.2524471282958984, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.020525451559934318, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008210180623973727, |
|
"grad_norm": 7.479497909545898, |
|
"learning_rate": 2e-05, |
|
"loss": 6.8396, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008210180623973727, |
|
"eval_loss": 7.077064514160156, |
|
"eval_runtime": 55.7918, |
|
"eval_samples_per_second": 9.195, |
|
"eval_steps_per_second": 4.606, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016420361247947454, |
|
"grad_norm": 6.5878400802612305, |
|
"learning_rate": 4e-05, |
|
"loss": 6.7554, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0024630541871921183, |
|
"grad_norm": 7.17900276184082, |
|
"learning_rate": 6e-05, |
|
"loss": 7.4692, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003284072249589491, |
|
"grad_norm": 7.042717933654785, |
|
"learning_rate": 8e-05, |
|
"loss": 7.6165, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004105090311986864, |
|
"grad_norm": 8.648153305053711, |
|
"learning_rate": 0.0001, |
|
"loss": 7.2808, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004105090311986864, |
|
"eval_loss": 6.476822376251221, |
|
"eval_runtime": 55.4437, |
|
"eval_samples_per_second": 9.253, |
|
"eval_steps_per_second": 4.635, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0049261083743842365, |
|
"grad_norm": 8.955443382263184, |
|
"learning_rate": 0.00012, |
|
"loss": 5.9208, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005747126436781609, |
|
"grad_norm": 9.691061019897461, |
|
"learning_rate": 0.00014, |
|
"loss": 6.18, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006568144499178982, |
|
"grad_norm": 8.714971542358398, |
|
"learning_rate": 0.00016, |
|
"loss": 4.8694, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007389162561576354, |
|
"grad_norm": 10.449569702148438, |
|
"learning_rate": 0.00018, |
|
"loss": 4.5233, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008210180623973728, |
|
"grad_norm": 7.942665100097656, |
|
"learning_rate": 0.0002, |
|
"loss": 3.3926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008210180623973728, |
|
"eval_loss": 3.5349371433258057, |
|
"eval_runtime": 55.2423, |
|
"eval_samples_per_second": 9.286, |
|
"eval_steps_per_second": 4.652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0090311986863711, |
|
"grad_norm": 6.665732383728027, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 3.9181, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009852216748768473, |
|
"grad_norm": 7.836021900177002, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 3.0558, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010673234811165846, |
|
"grad_norm": 11.377631187438965, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 3.2821, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011494252873563218, |
|
"grad_norm": 8.392828941345215, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 2.391, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012315270935960592, |
|
"grad_norm": 7.50490665435791, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.895, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012315270935960592, |
|
"eval_loss": 2.026073932647705, |
|
"eval_runtime": 55.3509, |
|
"eval_samples_per_second": 9.268, |
|
"eval_steps_per_second": 4.643, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013136288998357963, |
|
"grad_norm": 7.33989143371582, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 1.9689, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013957307060755337, |
|
"grad_norm": 9.091341018676758, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 1.6151, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014778325123152709, |
|
"grad_norm": 7.225343227386475, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 1.8492, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015599343185550082, |
|
"grad_norm": 9.886344909667969, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 1.9566, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016420361247947456, |
|
"grad_norm": 6.689027786254883, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.2539, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016420361247947456, |
|
"eval_loss": 1.3548338413238525, |
|
"eval_runtime": 56.0188, |
|
"eval_samples_per_second": 9.158, |
|
"eval_steps_per_second": 4.588, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017241379310344827, |
|
"grad_norm": 8.534672737121582, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 1.5264, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0180623973727422, |
|
"grad_norm": 6.850774765014648, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 1.2807, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.018883415435139574, |
|
"grad_norm": 7.599748611450195, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 1.2068, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.019704433497536946, |
|
"grad_norm": 10.296943664550781, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 1.5661, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.020525451559934318, |
|
"grad_norm": 7.845312118530273, |
|
"learning_rate": 0.0, |
|
"loss": 1.4555, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020525451559934318, |
|
"eval_loss": 1.2524471282958984, |
|
"eval_runtime": 59.8902, |
|
"eval_samples_per_second": 8.566, |
|
"eval_steps_per_second": 4.291, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1223694483456000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|