|
{ |
|
"best_metric": 0.20430698990821838, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.07931786634939521, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.000396589331746976, |
|
"grad_norm": 6.674250602722168, |
|
"learning_rate": 1.001e-05, |
|
"loss": 4.3034, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000396589331746976, |
|
"eval_loss": 3.849916934967041, |
|
"eval_runtime": 46.9826, |
|
"eval_samples_per_second": 22.604, |
|
"eval_steps_per_second": 5.662, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000793178663493952, |
|
"grad_norm": 13.111907958984375, |
|
"learning_rate": 2.002e-05, |
|
"loss": 6.024, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.001189767995240928, |
|
"grad_norm": 9.942648887634277, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 5.3876, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001586357326987904, |
|
"grad_norm": 8.252339363098145, |
|
"learning_rate": 4.004e-05, |
|
"loss": 2.8694, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00198294665873488, |
|
"grad_norm": 10.045882225036621, |
|
"learning_rate": 5.005e-05, |
|
"loss": 2.5194, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002379535990481856, |
|
"grad_norm": 6.507627487182617, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 2.0718, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002776125322228832, |
|
"grad_norm": 7.497224807739258, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 2.275, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.003172714653975808, |
|
"grad_norm": 5.831888675689697, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.198, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.003569303985722784, |
|
"grad_norm": 6.313211917877197, |
|
"learning_rate": 9.009e-05, |
|
"loss": 1.3938, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00396589331746976, |
|
"grad_norm": 4.629086017608643, |
|
"learning_rate": 0.0001001, |
|
"loss": 1.3091, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004362482649216736, |
|
"grad_norm": 4.553199291229248, |
|
"learning_rate": 9.957315789473684e-05, |
|
"loss": 1.1133, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004759071980963712, |
|
"grad_norm": 5.859588623046875, |
|
"learning_rate": 9.904631578947367e-05, |
|
"loss": 0.9577, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005155661312710688, |
|
"grad_norm": 4.0679707527160645, |
|
"learning_rate": 9.851947368421052e-05, |
|
"loss": 0.8043, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005552250644457664, |
|
"grad_norm": 3.8273086547851562, |
|
"learning_rate": 9.799263157894736e-05, |
|
"loss": 0.6524, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00594883997620464, |
|
"grad_norm": 5.04181432723999, |
|
"learning_rate": 9.746578947368421e-05, |
|
"loss": 0.785, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006345429307951616, |
|
"grad_norm": 4.491847038269043, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 0.8275, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0067420186396985925, |
|
"grad_norm": 7.865641117095947, |
|
"learning_rate": 9.641210526315789e-05, |
|
"loss": 1.1221, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.007138607971445568, |
|
"grad_norm": 5.287400722503662, |
|
"learning_rate": 9.588526315789473e-05, |
|
"loss": 0.7424, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007535197303192544, |
|
"grad_norm": 5.461188793182373, |
|
"learning_rate": 9.535842105263157e-05, |
|
"loss": 0.4901, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00793178663493952, |
|
"grad_norm": 3.19498348236084, |
|
"learning_rate": 9.483157894736841e-05, |
|
"loss": 0.5121, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008328375966686495, |
|
"grad_norm": 6.649216175079346, |
|
"learning_rate": 9.430473684210526e-05, |
|
"loss": 0.203, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008724965298433472, |
|
"grad_norm": 6.187960624694824, |
|
"learning_rate": 9.37778947368421e-05, |
|
"loss": 1.141, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009121554630180448, |
|
"grad_norm": 3.0972740650177, |
|
"learning_rate": 9.325105263157894e-05, |
|
"loss": 0.5017, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.009518143961927425, |
|
"grad_norm": 4.420783042907715, |
|
"learning_rate": 9.272421052631578e-05, |
|
"loss": 0.7917, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.009914733293674401, |
|
"grad_norm": 2.6894378662109375, |
|
"learning_rate": 9.219736842105263e-05, |
|
"loss": 0.2605, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010311322625421376, |
|
"grad_norm": 2.54012131690979, |
|
"learning_rate": 9.167052631578946e-05, |
|
"loss": 0.4831, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010707911957168352, |
|
"grad_norm": 2.170597791671753, |
|
"learning_rate": 9.114368421052632e-05, |
|
"loss": 0.3522, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011104501288915328, |
|
"grad_norm": 5.271856307983398, |
|
"learning_rate": 9.061684210526315e-05, |
|
"loss": 1.2457, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.011501090620662305, |
|
"grad_norm": 2.632899522781372, |
|
"learning_rate": 9.009e-05, |
|
"loss": 0.4753, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01189767995240928, |
|
"grad_norm": 3.3594906330108643, |
|
"learning_rate": 8.956315789473683e-05, |
|
"loss": 0.6309, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012294269284156256, |
|
"grad_norm": 2.8951773643493652, |
|
"learning_rate": 8.903631578947368e-05, |
|
"loss": 0.439, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.012690858615903232, |
|
"grad_norm": 4.174351215362549, |
|
"learning_rate": 8.850947368421052e-05, |
|
"loss": 0.5533, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.013087447947650209, |
|
"grad_norm": 4.82841157913208, |
|
"learning_rate": 8.798263157894736e-05, |
|
"loss": 0.8201, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.013484037279397185, |
|
"grad_norm": 4.0411858558654785, |
|
"learning_rate": 8.745578947368422e-05, |
|
"loss": 0.5246, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01388062661114416, |
|
"grad_norm": 3.1116089820861816, |
|
"learning_rate": 8.692894736842105e-05, |
|
"loss": 0.5387, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.014277215942891136, |
|
"grad_norm": 3.7147281169891357, |
|
"learning_rate": 8.64021052631579e-05, |
|
"loss": 0.4417, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.014673805274638112, |
|
"grad_norm": 3.4639275074005127, |
|
"learning_rate": 8.587526315789473e-05, |
|
"loss": 0.6997, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.015070394606385089, |
|
"grad_norm": 5.578957557678223, |
|
"learning_rate": 8.534842105263157e-05, |
|
"loss": 1.3211, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.015466983938132063, |
|
"grad_norm": 2.867260456085205, |
|
"learning_rate": 8.482157894736842e-05, |
|
"loss": 0.5126, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01586357326987904, |
|
"grad_norm": 3.506054401397705, |
|
"learning_rate": 8.429473684210525e-05, |
|
"loss": 0.4488, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016260162601626018, |
|
"grad_norm": 3.3142287731170654, |
|
"learning_rate": 8.376789473684211e-05, |
|
"loss": 0.373, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.01665675193337299, |
|
"grad_norm": 1.8013468980789185, |
|
"learning_rate": 8.324105263157894e-05, |
|
"loss": 0.1376, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.017053341265119967, |
|
"grad_norm": 2.2930386066436768, |
|
"learning_rate": 8.271421052631579e-05, |
|
"loss": 0.0869, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.017449930596866944, |
|
"grad_norm": 0.5169830918312073, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 0.0196, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.01784651992861392, |
|
"grad_norm": 1.5832732915878296, |
|
"learning_rate": 8.166052631578947e-05, |
|
"loss": 0.0804, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.018243109260360896, |
|
"grad_norm": 2.339470386505127, |
|
"learning_rate": 8.113368421052631e-05, |
|
"loss": 0.4453, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.018639698592107873, |
|
"grad_norm": 2.9261984825134277, |
|
"learning_rate": 8.060684210526315e-05, |
|
"loss": 0.2987, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01903628792385485, |
|
"grad_norm": 0.43078291416168213, |
|
"learning_rate": 8.008e-05, |
|
"loss": 0.0167, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.019432877255601826, |
|
"grad_norm": 25.611412048339844, |
|
"learning_rate": 7.955315789473684e-05, |
|
"loss": 0.0732, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.019829466587348802, |
|
"grad_norm": 2.956575393676758, |
|
"learning_rate": 7.902631578947368e-05, |
|
"loss": 0.1045, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.019829466587348802, |
|
"eval_loss": 0.5711997747421265, |
|
"eval_runtime": 47.4495, |
|
"eval_samples_per_second": 22.382, |
|
"eval_steps_per_second": 5.606, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020226055919095775, |
|
"grad_norm": 8.624287605285645, |
|
"learning_rate": 7.849947368421052e-05, |
|
"loss": 1.3873, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02062264525084275, |
|
"grad_norm": 7.417449951171875, |
|
"learning_rate": 7.797263157894736e-05, |
|
"loss": 0.8735, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.021019234582589728, |
|
"grad_norm": 4.000467300415039, |
|
"learning_rate": 7.744578947368421e-05, |
|
"loss": 0.8792, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.021415823914336704, |
|
"grad_norm": 1.1016407012939453, |
|
"learning_rate": 7.691894736842104e-05, |
|
"loss": 0.1425, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02181241324608368, |
|
"grad_norm": 1.845973014831543, |
|
"learning_rate": 7.63921052631579e-05, |
|
"loss": 0.1722, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.022209002577830657, |
|
"grad_norm": 2.5237021446228027, |
|
"learning_rate": 7.586526315789473e-05, |
|
"loss": 0.5188, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.022605591909577633, |
|
"grad_norm": 1.7162082195281982, |
|
"learning_rate": 7.533842105263158e-05, |
|
"loss": 0.2142, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02300218124132461, |
|
"grad_norm": 3.1163854598999023, |
|
"learning_rate": 7.481157894736841e-05, |
|
"loss": 0.8212, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.023398770573071586, |
|
"grad_norm": 1.6111253499984741, |
|
"learning_rate": 7.428473684210526e-05, |
|
"loss": 0.302, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.02379535990481856, |
|
"grad_norm": 2.3448877334594727, |
|
"learning_rate": 7.375789473684209e-05, |
|
"loss": 0.5013, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.024191949236565535, |
|
"grad_norm": 1.4863951206207275, |
|
"learning_rate": 7.323105263157895e-05, |
|
"loss": 0.0785, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.02458853856831251, |
|
"grad_norm": 0.46337777376174927, |
|
"learning_rate": 7.270421052631578e-05, |
|
"loss": 0.0252, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.024985127900059488, |
|
"grad_norm": 1.6508891582489014, |
|
"learning_rate": 7.217736842105263e-05, |
|
"loss": 0.2864, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.025381717231806464, |
|
"grad_norm": 1.8168219327926636, |
|
"learning_rate": 7.165052631578947e-05, |
|
"loss": 0.1634, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02577830656355344, |
|
"grad_norm": 1.4724931716918945, |
|
"learning_rate": 7.11236842105263e-05, |
|
"loss": 0.1565, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.026174895895300417, |
|
"grad_norm": 1.3949257135391235, |
|
"learning_rate": 7.059684210526315e-05, |
|
"loss": 0.2016, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.026571485227047394, |
|
"grad_norm": 5.898593902587891, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 0.2504, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02696807455879437, |
|
"grad_norm": 1.9633386135101318, |
|
"learning_rate": 6.954315789473684e-05, |
|
"loss": 0.221, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.027364663890541343, |
|
"grad_norm": 2.800323963165283, |
|
"learning_rate": 6.901631578947368e-05, |
|
"loss": 0.457, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.02776125322228832, |
|
"grad_norm": 1.5002562999725342, |
|
"learning_rate": 6.848947368421052e-05, |
|
"loss": 0.0964, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028157842554035296, |
|
"grad_norm": 2.7115468978881836, |
|
"learning_rate": 6.796263157894737e-05, |
|
"loss": 0.4686, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.028554431885782272, |
|
"grad_norm": 4.224088668823242, |
|
"learning_rate": 6.74357894736842e-05, |
|
"loss": 0.9701, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02895102121752925, |
|
"grad_norm": 4.742822647094727, |
|
"learning_rate": 6.690894736842105e-05, |
|
"loss": 0.7263, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.029347610549276225, |
|
"grad_norm": 2.388258934020996, |
|
"learning_rate": 6.638210526315788e-05, |
|
"loss": 0.3153, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0297441998810232, |
|
"grad_norm": 2.4255616664886475, |
|
"learning_rate": 6.585526315789474e-05, |
|
"loss": 0.2614, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.030140789212770178, |
|
"grad_norm": 1.9446436166763306, |
|
"learning_rate": 6.532842105263157e-05, |
|
"loss": 0.1088, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.030537378544517154, |
|
"grad_norm": 2.5515191555023193, |
|
"learning_rate": 6.480157894736842e-05, |
|
"loss": 0.4212, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.030933967876264127, |
|
"grad_norm": 1.997567892074585, |
|
"learning_rate": 6.427473684210526e-05, |
|
"loss": 0.2342, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0313305572080111, |
|
"grad_norm": 2.1094398498535156, |
|
"learning_rate": 6.37478947368421e-05, |
|
"loss": 0.2247, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.03172714653975808, |
|
"grad_norm": 2.6329636573791504, |
|
"learning_rate": 6.322105263157894e-05, |
|
"loss": 0.431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.032123735871505056, |
|
"grad_norm": 2.7955644130706787, |
|
"learning_rate": 6.269421052631577e-05, |
|
"loss": 0.2616, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.032520325203252036, |
|
"grad_norm": 2.9589076042175293, |
|
"learning_rate": 6.216736842105263e-05, |
|
"loss": 0.3463, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03291691453499901, |
|
"grad_norm": 2.4867746829986572, |
|
"learning_rate": 6.164052631578947e-05, |
|
"loss": 0.4058, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.03331350386674598, |
|
"grad_norm": 12.15013313293457, |
|
"learning_rate": 6.111368421052631e-05, |
|
"loss": 0.845, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03371009319849296, |
|
"grad_norm": 1.4896100759506226, |
|
"learning_rate": 6.058684210526315e-05, |
|
"loss": 0.0909, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.034106682530239935, |
|
"grad_norm": 3.909092426300049, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 0.7803, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.034503271861986914, |
|
"grad_norm": 3.4637069702148438, |
|
"learning_rate": 5.953315789473684e-05, |
|
"loss": 0.2718, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03489986119373389, |
|
"grad_norm": 3.0589916706085205, |
|
"learning_rate": 5.9006315789473676e-05, |
|
"loss": 0.4081, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.03529645052548087, |
|
"grad_norm": 0.2933317720890045, |
|
"learning_rate": 5.847947368421053e-05, |
|
"loss": 0.0098, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.03569303985722784, |
|
"grad_norm": 2.7271687984466553, |
|
"learning_rate": 5.795263157894737e-05, |
|
"loss": 0.1337, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03608962918897482, |
|
"grad_norm": 0.08103972673416138, |
|
"learning_rate": 5.742578947368421e-05, |
|
"loss": 0.0037, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03648621852072179, |
|
"grad_norm": 2.7736382484436035, |
|
"learning_rate": 5.6898947368421046e-05, |
|
"loss": 0.2963, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.036882807852468766, |
|
"grad_norm": 2.4266488552093506, |
|
"learning_rate": 5.6372105263157886e-05, |
|
"loss": 0.1306, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.037279397184215746, |
|
"grad_norm": 1.565101981163025, |
|
"learning_rate": 5.584526315789473e-05, |
|
"loss": 0.0939, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03767598651596272, |
|
"grad_norm": 0.204191654920578, |
|
"learning_rate": 5.531842105263158e-05, |
|
"loss": 0.0056, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0380725758477097, |
|
"grad_norm": 1.7860132455825806, |
|
"learning_rate": 5.4791578947368424e-05, |
|
"loss": 0.2133, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.03846916517945667, |
|
"grad_norm": 3.4425370693206787, |
|
"learning_rate": 5.426473684210526e-05, |
|
"loss": 0.2545, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.03886575451120365, |
|
"grad_norm": 2.9134604930877686, |
|
"learning_rate": 5.37378947368421e-05, |
|
"loss": 0.282, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.039262343842950624, |
|
"grad_norm": 2.0041611194610596, |
|
"learning_rate": 5.321105263157894e-05, |
|
"loss": 0.175, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.039658933174697604, |
|
"grad_norm": 2.66727352142334, |
|
"learning_rate": 5.268421052631578e-05, |
|
"loss": 0.2079, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.039658933174697604, |
|
"eval_loss": 0.2756502330303192, |
|
"eval_runtime": 47.4815, |
|
"eval_samples_per_second": 22.367, |
|
"eval_steps_per_second": 5.602, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04005552250644458, |
|
"grad_norm": 4.0711822509765625, |
|
"learning_rate": 5.2157368421052626e-05, |
|
"loss": 0.6342, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.04045211183819155, |
|
"grad_norm": 1.8220709562301636, |
|
"learning_rate": 5.163052631578947e-05, |
|
"loss": 0.2614, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04084870116993853, |
|
"grad_norm": 0.1610630750656128, |
|
"learning_rate": 5.110368421052632e-05, |
|
"loss": 0.0077, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0412452905016855, |
|
"grad_norm": 1.660254955291748, |
|
"learning_rate": 5.057684210526316e-05, |
|
"loss": 0.2139, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04164187983343248, |
|
"grad_norm": 1.2034050226211548, |
|
"learning_rate": 5.005e-05, |
|
"loss": 0.1488, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.042038469165179455, |
|
"grad_norm": 1.123096227645874, |
|
"learning_rate": 4.9523157894736836e-05, |
|
"loss": 0.1183, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.042435058496926435, |
|
"grad_norm": 1.3603556156158447, |
|
"learning_rate": 4.899631578947368e-05, |
|
"loss": 0.2229, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04283164782867341, |
|
"grad_norm": 1.2517480850219727, |
|
"learning_rate": 4.846947368421052e-05, |
|
"loss": 0.1483, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04322823716042039, |
|
"grad_norm": 0.9318723082542419, |
|
"learning_rate": 4.794263157894737e-05, |
|
"loss": 0.0662, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04362482649216736, |
|
"grad_norm": 1.0203633308410645, |
|
"learning_rate": 4.7415789473684206e-05, |
|
"loss": 0.1234, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.044021415823914334, |
|
"grad_norm": 1.8892359733581543, |
|
"learning_rate": 4.688894736842105e-05, |
|
"loss": 0.1998, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.044418005155661314, |
|
"grad_norm": 0.6476733088493347, |
|
"learning_rate": 4.636210526315789e-05, |
|
"loss": 0.039, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.04481459448740829, |
|
"grad_norm": 1.5910590887069702, |
|
"learning_rate": 4.583526315789473e-05, |
|
"loss": 0.2231, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.045211183819155266, |
|
"grad_norm": 1.6052759885787964, |
|
"learning_rate": 4.530842105263158e-05, |
|
"loss": 0.2969, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.04560777315090224, |
|
"grad_norm": 2.3403847217559814, |
|
"learning_rate": 4.4781578947368416e-05, |
|
"loss": 0.1473, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.04600436248264922, |
|
"grad_norm": 3.6207292079925537, |
|
"learning_rate": 4.425473684210526e-05, |
|
"loss": 0.5701, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04640095181439619, |
|
"grad_norm": 0.6984104514122009, |
|
"learning_rate": 4.372789473684211e-05, |
|
"loss": 0.0504, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.04679754114614317, |
|
"grad_norm": 2.5241734981536865, |
|
"learning_rate": 4.320105263157895e-05, |
|
"loss": 0.2509, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.047194130477890145, |
|
"grad_norm": 1.5402919054031372, |
|
"learning_rate": 4.2674210526315786e-05, |
|
"loss": 0.1996, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.04759071980963712, |
|
"grad_norm": 2.3560476303100586, |
|
"learning_rate": 4.2147368421052626e-05, |
|
"loss": 0.3034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0479873091413841, |
|
"grad_norm": 1.9385439157485962, |
|
"learning_rate": 4.162052631578947e-05, |
|
"loss": 0.3015, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.04838389847313107, |
|
"grad_norm": 1.4906829595565796, |
|
"learning_rate": 4.109368421052631e-05, |
|
"loss": 0.1302, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.04878048780487805, |
|
"grad_norm": 2.4340176582336426, |
|
"learning_rate": 4.056684210526316e-05, |
|
"loss": 0.4329, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.04917707713662502, |
|
"grad_norm": 2.110347032546997, |
|
"learning_rate": 4.004e-05, |
|
"loss": 0.3068, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.049573666468372, |
|
"grad_norm": 1.2856132984161377, |
|
"learning_rate": 3.951315789473684e-05, |
|
"loss": 0.1518, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.049970255800118976, |
|
"grad_norm": 2.3550281524658203, |
|
"learning_rate": 3.898631578947368e-05, |
|
"loss": 0.117, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.050366845131865956, |
|
"grad_norm": 2.4217026233673096, |
|
"learning_rate": 3.845947368421052e-05, |
|
"loss": 0.4333, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.05076343446361293, |
|
"grad_norm": 1.6932317018508911, |
|
"learning_rate": 3.7932631578947367e-05, |
|
"loss": 0.148, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0511600237953599, |
|
"grad_norm": 1.6000185012817383, |
|
"learning_rate": 3.7405789473684206e-05, |
|
"loss": 0.0932, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05155661312710688, |
|
"grad_norm": 1.805180311203003, |
|
"learning_rate": 3.6878947368421045e-05, |
|
"loss": 0.2301, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.051953202458853855, |
|
"grad_norm": 2.063298463821411, |
|
"learning_rate": 3.635210526315789e-05, |
|
"loss": 0.1673, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.052349791790600834, |
|
"grad_norm": 2.706456422805786, |
|
"learning_rate": 3.582526315789474e-05, |
|
"loss": 0.389, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05274638112234781, |
|
"grad_norm": 2.0284225940704346, |
|
"learning_rate": 3.5298421052631576e-05, |
|
"loss": 0.1468, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.05314297045409479, |
|
"grad_norm": 1.6976109743118286, |
|
"learning_rate": 3.477157894736842e-05, |
|
"loss": 0.0762, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05353955978584176, |
|
"grad_norm": 1.7858450412750244, |
|
"learning_rate": 3.424473684210526e-05, |
|
"loss": 0.2102, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05393614911758874, |
|
"grad_norm": 1.9341448545455933, |
|
"learning_rate": 3.37178947368421e-05, |
|
"loss": 0.2661, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05433273844933571, |
|
"grad_norm": 3.936488628387451, |
|
"learning_rate": 3.319105263157894e-05, |
|
"loss": 0.6706, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.054729327781082686, |
|
"grad_norm": 1.7080624103546143, |
|
"learning_rate": 3.2664210526315786e-05, |
|
"loss": 0.1062, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.055125917112829666, |
|
"grad_norm": 1.5581916570663452, |
|
"learning_rate": 3.213736842105263e-05, |
|
"loss": 0.0934, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.05552250644457664, |
|
"grad_norm": 1.3080987930297852, |
|
"learning_rate": 3.161052631578947e-05, |
|
"loss": 0.0814, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05591909577632362, |
|
"grad_norm": 2.823137044906616, |
|
"learning_rate": 3.108368421052632e-05, |
|
"loss": 0.4453, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.05631568510807059, |
|
"grad_norm": 0.21419651806354523, |
|
"learning_rate": 3.0556842105263156e-05, |
|
"loss": 0.0073, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.05671227443981757, |
|
"grad_norm": 0.22324123978614807, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 0.0051, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.057108863771564544, |
|
"grad_norm": 0.2457597404718399, |
|
"learning_rate": 2.9503157894736838e-05, |
|
"loss": 0.0057, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.057505453103311524, |
|
"grad_norm": 3.4714629650115967, |
|
"learning_rate": 2.8976315789473684e-05, |
|
"loss": 0.5372, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0579020424350585, |
|
"grad_norm": 0.028041277080774307, |
|
"learning_rate": 2.8449473684210523e-05, |
|
"loss": 0.0009, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.05829863176680547, |
|
"grad_norm": 1.089476466178894, |
|
"learning_rate": 2.7922631578947366e-05, |
|
"loss": 0.0318, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05869522109855245, |
|
"grad_norm": 2.4500927925109863, |
|
"learning_rate": 2.7395789473684212e-05, |
|
"loss": 0.4794, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.05909181043029942, |
|
"grad_norm": 0.0174604132771492, |
|
"learning_rate": 2.686894736842105e-05, |
|
"loss": 0.0006, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.0594883997620464, |
|
"grad_norm": 2.791302442550659, |
|
"learning_rate": 2.634210526315789e-05, |
|
"loss": 0.4109, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0594883997620464, |
|
"eval_loss": 0.23389868438243866, |
|
"eval_runtime": 47.3967, |
|
"eval_samples_per_second": 22.407, |
|
"eval_steps_per_second": 5.612, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.059884989093793375, |
|
"grad_norm": 4.477522373199463, |
|
"learning_rate": 2.5815263157894736e-05, |
|
"loss": 0.5733, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.060281578425540355, |
|
"grad_norm": 3.334212303161621, |
|
"learning_rate": 2.528842105263158e-05, |
|
"loss": 0.3337, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06067816775728733, |
|
"grad_norm": 2.710191011428833, |
|
"learning_rate": 2.4761578947368418e-05, |
|
"loss": 0.1571, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.06107475708903431, |
|
"grad_norm": 1.1137654781341553, |
|
"learning_rate": 2.423473684210526e-05, |
|
"loss": 0.1697, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.06147134642078128, |
|
"grad_norm": 0.505480945110321, |
|
"learning_rate": 2.3707894736842103e-05, |
|
"loss": 0.0237, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.061867935752528254, |
|
"grad_norm": 1.122707724571228, |
|
"learning_rate": 2.3181052631578946e-05, |
|
"loss": 0.0803, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.062264525084275234, |
|
"grad_norm": 0.782159149646759, |
|
"learning_rate": 2.265421052631579e-05, |
|
"loss": 0.063, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.0626611144160222, |
|
"grad_norm": 2.235445499420166, |
|
"learning_rate": 2.212736842105263e-05, |
|
"loss": 0.2019, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.06305770374776919, |
|
"grad_norm": 3.1838295459747314, |
|
"learning_rate": 2.1600526315789474e-05, |
|
"loss": 0.3564, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.06345429307951617, |
|
"grad_norm": 1.417310118675232, |
|
"learning_rate": 2.1073684210526313e-05, |
|
"loss": 0.1137, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06385088241126313, |
|
"grad_norm": 1.3451471328735352, |
|
"learning_rate": 2.0546842105263155e-05, |
|
"loss": 0.1321, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.06424747174301011, |
|
"grad_norm": 3.381690740585327, |
|
"learning_rate": 2.002e-05, |
|
"loss": 0.3365, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.06464406107475709, |
|
"grad_norm": 1.9907605648040771, |
|
"learning_rate": 1.949315789473684e-05, |
|
"loss": 0.1682, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 1.1349059343338013, |
|
"learning_rate": 1.8966315789473683e-05, |
|
"loss": 0.0735, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.06543723973825104, |
|
"grad_norm": 1.4078447818756104, |
|
"learning_rate": 1.8439473684210522e-05, |
|
"loss": 0.1848, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.06583382906999802, |
|
"grad_norm": 2.635918617248535, |
|
"learning_rate": 1.791263157894737e-05, |
|
"loss": 0.288, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.066230418401745, |
|
"grad_norm": 2.0039432048797607, |
|
"learning_rate": 1.738578947368421e-05, |
|
"loss": 0.4102, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.06662700773349196, |
|
"grad_norm": 2.923654794692993, |
|
"learning_rate": 1.685894736842105e-05, |
|
"loss": 0.8078, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.06702359706523894, |
|
"grad_norm": 2.5879900455474854, |
|
"learning_rate": 1.6332105263157893e-05, |
|
"loss": 0.3793, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.06742018639698592, |
|
"grad_norm": 1.7998006343841553, |
|
"learning_rate": 1.5805263157894735e-05, |
|
"loss": 0.1745, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0678167757287329, |
|
"grad_norm": 2.5468177795410156, |
|
"learning_rate": 1.5278421052631578e-05, |
|
"loss": 0.1744, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.06821336506047987, |
|
"grad_norm": 2.898380994796753, |
|
"learning_rate": 1.4751578947368419e-05, |
|
"loss": 0.379, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.06860995439222685, |
|
"grad_norm": 3.7562787532806396, |
|
"learning_rate": 1.4224736842105262e-05, |
|
"loss": 0.5993, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.06900654372397383, |
|
"grad_norm": 1.9443773031234741, |
|
"learning_rate": 1.3697894736842106e-05, |
|
"loss": 0.1486, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0694031330557208, |
|
"grad_norm": 2.690262794494629, |
|
"learning_rate": 1.3171052631578945e-05, |
|
"loss": 0.2218, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06979972238746777, |
|
"grad_norm": 2.491971492767334, |
|
"learning_rate": 1.264421052631579e-05, |
|
"loss": 0.1893, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07019631171921475, |
|
"grad_norm": 3.372171401977539, |
|
"learning_rate": 1.211736842105263e-05, |
|
"loss": 0.5962, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.07059290105096173, |
|
"grad_norm": 2.8883213996887207, |
|
"learning_rate": 1.1590526315789473e-05, |
|
"loss": 0.3897, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.0709894903827087, |
|
"grad_norm": 2.1615426540374756, |
|
"learning_rate": 1.1063684210526316e-05, |
|
"loss": 0.1384, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.07138607971445568, |
|
"grad_norm": 3.077579975128174, |
|
"learning_rate": 1.0536842105263156e-05, |
|
"loss": 0.6491, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07178266904620266, |
|
"grad_norm": 1.228853464126587, |
|
"learning_rate": 1.001e-05, |
|
"loss": 0.06, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.07217925837794964, |
|
"grad_norm": 1.8013077974319458, |
|
"learning_rate": 9.483157894736842e-06, |
|
"loss": 0.1734, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.0725758477096966, |
|
"grad_norm": 0.8816391229629517, |
|
"learning_rate": 8.956315789473684e-06, |
|
"loss": 0.0388, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.07297243704144359, |
|
"grad_norm": 1.7668546438217163, |
|
"learning_rate": 8.429473684210525e-06, |
|
"loss": 0.217, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07336902637319057, |
|
"grad_norm": 3.179753541946411, |
|
"learning_rate": 7.902631578947368e-06, |
|
"loss": 0.28, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.07376561570493753, |
|
"grad_norm": 3.8374545574188232, |
|
"learning_rate": 7.3757894736842095e-06, |
|
"loss": 0.4457, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.07416220503668451, |
|
"grad_norm": 3.593108892440796, |
|
"learning_rate": 6.848947368421053e-06, |
|
"loss": 1.1263, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.07455879436843149, |
|
"grad_norm": 3.4441983699798584, |
|
"learning_rate": 6.322105263157895e-06, |
|
"loss": 0.4889, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.07495538370017847, |
|
"grad_norm": 2.782144546508789, |
|
"learning_rate": 5.7952631578947365e-06, |
|
"loss": 0.3363, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.07535197303192544, |
|
"grad_norm": 0.27459532022476196, |
|
"learning_rate": 5.268421052631578e-06, |
|
"loss": 0.0108, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07574856236367242, |
|
"grad_norm": 2.1841888427734375, |
|
"learning_rate": 4.741578947368421e-06, |
|
"loss": 0.3438, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.0761451516954194, |
|
"grad_norm": 0.02204926311969757, |
|
"learning_rate": 4.2147368421052626e-06, |
|
"loss": 0.0007, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.07654174102716636, |
|
"grad_norm": 1.9425251483917236, |
|
"learning_rate": 3.6878947368421047e-06, |
|
"loss": 0.2348, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.07693833035891334, |
|
"grad_norm": 0.04739471897482872, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 0.0012, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.07733491969066032, |
|
"grad_norm": 0.06455172598361969, |
|
"learning_rate": 2.634210526315789e-06, |
|
"loss": 0.0017, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0777315090224073, |
|
"grad_norm": 3.44230318069458, |
|
"learning_rate": 2.1073684210526313e-06, |
|
"loss": 0.4222, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.07812809835415427, |
|
"grad_norm": 2.755849838256836, |
|
"learning_rate": 1.5805263157894737e-06, |
|
"loss": 0.4239, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.07852468768590125, |
|
"grad_norm": 1.2250853776931763, |
|
"learning_rate": 1.0536842105263156e-06, |
|
"loss": 0.0493, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.07892127701764823, |
|
"grad_norm": 2.685635566711426, |
|
"learning_rate": 5.268421052631578e-07, |
|
"loss": 0.3463, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.07931786634939521, |
|
"grad_norm": 4.368965148925781, |
|
"learning_rate": 0.0, |
|
"loss": 0.6687, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07931786634939521, |
|
"eval_loss": 0.20430698990821838, |
|
"eval_runtime": 47.6165, |
|
"eval_samples_per_second": 22.303, |
|
"eval_steps_per_second": 5.586, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.39474174476288e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|