|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9990224828934506, |
|
"global_step": 1533, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002998043052837573, |
|
"loss": 9.8511, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002998043052837573, |
|
"loss": 6.188, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029960861056751466, |
|
"loss": 5.9422, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000299412915851272, |
|
"loss": 4.4474, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029921722113502935, |
|
"loss": 3.7152, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029902152641878666, |
|
"loss": 3.7503, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000298825831702544, |
|
"loss": 3.443, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029863013698630135, |
|
"loss": 3.4837, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029843444227005867, |
|
"loss": 3.4489, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000298238747553816, |
|
"loss": 3.3525, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029804305283757335, |
|
"loss": 3.3712, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029784735812133067, |
|
"loss": 3.3725, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029765166340508804, |
|
"loss": 3.3204, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029745596868884536, |
|
"loss": 3.3487, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002972602739726027, |
|
"loss": 3.3468, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029706457925636004, |
|
"loss": 3.3251, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002968688845401174, |
|
"loss": 3.3036, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029667318982387473, |
|
"loss": 3.3156, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029647749510763204, |
|
"loss": 3.3083, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002962818003913894, |
|
"loss": 3.3087, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029608610567514673, |
|
"loss": 3.3111, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029589041095890405, |
|
"loss": 3.3252, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002956947162426614, |
|
"loss": 3.3096, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029549902152641873, |
|
"loss": 3.3336, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002953033268101761, |
|
"loss": 3.3167, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002951076320939334, |
|
"loss": 3.2763, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002949119373776908, |
|
"loss": 3.3215, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002947162426614481, |
|
"loss": 3.3226, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002945205479452055, |
|
"loss": 3.3105, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002943248532289628, |
|
"loss": 3.3021, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002941291585127201, |
|
"loss": 3.2943, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002939334637964774, |
|
"loss": 3.3157, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002937377690802348, |
|
"loss": 3.2878, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002935420743639921, |
|
"loss": 3.3081, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002933463796477495, |
|
"loss": 3.2917, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002931506849315068, |
|
"loss": 3.2955, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029295499021526417, |
|
"loss": 3.3038, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002927592954990215, |
|
"loss": 3.2788, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029256360078277886, |
|
"loss": 3.3037, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029236790606653617, |
|
"loss": 3.2877, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029217221135029354, |
|
"loss": 3.2931, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029197651663405086, |
|
"loss": 3.3072, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002917808219178082, |
|
"loss": 3.3041, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002915851272015655, |
|
"loss": 3.3031, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029138943248532286, |
|
"loss": 3.3022, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002911937377690802, |
|
"loss": 3.2758, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029099804305283755, |
|
"loss": 3.3018, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029080234833659486, |
|
"loss": 3.2911, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029060665362035223, |
|
"loss": 3.3105, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029041095890410955, |
|
"loss": 3.3076, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002902152641878669, |
|
"loss": 3.2728, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029001956947162424, |
|
"loss": 3.2984, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00028982387475538155, |
|
"loss": 3.2977, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002896281800391389, |
|
"loss": 3.2912, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00028943248532289624, |
|
"loss": 3.2878, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00028923679060665356, |
|
"loss": 3.2831, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002890410958904109, |
|
"loss": 3.291, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00028884540117416824, |
|
"loss": 3.3009, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002886497064579256, |
|
"loss": 3.3219, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00028845401174168293, |
|
"loss": 3.2959, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002882583170254403, |
|
"loss": 3.2987, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002880626223091976, |
|
"loss": 3.3099, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000287866927592955, |
|
"loss": 3.3113, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002876712328767123, |
|
"loss": 3.2871, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002874755381604696, |
|
"loss": 3.3086, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000287279843444227, |
|
"loss": 3.2915, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002870841487279843, |
|
"loss": 3.3023, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002868884540117416, |
|
"loss": 3.2963, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000286692759295499, |
|
"loss": 3.3007, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002864970645792563, |
|
"loss": 3.298, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002863013698630137, |
|
"loss": 3.3059, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000286105675146771, |
|
"loss": 3.2833, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028590998043052837, |
|
"loss": 3.2876, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002857142857142857, |
|
"loss": 3.3049, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028551859099804305, |
|
"loss": 3.2997, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028532289628180037, |
|
"loss": 3.2983, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002851272015655577, |
|
"loss": 3.2836, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028493150684931505, |
|
"loss": 3.2893, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028473581213307237, |
|
"loss": 3.2929, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002845401174168297, |
|
"loss": 3.2806, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028434442270058706, |
|
"loss": 3.2935, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002841487279843444, |
|
"loss": 3.3072, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028395303326810174, |
|
"loss": 3.3108, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028375733855185906, |
|
"loss": 3.2932, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028356164383561643, |
|
"loss": 3.2835, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028336594911937375, |
|
"loss": 3.3251, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002831702544031311, |
|
"loss": 3.2801, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028297455968688843, |
|
"loss": 3.2857, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028277886497064575, |
|
"loss": 3.3075, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028258317025440307, |
|
"loss": 3.2917, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028238747553816044, |
|
"loss": 3.2721, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028219178082191775, |
|
"loss": 3.2839, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002819960861056751, |
|
"loss": 3.3037, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028180039138943244, |
|
"loss": 3.2903, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002816046966731898, |
|
"loss": 3.283, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002814090019569471, |
|
"loss": 3.2963, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002812133072407045, |
|
"loss": 3.3007, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002810176125244618, |
|
"loss": 3.2914, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028082191780821913, |
|
"loss": 3.3052, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002806262230919765, |
|
"loss": 3.308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002804305283757338, |
|
"loss": 3.3233, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028023483365949113, |
|
"loss": 3.2813, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002800391389432485, |
|
"loss": 3.2777, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002798434442270058, |
|
"loss": 3.29, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002796477495107632, |
|
"loss": 3.2916, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002794520547945205, |
|
"loss": 3.2815, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002792563600782779, |
|
"loss": 3.2872, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002790606653620352, |
|
"loss": 3.3095, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027886497064579256, |
|
"loss": 3.3155, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002786692759295499, |
|
"loss": 3.3012, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002784735812133072, |
|
"loss": 3.2934, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027827788649706456, |
|
"loss": 3.3186, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002780821917808219, |
|
"loss": 3.2776, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002778864970645792, |
|
"loss": 3.3155, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027769080234833657, |
|
"loss": 3.3051, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002774951076320939, |
|
"loss": 3.3012, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027729941291585125, |
|
"loss": 3.2965, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027710371819960857, |
|
"loss": 3.31, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027690802348336594, |
|
"loss": 3.2916, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027671232876712326, |
|
"loss": 3.2975, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002765166340508806, |
|
"loss": 3.2767, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027632093933463794, |
|
"loss": 3.2795, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027612524461839526, |
|
"loss": 3.2931, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027592954990215263, |
|
"loss": 3.2788, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027573385518590995, |
|
"loss": 3.2713, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027553816046966726, |
|
"loss": 3.2781, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027534246575342463, |
|
"loss": 3.2881, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027514677103718195, |
|
"loss": 3.2577, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002749510763209393, |
|
"loss": 3.2739, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00027475538160469663, |
|
"loss": 3.277, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000274559686888454, |
|
"loss": 3.2712, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002743639921722113, |
|
"loss": 3.2791, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002741682974559687, |
|
"loss": 3.2689, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000273972602739726, |
|
"loss": 3.266, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002737769080234833, |
|
"loss": 3.2763, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00027358121330724064, |
|
"loss": 3.2733, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000273385518590998, |
|
"loss": 3.2581, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002731898238747553, |
|
"loss": 3.2686, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002729941291585127, |
|
"loss": 3.262, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00027279843444227, |
|
"loss": 3.2614, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002726027397260274, |
|
"loss": 3.2648, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002724070450097847, |
|
"loss": 3.264, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027221135029354207, |
|
"loss": 3.2752, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002720156555772994, |
|
"loss": 3.2459, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002718199608610567, |
|
"loss": 3.2722, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002716242661448141, |
|
"loss": 3.2584, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002714285714285714, |
|
"loss": 3.2534, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002712328767123287, |
|
"loss": 3.2674, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002710371819960861, |
|
"loss": 3.2833, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002708414872798434, |
|
"loss": 3.2681, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027064579256360076, |
|
"loss": 3.2882, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002704500978473581, |
|
"loss": 3.2487, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027025440313111545, |
|
"loss": 3.2686, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027005870841487277, |
|
"loss": 3.2715, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00026986301369863014, |
|
"loss": 3.3318, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00026966731898238745, |
|
"loss": 3.2894, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00026947162426614477, |
|
"loss": 3.2855, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00026927592954990214, |
|
"loss": 3.2884, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00026908023483365945, |
|
"loss": 3.2857, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00026888454011741677, |
|
"loss": 3.2585, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00026868884540117414, |
|
"loss": 3.2767, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026849315068493146, |
|
"loss": 3.2683, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026829745596868883, |
|
"loss": 3.278, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026810176125244614, |
|
"loss": 3.2791, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002679060665362035, |
|
"loss": 3.2493, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026771037181996083, |
|
"loss": 3.2582, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002675146771037182, |
|
"loss": 3.2674, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002673189823874755, |
|
"loss": 3.2644, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00026712328767123283, |
|
"loss": 3.275, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002669275929549902, |
|
"loss": 3.261, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002667318982387475, |
|
"loss": 3.2672, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026653620352250484, |
|
"loss": 3.2667, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002663405088062622, |
|
"loss": 3.2546, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002661448140900195, |
|
"loss": 3.2809, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002659491193737769, |
|
"loss": 3.2547, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002657534246575342, |
|
"loss": 3.267, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002655577299412916, |
|
"loss": 3.2613, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002653620352250489, |
|
"loss": 3.2727, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026516634050880627, |
|
"loss": 3.2651, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002649706457925636, |
|
"loss": 3.2562, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002647749510763209, |
|
"loss": 3.2423, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002645792563600782, |
|
"loss": 3.2899, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002643835616438356, |
|
"loss": 3.2654, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002641878669275929, |
|
"loss": 3.2669, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026399217221135027, |
|
"loss": 3.2416, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002637964774951076, |
|
"loss": 3.2704, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026360078277886496, |
|
"loss": 3.2871, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002634050880626223, |
|
"loss": 3.2606, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026320939334637964, |
|
"loss": 3.2691, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026301369863013696, |
|
"loss": 3.2654, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002628180039138943, |
|
"loss": 3.2508, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026262230919765165, |
|
"loss": 3.2768, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026242661448140896, |
|
"loss": 3.2809, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002622309197651663, |
|
"loss": 3.2623, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026203522504892365, |
|
"loss": 3.2536, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026183953033268097, |
|
"loss": 3.2727, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026164383561643834, |
|
"loss": 3.2631, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026144814090019565, |
|
"loss": 3.2742, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000261252446183953, |
|
"loss": 3.2635, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026105675146771034, |
|
"loss": 3.2664, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002608610567514677, |
|
"loss": 3.2646, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000260665362035225, |
|
"loss": 3.2695, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026046966731898234, |
|
"loss": 3.2475, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002602739726027397, |
|
"loss": 3.2742, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026007827788649703, |
|
"loss": 3.2562, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00025988258317025435, |
|
"loss": 3.2738, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002596868884540117, |
|
"loss": 3.241, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00025949119373776903, |
|
"loss": 3.2863, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002592954990215264, |
|
"loss": 3.2579, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002590998043052837, |
|
"loss": 3.2495, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002589041095890411, |
|
"loss": 3.2385, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002587084148727984, |
|
"loss": 3.2534, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002585127201565558, |
|
"loss": 3.272, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002583170254403131, |
|
"loss": 3.2582, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002581213307240704, |
|
"loss": 3.2588, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002579256360078278, |
|
"loss": 3.2652, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002577299412915851, |
|
"loss": 3.2526, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002575342465753424, |
|
"loss": 3.2574, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002573385518590998, |
|
"loss": 3.2686, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002571428571428571, |
|
"loss": 3.2537, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00025694716242661447, |
|
"loss": 3.2528, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002567514677103718, |
|
"loss": 3.2461, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025655577299412915, |
|
"loss": 3.2619, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025636007827788647, |
|
"loss": 3.2562, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025616438356164384, |
|
"loss": 3.2308, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025596868884540116, |
|
"loss": 3.2509, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002557729941291585, |
|
"loss": 3.245, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002555772994129158, |
|
"loss": 3.2481, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025538160469667316, |
|
"loss": 3.2704, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002551859099804305, |
|
"loss": 3.2524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025499021526418785, |
|
"loss": 3.2566, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025479452054794516, |
|
"loss": 3.2582, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025459882583170253, |
|
"loss": 3.2517, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025440313111545985, |
|
"loss": 3.2534, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002542074363992172, |
|
"loss": 3.2599, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025401174168297454, |
|
"loss": 3.2448, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025381604696673185, |
|
"loss": 3.2577, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002536203522504892, |
|
"loss": 3.2621, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025342465753424654, |
|
"loss": 3.2591, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025322896281800385, |
|
"loss": 3.2394, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002530332681017612, |
|
"loss": 3.2334, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025283757338551854, |
|
"loss": 3.2331, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002526418786692759, |
|
"loss": 3.2516, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025244618395303323, |
|
"loss": 3.2424, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002522504892367906, |
|
"loss": 3.2745, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002520547945205479, |
|
"loss": 3.2458, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002518590998043053, |
|
"loss": 3.2672, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002516634050880626, |
|
"loss": 3.2674, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002514677103718199, |
|
"loss": 3.2412, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002512720156555773, |
|
"loss": 3.2619, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002510763209393346, |
|
"loss": 3.2398, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002508806262230919, |
|
"loss": 3.3514, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002506849315068493, |
|
"loss": 3.241, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002504892367906066, |
|
"loss": 3.2831, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000250293542074364, |
|
"loss": 3.2722, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002500978473581213, |
|
"loss": 3.2489, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00024990215264187866, |
|
"loss": 3.2612, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000249706457925636, |
|
"loss": 3.2677, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00024951076320939335, |
|
"loss": 3.2593, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00024931506849315067, |
|
"loss": 3.2472, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000249119373776908, |
|
"loss": 3.2613, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00024892367906066535, |
|
"loss": 3.2534, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00024872798434442267, |
|
"loss": 3.2596, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024853228962818, |
|
"loss": 3.2533, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024833659491193736, |
|
"loss": 3.2543, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024814090019569467, |
|
"loss": 3.2532, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024794520547945204, |
|
"loss": 3.2553, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00024774951076320936, |
|
"loss": 3.2615, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024755381604696673, |
|
"loss": 3.2719, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024735812133072404, |
|
"loss": 3.2568, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002471624266144814, |
|
"loss": 3.2807, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024696673189823873, |
|
"loss": 3.2515, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024677103718199605, |
|
"loss": 3.252, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024657534246575336, |
|
"loss": 3.2526, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024637964774951073, |
|
"loss": 3.2635, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024618395303326805, |
|
"loss": 3.2602, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002459882583170254, |
|
"loss": 3.2795, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024579256360078274, |
|
"loss": 3.2548, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002455968688845401, |
|
"loss": 3.2573, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002454011741682974, |
|
"loss": 3.2685, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002452054794520548, |
|
"loss": 3.2407, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002450097847358121, |
|
"loss": 3.2391, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002448140900195694, |
|
"loss": 3.261, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002446183953033268, |
|
"loss": 3.2429, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002444227005870841, |
|
"loss": 3.2453, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024422700587084143, |
|
"loss": 3.2567, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002440313111545988, |
|
"loss": 3.2556, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024383561643835614, |
|
"loss": 3.2319, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024363992172211349, |
|
"loss": 3.2526, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002434442270058708, |
|
"loss": 3.2126, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024324853228962817, |
|
"loss": 3.2325, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002430528375733855, |
|
"loss": 3.2457, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024285714285714283, |
|
"loss": 3.2599, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024266144814090015, |
|
"loss": 3.2293, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024246575342465752, |
|
"loss": 3.2366, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024227005870841483, |
|
"loss": 3.2429, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002420743639921722, |
|
"loss": 3.2486, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024187866927592952, |
|
"loss": 3.2709, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024168297455968686, |
|
"loss": 3.2296, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024148727984344418, |
|
"loss": 3.2423, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024129158512720155, |
|
"loss": 3.2379, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024109589041095887, |
|
"loss": 3.2257, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024090019569471624, |
|
"loss": 3.2138, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00024070450097847355, |
|
"loss": 3.2305, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002405088062622309, |
|
"loss": 3.2308, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002403131115459882, |
|
"loss": 3.2321, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024011741682974558, |
|
"loss": 3.235, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0002399217221135029, |
|
"loss": 3.2562, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00023972602739726024, |
|
"loss": 3.2388, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002395303326810176, |
|
"loss": 3.2424, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00023933463796477493, |
|
"loss": 3.2445, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00023913894324853225, |
|
"loss": 3.2203, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00023894324853228962, |
|
"loss": 3.2501, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00023874755381604693, |
|
"loss": 3.2524, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023855185909980428, |
|
"loss": 3.2162, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023835616438356162, |
|
"loss": 3.2458, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023816046966731896, |
|
"loss": 3.2323, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023796477495107628, |
|
"loss": 3.2198, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023776908023483365, |
|
"loss": 3.2279, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023757338551859097, |
|
"loss": 3.2285, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002373776908023483, |
|
"loss": 3.2181, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023718199608610565, |
|
"loss": 3.2484, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000236986301369863, |
|
"loss": 3.2226, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0002367906066536203, |
|
"loss": 3.2449, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023659491193737768, |
|
"loss": 3.214, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.000236399217221135, |
|
"loss": 3.2303, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023620352250489234, |
|
"loss": 3.2398, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023600782778864968, |
|
"loss": 3.23, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023581213307240703, |
|
"loss": 3.2279, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023561643835616434, |
|
"loss": 3.2459, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023542074363992171, |
|
"loss": 3.231, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023522504892367903, |
|
"loss": 3.231, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023502935420743637, |
|
"loss": 3.2127, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023483365949119372, |
|
"loss": 3.2426, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023463796477495106, |
|
"loss": 3.2341, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023444227005870838, |
|
"loss": 3.2154, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023424657534246575, |
|
"loss": 3.2525, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023405088062622306, |
|
"loss": 3.226, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002338551859099804, |
|
"loss": 3.2392, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023365949119373772, |
|
"loss": 3.2265, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002334637964774951, |
|
"loss": 3.2227, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002332681017612524, |
|
"loss": 3.2645, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023307240704500978, |
|
"loss": 3.2254, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002328767123287671, |
|
"loss": 3.2208, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023268101761252444, |
|
"loss": 3.2552, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023248532289628176, |
|
"loss": 3.2221, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023228962818003913, |
|
"loss": 3.2311, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023209393346379644, |
|
"loss": 3.2324, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002318982387475538, |
|
"loss": 3.231, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023170254403131113, |
|
"loss": 3.2104, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023150684931506847, |
|
"loss": 3.2188, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002313111545988258, |
|
"loss": 3.2336, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023111545988258316, |
|
"loss": 3.2231, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023091976516634047, |
|
"loss": 3.2291, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023072407045009782, |
|
"loss": 3.2345, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023052837573385516, |
|
"loss": 3.2295, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002303326810176125, |
|
"loss": 3.2328, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023013698630136982, |
|
"loss": 3.2326, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002299412915851272, |
|
"loss": 3.2417, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0002297455968688845, |
|
"loss": 3.2101, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00022954990215264185, |
|
"loss": 3.2424, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002293542074363992, |
|
"loss": 3.2306, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00022915851272015654, |
|
"loss": 3.2162, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00022896281800391385, |
|
"loss": 3.2224, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00022876712328767122, |
|
"loss": 3.2394, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022857142857142854, |
|
"loss": 3.2171, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022837573385518588, |
|
"loss": 3.231, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022818003913894323, |
|
"loss": 3.2285, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022798434442270057, |
|
"loss": 3.2165, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022778864970645789, |
|
"loss": 3.2229, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022759295499021526, |
|
"loss": 3.2362, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022739726027397257, |
|
"loss": 3.2053, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022720156555772992, |
|
"loss": 3.225, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022700587084148726, |
|
"loss": 3.2213, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002268101761252446, |
|
"loss": 3.2369, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022661448140900192, |
|
"loss": 3.2185, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002264187866927593, |
|
"loss": 3.2339, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002262230919765166, |
|
"loss": 3.2277, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022602739726027395, |
|
"loss": 3.2195, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002258317025440313, |
|
"loss": 3.2055, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022563600782778863, |
|
"loss": 3.2305, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022544031311154595, |
|
"loss": 3.2258, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022524461839530332, |
|
"loss": 3.2236, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022504892367906064, |
|
"loss": 3.2307, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022485322896281798, |
|
"loss": 3.227, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002246575342465753, |
|
"loss": 3.2127, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022446183953033267, |
|
"loss": 3.2243, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022426614481408998, |
|
"loss": 3.2343, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022407045009784735, |
|
"loss": 3.2228, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022387475538160467, |
|
"loss": 3.214, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022367906066536201, |
|
"loss": 3.2134, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022348336594911933, |
|
"loss": 3.2233, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002232876712328767, |
|
"loss": 3.2006, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022309197651663402, |
|
"loss": 3.2294, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002228962818003914, |
|
"loss": 3.2246, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0002227005870841487, |
|
"loss": 3.2249, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022250489236790605, |
|
"loss": 3.2249, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022230919765166336, |
|
"loss": 3.2321, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022211350293542073, |
|
"loss": 3.2351, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00022191780821917805, |
|
"loss": 3.2277, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002217221135029354, |
|
"loss": 3.2224, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022152641878669274, |
|
"loss": 3.2254, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022133072407045008, |
|
"loss": 3.2131, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0002211350293542074, |
|
"loss": 3.2256, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022093933463796477, |
|
"loss": 3.2157, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022074363992172208, |
|
"loss": 3.2321, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022054794520547942, |
|
"loss": 3.2162, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022035225048923677, |
|
"loss": 3.2012, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002201565557729941, |
|
"loss": 3.2206, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00021996086105675143, |
|
"loss": 3.2201, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002197651663405088, |
|
"loss": 3.2085, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00021956947162426611, |
|
"loss": 3.2232, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00021937377690802346, |
|
"loss": 3.2345, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002191780821917808, |
|
"loss": 3.2241, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00021898238747553814, |
|
"loss": 3.2347, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00021878669275929546, |
|
"loss": 3.2207, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021859099804305283, |
|
"loss": 3.2173, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021839530332681015, |
|
"loss": 3.2202, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002181996086105675, |
|
"loss": 3.218, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021800391389432483, |
|
"loss": 3.211, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021780821917808218, |
|
"loss": 3.2094, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002176125244618395, |
|
"loss": 3.2192, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021741682974559686, |
|
"loss": 3.2214, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021722113502935418, |
|
"loss": 3.2346, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021702544031311152, |
|
"loss": 3.2299, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021682974559686887, |
|
"loss": 3.2275, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0002166340508806262, |
|
"loss": 3.2017, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021643835616438353, |
|
"loss": 3.2083, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002162426614481409, |
|
"loss": 3.2247, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002160469667318982, |
|
"loss": 3.211, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021585127201565556, |
|
"loss": 3.2229, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021565557729941287, |
|
"loss": 3.197, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021545988258317024, |
|
"loss": 3.2035, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021526418786692756, |
|
"loss": 3.2118, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021506849315068493, |
|
"loss": 3.2077, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021487279843444224, |
|
"loss": 3.215, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002146771037181996, |
|
"loss": 3.2145, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002144814090019569, |
|
"loss": 3.1987, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021428571428571427, |
|
"loss": 3.2123, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002140900195694716, |
|
"loss": 3.1901, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021389432485322896, |
|
"loss": 3.1919, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021369863013698628, |
|
"loss": 3.2068, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021350293542074362, |
|
"loss": 3.2125, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021330724070450094, |
|
"loss": 3.1952, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002131115459882583, |
|
"loss": 3.2172, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021291585127201562, |
|
"loss": 3.2029, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021272015655577297, |
|
"loss": 3.197, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002125244618395303, |
|
"loss": 3.1921, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021232876712328765, |
|
"loss": 3.1898, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021213307240704497, |
|
"loss": 3.201, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021193737769080234, |
|
"loss": 3.2142, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021174168297455966, |
|
"loss": 3.2015, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000211545988258317, |
|
"loss": 3.1952, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021135029354207434, |
|
"loss": 3.2191, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021115459882583169, |
|
"loss": 3.2111, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000210958904109589, |
|
"loss": 3.1956, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021076320939334637, |
|
"loss": 3.1995, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0002105675146771037, |
|
"loss": 3.1866, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021037181996086103, |
|
"loss": 3.2022, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021017612524461838, |
|
"loss": 3.197, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00020998043052837572, |
|
"loss": 3.1843, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00020978473581213303, |
|
"loss": 3.202, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002095890410958904, |
|
"loss": 3.1863, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00020939334637964772, |
|
"loss": 3.2066, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00020919765166340506, |
|
"loss": 3.217, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002090019569471624, |
|
"loss": 3.204, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00020880626223091975, |
|
"loss": 3.2038, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00020861056751467707, |
|
"loss": 3.2124, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020841487279843444, |
|
"loss": 3.1845, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020821917808219175, |
|
"loss": 3.2088, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0002080234833659491, |
|
"loss": 3.2019, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020782778864970644, |
|
"loss": 3.1932, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020763209393346378, |
|
"loss": 3.1972, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002074363992172211, |
|
"loss": 3.1964, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020724070450097847, |
|
"loss": 3.197, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002070450097847358, |
|
"loss": 3.1952, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020684931506849313, |
|
"loss": 3.1906, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020665362035225045, |
|
"loss": 3.1882, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020645792563600782, |
|
"loss": 3.2031, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020626223091976513, |
|
"loss": 3.1884, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002060665362035225, |
|
"loss": 3.1871, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020587084148727982, |
|
"loss": 3.1905, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020567514677103716, |
|
"loss": 3.1984, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020547945205479448, |
|
"loss": 3.1861, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020528375733855185, |
|
"loss": 3.2009, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020508806262230917, |
|
"loss": 3.1891, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020489236790606654, |
|
"loss": 3.1748, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020469667318982385, |
|
"loss": 3.2082, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002045009784735812, |
|
"loss": 3.2025, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002043052837573385, |
|
"loss": 3.186, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020410958904109588, |
|
"loss": 3.1866, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002039138943248532, |
|
"loss": 3.1825, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020371819960861057, |
|
"loss": 3.1905, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020352250489236788, |
|
"loss": 3.1809, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020332681017612523, |
|
"loss": 3.1974, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020313111545988254, |
|
"loss": 3.2088, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020293542074363991, |
|
"loss": 3.1773, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020273972602739723, |
|
"loss": 3.2, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020254403131115457, |
|
"loss": 3.1715, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020234833659491192, |
|
"loss": 3.1883, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020215264187866926, |
|
"loss": 3.1853, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020195694716242658, |
|
"loss": 3.1999, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020176125244618395, |
|
"loss": 3.1896, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020156555772994126, |
|
"loss": 3.1796, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002013698630136986, |
|
"loss": 3.1942, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020117416829745595, |
|
"loss": 3.2231, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002009784735812133, |
|
"loss": 3.1886, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002007827788649706, |
|
"loss": 3.1945, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020058708414872798, |
|
"loss": 3.1898, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002003913894324853, |
|
"loss": 3.1947, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020019569471624264, |
|
"loss": 3.1883, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.519240379333496, |
|
"eval_runtime": 508.4632, |
|
"eval_samples_per_second": 31.135, |
|
"eval_steps_per_second": 0.071, |
|
"eval_wer": 1.0, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019999999999999998, |
|
"loss": 4.7961, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019980430528375733, |
|
"loss": 3.1885, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019960861056751464, |
|
"loss": 3.1757, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000199412915851272, |
|
"loss": 3.202, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019921722113502933, |
|
"loss": 3.1831, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019902152641878667, |
|
"loss": 3.201, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019882583170254402, |
|
"loss": 3.175, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019863013698630136, |
|
"loss": 3.1899, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019843444227005867, |
|
"loss": 3.1941, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019823874755381604, |
|
"loss": 3.1991, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019804305283757336, |
|
"loss": 3.1991, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001978473581213307, |
|
"loss": 3.1906, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019765166340508805, |
|
"loss": 3.1671, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001974559686888454, |
|
"loss": 3.1912, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001972602739726027, |
|
"loss": 3.1836, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019706457925636008, |
|
"loss": 3.1905, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001968688845401174, |
|
"loss": 3.1853, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019667318982387474, |
|
"loss": 3.2102, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019647749510763205, |
|
"loss": 3.1815, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019628180039138942, |
|
"loss": 3.1788, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019608610567514674, |
|
"loss": 3.1824, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001958904109589041, |
|
"loss": 3.1827, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019569471624266143, |
|
"loss": 3.1924, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019549902152641877, |
|
"loss": 3.177, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019530332681017609, |
|
"loss": 3.1616, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019510763209393346, |
|
"loss": 3.1888, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019491193737769077, |
|
"loss": 3.1857, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019471624266144814, |
|
"loss": 3.1935, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019452054794520546, |
|
"loss": 3.1786, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001943248532289628, |
|
"loss": 3.1816, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019412915851272012, |
|
"loss": 3.1987, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001939334637964775, |
|
"loss": 3.1934, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001937377690802348, |
|
"loss": 3.1769, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019354207436399215, |
|
"loss": 3.1844, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001933463796477495, |
|
"loss": 3.2072, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019315068493150683, |
|
"loss": 3.2114, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019295499021526415, |
|
"loss": 3.2147, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019275929549902152, |
|
"loss": 3.2072, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019256360078277884, |
|
"loss": 3.179, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019236790606653618, |
|
"loss": 3.1849, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019217221135029352, |
|
"loss": 3.1866, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019197651663405087, |
|
"loss": 3.1894, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019178082191780818, |
|
"loss": 3.1924, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019158512720156555, |
|
"loss": 3.1798, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019138943248532287, |
|
"loss": 3.1698, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019119373776908021, |
|
"loss": 3.1712, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019099804305283756, |
|
"loss": 3.1818, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001908023483365949, |
|
"loss": 3.178, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019060665362035222, |
|
"loss": 3.1869, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001904109589041096, |
|
"loss": 3.1863, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001902152641878669, |
|
"loss": 3.1727, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019001956947162425, |
|
"loss": 3.1704, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001898238747553816, |
|
"loss": 3.1992, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018962818003913893, |
|
"loss": 3.1775, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018943248532289625, |
|
"loss": 3.1862, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018923679060665362, |
|
"loss": 3.1678, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018904109589041094, |
|
"loss": 3.1795, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018884540117416828, |
|
"loss": 3.1739, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018864970645792562, |
|
"loss": 3.1721, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018845401174168297, |
|
"loss": 3.1792, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018825831702544028, |
|
"loss": 3.1794, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018806262230919765, |
|
"loss": 3.1725, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018786692759295497, |
|
"loss": 3.183, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001876712328767123, |
|
"loss": 3.1853, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018747553816046963, |
|
"loss": 3.1672, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.000187279843444227, |
|
"loss": 3.159, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018708414872798431, |
|
"loss": 3.1886, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018688845401174168, |
|
"loss": 3.1689, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.000186692759295499, |
|
"loss": 3.1761, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018649706457925634, |
|
"loss": 3.1779, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018630136986301366, |
|
"loss": 3.1857, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018610567514677103, |
|
"loss": 3.1752, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018590998043052835, |
|
"loss": 3.1783, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 3.1647, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018551859099804303, |
|
"loss": 3.1541, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018532289628180038, |
|
"loss": 3.1607, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001851272015655577, |
|
"loss": 3.1729, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018493150684931506, |
|
"loss": 3.172, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018473581213307238, |
|
"loss": 3.1645, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018454011741682972, |
|
"loss": 3.1598, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018434442270058707, |
|
"loss": 3.1866, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001841487279843444, |
|
"loss": 3.179, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018395303326810173, |
|
"loss": 3.1719, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001837573385518591, |
|
"loss": 3.1766, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001835616438356164, |
|
"loss": 3.1781, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018336594911937376, |
|
"loss": 3.1735, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001831702544031311, |
|
"loss": 3.1646, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018297455968688844, |
|
"loss": 3.1759, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018277886497064576, |
|
"loss": 3.1729, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018258317025440313, |
|
"loss": 3.1831, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018238747553816044, |
|
"loss": 3.1864, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001821917808219178, |
|
"loss": 3.1831, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018199608610567513, |
|
"loss": 3.1806, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018180039138943247, |
|
"loss": 3.1729, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0001816046966731898, |
|
"loss": 3.1798, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018140900195694716, |
|
"loss": 3.1612, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018121330724070448, |
|
"loss": 3.1533, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018101761252446182, |
|
"loss": 3.1838, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018082191780821916, |
|
"loss": 3.1636, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001806262230919765, |
|
"loss": 3.1589, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018043052837573382, |
|
"loss": 3.1738, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001802348336594912, |
|
"loss": 3.2014, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001800391389432485, |
|
"loss": 3.1662, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00017984344422700585, |
|
"loss": 3.1626, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001796477495107632, |
|
"loss": 3.1654, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017945205479452054, |
|
"loss": 3.1632, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017925636007827786, |
|
"loss": 3.1679, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017906066536203523, |
|
"loss": 3.1821, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017886497064579254, |
|
"loss": 3.1765, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017866927592954989, |
|
"loss": 3.185, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001784735812133072, |
|
"loss": 3.1564, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017827788649706457, |
|
"loss": 3.1747, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001780821917808219, |
|
"loss": 3.1589, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017788649706457926, |
|
"loss": 3.1813, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017769080234833658, |
|
"loss": 3.1688, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017749510763209392, |
|
"loss": 3.1721, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017729941291585123, |
|
"loss": 3.1574, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001771037181996086, |
|
"loss": 3.1615, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017690802348336592, |
|
"loss": 3.1536, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001767123287671233, |
|
"loss": 3.1602, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001765166340508806, |
|
"loss": 3.18, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017632093933463795, |
|
"loss": 3.1716, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017612524461839527, |
|
"loss": 3.1635, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017592954990215264, |
|
"loss": 3.1576, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017573385518590995, |
|
"loss": 3.1521, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001755381604696673, |
|
"loss": 3.1763, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017534246575342464, |
|
"loss": 3.1736, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017514677103718198, |
|
"loss": 3.1842, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001749510763209393, |
|
"loss": 3.155, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017475538160469667, |
|
"loss": 3.1671, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.000174559686888454, |
|
"loss": 3.1791, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017436399217221133, |
|
"loss": 3.1427, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017416829745596867, |
|
"loss": 3.1859, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017397260273972602, |
|
"loss": 3.183, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017377690802348333, |
|
"loss": 3.1634, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001735812133072407, |
|
"loss": 3.1799, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017338551859099802, |
|
"loss": 3.1703, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017318982387475536, |
|
"loss": 3.1582, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001729941291585127, |
|
"loss": 3.1662, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017279843444227005, |
|
"loss": 3.1642, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017260273972602737, |
|
"loss": 3.173, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017240704500978474, |
|
"loss": 3.168, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017221135029354205, |
|
"loss": 3.1578, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001720156555772994, |
|
"loss": 3.1685, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017181996086105674, |
|
"loss": 3.1567, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017162426614481408, |
|
"loss": 3.1639, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001714285714285714, |
|
"loss": 3.1586, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017123287671232877, |
|
"loss": 3.1552, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017103718199608608, |
|
"loss": 3.153, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017084148727984343, |
|
"loss": 3.1729, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017064579256360077, |
|
"loss": 3.1465, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017045009784735811, |
|
"loss": 3.168, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017025440313111543, |
|
"loss": 3.1802, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0001700587084148728, |
|
"loss": 3.1603, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00016986301369863012, |
|
"loss": 3.1563, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00016966731898238746, |
|
"loss": 3.1637, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016947162426614478, |
|
"loss": 3.1618, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016927592954990215, |
|
"loss": 3.1485, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016908023483365946, |
|
"loss": 3.1536, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016888454011741683, |
|
"loss": 3.1656, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016868884540117415, |
|
"loss": 3.1689, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001684931506849315, |
|
"loss": 3.1441, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001682974559686888, |
|
"loss": 3.1692, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016810176125244618, |
|
"loss": 3.1777, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001679060665362035, |
|
"loss": 3.1608, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016771037181996087, |
|
"loss": 3.1557, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016751467710371818, |
|
"loss": 3.1526, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016731898238747553, |
|
"loss": 3.1393, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016712328767123284, |
|
"loss": 3.165, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001669275929549902, |
|
"loss": 3.1819, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016673189823874753, |
|
"loss": 3.1709, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016653620352250487, |
|
"loss": 3.166, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016634050880626222, |
|
"loss": 3.1599, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016614481409001956, |
|
"loss": 3.1628, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016594911937377687, |
|
"loss": 3.1642, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016575342465753425, |
|
"loss": 3.1734, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016555772994129156, |
|
"loss": 3.1581, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001653620352250489, |
|
"loss": 3.1575, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016516634050880625, |
|
"loss": 3.1557, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001649706457925636, |
|
"loss": 3.15, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001647749510763209, |
|
"loss": 3.1475, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016457925636007828, |
|
"loss": 3.1371, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001643835616438356, |
|
"loss": 3.1573, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016418786692759294, |
|
"loss": 3.1629, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016399217221135028, |
|
"loss": 3.1715, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016379647749510762, |
|
"loss": 3.1557, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016360078277886494, |
|
"loss": 3.1765, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001634050880626223, |
|
"loss": 3.1585, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016320939334637963, |
|
"loss": 3.186, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016301369863013697, |
|
"loss": 3.1744, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001628180039138943, |
|
"loss": 3.1756, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016262230919765166, |
|
"loss": 3.1623, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016242661448140897, |
|
"loss": 3.1626, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016223091976516634, |
|
"loss": 3.1468, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016203522504892366, |
|
"loss": 3.1574, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.000161839530332681, |
|
"loss": 3.1872, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016164383561643835, |
|
"loss": 3.1618, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001614481409001957, |
|
"loss": 3.1654, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.000161252446183953, |
|
"loss": 3.1765, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016105675146771038, |
|
"loss": 3.1534, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001608610567514677, |
|
"loss": 3.1434, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016066536203522504, |
|
"loss": 3.1704, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016046966731898235, |
|
"loss": 3.1692, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016027397260273972, |
|
"loss": 3.1699, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016007827788649704, |
|
"loss": 3.1624, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001598825831702544, |
|
"loss": 3.1482, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00015968688845401172, |
|
"loss": 3.1488, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015949119373776907, |
|
"loss": 3.1766, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015929549902152638, |
|
"loss": 3.1617, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015909980430528375, |
|
"loss": 3.1672, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015890410958904107, |
|
"loss": 3.1481, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00015870841487279844, |
|
"loss": 3.1539, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015851272015655576, |
|
"loss": 3.1513, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001583170254403131, |
|
"loss": 3.1677, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015812133072407042, |
|
"loss": 3.1623, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001579256360078278, |
|
"loss": 3.1733, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001577299412915851, |
|
"loss": 3.1467, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015753424657534245, |
|
"loss": 3.1513, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001573385518590998, |
|
"loss": 3.1497, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015714285714285713, |
|
"loss": 3.159, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015694716242661445, |
|
"loss": 3.1563, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015675146771037182, |
|
"loss": 3.1581, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015655577299412914, |
|
"loss": 3.133, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015636007827788648, |
|
"loss": 3.145, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015616438356164382, |
|
"loss": 3.1565, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015596868884540117, |
|
"loss": 3.1405, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015577299412915848, |
|
"loss": 3.1518, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015557729941291585, |
|
"loss": 3.1659, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015538160469667317, |
|
"loss": 3.1323, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001551859099804305, |
|
"loss": 3.1682, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015499021526418785, |
|
"loss": 3.1342, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001547945205479452, |
|
"loss": 3.1752, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015459882583170251, |
|
"loss": 3.156, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015440313111545988, |
|
"loss": 3.1592, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001542074363992172, |
|
"loss": 3.1566, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015401174168297454, |
|
"loss": 3.1442, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001538160469667319, |
|
"loss": 3.1806, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015362035225048923, |
|
"loss": 3.1548, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015342465753424655, |
|
"loss": 3.1635, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015322896281800392, |
|
"loss": 3.1544, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015303326810176123, |
|
"loss": 3.1547, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015283757338551858, |
|
"loss": 3.1348, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015264187866927592, |
|
"loss": 3.1474, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015244618395303326, |
|
"loss": 3.16, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015225048923679058, |
|
"loss": 3.1442, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015205479452054795, |
|
"loss": 3.1437, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015185909980430527, |
|
"loss": 3.1491, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001516634050880626, |
|
"loss": 3.1727, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015146771037181993, |
|
"loss": 3.1545, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001512720156555773, |
|
"loss": 3.1386, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001510763209393346, |
|
"loss": 3.128, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015088062622309198, |
|
"loss": 3.1668, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001506849315068493, |
|
"loss": 3.1344, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015048923679060664, |
|
"loss": 3.1301, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015029354207436396, |
|
"loss": 3.1607, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015009784735812133, |
|
"loss": 3.1406, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00014990215264187864, |
|
"loss": 3.141, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.000149706457925636, |
|
"loss": 3.1406, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014951076320939333, |
|
"loss": 3.1468, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014931506849315067, |
|
"loss": 3.1326, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.000149119373776908, |
|
"loss": 3.1391, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014892367906066533, |
|
"loss": 3.1678, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014872798434442268, |
|
"loss": 3.1543, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014853228962818002, |
|
"loss": 3.1469, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014833659491193736, |
|
"loss": 3.1584, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001481409001956947, |
|
"loss": 3.1335, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014794520547945202, |
|
"loss": 3.1468, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014774951076320937, |
|
"loss": 3.1406, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001475538160469667, |
|
"loss": 3.1297, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014735812133072405, |
|
"loss": 3.1374, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001471624266144814, |
|
"loss": 3.1366, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001469667318982387, |
|
"loss": 3.1248, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014677103718199606, |
|
"loss": 3.1382, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001465753424657534, |
|
"loss": 3.1226, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014637964774951074, |
|
"loss": 3.1489, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014618395303326809, |
|
"loss": 3.1356, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014598825831702543, |
|
"loss": 3.129, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014579256360078275, |
|
"loss": 3.1357, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0001455968688845401, |
|
"loss": 3.1262, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014540117416829743, |
|
"loss": 3.1393, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014520547945205478, |
|
"loss": 3.1368, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014500978473581212, |
|
"loss": 3.1179, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014481409001956946, |
|
"loss": 3.1203, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014461839530332678, |
|
"loss": 3.1377, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014442270058708412, |
|
"loss": 3.1352, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014422700587084146, |
|
"loss": 3.1483, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001440313111545988, |
|
"loss": 3.1367, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014383561643835615, |
|
"loss": 3.1331, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001436399217221135, |
|
"loss": 3.1125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001434442270058708, |
|
"loss": 3.1554, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014324853228962815, |
|
"loss": 3.1296, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0001430528375733855, |
|
"loss": 3.1257, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014285714285714284, |
|
"loss": 3.1232, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014266144814090018, |
|
"loss": 3.1271, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014246575342465753, |
|
"loss": 3.1358, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014227005870841484, |
|
"loss": 3.1306, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001420743639921722, |
|
"loss": 3.1541, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014187866927592953, |
|
"loss": 3.1495, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00014168297455968687, |
|
"loss": 3.1358, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014148727984344422, |
|
"loss": 3.1495, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014129158512720153, |
|
"loss": 3.1224, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014109589041095888, |
|
"loss": 3.1549, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014090019569471622, |
|
"loss": 3.1578, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014070450097847356, |
|
"loss": 3.1453, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001405088062622309, |
|
"loss": 3.1538, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014031311154598825, |
|
"loss": 3.139, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014011741682974557, |
|
"loss": 3.1142, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001399217221135029, |
|
"loss": 3.1151, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00013972602739726025, |
|
"loss": 3.1302, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001395303326810176, |
|
"loss": 3.1555, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013933463796477494, |
|
"loss": 3.1429, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013913894324853228, |
|
"loss": 3.1318, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001389432485322896, |
|
"loss": 3.1529, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00013874755381604694, |
|
"loss": 3.1258, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013855185909980428, |
|
"loss": 3.1296, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013835616438356163, |
|
"loss": 3.1461, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013816046966731897, |
|
"loss": 3.1311, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013796477495107631, |
|
"loss": 3.1395, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013776908023483363, |
|
"loss": 3.1253, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013757338551859097, |
|
"loss": 3.1341, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013737769080234832, |
|
"loss": 3.1389, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013718199608610566, |
|
"loss": 3.1301, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.000136986301369863, |
|
"loss": 3.125, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013679060665362032, |
|
"loss": 3.127, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013659491193737766, |
|
"loss": 3.1212, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.000136399217221135, |
|
"loss": 3.1257, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013620352250489235, |
|
"loss": 3.1402, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0001360078277886497, |
|
"loss": 3.1311, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013581213307240704, |
|
"loss": 3.1327, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013561643835616435, |
|
"loss": 3.1322, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001354207436399217, |
|
"loss": 3.1303, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013522504892367904, |
|
"loss": 3.1196, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013502935420743638, |
|
"loss": 3.1346, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013483365949119373, |
|
"loss": 3.1205, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013463796477495107, |
|
"loss": 3.1428, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013444227005870839, |
|
"loss": 3.1473, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013424657534246573, |
|
"loss": 3.1177, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013405088062622307, |
|
"loss": 3.128, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013385518590998042, |
|
"loss": 3.162, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013365949119373776, |
|
"loss": 3.1314, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001334637964774951, |
|
"loss": 3.1364, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013326810176125242, |
|
"loss": 3.1359, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013307240704500976, |
|
"loss": 3.1367, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001328767123287671, |
|
"loss": 3.1389, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013268101761252445, |
|
"loss": 3.1369, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001324853228962818, |
|
"loss": 3.1663, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001322896281800391, |
|
"loss": 3.1757, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013209393346379645, |
|
"loss": 3.1359, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001318982387475538, |
|
"loss": 3.1262, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00013170254403131114, |
|
"loss": 3.145, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013150684931506848, |
|
"loss": 3.1185, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013131115459882582, |
|
"loss": 3.1311, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013111545988258314, |
|
"loss": 3.1238, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013091976516634048, |
|
"loss": 3.1349, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013072407045009783, |
|
"loss": 3.1431, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013052837573385517, |
|
"loss": 3.1335, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001303326810176125, |
|
"loss": 3.1396, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013013698630136986, |
|
"loss": 3.1351, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00012994129158512717, |
|
"loss": 3.1457, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00012974559686888452, |
|
"loss": 3.124, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012954990215264186, |
|
"loss": 3.127, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001293542074363992, |
|
"loss": 3.1555, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00012915851272015655, |
|
"loss": 3.1196, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001289628180039139, |
|
"loss": 3.1233, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001287671232876712, |
|
"loss": 3.1462, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012857142857142855, |
|
"loss": 3.1208, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001283757338551859, |
|
"loss": 3.1295, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012818003913894324, |
|
"loss": 3.1144, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012798434442270058, |
|
"loss": 3.1134, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001277886497064579, |
|
"loss": 3.1273, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012759295499021524, |
|
"loss": 3.1219, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012739726027397258, |
|
"loss": 3.124, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012720156555772992, |
|
"loss": 3.1346, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012700587084148727, |
|
"loss": 3.1182, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001268101761252446, |
|
"loss": 3.1364, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012661448140900193, |
|
"loss": 3.1302, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012641878669275927, |
|
"loss": 3.1325, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012622309197651661, |
|
"loss": 3.1439, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012602739726027396, |
|
"loss": 3.1263, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001258317025440313, |
|
"loss": 3.1286, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012563600782778864, |
|
"loss": 3.1035, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012544031311154596, |
|
"loss": 3.1113, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001252446183953033, |
|
"loss": 3.1172, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012504892367906065, |
|
"loss": 3.147, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.000124853228962818, |
|
"loss": 3.1284, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012465753424657533, |
|
"loss": 3.1333, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012446183953033268, |
|
"loss": 3.1376, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012426614481409, |
|
"loss": 3.1115, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012407045009784734, |
|
"loss": 3.1494, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012387475538160468, |
|
"loss": 3.1442, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012367906066536202, |
|
"loss": 3.1194, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012348336594911937, |
|
"loss": 3.1343, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012328767123287668, |
|
"loss": 3.1355, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012309197651663403, |
|
"loss": 3.1238, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012289628180039137, |
|
"loss": 3.111, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001227005870841487, |
|
"loss": 3.1149, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012250489236790606, |
|
"loss": 3.1266, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001223091976516634, |
|
"loss": 3.1254, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012211350293542071, |
|
"loss": 3.1104, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012191780821917807, |
|
"loss": 3.1247, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001217221135029354, |
|
"loss": 3.1289, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012152641878669274, |
|
"loss": 3.1161, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012133072407045007, |
|
"loss": 3.1296, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012113502935420742, |
|
"loss": 3.1252, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012093933463796476, |
|
"loss": 3.1422, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012074363992172209, |
|
"loss": 3.1013, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012054794520547943, |
|
"loss": 3.1349, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012035225048923678, |
|
"loss": 3.1256, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001201565557729941, |
|
"loss": 3.1272, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00011996086105675145, |
|
"loss": 3.1389, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001197651663405088, |
|
"loss": 3.1222, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011956947162426612, |
|
"loss": 3.122, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011937377690802347, |
|
"loss": 3.1322, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011917808219178081, |
|
"loss": 3.1271, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011898238747553814, |
|
"loss": 3.126, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011878669275929548, |
|
"loss": 3.1209, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011859099804305283, |
|
"loss": 3.1359, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011839530332681016, |
|
"loss": 3.1275, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001181996086105675, |
|
"loss": 3.1173, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011800391389432484, |
|
"loss": 3.1109, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011780821917808217, |
|
"loss": 3.1367, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011761252446183952, |
|
"loss": 3.1124, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011741682974559686, |
|
"loss": 3.1189, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011722113502935419, |
|
"loss": 3.1114, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011702544031311153, |
|
"loss": 3.1296, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011682974559686886, |
|
"loss": 3.1141, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001166340508806262, |
|
"loss": 3.1284, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011643835616438355, |
|
"loss": 3.1284, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011624266144814088, |
|
"loss": 3.1153, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011604696673189822, |
|
"loss": 3.1182, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011585127201565556, |
|
"loss": 3.1171, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001156555772994129, |
|
"loss": 3.1289, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011545988258317024, |
|
"loss": 3.1143, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011526418786692758, |
|
"loss": 3.1236, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011506849315068491, |
|
"loss": 3.1242, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011487279843444225, |
|
"loss": 3.1404, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001146771037181996, |
|
"loss": 3.1173, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011448140900195693, |
|
"loss": 3.1406, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011428571428571427, |
|
"loss": 3.1177, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011409001956947161, |
|
"loss": 3.1103, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011389432485322894, |
|
"loss": 3.128, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011369863013698629, |
|
"loss": 3.1282, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011350293542074363, |
|
"loss": 3.1315, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011330724070450096, |
|
"loss": 3.1205, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001131115459882583, |
|
"loss": 3.1464, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011291585127201565, |
|
"loss": 3.135, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011272015655577298, |
|
"loss": 3.1292, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011252446183953032, |
|
"loss": 3.1267, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011232876712328765, |
|
"loss": 3.1131, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011213307240704499, |
|
"loss": 3.128, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011193737769080234, |
|
"loss": 3.1223, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011174168297455966, |
|
"loss": 3.1283, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011154598825831701, |
|
"loss": 3.1163, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011135029354207435, |
|
"loss": 3.1357, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011115459882583168, |
|
"loss": 3.1144, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011095890410958902, |
|
"loss": 3.1232, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011076320939334637, |
|
"loss": 3.1239, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001105675146771037, |
|
"loss": 3.1255, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011037181996086104, |
|
"loss": 3.0973, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011017612524461838, |
|
"loss": 3.1072, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00010998043052837571, |
|
"loss": 3.1245, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00010978473581213306, |
|
"loss": 3.1314, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0001095890410958904, |
|
"loss": 3.1118, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010939334637964773, |
|
"loss": 3.1211, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010919765166340507, |
|
"loss": 3.1143, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010900195694716242, |
|
"loss": 3.1095, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010880626223091975, |
|
"loss": 3.1202, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010861056751467709, |
|
"loss": 3.1172, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010841487279843443, |
|
"loss": 3.1, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010821917808219176, |
|
"loss": 3.1008, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001080234833659491, |
|
"loss": 3.1302, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010782778864970644, |
|
"loss": 3.1089, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010763209393346378, |
|
"loss": 3.0935, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010743639921722112, |
|
"loss": 3.1132, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010724070450097845, |
|
"loss": 3.1217, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001070450097847358, |
|
"loss": 3.1082, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010684931506849314, |
|
"loss": 3.1156, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010665362035225047, |
|
"loss": 3.1278, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010645792563600781, |
|
"loss": 3.1095, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010626223091976516, |
|
"loss": 3.1043, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010606653620352248, |
|
"loss": 3.1249, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010587084148727983, |
|
"loss": 3.1214, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010567514677103717, |
|
"loss": 3.1249, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001054794520547945, |
|
"loss": 3.1, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010528375733855184, |
|
"loss": 3.1339, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010508806262230919, |
|
"loss": 3.1316, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010489236790606652, |
|
"loss": 3.1167, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010469667318982386, |
|
"loss": 3.1114, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0001045009784735812, |
|
"loss": 3.1149, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010430528375733853, |
|
"loss": 3.124, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010410958904109588, |
|
"loss": 3.1216, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010391389432485322, |
|
"loss": 3.1094, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010371819960861055, |
|
"loss": 3.1256, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001035225048923679, |
|
"loss": 3.1146, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010332681017612522, |
|
"loss": 3.1392, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010313111545988257, |
|
"loss": 3.106, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010293542074363991, |
|
"loss": 3.1158, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010273972602739724, |
|
"loss": 3.1201, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010254403131115458, |
|
"loss": 3.131, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010234833659491193, |
|
"loss": 3.1173, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010215264187866926, |
|
"loss": 3.1042, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001019569471624266, |
|
"loss": 3.1043, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010176125244618394, |
|
"loss": 3.1047, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010156555772994127, |
|
"loss": 3.1142, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010136986301369862, |
|
"loss": 3.1218, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010117416829745596, |
|
"loss": 3.1069, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010097847358121329, |
|
"loss": 3.1048, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010078277886497063, |
|
"loss": 3.1196, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010058708414872797, |
|
"loss": 3.1062, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001003913894324853, |
|
"loss": 3.109, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010019569471624265, |
|
"loss": 3.1097, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 11.071310043334961, |
|
"eval_runtime": 512.791, |
|
"eval_samples_per_second": 30.872, |
|
"eval_steps_per_second": 0.07, |
|
"eval_wer": 0.9999636220770339, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999999999999999e-05, |
|
"loss": 4.6471, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.980430528375732e-05, |
|
"loss": 3.1252, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.960861056751466e-05, |
|
"loss": 3.0992, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.941291585127201e-05, |
|
"loss": 3.1093, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.921722113502934e-05, |
|
"loss": 3.1178, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.902152641878668e-05, |
|
"loss": 3.1203, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.882583170254402e-05, |
|
"loss": 3.1096, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.863013698630135e-05, |
|
"loss": 3.1118, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.84344422700587e-05, |
|
"loss": 3.0968, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.823874755381603e-05, |
|
"loss": 3.116, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.804305283757337e-05, |
|
"loss": 3.0984, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.784735812133071e-05, |
|
"loss": 3.1049, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.765166340508804e-05, |
|
"loss": 3.0993, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.745596868884539e-05, |
|
"loss": 3.1017, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.726027397260273e-05, |
|
"loss": 3.1103, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.706457925636006e-05, |
|
"loss": 3.1097, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.68688845401174e-05, |
|
"loss": 3.1288, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.667318982387475e-05, |
|
"loss": 3.118, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.647749510763208e-05, |
|
"loss": 3.1166, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.628180039138942e-05, |
|
"loss": 3.106, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.608610567514676e-05, |
|
"loss": 3.0979, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.589041095890409e-05, |
|
"loss": 3.1159, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.569471624266144e-05, |
|
"loss": 3.1071, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.549902152641878e-05, |
|
"loss": 3.0906, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.530332681017611e-05, |
|
"loss": 3.1089, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.510763209393345e-05, |
|
"loss": 3.1008, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.49119373776908e-05, |
|
"loss": 3.1032, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.471624266144812e-05, |
|
"loss": 3.0994, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.452054794520547e-05, |
|
"loss": 3.093, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.432485322896281e-05, |
|
"loss": 3.1114, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.412915851272014e-05, |
|
"loss": 3.0827, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.393346379647748e-05, |
|
"loss": 3.1041, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.373776908023481e-05, |
|
"loss": 3.1068, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.354207436399216e-05, |
|
"loss": 3.0941, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.33463796477495e-05, |
|
"loss": 3.1066, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.315068493150683e-05, |
|
"loss": 3.1036, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.295499021526417e-05, |
|
"loss": 3.1035, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.275929549902152e-05, |
|
"loss": 3.0722, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.256360078277885e-05, |
|
"loss": 3.1077, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.236790606653619e-05, |
|
"loss": 3.0914, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.217221135029353e-05, |
|
"loss": 3.0951, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.197651663405086e-05, |
|
"loss": 3.104, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.17808219178082e-05, |
|
"loss": 3.1262, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.158512720156555e-05, |
|
"loss": 3.0933, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.138943248532288e-05, |
|
"loss": 3.1002, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.119373776908022e-05, |
|
"loss": 3.1073, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.099804305283757e-05, |
|
"loss": 3.0895, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.08023483365949e-05, |
|
"loss": 3.0823, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.060665362035224e-05, |
|
"loss": 3.0955, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.041095890410958e-05, |
|
"loss": 3.1231, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.021526418786691e-05, |
|
"loss": 3.1017, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.001956947162426e-05, |
|
"loss": 3.103, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 8.98238747553816e-05, |
|
"loss": 3.0829, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.962818003913893e-05, |
|
"loss": 3.0942, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.943248532289627e-05, |
|
"loss": 3.0983, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.92367906066536e-05, |
|
"loss": 3.0937, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.904109589041094e-05, |
|
"loss": 3.0885, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.884540117416829e-05, |
|
"loss": 3.0858, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.864970645792562e-05, |
|
"loss": 3.121, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.845401174168296e-05, |
|
"loss": 3.1038, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.82583170254403e-05, |
|
"loss": 3.0919, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.806262230919763e-05, |
|
"loss": 3.1125, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.786692759295498e-05, |
|
"loss": 3.1011, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.767123287671232e-05, |
|
"loss": 3.1193, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.747553816046965e-05, |
|
"loss": 3.0789, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.7279843444227e-05, |
|
"loss": 3.1006, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.708414872798434e-05, |
|
"loss": 3.1077, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.688845401174167e-05, |
|
"loss": 3.0825, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.669275929549901e-05, |
|
"loss": 3.0824, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.649706457925635e-05, |
|
"loss": 3.1055, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.630136986301368e-05, |
|
"loss": 3.0984, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.610567514677103e-05, |
|
"loss": 3.0951, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.590998043052837e-05, |
|
"loss": 3.1181, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.57142857142857e-05, |
|
"loss": 3.1022, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.551859099804304e-05, |
|
"loss": 3.0843, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.532289628180039e-05, |
|
"loss": 3.0949, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.512720156555772e-05, |
|
"loss": 3.1284, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.493150684931506e-05, |
|
"loss": 3.0959, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.473581213307239e-05, |
|
"loss": 3.0898, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.454011741682973e-05, |
|
"loss": 3.0951, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.434442270058707e-05, |
|
"loss": 3.1127, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.41487279843444e-05, |
|
"loss": 3.0788, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.395303326810175e-05, |
|
"loss": 3.0924, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.375733855185909e-05, |
|
"loss": 3.0868, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.356164383561642e-05, |
|
"loss": 3.0955, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.336594911937376e-05, |
|
"loss": 3.0779, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.317025440313111e-05, |
|
"loss": 3.0863, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.297455968688844e-05, |
|
"loss": 3.0776, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.277886497064578e-05, |
|
"loss": 3.0884, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.258317025440312e-05, |
|
"loss": 3.0996, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.238747553816045e-05, |
|
"loss": 3.0983, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.21917808219178e-05, |
|
"loss": 3.078, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.199608610567514e-05, |
|
"loss": 3.0982, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.180039138943247e-05, |
|
"loss": 3.1144, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.160469667318981e-05, |
|
"loss": 3.0973, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.140900195694716e-05, |
|
"loss": 3.0844, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.121330724070449e-05, |
|
"loss": 3.0951, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.101761252446183e-05, |
|
"loss": 3.0945, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.082191780821917e-05, |
|
"loss": 3.0839, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.06262230919765e-05, |
|
"loss": 3.0876, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.043052837573385e-05, |
|
"loss": 3.0903, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.023483365949118e-05, |
|
"loss": 3.0901, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.003913894324852e-05, |
|
"loss": 3.0799, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 7.984344422700586e-05, |
|
"loss": 3.0929, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.964774951076319e-05, |
|
"loss": 3.1026, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.945205479452054e-05, |
|
"loss": 3.0934, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.925636007827788e-05, |
|
"loss": 3.0998, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.906066536203521e-05, |
|
"loss": 3.0951, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.886497064579255e-05, |
|
"loss": 3.1021, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.86692759295499e-05, |
|
"loss": 3.0904, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.847358121330722e-05, |
|
"loss": 3.0769, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.827788649706457e-05, |
|
"loss": 3.0951, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.808219178082191e-05, |
|
"loss": 3.0719, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.788649706457924e-05, |
|
"loss": 3.0889, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.769080234833658e-05, |
|
"loss": 3.0789, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.749510763209393e-05, |
|
"loss": 3.0973, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.729941291585126e-05, |
|
"loss": 3.0977, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.71037181996086e-05, |
|
"loss": 3.0884, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.690802348336594e-05, |
|
"loss": 3.0909, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.671232876712327e-05, |
|
"loss": 3.0774, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.651663405088062e-05, |
|
"loss": 3.0874, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.632093933463796e-05, |
|
"loss": 3.0765, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.612524461839529e-05, |
|
"loss": 3.0828, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.592954990215263e-05, |
|
"loss": 3.0936, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.573385518590996e-05, |
|
"loss": 3.0783, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.55381604696673e-05, |
|
"loss": 3.0877, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.534246575342465e-05, |
|
"loss": 3.0906, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.514677103718198e-05, |
|
"loss": 3.0953, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.495107632093932e-05, |
|
"loss": 3.0831, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.475538160469667e-05, |
|
"loss": 3.0823, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.4559686888454e-05, |
|
"loss": 3.063, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.436399217221134e-05, |
|
"loss": 3.1071, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.416829745596868e-05, |
|
"loss": 3.0938, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.397260273972601e-05, |
|
"loss": 3.1188, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.377690802348336e-05, |
|
"loss": 3.0981, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.35812133072407e-05, |
|
"loss": 3.0792, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.338551859099803e-05, |
|
"loss": 3.0976, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.318982387475537e-05, |
|
"loss": 3.0991, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.299412915851271e-05, |
|
"loss": 3.1003, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.279843444227004e-05, |
|
"loss": 3.0793, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.260273972602739e-05, |
|
"loss": 3.0891, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.240704500978473e-05, |
|
"loss": 3.0808, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.221135029354206e-05, |
|
"loss": 3.08, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.20156555772994e-05, |
|
"loss": 3.0784, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.181996086105675e-05, |
|
"loss": 3.0652, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.162426614481408e-05, |
|
"loss": 3.0888, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.142857142857142e-05, |
|
"loss": 3.0916, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.123287671232876e-05, |
|
"loss": 3.084, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.10371819960861e-05, |
|
"loss": 3.0874, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.084148727984344e-05, |
|
"loss": 3.0831, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.064579256360077e-05, |
|
"loss": 3.0965, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.045009784735811e-05, |
|
"loss": 3.0963, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.025440313111545e-05, |
|
"loss": 3.1057, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.005870841487278e-05, |
|
"loss": 3.0931, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.986301369863013e-05, |
|
"loss": 3.0869, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.966731898238747e-05, |
|
"loss": 3.0825, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.94716242661448e-05, |
|
"loss": 3.1187, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.927592954990214e-05, |
|
"loss": 3.0852, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.908023483365949e-05, |
|
"loss": 3.0945, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.888454011741682e-05, |
|
"loss": 3.0845, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.868884540117416e-05, |
|
"loss": 3.0884, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.84931506849315e-05, |
|
"loss": 3.0952, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.829745596868883e-05, |
|
"loss": 3.0746, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.810176125244617e-05, |
|
"loss": 3.0781, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.790606653620352e-05, |
|
"loss": 3.0805, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.771037181996085e-05, |
|
"loss": 3.0887, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.751467710371819e-05, |
|
"loss": 3.0738, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.731898238747553e-05, |
|
"loss": 3.1072, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.712328767123286e-05, |
|
"loss": 3.0933, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.692759295499021e-05, |
|
"loss": 3.0806, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.673189823874755e-05, |
|
"loss": 3.0914, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.653620352250488e-05, |
|
"loss": 3.0833, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.634050880626222e-05, |
|
"loss": 3.0953, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.614481409001955e-05, |
|
"loss": 3.0787, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.59491193737769e-05, |
|
"loss": 3.0927, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.575342465753424e-05, |
|
"loss": 3.0778, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.555772994129157e-05, |
|
"loss": 3.1037, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.536203522504891e-05, |
|
"loss": 3.0948, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.516634050880626e-05, |
|
"loss": 3.0799, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.497064579256359e-05, |
|
"loss": 3.0921, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.477495107632093e-05, |
|
"loss": 3.0831, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.457925636007827e-05, |
|
"loss": 3.0792, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.43835616438356e-05, |
|
"loss": 3.0563, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.418786692759295e-05, |
|
"loss": 3.0839, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.399217221135029e-05, |
|
"loss": 3.074, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.379647749510762e-05, |
|
"loss": 3.081, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.360078277886496e-05, |
|
"loss": 3.094, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.34050880626223e-05, |
|
"loss": 3.088, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.320939334637964e-05, |
|
"loss": 3.0876, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.301369863013698e-05, |
|
"loss": 3.0858, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.281800391389432e-05, |
|
"loss": 3.0721, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.262230919765165e-05, |
|
"loss": 3.0885, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.2426614481409e-05, |
|
"loss": 3.0734, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.223091976516634e-05, |
|
"loss": 3.0742, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.203522504892367e-05, |
|
"loss": 3.0683, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.183953033268101e-05, |
|
"loss": 3.0922, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.164383561643834e-05, |
|
"loss": 3.0714, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.144814090019568e-05, |
|
"loss": 3.0796, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.125244618395303e-05, |
|
"loss": 3.0699, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.105675146771036e-05, |
|
"loss": 3.0954, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.08610567514677e-05, |
|
"loss": 3.0947, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.066536203522504e-05, |
|
"loss": 3.1089, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.046966731898238e-05, |
|
"loss": 3.0642, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.027397260273972e-05, |
|
"loss": 3.0729, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.007827788649705e-05, |
|
"loss": 3.0939, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.98825831702544e-05, |
|
"loss": 3.0843, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.968688845401173e-05, |
|
"loss": 3.0808, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.949119373776907e-05, |
|
"loss": 3.0813, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.929549902152641e-05, |
|
"loss": 3.0939, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.909980430528375e-05, |
|
"loss": 3.106, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.8904109589041086e-05, |
|
"loss": 3.0975, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5.870841487279843e-05, |
|
"loss": 3.0838, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.8512720156555766e-05, |
|
"loss": 3.0846, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.83170254403131e-05, |
|
"loss": 3.0797, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.812133072407044e-05, |
|
"loss": 3.0816, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.792563600782778e-05, |
|
"loss": 3.0816, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.772994129158512e-05, |
|
"loss": 3.0732, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.7534246575342455e-05, |
|
"loss": 3.0737, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.73385518590998e-05, |
|
"loss": 3.065, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.7142857142857135e-05, |
|
"loss": 3.0787, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.694716242661447e-05, |
|
"loss": 3.0977, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.6751467710371815e-05, |
|
"loss": 3.0921, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.655577299412915e-05, |
|
"loss": 3.0748, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.636007827788649e-05, |
|
"loss": 3.0966, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.6164383561643824e-05, |
|
"loss": 3.0837, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.596868884540117e-05, |
|
"loss": 3.087, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.5772994129158504e-05, |
|
"loss": 3.082, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.557729941291584e-05, |
|
"loss": 3.0773, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.5381604696673184e-05, |
|
"loss": 3.0805, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.518590998043052e-05, |
|
"loss": 3.0782, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.499021526418786e-05, |
|
"loss": 3.0895, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.47945205479452e-05, |
|
"loss": 3.0899, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.459882583170254e-05, |
|
"loss": 3.0689, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.440313111545987e-05, |
|
"loss": 3.0842, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.4207436399217217e-05, |
|
"loss": 3.0907, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.401174168297455e-05, |
|
"loss": 3.0873, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.381604696673189e-05, |
|
"loss": 3.0894, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3620352250489226e-05, |
|
"loss": 3.0787, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.342465753424657e-05, |
|
"loss": 3.0702, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.3228962818003906e-05, |
|
"loss": 3.1048, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.303326810176124e-05, |
|
"loss": 3.0777, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.2837573385518586e-05, |
|
"loss": 3.0874, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.264187866927592e-05, |
|
"loss": 3.0819, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.244618395303326e-05, |
|
"loss": 3.0687, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.22504892367906e-05, |
|
"loss": 3.0667, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.205479452054794e-05, |
|
"loss": 3.0798, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.1859099804305275e-05, |
|
"loss": 3.0666, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.166340508806261e-05, |
|
"loss": 3.083, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.1467710371819955e-05, |
|
"loss": 3.0885, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.127201565557729e-05, |
|
"loss": 3.1091, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.107632093933463e-05, |
|
"loss": 3.0811, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.088062622309197e-05, |
|
"loss": 3.08, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.068493150684931e-05, |
|
"loss": 3.0862, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0489236790606644e-05, |
|
"loss": 3.0712, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.029354207436399e-05, |
|
"loss": 3.0791, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.0097847358121324e-05, |
|
"loss": 3.0695, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.990215264187866e-05, |
|
"loss": 3.0748, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.9706457925636004e-05, |
|
"loss": 3.0672, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.951076320939334e-05, |
|
"loss": 3.0857, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.931506849315068e-05, |
|
"loss": 3.0796, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.911937377690801e-05, |
|
"loss": 3.0754, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.8923679060665357e-05, |
|
"loss": 3.0985, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.872798434442269e-05, |
|
"loss": 3.0685, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.853228962818003e-05, |
|
"loss": 3.0901, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.833659491193737e-05, |
|
"loss": 3.0864, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.814090019569471e-05, |
|
"loss": 3.0777, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.7945205479452046e-05, |
|
"loss": 3.0787, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.774951076320939e-05, |
|
"loss": 3.0945, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.7553816046966726e-05, |
|
"loss": 3.1029, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.735812133072406e-05, |
|
"loss": 3.0755, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.7162426614481406e-05, |
|
"loss": 3.0828, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.696673189823874e-05, |
|
"loss": 3.0773, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.677103718199608e-05, |
|
"loss": 3.0886, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.6575342465753415e-05, |
|
"loss": 3.0616, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.637964774951076e-05, |
|
"loss": 3.112, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.6183953033268095e-05, |
|
"loss": 3.0566, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.598825831702543e-05, |
|
"loss": 3.0837, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.5792563600782775e-05, |
|
"loss": 3.0935, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.559686888454011e-05, |
|
"loss": 3.0689, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.540117416829745e-05, |
|
"loss": 3.0551, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.520547945205479e-05, |
|
"loss": 3.076, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.500978473581213e-05, |
|
"loss": 3.0757, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.4814090019569464e-05, |
|
"loss": 3.0753, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.46183953033268e-05, |
|
"loss": 3.0839, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.4422700587084144e-05, |
|
"loss": 3.0787, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.422700587084148e-05, |
|
"loss": 3.0667, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.403131115459882e-05, |
|
"loss": 3.0868, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.383561643835616e-05, |
|
"loss": 3.0668, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.36399217221135e-05, |
|
"loss": 3.0727, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.344422700587083e-05, |
|
"loss": 3.0775, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.3248532289628176e-05, |
|
"loss": 3.1012, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.305283757338551e-05, |
|
"loss": 3.0824, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.285714285714285e-05, |
|
"loss": 3.081, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.266144814090019e-05, |
|
"loss": 3.0902, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.246575342465753e-05, |
|
"loss": 3.0717, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.2270058708414866e-05, |
|
"loss": 3.1039, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.20743639921722e-05, |
|
"loss": 3.0871, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.1878669275929546e-05, |
|
"loss": 3.0765, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.168297455968688e-05, |
|
"loss": 3.0723, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.148727984344422e-05, |
|
"loss": 3.0852, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.129158512720156e-05, |
|
"loss": 3.0862, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.10958904109589e-05, |
|
"loss": 3.0799, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.0900195694716235e-05, |
|
"loss": 3.0794, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.070450097847358e-05, |
|
"loss": 3.0801, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.0508806262230915e-05, |
|
"loss": 3.0738, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.031311154598825e-05, |
|
"loss": 3.0657, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.011741682974559e-05, |
|
"loss": 3.0888, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.992172211350293e-05, |
|
"loss": 3.0913, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.972602739726027e-05, |
|
"loss": 3.0793, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.9530332681017604e-05, |
|
"loss": 3.0803, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.933463796477495e-05, |
|
"loss": 3.072, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.9138943248532284e-05, |
|
"loss": 3.1062, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.894324853228962e-05, |
|
"loss": 3.0677, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.8747553816046964e-05, |
|
"loss": 3.0691, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.85518590998043e-05, |
|
"loss": 3.0733, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.835616438356164e-05, |
|
"loss": 3.0785, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.816046966731898e-05, |
|
"loss": 3.0713, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.7964774951076317e-05, |
|
"loss": 3.0738, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.776908023483365e-05, |
|
"loss": 3.0579, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.757338551859099e-05, |
|
"loss": 3.0828, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.737769080234833e-05, |
|
"loss": 3.0829, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.718199608610567e-05, |
|
"loss": 3.0813, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.6986301369863006e-05, |
|
"loss": 3.0789, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.679060665362035e-05, |
|
"loss": 3.0724, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.6594911937377686e-05, |
|
"loss": 3.0742, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.639921722113502e-05, |
|
"loss": 3.07, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.6203522504892366e-05, |
|
"loss": 3.1018, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.60078277886497e-05, |
|
"loss": 3.102, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.581213307240704e-05, |
|
"loss": 3.0771, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.561643835616438e-05, |
|
"loss": 3.0765, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.542074363992172e-05, |
|
"loss": 3.086, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.5225048923679055e-05, |
|
"loss": 3.092, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.502935420743639e-05, |
|
"loss": 3.0907, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.4833659491193735e-05, |
|
"loss": 3.0756, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.463796477495107e-05, |
|
"loss": 3.0839, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.444227005870841e-05, |
|
"loss": 3.0753, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.424657534246575e-05, |
|
"loss": 3.0714, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.405088062622309e-05, |
|
"loss": 3.0728, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.3855185909980424e-05, |
|
"loss": 3.0777, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.365949119373777e-05, |
|
"loss": 3.0667, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.3463796477495104e-05, |
|
"loss": 3.0796, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.326810176125244e-05, |
|
"loss": 3.0698, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.307240704500978e-05, |
|
"loss": 3.0719, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.287671232876712e-05, |
|
"loss": 3.0711, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.268101761252446e-05, |
|
"loss": 3.0869, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.248532289628179e-05, |
|
"loss": 3.0937, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.2289628180039136e-05, |
|
"loss": 3.0653, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.209393346379647e-05, |
|
"loss": 3.0683, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.189823874755381e-05, |
|
"loss": 3.0893, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.170254403131115e-05, |
|
"loss": 3.0647, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.150684931506849e-05, |
|
"loss": 3.0788, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1311154598825826e-05, |
|
"loss": 3.0813, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.111545988258317e-05, |
|
"loss": 3.0601, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.0919765166340506e-05, |
|
"loss": 3.0741, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.072407045009784e-05, |
|
"loss": 3.0684, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.052837573385518e-05, |
|
"loss": 3.0627, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.033268101761252e-05, |
|
"loss": 3.0901, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.013698630136986e-05, |
|
"loss": 3.0768, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.99412915851272e-05, |
|
"loss": 3.0755, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.9745596868884535e-05, |
|
"loss": 3.0837, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9549902152641875e-05, |
|
"loss": 3.0699, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.9354207436399215e-05, |
|
"loss": 3.0797, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.915851272015655e-05, |
|
"loss": 3.0603, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.896281800391389e-05, |
|
"loss": 3.0857, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.8767123287671228e-05, |
|
"loss": 3.0896, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8571428571428567e-05, |
|
"loss": 3.0757, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8375733855185907e-05, |
|
"loss": 3.0854, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8180039138943244e-05, |
|
"loss": 3.097, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.7984344422700584e-05, |
|
"loss": 3.0782, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.778864970645792e-05, |
|
"loss": 3.0538, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.759295499021526e-05, |
|
"loss": 3.061, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.73972602739726e-05, |
|
"loss": 3.0633, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7201565557729937e-05, |
|
"loss": 3.0649, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7005870841487277e-05, |
|
"loss": 3.0662, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.6810176125244613e-05, |
|
"loss": 3.0493, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6614481409001953e-05, |
|
"loss": 3.0856, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.6418786692759293e-05, |
|
"loss": 3.0655, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.622309197651663e-05, |
|
"loss": 3.0681, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.602739726027397e-05, |
|
"loss": 3.0607, |
|
"step": 1401 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5831702544031306e-05, |
|
"loss": 3.0934, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5636007827788646e-05, |
|
"loss": 3.075, |
|
"step": 1403 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5440313111545986e-05, |
|
"loss": 3.0954, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5244618395303322e-05, |
|
"loss": 3.0619, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5048923679060662e-05, |
|
"loss": 3.0723, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.4853228962818002e-05, |
|
"loss": 3.0663, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.465753424657534e-05, |
|
"loss": 3.0554, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4461839530332678e-05, |
|
"loss": 3.0691, |
|
"step": 1409 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4266144814090015e-05, |
|
"loss": 3.0708, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.4070450097847355e-05, |
|
"loss": 3.0729, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.3874755381604695e-05, |
|
"loss": 3.0599, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.367906066536203e-05, |
|
"loss": 3.0671, |
|
"step": 1413 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.348336594911937e-05, |
|
"loss": 3.0763, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3287671232876708e-05, |
|
"loss": 3.0725, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3091976516634047e-05, |
|
"loss": 3.0855, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2896281800391387e-05, |
|
"loss": 3.0451, |
|
"step": 1417 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2700587084148724e-05, |
|
"loss": 3.0603, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2504892367906064e-05, |
|
"loss": 3.0644, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.23091976516634e-05, |
|
"loss": 3.0869, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.211350293542074e-05, |
|
"loss": 3.0638, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.191780821917808e-05, |
|
"loss": 3.0608, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1722113502935417e-05, |
|
"loss": 3.0477, |
|
"step": 1423 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1526418786692757e-05, |
|
"loss": 3.0687, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1330724070450096e-05, |
|
"loss": 3.0685, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1135029354207433e-05, |
|
"loss": 3.0634, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.0939334637964773e-05, |
|
"loss": 3.0752, |
|
"step": 1427 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.074363992172211e-05, |
|
"loss": 3.0728, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.054794520547945e-05, |
|
"loss": 3.0551, |
|
"step": 1429 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.035225048923679e-05, |
|
"loss": 3.063, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0156555772994126e-05, |
|
"loss": 3.0545, |
|
"step": 1431 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9960861056751466e-05, |
|
"loss": 3.0742, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.9765166340508802e-05, |
|
"loss": 3.0602, |
|
"step": 1433 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9569471624266142e-05, |
|
"loss": 3.0536, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.9373776908023482e-05, |
|
"loss": 3.084, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.917808219178082e-05, |
|
"loss": 3.0582, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8982387475538158e-05, |
|
"loss": 3.0698, |
|
"step": 1437 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8786692759295495e-05, |
|
"loss": 3.053, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8590998043052835e-05, |
|
"loss": 3.0486, |
|
"step": 1439 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.8395303326810175e-05, |
|
"loss": 3.0623, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.819960861056751e-05, |
|
"loss": 3.0892, |
|
"step": 1441 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.800391389432485e-05, |
|
"loss": 3.0567, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.780821917808219e-05, |
|
"loss": 3.064, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7612524461839527e-05, |
|
"loss": 3.0547, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7416829745596867e-05, |
|
"loss": 3.0642, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7221135029354204e-05, |
|
"loss": 3.0613, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.7025440313111544e-05, |
|
"loss": 3.0615, |
|
"step": 1447 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6829745596868884e-05, |
|
"loss": 3.0612, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.663405088062622e-05, |
|
"loss": 3.0518, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.643835616438356e-05, |
|
"loss": 3.0547, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6242661448140897e-05, |
|
"loss": 3.049, |
|
"step": 1451 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6046966731898236e-05, |
|
"loss": 3.0518, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5851272015655576e-05, |
|
"loss": 3.0503, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.5655577299412913e-05, |
|
"loss": 3.0655, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5459882583170253e-05, |
|
"loss": 3.0711, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.526418786692759e-05, |
|
"loss": 3.062, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.506849315068493e-05, |
|
"loss": 3.0557, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4872798434442267e-05, |
|
"loss": 3.0654, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.4677103718199607e-05, |
|
"loss": 3.0582, |
|
"step": 1459 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4481409001956946e-05, |
|
"loss": 3.0748, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4285714285714284e-05, |
|
"loss": 3.0422, |
|
"step": 1461 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.4090019569471622e-05, |
|
"loss": 3.0655, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.389432485322896e-05, |
|
"loss": 3.0531, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.36986301369863e-05, |
|
"loss": 3.0632, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3502935420743638e-05, |
|
"loss": 3.0606, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3307240704500976e-05, |
|
"loss": 3.0665, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.3111545988258315e-05, |
|
"loss": 3.0723, |
|
"step": 1467 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2915851272015653e-05, |
|
"loss": 3.0579, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.2720156555772993e-05, |
|
"loss": 3.0634, |
|
"step": 1469 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2524461839530331e-05, |
|
"loss": 3.0561, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.232876712328767e-05, |
|
"loss": 3.0639, |
|
"step": 1471 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.2133072407045007e-05, |
|
"loss": 3.0578, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1937377690802347e-05, |
|
"loss": 3.0509, |
|
"step": 1473 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1741682974559686e-05, |
|
"loss": 3.0622, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1545988258317024e-05, |
|
"loss": 3.0732, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.1350293542074362e-05, |
|
"loss": 3.0526, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.11545988258317e-05, |
|
"loss": 3.0411, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.095890410958904e-05, |
|
"loss": 3.0522, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.0763209393346378e-05, |
|
"loss": 3.0538, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0567514677103716e-05, |
|
"loss": 3.0508, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0371819960861055e-05, |
|
"loss": 3.0542, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0176125244618395e-05, |
|
"loss": 3.0544, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.980430528375733e-06, |
|
"loss": 3.0592, |
|
"step": 1483 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.784735812133071e-06, |
|
"loss": 3.0523, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.58904109589041e-06, |
|
"loss": 3.0467, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.393346379647747e-06, |
|
"loss": 3.0587, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.197651663405087e-06, |
|
"loss": 3.0535, |
|
"step": 1487 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.001956947162426e-06, |
|
"loss": 3.0468, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.806262230919764e-06, |
|
"loss": 3.0594, |
|
"step": 1489 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.610567514677102e-06, |
|
"loss": 3.0614, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.414872798434442e-06, |
|
"loss": 3.0695, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.21917808219178e-06, |
|
"loss": 3.0522, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.023483365949118e-06, |
|
"loss": 3.0282, |
|
"step": 1493 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.827788649706456e-06, |
|
"loss": 3.0886, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.632093933463795e-06, |
|
"loss": 3.0477, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.436399217221134e-06, |
|
"loss": 3.0401, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.240704500978473e-06, |
|
"loss": 3.0552, |
|
"step": 1497 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.045009784735811e-06, |
|
"loss": 3.0469, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.84931506849315e-06, |
|
"loss": 3.043, |
|
"step": 1499 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.653620352250488e-06, |
|
"loss": 3.0467, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.4579256360078264e-06, |
|
"loss": 3.0634, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.2622309197651655e-06, |
|
"loss": 3.0546, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 6.066536203522504e-06, |
|
"loss": 3.0741, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.870841487279843e-06, |
|
"loss": 3.0711, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.675146771037181e-06, |
|
"loss": 3.0573, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.47945205479452e-06, |
|
"loss": 3.0528, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.283757338551858e-06, |
|
"loss": 3.0352, |
|
"step": 1507 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.088062622309197e-06, |
|
"loss": 3.0307, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.8923679060665355e-06, |
|
"loss": 3.0397, |
|
"step": 1509 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.696673189823874e-06, |
|
"loss": 3.0631, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.500978473581213e-06, |
|
"loss": 3.0502, |
|
"step": 1511 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.305283757338551e-06, |
|
"loss": 3.0575, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.10958904109589e-06, |
|
"loss": 3.0654, |
|
"step": 1513 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.913894324853228e-06, |
|
"loss": 3.0512, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.718199608610567e-06, |
|
"loss": 3.0493, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.5225048923679055e-06, |
|
"loss": 3.0607, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.326810176125244e-06, |
|
"loss": 3.0561, |
|
"step": 1517 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.1311154598825827e-06, |
|
"loss": 3.0601, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.9354207436399214e-06, |
|
"loss": 3.0527, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.73972602739726e-06, |
|
"loss": 3.0516, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.5440313111545986e-06, |
|
"loss": 3.0704, |
|
"step": 1521 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.348336594911937e-06, |
|
"loss": 3.0557, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.1526418786692755e-06, |
|
"loss": 3.0465, |
|
"step": 1523 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.956947162426614e-06, |
|
"loss": 3.06, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.7612524461839527e-06, |
|
"loss": 3.0631, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5655577299412914e-06, |
|
"loss": 3.0715, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.36986301369863e-06, |
|
"loss": 3.0547, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.1741682974559684e-06, |
|
"loss": 3.0642, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.78473581213307e-07, |
|
"loss": 3.0562, |
|
"step": 1529 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.827788649706457e-07, |
|
"loss": 3.0795, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.870841487279842e-07, |
|
"loss": 3.0513, |
|
"step": 1531 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.9138943248532284e-07, |
|
"loss": 3.0782, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.9569471624266142e-07, |
|
"loss": 3.0541, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 14.038721084594727, |
|
"eval_runtime": 520.0539, |
|
"eval_samples_per_second": 30.441, |
|
"eval_steps_per_second": 0.069, |
|
"eval_wer": 0.9999636220770339, |
|
"step": 1533 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1533, |
|
"total_flos": 4.180774836907213e+19, |
|
"train_loss": 3.1717406544744464, |
|
"train_runtime": 43485.0285, |
|
"train_samples_per_second": 31.616, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"max_steps": 1533, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.180774836907213e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|