{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 1000, "global_step": 11619, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.3029259896729775e-10, "logits/chosen": -3.9100074768066406, "logits/rejected": -4.447928428649902, "logps/chosen": -252.016845703125, "logps/rejected": -298.87518310546875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.302925989672977e-09, "logits/chosen": -5.264148235321045, "logits/rejected": -4.749191761016846, "logps/chosen": -704.3628540039062, "logps/rejected": -532.2970581054688, "loss": 0.697, "rewards/accuracies": 0.3888888955116272, "rewards/chosen": -0.0018743880791589618, "rewards/margins": -0.004106739070266485, "rewards/rejected": 0.0022323497105389833, "step": 10 }, { "epoch": 0.01, "learning_rate": 8.605851979345954e-09, "logits/chosen": -5.4348464012146, "logits/rejected": -4.959498882293701, "logps/chosen": -699.098388671875, "logps/rejected": -476.25018310546875, "loss": 0.6916, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.006323958281427622, "rewards/margins": 0.00015197992615867406, "rewards/rejected": 0.0061719780787825584, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.2908777969018932e-08, "logits/chosen": -5.242746829986572, "logits/rejected": -5.211531162261963, "logps/chosen": -525.1812744140625, "logps/rejected": -423.4251403808594, "loss": 0.6925, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.003160216612741351, "rewards/margins": -0.0022907473612576723, "rewards/rejected": -0.0008694697171449661, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.7211703958691908e-08, "logits/chosen": -5.131052017211914, "logits/rejected": -4.265780925750732, "logps/chosen": -661.372314453125, "logps/rejected": -430.10528564453125, "loss": 0.6967, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0079353591427207, "rewards/margins": -0.00991396140307188, "rewards/rejected": 0.0019786027260124683, "step": 40 }, { "epoch": 0.01, "learning_rate": 2.1514629948364887e-08, "logits/chosen": -5.017221927642822, "logits/rejected": -5.080911159515381, "logps/chosen": -700.57470703125, "logps/rejected": -517.5032348632812, "loss": 0.6928, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.009642349556088448, "rewards/margins": 0.0066725024953484535, "rewards/rejected": 0.002969847060739994, "step": 50 }, { "epoch": 0.02, "learning_rate": 2.5817555938037863e-08, "logits/chosen": -4.963813304901123, "logits/rejected": -5.011960983276367, "logps/chosen": -555.6843872070312, "logps/rejected": -501.48193359375, "loss": 0.693, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.005733240395784378, "rewards/margins": -0.00958978570997715, "rewards/rejected": 0.0038565441500395536, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.012048192771084e-08, "logits/chosen": -5.372871398925781, "logits/rejected": -5.036227226257324, "logps/chosen": -683.734619140625, "logps/rejected": -468.33074951171875, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.007056492380797863, "rewards/margins": 0.008552687242627144, "rewards/rejected": -0.0014961964916437864, "step": 70 }, { "epoch": 0.02, "learning_rate": 3.4423407917383815e-08, "logits/chosen": -4.818124294281006, "logits/rejected": -4.8408002853393555, "logps/chosen": -626.4202880859375, "logps/rejected": -468.92657470703125, "loss": 0.6986, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": 0.0017438624054193497, "rewards/margins": -0.003411166835576296, "rewards/rejected": 0.0051550292409956455, "step": 80 }, { "epoch": 0.02, "learning_rate": 3.8726333907056795e-08, "logits/chosen": -5.138584136962891, "logits/rejected": -5.125112533569336, "logps/chosen": -515.315185546875, "logps/rejected": -433.69207763671875, "loss": 0.693, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.00419414509087801, "rewards/margins": -0.0020610261708498, "rewards/rejected": -0.002133118687197566, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.3029259896729774e-08, "logits/chosen": -5.1805315017700195, "logits/rejected": -4.352316379547119, "logps/chosen": -593.7911987304688, "logps/rejected": -424.0450744628906, "loss": 0.6917, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.0024153776466846466, "rewards/margins": -0.003693290753290057, "rewards/rejected": 0.0012779124081134796, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.7332185886402753e-08, "logits/chosen": -4.997444152832031, "logits/rejected": -4.79990291595459, "logps/chosen": -564.8049926757812, "logps/rejected": -494.8258361816406, "loss": 0.6939, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.004837861750274897, "rewards/margins": 0.0014233009424060583, "rewards/rejected": -0.006261162459850311, "step": 110 }, { "epoch": 0.03, "learning_rate": 5.1635111876075726e-08, "logits/chosen": -5.0655717849731445, "logits/rejected": -4.671490669250488, "logps/chosen": -580.5496826171875, "logps/rejected": -467.583984375, "loss": 0.6929, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.007152697537094355, "rewards/margins": -0.0011967445025220513, "rewards/rejected": -0.005955953150987625, "step": 120 }, { "epoch": 0.03, "learning_rate": 5.593803786574871e-08, "logits/chosen": -5.139309406280518, "logits/rejected": -4.639881134033203, "logps/chosen": -599.8736572265625, "logps/rejected": -448.46820068359375, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": 0.0030009676702320576, "rewards/margins": 0.00471294391900301, "rewards/rejected": -0.0017119761323556304, "step": 130 }, { "epoch": 0.04, "learning_rate": 6.024096385542168e-08, "logits/chosen": -5.270911693572998, "logits/rejected": -4.335195064544678, "logps/chosen": -568.5928955078125, "logps/rejected": -407.9732666015625, "loss": 0.6891, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0030436916276812553, "rewards/margins": 0.007372210267931223, "rewards/rejected": -0.010415898635983467, "step": 140 }, { "epoch": 0.04, "learning_rate": 6.454388984509466e-08, "logits/chosen": -5.026791572570801, "logits/rejected": -4.740239143371582, "logps/chosen": -584.68408203125, "logps/rejected": -476.5711975097656, "loss": 0.6935, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.005011760629713535, "rewards/margins": -0.000551621604245156, "rewards/rejected": 0.005563382990658283, "step": 150 }, { "epoch": 0.04, "learning_rate": 6.884681583476763e-08, "logits/chosen": -5.271963119506836, "logits/rejected": -4.849347114562988, "logps/chosen": -647.335205078125, "logps/rejected": -521.4801025390625, "loss": 0.6905, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.009549209848046303, "rewards/margins": 0.011885453946888447, "rewards/rejected": -0.0023362443316727877, "step": 160 }, { "epoch": 0.04, "learning_rate": 7.314974182444061e-08, "logits/chosen": -4.946420192718506, "logits/rejected": -5.07125997543335, "logps/chosen": -649.4849853515625, "logps/rejected": -453.10638427734375, "loss": 0.6884, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00495701190084219, "rewards/margins": 0.009024125523865223, "rewards/rejected": -0.0040671140886843204, "step": 170 }, { "epoch": 0.05, "learning_rate": 7.745266781411359e-08, "logits/chosen": -5.0091233253479, "logits/rejected": -4.591073989868164, "logps/chosen": -633.8282470703125, "logps/rejected": -495.9832458496094, "loss": 0.6942, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.002488089492544532, "rewards/margins": -0.0028180344961583614, "rewards/rejected": 0.00032994779758155346, "step": 180 }, { "epoch": 0.05, "learning_rate": 8.175559380378658e-08, "logits/chosen": -5.426260948181152, "logits/rejected": -4.911390781402588, "logps/chosen": -546.738525390625, "logps/rejected": -343.8888244628906, "loss": 0.6907, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.008861453272402287, "rewards/margins": 0.0008963894215412438, "rewards/rejected": -0.009757841005921364, "step": 190 }, { "epoch": 0.05, "learning_rate": 8.605851979345955e-08, "logits/chosen": -4.779931545257568, "logits/rejected": -4.913551330566406, "logps/chosen": -481.3622131347656, "logps/rejected": -475.091064453125, "loss": 0.6955, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.012473881244659424, "rewards/margins": -0.014269264414906502, "rewards/rejected": 0.001795382471755147, "step": 200 }, { "epoch": 0.05, "learning_rate": 9.036144578313253e-08, "logits/chosen": -5.050047874450684, "logits/rejected": -4.261329174041748, "logps/chosen": -637.3734130859375, "logps/rejected": -505.302734375, "loss": 0.6919, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00872829370200634, "rewards/margins": 0.013085213489830494, "rewards/rejected": -0.004356918856501579, "step": 210 }, { "epoch": 0.06, "learning_rate": 9.466437177280551e-08, "logits/chosen": -5.3743133544921875, "logits/rejected": -5.062119007110596, "logps/chosen": -581.7989501953125, "logps/rejected": -458.2098693847656, "loss": 0.6863, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.009550997987389565, "rewards/margins": 0.021547481417655945, "rewards/rejected": -0.01199648343026638, "step": 220 }, { "epoch": 0.06, "learning_rate": 9.896729776247847e-08, "logits/chosen": -5.207685947418213, "logits/rejected": -4.478488445281982, "logps/chosen": -637.5294189453125, "logps/rejected": -453.7583923339844, "loss": 0.6887, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.001260512275621295, "rewards/margins": 0.010398800484836102, "rewards/rejected": -0.011659312061965466, "step": 230 }, { "epoch": 0.06, "learning_rate": 1.0327022375215145e-07, "logits/chosen": -5.5133514404296875, "logits/rejected": -4.980105400085449, "logps/chosen": -543.03076171875, "logps/rejected": -449.3726501464844, "loss": 0.693, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.0019137380877509713, "rewards/margins": 0.0013157512294128537, "rewards/rejected": 0.0005979869747534394, "step": 240 }, { "epoch": 0.06, "learning_rate": 1.0757314974182443e-07, "logits/chosen": -5.122113227844238, "logits/rejected": -5.055177211761475, "logps/chosen": -639.8763427734375, "logps/rejected": -499.72674560546875, "loss": 0.6918, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.005455931182950735, "rewards/margins": -0.007514602039009333, "rewards/rejected": 0.0020586701575666666, "step": 250 }, { "epoch": 0.07, "learning_rate": 1.1187607573149742e-07, "logits/chosen": -4.911314487457275, "logits/rejected": -5.1233673095703125, "logps/chosen": -572.2269287109375, "logps/rejected": -467.08544921875, "loss": 0.6853, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0021181937772780657, "rewards/margins": 0.013793589547276497, "rewards/rejected": -0.01167539693415165, "step": 260 }, { "epoch": 0.07, "learning_rate": 1.1617900172117039e-07, "logits/chosen": -5.0716118812561035, "logits/rejected": -4.77467679977417, "logps/chosen": -679.3989868164062, "logps/rejected": -484.687255859375, "loss": 0.6851, "rewards/accuracies": 0.5625, "rewards/chosen": -0.00423796009272337, "rewards/margins": 0.0044207083992660046, "rewards/rejected": -0.008658669888973236, "step": 270 }, { "epoch": 0.07, "learning_rate": 1.2048192771084337e-07, "logits/chosen": -5.279174327850342, "logits/rejected": -5.328460693359375, "logps/chosen": -659.2606811523438, "logps/rejected": -561.7323608398438, "loss": 0.6866, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.009951446205377579, "rewards/margins": 0.024267423897981644, "rewards/rejected": -0.014315979555249214, "step": 280 }, { "epoch": 0.07, "learning_rate": 1.2478485370051635e-07, "logits/chosen": -5.280118942260742, "logits/rejected": -4.600610733032227, "logps/chosen": -553.9241333007812, "logps/rejected": -406.5719909667969, "loss": 0.6849, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0010459672193974257, "rewards/margins": 0.014745978638529778, "rewards/rejected": -0.013700013048946857, "step": 290 }, { "epoch": 0.08, "learning_rate": 1.2908777969018933e-07, "logits/chosen": -5.575888156890869, "logits/rejected": -5.102967262268066, "logps/chosen": -614.0731201171875, "logps/rejected": -444.968017578125, "loss": 0.6826, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.00569062540307641, "rewards/margins": 0.017860982567071915, "rewards/rejected": -0.012170357629656792, "step": 300 }, { "epoch": 0.08, "learning_rate": 1.333907056798623e-07, "logits/chosen": -5.246956825256348, "logits/rejected": -4.5507917404174805, "logps/chosen": -693.5358276367188, "logps/rejected": -474.76904296875, "loss": 0.6802, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.020677082240581512, "rewards/margins": 0.03504311665892601, "rewards/rejected": -0.014366035349667072, "step": 310 }, { "epoch": 0.08, "learning_rate": 1.3769363166953526e-07, "logits/chosen": -4.9823760986328125, "logits/rejected": -4.622619152069092, "logps/chosen": -617.7606201171875, "logps/rejected": -437.1983337402344, "loss": 0.6776, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.011038780212402344, "rewards/margins": 0.04015212133526802, "rewards/rejected": -0.02911333367228508, "step": 320 }, { "epoch": 0.09, "learning_rate": 1.4199655765920827e-07, "logits/chosen": -5.331151485443115, "logits/rejected": -4.492539882659912, "logps/chosen": -613.4076538085938, "logps/rejected": -419.6726989746094, "loss": 0.675, "rewards/accuracies": 0.625, "rewards/chosen": 0.01295288186520338, "rewards/margins": 0.044692836701869965, "rewards/rejected": -0.03173995390534401, "step": 330 }, { "epoch": 0.09, "learning_rate": 1.4629948364888122e-07, "logits/chosen": -5.355683326721191, "logits/rejected": -4.948823928833008, "logps/chosen": -601.9366455078125, "logps/rejected": -457.1575622558594, "loss": 0.6777, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.011964460834860802, "rewards/margins": 0.0282148364931345, "rewards/rejected": -0.016250377520918846, "step": 340 }, { "epoch": 0.09, "learning_rate": 1.5060240963855423e-07, "logits/chosen": -5.347403049468994, "logits/rejected": -4.604220390319824, "logps/chosen": -474.96728515625, "logps/rejected": -383.0309143066406, "loss": 0.6761, "rewards/accuracies": 0.5625, "rewards/chosen": -0.008309592492878437, "rewards/margins": 0.022612642496824265, "rewards/rejected": -0.030922239646315575, "step": 350 }, { "epoch": 0.09, "learning_rate": 1.5490533562822718e-07, "logits/chosen": -5.091513633728027, "logits/rejected": -4.543365001678467, "logps/chosen": -623.7210693359375, "logps/rejected": -465.08453369140625, "loss": 0.6767, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.016020171344280243, "rewards/margins": 0.04768504947423935, "rewards/rejected": -0.031664878129959106, "step": 360 }, { "epoch": 0.1, "learning_rate": 1.5920826161790016e-07, "logits/chosen": -5.261569023132324, "logits/rejected": -4.886566162109375, "logps/chosen": -579.731201171875, "logps/rejected": -466.47998046875, "loss": 0.6807, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.002947400091215968, "rewards/margins": 0.026230674237012863, "rewards/rejected": -0.023283271118998528, "step": 370 }, { "epoch": 0.1, "learning_rate": 1.6351118760757316e-07, "logits/chosen": -5.399057388305664, "logits/rejected": -5.102800369262695, "logps/chosen": -624.6961059570312, "logps/rejected": -495.3890075683594, "loss": 0.67, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.01617654599249363, "rewards/margins": 0.04926614835858345, "rewards/rejected": -0.03308960422873497, "step": 380 }, { "epoch": 0.1, "learning_rate": 1.6781411359724612e-07, "logits/chosen": -5.009361267089844, "logits/rejected": -4.941693305969238, "logps/chosen": -595.1302490234375, "logps/rejected": -417.4217224121094, "loss": 0.6699, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.020007852464914322, "rewards/margins": 0.050657860934734344, "rewards/rejected": -0.030650001019239426, "step": 390 }, { "epoch": 0.1, "learning_rate": 1.721170395869191e-07, "logits/chosen": -5.084206581115723, "logits/rejected": -4.509620189666748, "logps/chosen": -602.6290283203125, "logps/rejected": -386.221435546875, "loss": 0.6662, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.013614082708954811, "rewards/margins": 0.07596451044082642, "rewards/rejected": -0.06235043331980705, "step": 400 }, { "epoch": 0.11, "learning_rate": 1.7641996557659208e-07, "logits/chosen": -4.932361602783203, "logits/rejected": -4.9008402824401855, "logps/chosen": -530.425537109375, "logps/rejected": -484.12506103515625, "loss": 0.6732, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.02846016362309456, "rewards/margins": 0.027621466666460037, "rewards/rejected": -0.056081630289554596, "step": 410 }, { "epoch": 0.11, "learning_rate": 1.8072289156626505e-07, "logits/chosen": -4.652538299560547, "logits/rejected": -4.855567932128906, "logps/chosen": -501.2976989746094, "logps/rejected": -446.33892822265625, "loss": 0.6744, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.01377080101519823, "rewards/margins": 0.034459188580513, "rewards/rejected": -0.04822998493909836, "step": 420 }, { "epoch": 0.11, "learning_rate": 1.85025817555938e-07, "logits/chosen": -4.975879669189453, "logits/rejected": -4.7896833419799805, "logps/chosen": -516.0547485351562, "logps/rejected": -411.4002380371094, "loss": 0.6593, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.01589583232998848, "rewards/margins": 0.06809919327497482, "rewards/rejected": -0.0839950293302536, "step": 430 }, { "epoch": 0.11, "learning_rate": 1.8932874354561101e-07, "logits/chosen": -5.023136138916016, "logits/rejected": -4.507846832275391, "logps/chosen": -614.4235229492188, "logps/rejected": -451.65130615234375, "loss": 0.6582, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.023505011573433876, "rewards/margins": 0.052045732736587524, "rewards/rejected": -0.07555074244737625, "step": 440 }, { "epoch": 0.12, "learning_rate": 1.93631669535284e-07, "logits/chosen": -4.85958194732666, "logits/rejected": -4.196021556854248, "logps/chosen": -684.1866455078125, "logps/rejected": -565.8687744140625, "loss": 0.6647, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.01664821431040764, "rewards/margins": 0.06094440817832947, "rewards/rejected": -0.044296182692050934, "step": 450 }, { "epoch": 0.12, "learning_rate": 1.9793459552495695e-07, "logits/chosen": -5.081407070159912, "logits/rejected": -4.934262275695801, "logps/chosen": -499.976806640625, "logps/rejected": -417.374267578125, "loss": 0.6655, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.005431560333818197, "rewards/margins": 0.07330936193466187, "rewards/rejected": -0.0787409245967865, "step": 460 }, { "epoch": 0.12, "learning_rate": 2.0223752151462995e-07, "logits/chosen": -5.068818092346191, "logits/rejected": -4.470884799957275, "logps/chosen": -651.0631713867188, "logps/rejected": -452.13397216796875, "loss": 0.6459, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.023576775565743446, "rewards/margins": 0.12001647055149078, "rewards/rejected": -0.09643969684839249, "step": 470 }, { "epoch": 0.12, "learning_rate": 2.065404475043029e-07, "logits/chosen": -5.4202375411987305, "logits/rejected": -4.706531524658203, "logps/chosen": -633.048583984375, "logps/rejected": -444.3077697753906, "loss": 0.6693, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.017587199807167053, "rewards/margins": 0.06189751625061035, "rewards/rejected": -0.0794847160577774, "step": 480 }, { "epoch": 0.13, "learning_rate": 2.108433734939759e-07, "logits/chosen": -5.162226676940918, "logits/rejected": -4.947068214416504, "logps/chosen": -540.6724243164062, "logps/rejected": -472.81219482421875, "loss": 0.6524, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.009432580322027206, "rewards/margins": 0.0895482748746872, "rewards/rejected": -0.0989808589220047, "step": 490 }, { "epoch": 0.13, "learning_rate": 2.1514629948364886e-07, "logits/chosen": -5.013847827911377, "logits/rejected": -4.832370281219482, "logps/chosen": -609.6827392578125, "logps/rejected": -463.8045349121094, "loss": 0.6664, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.01732155866920948, "rewards/margins": 0.07921649515628815, "rewards/rejected": -0.09653805941343307, "step": 500 }, { "epoch": 0.13, "learning_rate": 2.1944922547332184e-07, "logits/chosen": -5.125462055206299, "logits/rejected": -4.30678653717041, "logps/chosen": -640.7669677734375, "logps/rejected": -462.31964111328125, "loss": 0.6481, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.020301353186368942, "rewards/margins": 0.15210309624671936, "rewards/rejected": -0.1318017542362213, "step": 510 }, { "epoch": 0.13, "learning_rate": 2.2375215146299485e-07, "logits/chosen": -5.18137264251709, "logits/rejected": -4.867682933807373, "logps/chosen": -635.4566650390625, "logps/rejected": -463.09619140625, "loss": 0.6411, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.02704542875289917, "rewards/margins": 0.11829884350299835, "rewards/rejected": -0.09125340729951859, "step": 520 }, { "epoch": 0.14, "learning_rate": 2.280550774526678e-07, "logits/chosen": -4.986373424530029, "logits/rejected": -5.041856288909912, "logps/chosen": -556.9791259765625, "logps/rejected": -443.550537109375, "loss": 0.6512, "rewards/accuracies": 0.5625, "rewards/chosen": -0.012650929391384125, "rewards/margins": 0.09012730419635773, "rewards/rejected": -0.10277823358774185, "step": 530 }, { "epoch": 0.14, "learning_rate": 2.3235800344234078e-07, "logits/chosen": -5.180631160736084, "logits/rejected": -4.831335544586182, "logps/chosen": -674.0233154296875, "logps/rejected": -497.614990234375, "loss": 0.6322, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.0431937500834465, "rewards/margins": 0.16998907923698425, "rewards/rejected": -0.12679533660411835, "step": 540 }, { "epoch": 0.14, "learning_rate": 2.3666092943201376e-07, "logits/chosen": -5.200675010681152, "logits/rejected": -5.022038459777832, "logps/chosen": -533.6292114257812, "logps/rejected": -420.7462463378906, "loss": 0.6482, "rewards/accuracies": 0.625, "rewards/chosen": -0.03946402668952942, "rewards/margins": 0.12643857300281525, "rewards/rejected": -0.16590259969234467, "step": 550 }, { "epoch": 0.14, "learning_rate": 2.4096385542168674e-07, "logits/chosen": -5.245135307312012, "logits/rejected": -4.6082282066345215, "logps/chosen": -707.181640625, "logps/rejected": -419.15216064453125, "loss": 0.6387, "rewards/accuracies": 0.6875, "rewards/chosen": 0.033398307859897614, "rewards/margins": 0.1732049286365509, "rewards/rejected": -0.13980664312839508, "step": 560 }, { "epoch": 0.15, "learning_rate": 2.452667814113597e-07, "logits/chosen": -5.241088390350342, "logits/rejected": -4.676846504211426, "logps/chosen": -540.9588623046875, "logps/rejected": -410.1190490722656, "loss": 0.6431, "rewards/accuracies": 0.5625, "rewards/chosen": -0.02844971977174282, "rewards/margins": 0.11144000291824341, "rewards/rejected": -0.13988973200321198, "step": 570 }, { "epoch": 0.15, "learning_rate": 2.495697074010327e-07, "logits/chosen": -5.343123435974121, "logits/rejected": -4.407637596130371, "logps/chosen": -621.5379638671875, "logps/rejected": -492.9173889160156, "loss": 0.6191, "rewards/accuracies": 0.6875, "rewards/chosen": 0.030505608767271042, "rewards/margins": 0.2197607010602951, "rewards/rejected": -0.18925510346889496, "step": 580 }, { "epoch": 0.15, "learning_rate": 2.538726333907057e-07, "logits/chosen": -4.829524040222168, "logits/rejected": -4.568309783935547, "logps/chosen": -636.0474243164062, "logps/rejected": -511.0184631347656, "loss": 0.637, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.01987128145992756, "rewards/margins": 0.17215164005756378, "rewards/rejected": -0.15228036046028137, "step": 590 }, { "epoch": 0.15, "learning_rate": 2.5817555938037866e-07, "logits/chosen": -5.312628269195557, "logits/rejected": -4.903040885925293, "logps/chosen": -570.6448364257812, "logps/rejected": -457.3438415527344, "loss": 0.654, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08383025974035263, "rewards/margins": 0.0945521667599678, "rewards/rejected": -0.17838242650032043, "step": 600 }, { "epoch": 0.16, "learning_rate": 2.624784853700516e-07, "logits/chosen": -5.0792460441589355, "logits/rejected": -4.825201988220215, "logps/chosen": -646.8591918945312, "logps/rejected": -503.1688537597656, "loss": 0.65, "rewards/accuracies": 0.5625, "rewards/chosen": -0.029812108725309372, "rewards/margins": 0.13648934662342072, "rewards/rejected": -0.16630145907402039, "step": 610 }, { "epoch": 0.16, "learning_rate": 2.667814113597246e-07, "logits/chosen": -4.850094795227051, "logits/rejected": -4.8681488037109375, "logps/chosen": -595.0391845703125, "logps/rejected": -407.6862487792969, "loss": 0.6141, "rewards/accuracies": 0.75, "rewards/chosen": 0.021272307261824608, "rewards/margins": 0.18665993213653564, "rewards/rejected": -0.1653876155614853, "step": 620 }, { "epoch": 0.16, "learning_rate": 2.7108433734939757e-07, "logits/chosen": -5.162228107452393, "logits/rejected": -4.516819477081299, "logps/chosen": -606.5585327148438, "logps/rejected": -421.19671630859375, "loss": 0.6021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0406644307076931, "rewards/margins": 0.2607409954071045, "rewards/rejected": -0.3014054596424103, "step": 630 }, { "epoch": 0.17, "learning_rate": 2.753872633390705e-07, "logits/chosen": -4.838536739349365, "logits/rejected": -4.416684627532959, "logps/chosen": -580.0470581054688, "logps/rejected": -443.6226501464844, "loss": 0.6172, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07994841784238815, "rewards/margins": 0.18127188086509705, "rewards/rejected": -0.2612202763557434, "step": 640 }, { "epoch": 0.17, "learning_rate": 2.7969018932874353e-07, "logits/chosen": -5.0308518409729, "logits/rejected": -4.490294933319092, "logps/chosen": -564.5159912109375, "logps/rejected": -440.820556640625, "loss": 0.6122, "rewards/accuracies": 0.6875, "rewards/chosen": -0.035964034497737885, "rewards/margins": 0.2013992965221405, "rewards/rejected": -0.23736336827278137, "step": 650 }, { "epoch": 0.17, "learning_rate": 2.8399311531841653e-07, "logits/chosen": -5.254231929779053, "logits/rejected": -4.740448951721191, "logps/chosen": -571.9439697265625, "logps/rejected": -442.2288513183594, "loss": 0.6172, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.025833938270807266, "rewards/margins": 0.2455829679965973, "rewards/rejected": -0.21974897384643555, "step": 660 }, { "epoch": 0.17, "learning_rate": 2.882960413080895e-07, "logits/chosen": -4.6655120849609375, "logits/rejected": -4.829866886138916, "logps/chosen": -570.6885986328125, "logps/rejected": -474.15008544921875, "loss": 0.5971, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.038882963359355927, "rewards/margins": 0.2375839650630951, "rewards/rejected": -0.2764669358730316, "step": 670 }, { "epoch": 0.18, "learning_rate": 2.9259896729776244e-07, "logits/chosen": -5.491471290588379, "logits/rejected": -4.807618618011475, "logps/chosen": -647.5139770507812, "logps/rejected": -512.3443603515625, "loss": 0.6158, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.009458506479859352, "rewards/margins": 0.23128172755241394, "rewards/rejected": -0.24074025452136993, "step": 680 }, { "epoch": 0.18, "learning_rate": 2.9690189328743545e-07, "logits/chosen": -4.826086044311523, "logits/rejected": -4.812520980834961, "logps/chosen": -473.51141357421875, "logps/rejected": -461.0011291503906, "loss": 0.6209, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07645494490861893, "rewards/margins": 0.24685971438884735, "rewards/rejected": -0.3233146667480469, "step": 690 }, { "epoch": 0.18, "learning_rate": 3.0120481927710845e-07, "logits/chosen": -4.973363876342773, "logits/rejected": -4.975986003875732, "logps/chosen": -562.2603759765625, "logps/rejected": -456.67327880859375, "loss": 0.636, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09778499603271484, "rewards/margins": 0.16045483946800232, "rewards/rejected": -0.25823986530303955, "step": 700 }, { "epoch": 0.18, "learning_rate": 3.055077452667814e-07, "logits/chosen": -5.574841022491455, "logits/rejected": -5.209827423095703, "logps/chosen": -562.5537109375, "logps/rejected": -389.59869384765625, "loss": 0.6263, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07525424659252167, "rewards/margins": 0.3166554272174835, "rewards/rejected": -0.3919096887111664, "step": 710 }, { "epoch": 0.19, "learning_rate": 3.0981067125645436e-07, "logits/chosen": -4.69359827041626, "logits/rejected": -4.146351337432861, "logps/chosen": -605.7339477539062, "logps/rejected": -454.13397216796875, "loss": 0.5902, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03304797783493996, "rewards/margins": 0.32626834511756897, "rewards/rejected": -0.359316349029541, "step": 720 }, { "epoch": 0.19, "learning_rate": 3.1411359724612736e-07, "logits/chosen": -5.225559234619141, "logits/rejected": -4.429507255554199, "logps/chosen": -627.7376708984375, "logps/rejected": -419.525146484375, "loss": 0.6215, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05344909429550171, "rewards/margins": 0.31202349066734314, "rewards/rejected": -0.36547261476516724, "step": 730 }, { "epoch": 0.19, "learning_rate": 3.184165232358003e-07, "logits/chosen": -5.087578773498535, "logits/rejected": -4.8127851486206055, "logps/chosen": -590.3135375976562, "logps/rejected": -429.77557373046875, "loss": 0.6176, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07023955881595612, "rewards/margins": 0.21368876099586487, "rewards/rejected": -0.2839283049106598, "step": 740 }, { "epoch": 0.19, "learning_rate": 3.2271944922547327e-07, "logits/chosen": -4.641721725463867, "logits/rejected": -4.649771690368652, "logps/chosen": -591.5061645507812, "logps/rejected": -428.98297119140625, "loss": 0.6226, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.012526154518127441, "rewards/margins": 0.28393253684043884, "rewards/rejected": -0.2714063823223114, "step": 750 }, { "epoch": 0.2, "learning_rate": 3.2702237521514633e-07, "logits/chosen": -5.011282920837402, "logits/rejected": -5.0765380859375, "logps/chosen": -572.6627197265625, "logps/rejected": -453.070068359375, "loss": 0.618, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11206988990306854, "rewards/margins": 0.22098691761493683, "rewards/rejected": -0.33305680751800537, "step": 760 }, { "epoch": 0.2, "learning_rate": 3.313253012048193e-07, "logits/chosen": -5.301586151123047, "logits/rejected": -4.606153964996338, "logps/chosen": -672.5757446289062, "logps/rejected": -414.6512756347656, "loss": 0.625, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05622037500143051, "rewards/margins": 0.4378294050693512, "rewards/rejected": -0.49404972791671753, "step": 770 }, { "epoch": 0.2, "learning_rate": 3.3562822719449223e-07, "logits/chosen": -4.665999889373779, "logits/rejected": -4.793671607971191, "logps/chosen": -568.8703002929688, "logps/rejected": -446.510498046875, "loss": 0.6014, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11369004100561142, "rewards/margins": 0.25903743505477905, "rewards/rejected": -0.37272748351097107, "step": 780 }, { "epoch": 0.2, "learning_rate": 3.399311531841652e-07, "logits/chosen": -5.279812812805176, "logits/rejected": -4.487301349639893, "logps/chosen": -575.8890991210938, "logps/rejected": -417.63507080078125, "loss": 0.6147, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.021513447165489197, "rewards/margins": 0.3190038800239563, "rewards/rejected": -0.3405173420906067, "step": 790 }, { "epoch": 0.21, "learning_rate": 3.442340791738382e-07, "logits/chosen": -5.0749921798706055, "logits/rejected": -5.074210166931152, "logps/chosen": -621.884033203125, "logps/rejected": -516.0531005859375, "loss": 0.6316, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.07369761168956757, "rewards/margins": 0.25912362337112427, "rewards/rejected": -0.33282119035720825, "step": 800 }, { "epoch": 0.21, "learning_rate": 3.4853700516351115e-07, "logits/chosen": -4.786762237548828, "logits/rejected": -4.678177833557129, "logps/chosen": -658.0843505859375, "logps/rejected": -434.8324279785156, "loss": 0.5719, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.07689875364303589, "rewards/margins": 0.32996606826782227, "rewards/rejected": -0.40686479210853577, "step": 810 }, { "epoch": 0.21, "learning_rate": 3.5283993115318415e-07, "logits/chosen": -5.138358116149902, "logits/rejected": -4.517680644989014, "logps/chosen": -616.4362182617188, "logps/rejected": -427.05908203125, "loss": 0.5866, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10288667678833008, "rewards/margins": 0.3249581456184387, "rewards/rejected": -0.4278448224067688, "step": 820 }, { "epoch": 0.21, "learning_rate": 3.5714285714285716e-07, "logits/chosen": -4.970725059509277, "logits/rejected": -4.3715057373046875, "logps/chosen": -532.6653442382812, "logps/rejected": -393.58294677734375, "loss": 0.5884, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.14502236247062683, "rewards/margins": 0.2357177734375, "rewards/rejected": -0.38074007630348206, "step": 830 }, { "epoch": 0.22, "learning_rate": 3.614457831325301e-07, "logits/chosen": -4.913840293884277, "logits/rejected": -4.5594096183776855, "logps/chosen": -636.6441650390625, "logps/rejected": -482.0730895996094, "loss": 0.6103, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0632687360048294, "rewards/margins": 0.30106136202812195, "rewards/rejected": -0.36433011293411255, "step": 840 }, { "epoch": 0.22, "learning_rate": 3.6574870912220306e-07, "logits/chosen": -4.8611297607421875, "logits/rejected": -4.113354682922363, "logps/chosen": -553.498779296875, "logps/rejected": -414.3341369628906, "loss": 0.5832, "rewards/accuracies": 0.75, "rewards/chosen": -0.041005976498126984, "rewards/margins": 0.4031026363372803, "rewards/rejected": -0.4441085755825043, "step": 850 }, { "epoch": 0.22, "learning_rate": 3.70051635111876e-07, "logits/chosen": -4.950971603393555, "logits/rejected": -4.543227672576904, "logps/chosen": -549.2433471679688, "logps/rejected": -408.465087890625, "loss": 0.575, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09344466775655746, "rewards/margins": 0.3862845301628113, "rewards/rejected": -0.47972923517227173, "step": 860 }, { "epoch": 0.22, "learning_rate": 3.743545611015491e-07, "logits/chosen": -4.399587631225586, "logits/rejected": -4.545784950256348, "logps/chosen": -635.6307373046875, "logps/rejected": -534.0, "loss": 0.5893, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07762257754802704, "rewards/margins": 0.28970277309417725, "rewards/rejected": -0.3673253655433655, "step": 870 }, { "epoch": 0.23, "learning_rate": 3.7865748709122203e-07, "logits/chosen": -4.8872199058532715, "logits/rejected": -5.181689739227295, "logps/chosen": -575.9188842773438, "logps/rejected": -479.09600830078125, "loss": 0.5906, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03143126890063286, "rewards/margins": 0.4315270483493805, "rewards/rejected": -0.46295833587646484, "step": 880 }, { "epoch": 0.23, "learning_rate": 3.82960413080895e-07, "logits/chosen": -4.957703113555908, "logits/rejected": -4.7108378410339355, "logps/chosen": -560.40283203125, "logps/rejected": -471.11236572265625, "loss": 0.6106, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19514957070350647, "rewards/margins": 0.22792847454547882, "rewards/rejected": -0.4230780005455017, "step": 890 }, { "epoch": 0.23, "learning_rate": 3.87263339070568e-07, "logits/chosen": -5.072232246398926, "logits/rejected": -5.009909629821777, "logps/chosen": -622.2591552734375, "logps/rejected": -571.1608276367188, "loss": 0.6263, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.08355264365673065, "rewards/margins": 0.2895086407661438, "rewards/rejected": -0.37306129932403564, "step": 900 }, { "epoch": 0.23, "learning_rate": 3.9156626506024094e-07, "logits/chosen": -4.684813499450684, "logits/rejected": -4.5081610679626465, "logps/chosen": -545.4139404296875, "logps/rejected": -463.1116638183594, "loss": 0.6355, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.16347023844718933, "rewards/margins": 0.23229245841503143, "rewards/rejected": -0.39576268196105957, "step": 910 }, { "epoch": 0.24, "learning_rate": 3.958691910499139e-07, "logits/chosen": -4.592351913452148, "logits/rejected": -4.436278820037842, "logps/chosen": -622.434326171875, "logps/rejected": -484.0205078125, "loss": 0.6185, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11474698781967163, "rewards/margins": 0.32441645860671997, "rewards/rejected": -0.4391634464263916, "step": 920 }, { "epoch": 0.24, "learning_rate": 4.001721170395869e-07, "logits/chosen": -4.976946830749512, "logits/rejected": -4.344501495361328, "logps/chosen": -695.0062255859375, "logps/rejected": -434.45263671875, "loss": 0.5515, "rewards/accuracies": 0.6875, "rewards/chosen": 0.001315800822339952, "rewards/margins": 0.4282153248786926, "rewards/rejected": -0.4268995225429535, "step": 930 }, { "epoch": 0.24, "learning_rate": 4.044750430292599e-07, "logits/chosen": -4.906943321228027, "logits/rejected": -5.033685684204102, "logps/chosen": -572.1927490234375, "logps/rejected": -370.6805725097656, "loss": 0.6094, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.21344825625419617, "rewards/margins": 0.2808108925819397, "rewards/rejected": -0.4942591190338135, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.0877796901893286e-07, "logits/chosen": -4.9864373207092285, "logits/rejected": -4.49121618270874, "logps/chosen": -552.1973876953125, "logps/rejected": -453.93408203125, "loss": 0.5944, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1496225893497467, "rewards/margins": 0.3243640959262848, "rewards/rejected": -0.4739866852760315, "step": 950 }, { "epoch": 0.25, "learning_rate": 4.130808950086058e-07, "logits/chosen": -5.242043495178223, "logits/rejected": -5.088315010070801, "logps/chosen": -506.19378662109375, "logps/rejected": -428.0276794433594, "loss": 0.6154, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.23521193861961365, "rewards/margins": 0.23233680427074432, "rewards/rejected": -0.46754875779151917, "step": 960 }, { "epoch": 0.25, "learning_rate": 4.173838209982788e-07, "logits/chosen": -4.680005073547363, "logits/rejected": -4.595992565155029, "logps/chosen": -612.6997680664062, "logps/rejected": -466.02117919921875, "loss": 0.6023, "rewards/accuracies": 0.625, "rewards/chosen": -0.06499350816011429, "rewards/margins": 0.2926154136657715, "rewards/rejected": -0.35760894417762756, "step": 970 }, { "epoch": 0.25, "learning_rate": 4.216867469879518e-07, "logits/chosen": -4.720999240875244, "logits/rejected": -4.412631034851074, "logps/chosen": -640.3150634765625, "logps/rejected": -461.47430419921875, "loss": 0.6048, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.10517321527004242, "rewards/margins": 0.289271742105484, "rewards/rejected": -0.3944449722766876, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.259896729776248e-07, "logits/chosen": -4.786055564880371, "logits/rejected": -4.760524749755859, "logps/chosen": -603.1826171875, "logps/rejected": -434.2313537597656, "loss": 0.5971, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07815855741500854, "rewards/margins": 0.4959312975406647, "rewards/rejected": -0.5740898847579956, "step": 990 }, { "epoch": 0.26, "learning_rate": 4.3029259896729773e-07, "logits/chosen": -5.295985698699951, "logits/rejected": -4.818538665771484, "logps/chosen": -609.6526489257812, "logps/rejected": -530.8301391601562, "loss": 0.6549, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.18690188229084015, "rewards/margins": 0.09692970663309097, "rewards/rejected": -0.28383156657218933, "step": 1000 }, { "epoch": 0.26, "eval_logits/chosen": -5.0340986251831055, "eval_logits/rejected": -4.741002082824707, "eval_logps/chosen": -589.4749755859375, "eval_logps/rejected": -447.39031982421875, "eval_loss": 0.6036846041679382, "eval_rewards/accuracies": 0.6549999713897705, "eval_rewards/chosen": -0.1205490380525589, "eval_rewards/margins": 0.3644300401210785, "eval_rewards/rejected": -0.48497912287712097, "eval_runtime": 103.8827, "eval_samples_per_second": 19.252, "eval_steps_per_second": 1.203, "step": 1000 }, { "epoch": 0.26, "learning_rate": 4.3459552495697073e-07, "logits/chosen": -5.082704544067383, "logits/rejected": -4.902986526489258, "logps/chosen": -588.8089599609375, "logps/rejected": -428.0037536621094, "loss": 0.6221, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2024601697921753, "rewards/margins": 0.29414016008377075, "rewards/rejected": -0.49660032987594604, "step": 1010 }, { "epoch": 0.26, "learning_rate": 4.388984509466437e-07, "logits/chosen": -5.197500228881836, "logits/rejected": -4.793361186981201, "logps/chosen": -511.00262451171875, "logps/rejected": -448.6803283691406, "loss": 0.5853, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18908776342868805, "rewards/margins": 0.38477399945259094, "rewards/rejected": -0.5738617181777954, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.4320137693631664e-07, "logits/chosen": -4.766043663024902, "logits/rejected": -4.384085655212402, "logps/chosen": -585.555908203125, "logps/rejected": -467.23175048828125, "loss": 0.5957, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.2213415801525116, "rewards/margins": 0.21962162852287292, "rewards/rejected": -0.44096317887306213, "step": 1030 }, { "epoch": 0.27, "learning_rate": 4.475043029259897e-07, "logits/chosen": -4.47346830368042, "logits/rejected": -4.354211807250977, "logps/chosen": -555.2332763671875, "logps/rejected": -404.460693359375, "loss": 0.619, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.15883851051330566, "rewards/margins": 0.3275062143802643, "rewards/rejected": -0.48634472489356995, "step": 1040 }, { "epoch": 0.27, "learning_rate": 4.5180722891566265e-07, "logits/chosen": -5.188126564025879, "logits/rejected": -4.5927934646606445, "logps/chosen": -599.3787231445312, "logps/rejected": -386.72027587890625, "loss": 0.5727, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1270747184753418, "rewards/margins": 0.4735950529575348, "rewards/rejected": -0.6006697416305542, "step": 1050 }, { "epoch": 0.27, "learning_rate": 4.561101549053356e-07, "logits/chosen": -4.899298191070557, "logits/rejected": -4.807165145874023, "logps/chosen": -556.6817626953125, "logps/rejected": -470.33746337890625, "loss": 0.6341, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.19248482584953308, "rewards/margins": 0.22806823253631592, "rewards/rejected": -0.420553058385849, "step": 1060 }, { "epoch": 0.28, "learning_rate": 4.6041308089500856e-07, "logits/chosen": -5.198217868804932, "logits/rejected": -4.8544416427612305, "logps/chosen": -586.0979614257812, "logps/rejected": -457.1890563964844, "loss": 0.6556, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11423883587121964, "rewards/margins": 0.24916431307792664, "rewards/rejected": -0.3634031414985657, "step": 1070 }, { "epoch": 0.28, "learning_rate": 4.6471600688468156e-07, "logits/chosen": -5.037923336029053, "logits/rejected": -4.839613914489746, "logps/chosen": -554.1761474609375, "logps/rejected": -450.00714111328125, "loss": 0.6057, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1732582151889801, "rewards/margins": 0.2999359965324402, "rewards/rejected": -0.4731942117214203, "step": 1080 }, { "epoch": 0.28, "learning_rate": 4.6901893287435457e-07, "logits/chosen": -5.050728797912598, "logits/rejected": -4.86162805557251, "logps/chosen": -599.9119873046875, "logps/rejected": -531.1978149414062, "loss": 0.5896, "rewards/accuracies": 0.625, "rewards/chosen": 0.005447372794151306, "rewards/margins": 0.41118186712265015, "rewards/rejected": -0.4057345390319824, "step": 1090 }, { "epoch": 0.28, "learning_rate": 4.733218588640275e-07, "logits/chosen": -4.689888954162598, "logits/rejected": -4.732975959777832, "logps/chosen": -675.5143432617188, "logps/rejected": -486.61895751953125, "loss": 0.5748, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.034835100173950195, "rewards/margins": 0.46927565336227417, "rewards/rejected": -0.4344405233860016, "step": 1100 }, { "epoch": 0.29, "learning_rate": 4.776247848537005e-07, "logits/chosen": -4.877932071685791, "logits/rejected": -4.432870388031006, "logps/chosen": -635.0615234375, "logps/rejected": -507.43109130859375, "loss": 0.6242, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.06741355359554291, "rewards/margins": 0.2793946862220764, "rewards/rejected": -0.3468082845211029, "step": 1110 }, { "epoch": 0.29, "learning_rate": 4.819277108433735e-07, "logits/chosen": -5.052710056304932, "logits/rejected": -4.742690086364746, "logps/chosen": -613.1746215820312, "logps/rejected": -467.1844787597656, "loss": 0.5696, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.044348932802677155, "rewards/margins": 0.4402552545070648, "rewards/rejected": -0.39590635895729065, "step": 1120 }, { "epoch": 0.29, "learning_rate": 4.862306368330465e-07, "logits/chosen": -5.028913974761963, "logits/rejected": -4.801323890686035, "logps/chosen": -559.740966796875, "logps/rejected": -455.66876220703125, "loss": 0.604, "rewards/accuracies": 0.625, "rewards/chosen": -0.12539169192314148, "rewards/margins": 0.2681279182434082, "rewards/rejected": -0.3935196101665497, "step": 1130 }, { "epoch": 0.29, "learning_rate": 4.905335628227194e-07, "logits/chosen": -5.308072090148926, "logits/rejected": -4.882647514343262, "logps/chosen": -541.5811767578125, "logps/rejected": -456.3338928222656, "loss": 0.5921, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.14173582196235657, "rewards/margins": 0.36959391832351685, "rewards/rejected": -0.5113297700881958, "step": 1140 }, { "epoch": 0.3, "learning_rate": 4.948364888123924e-07, "logits/chosen": -4.7569990158081055, "logits/rejected": -4.340147495269775, "logps/chosen": -608.4178466796875, "logps/rejected": -410.1001892089844, "loss": 0.5889, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.08064558357000351, "rewards/margins": 0.5151132345199585, "rewards/rejected": -0.434467613697052, "step": 1150 }, { "epoch": 0.3, "learning_rate": 4.991394148020654e-07, "logits/chosen": -5.009119510650635, "logits/rejected": -4.440273761749268, "logps/chosen": -566.9505615234375, "logps/rejected": -414.0314025878906, "loss": 0.5797, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04594480246305466, "rewards/margins": 0.33115842938423157, "rewards/rejected": -0.3771032691001892, "step": 1160 }, { "epoch": 0.3, "learning_rate": 4.9961748111313e-07, "logits/chosen": -4.765492916107178, "logits/rejected": -4.444553852081299, "logps/chosen": -538.9769287109375, "logps/rejected": -497.11102294921875, "loss": 0.615, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.08430640399456024, "rewards/margins": 0.3298020660877228, "rewards/rejected": -0.4141085147857666, "step": 1170 }, { "epoch": 0.3, "learning_rate": 4.991393325045423e-07, "logits/chosen": -4.715311050415039, "logits/rejected": -5.062254905700684, "logps/chosen": -618.7896728515625, "logps/rejected": -528.5611572265625, "loss": 0.6008, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03303016349673271, "rewards/margins": 0.32118791341781616, "rewards/rejected": -0.28815776109695435, "step": 1180 }, { "epoch": 0.31, "learning_rate": 4.986611838959548e-07, "logits/chosen": -4.830372333526611, "logits/rejected": -4.5263776779174805, "logps/chosen": -577.4282836914062, "logps/rejected": -447.62432861328125, "loss": 0.6209, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01167289912700653, "rewards/margins": 0.4259801506996155, "rewards/rejected": -0.41430726647377014, "step": 1190 }, { "epoch": 0.31, "learning_rate": 4.981830352873673e-07, "logits/chosen": -4.7512030601501465, "logits/rejected": -4.839972019195557, "logps/chosen": -498.96295166015625, "logps/rejected": -422.81439208984375, "loss": 0.622, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12069588899612427, "rewards/margins": 0.2791539430618286, "rewards/rejected": -0.3998498320579529, "step": 1200 }, { "epoch": 0.31, "learning_rate": 4.977048866787798e-07, "logits/chosen": -4.871579647064209, "logits/rejected": -4.446122169494629, "logps/chosen": -721.882080078125, "logps/rejected": -512.2247314453125, "loss": 0.5767, "rewards/accuracies": 0.6875, "rewards/chosen": 0.18909777700901031, "rewards/margins": 0.45188307762145996, "rewards/rejected": -0.26278528571128845, "step": 1210 }, { "epoch": 0.32, "learning_rate": 4.972267380701922e-07, "logits/chosen": -4.585555076599121, "logits/rejected": -4.21342134475708, "logps/chosen": -597.9484252929688, "logps/rejected": -423.23638916015625, "loss": 0.6391, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0421050600707531, "rewards/margins": 0.3286398649215698, "rewards/rejected": -0.370744913816452, "step": 1220 }, { "epoch": 0.32, "learning_rate": 4.967485894616046e-07, "logits/chosen": -5.246591567993164, "logits/rejected": -4.655137538909912, "logps/chosen": -553.0552978515625, "logps/rejected": -469.4759826660156, "loss": 0.6103, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07931146770715714, "rewards/margins": 0.36592429876327515, "rewards/rejected": -0.286612868309021, "step": 1230 }, { "epoch": 0.32, "learning_rate": 4.962704408530171e-07, "logits/chosen": -5.055230140686035, "logits/rejected": -4.490811824798584, "logps/chosen": -556.0390625, "logps/rejected": -486.98419189453125, "loss": 0.6207, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.022427845746278763, "rewards/margins": 0.283773809671402, "rewards/rejected": -0.26134592294692993, "step": 1240 }, { "epoch": 0.32, "learning_rate": 4.957922922444295e-07, "logits/chosen": -5.387645244598389, "logits/rejected": -4.382160663604736, "logps/chosen": -589.1954956054688, "logps/rejected": -399.63287353515625, "loss": 0.5778, "rewards/accuracies": 0.6875, "rewards/chosen": 0.06765327602624893, "rewards/margins": 0.39525410532951355, "rewards/rejected": -0.32760077714920044, "step": 1250 }, { "epoch": 0.33, "learning_rate": 4.95314143635842e-07, "logits/chosen": -4.914328098297119, "logits/rejected": -4.508675575256348, "logps/chosen": -552.3641357421875, "logps/rejected": -402.50494384765625, "loss": 0.5843, "rewards/accuracies": 0.6875, "rewards/chosen": 0.10642577707767487, "rewards/margins": 0.4470345079898834, "rewards/rejected": -0.34060871601104736, "step": 1260 }, { "epoch": 0.33, "learning_rate": 4.948359950272544e-07, "logits/chosen": -4.932180881500244, "logits/rejected": -4.962239742279053, "logps/chosen": -521.2985229492188, "logps/rejected": -385.5546569824219, "loss": 0.5864, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.030282627791166306, "rewards/margins": 0.4476016163825989, "rewards/rejected": -0.4173189699649811, "step": 1270 }, { "epoch": 0.33, "learning_rate": 4.94357846418667e-07, "logits/chosen": -4.778327465057373, "logits/rejected": -4.716105937957764, "logps/chosen": -558.6226806640625, "logps/rejected": -384.1117858886719, "loss": 0.5674, "rewards/accuracies": 0.75, "rewards/chosen": 0.04330116882920265, "rewards/margins": 0.5348688960075378, "rewards/rejected": -0.49156779050827026, "step": 1280 }, { "epoch": 0.33, "learning_rate": 4.938796978100794e-07, "logits/chosen": -4.990681171417236, "logits/rejected": -4.8291425704956055, "logps/chosen": -568.8840942382812, "logps/rejected": -433.78076171875, "loss": 0.6167, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.06168697401881218, "rewards/margins": 0.35727956891059875, "rewards/rejected": -0.4189665913581848, "step": 1290 }, { "epoch": 0.34, "learning_rate": 4.934015492014918e-07, "logits/chosen": -5.141441345214844, "logits/rejected": -4.886346817016602, "logps/chosen": -483.84490966796875, "logps/rejected": -391.74029541015625, "loss": 0.58, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08020296692848206, "rewards/margins": 0.3480849862098694, "rewards/rejected": -0.42828798294067383, "step": 1300 }, { "epoch": 0.34, "learning_rate": 4.929234005929043e-07, "logits/chosen": -4.757034778594971, "logits/rejected": -4.431621551513672, "logps/chosen": -718.2557373046875, "logps/rejected": -508.9732971191406, "loss": 0.5673, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.13823214173316956, "rewards/margins": 0.4940420985221863, "rewards/rejected": -0.3558099567890167, "step": 1310 }, { "epoch": 0.34, "learning_rate": 4.924452519843167e-07, "logits/chosen": -5.435358047485352, "logits/rejected": -4.611025810241699, "logps/chosen": -628.8683471679688, "logps/rejected": -458.1544494628906, "loss": 0.5972, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.02066761627793312, "rewards/margins": 0.4707714915275574, "rewards/rejected": -0.45010384917259216, "step": 1320 }, { "epoch": 0.34, "learning_rate": 4.919671033757292e-07, "logits/chosen": -4.888759613037109, "logits/rejected": -4.751501560211182, "logps/chosen": -523.3485717773438, "logps/rejected": -479.13055419921875, "loss": 0.6379, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09673294425010681, "rewards/margins": 0.19956344366073608, "rewards/rejected": -0.2962964177131653, "step": 1330 }, { "epoch": 0.35, "learning_rate": 4.914889547671416e-07, "logits/chosen": -4.824790954589844, "logits/rejected": -5.042952060699463, "logps/chosen": -484.4859313964844, "logps/rejected": -514.9080200195312, "loss": 0.6032, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.020964890718460083, "rewards/margins": 0.30249106884002686, "rewards/rejected": -0.2815261781215668, "step": 1340 }, { "epoch": 0.35, "learning_rate": 4.91010806158554e-07, "logits/chosen": -5.325839996337891, "logits/rejected": -5.266266822814941, "logps/chosen": -576.068115234375, "logps/rejected": -414.4125061035156, "loss": 0.5514, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10221197456121445, "rewards/margins": 0.4299249053001404, "rewards/rejected": -0.5321369171142578, "step": 1350 }, { "epoch": 0.35, "learning_rate": 4.905326575499665e-07, "logits/chosen": -4.991552829742432, "logits/rejected": -4.639280796051025, "logps/chosen": -623.694580078125, "logps/rejected": -538.5532836914062, "loss": 0.6117, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.009182782843708992, "rewards/margins": 0.3545888364315033, "rewards/rejected": -0.36377161741256714, "step": 1360 }, { "epoch": 0.35, "learning_rate": 4.90054508941379e-07, "logits/chosen": -5.049736499786377, "logits/rejected": -5.0194244384765625, "logps/chosen": -712.1754150390625, "logps/rejected": -509.8062438964844, "loss": 0.5686, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.11003635823726654, "rewards/margins": 0.5030516386032104, "rewards/rejected": -0.3930152356624603, "step": 1370 }, { "epoch": 0.36, "learning_rate": 4.895763603327915e-07, "logits/chosen": -4.881339073181152, "logits/rejected": -4.5782790184021, "logps/chosen": -613.0987548828125, "logps/rejected": -448.47515869140625, "loss": 0.5931, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.011247800663113594, "rewards/margins": 0.3751145005226135, "rewards/rejected": -0.36386674642562866, "step": 1380 }, { "epoch": 0.36, "learning_rate": 4.890982117242038e-07, "logits/chosen": -5.164661884307861, "logits/rejected": -4.483150959014893, "logps/chosen": -645.5859375, "logps/rejected": -458.2242736816406, "loss": 0.5634, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.07793205231428146, "rewards/margins": 0.6242166757583618, "rewards/rejected": -0.546284556388855, "step": 1390 }, { "epoch": 0.36, "learning_rate": 4.886200631156163e-07, "logits/chosen": -5.292607307434082, "logits/rejected": -4.605780601501465, "logps/chosen": -643.26806640625, "logps/rejected": -498.0909118652344, "loss": 0.6274, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.06935273110866547, "rewards/margins": 0.4534047245979309, "rewards/rejected": -0.38405194878578186, "step": 1400 }, { "epoch": 0.36, "learning_rate": 4.881419145070288e-07, "logits/chosen": -5.07328462600708, "logits/rejected": -4.675717353820801, "logps/chosen": -493.72113037109375, "logps/rejected": -378.90643310546875, "loss": 0.5348, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.11141601949930191, "rewards/margins": 0.4773848056793213, "rewards/rejected": -0.5888007879257202, "step": 1410 }, { "epoch": 0.37, "learning_rate": 4.876637658984412e-07, "logits/chosen": -5.045197486877441, "logits/rejected": -4.835489749908447, "logps/chosen": -597.692626953125, "logps/rejected": -440.42755126953125, "loss": 0.5887, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05393511801958084, "rewards/margins": 0.407756507396698, "rewards/rejected": -0.46169161796569824, "step": 1420 }, { "epoch": 0.37, "learning_rate": 4.871856172898537e-07, "logits/chosen": -5.153180122375488, "logits/rejected": -4.6454267501831055, "logps/chosen": -501.3744201660156, "logps/rejected": -457.33843994140625, "loss": 0.5705, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.2535322606563568, "rewards/margins": 0.4022219181060791, "rewards/rejected": -0.6557540893554688, "step": 1430 }, { "epoch": 0.37, "learning_rate": 4.867074686812661e-07, "logits/chosen": -5.147744178771973, "logits/rejected": -4.829898357391357, "logps/chosen": -677.5922241210938, "logps/rejected": -535.0986938476562, "loss": 0.5677, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.0457867830991745, "rewards/margins": 0.5567488074302673, "rewards/rejected": -0.5109620094299316, "step": 1440 }, { "epoch": 0.37, "learning_rate": 4.862293200726786e-07, "logits/chosen": -4.852294445037842, "logits/rejected": -4.373203754425049, "logps/chosen": -601.892822265625, "logps/rejected": -494.73828125, "loss": 0.5945, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1383473426103592, "rewards/margins": 0.40635594725608826, "rewards/rejected": -0.5447033643722534, "step": 1450 }, { "epoch": 0.38, "learning_rate": 4.85751171464091e-07, "logits/chosen": -5.208996295928955, "logits/rejected": -4.987357139587402, "logps/chosen": -717.9088745117188, "logps/rejected": -576.4735107421875, "loss": 0.6022, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.032208070158958435, "rewards/margins": 0.5289193391799927, "rewards/rejected": -0.49671119451522827, "step": 1460 }, { "epoch": 0.38, "learning_rate": 4.852730228555035e-07, "logits/chosen": -4.818337440490723, "logits/rejected": -5.261691570281982, "logps/chosen": -681.8942260742188, "logps/rejected": -590.8587646484375, "loss": 0.6075, "rewards/accuracies": 0.6875, "rewards/chosen": 0.06512711942195892, "rewards/margins": 0.5188490748405457, "rewards/rejected": -0.45372194051742554, "step": 1470 }, { "epoch": 0.38, "learning_rate": 4.847948742469159e-07, "logits/chosen": -5.2047295570373535, "logits/rejected": -4.865410804748535, "logps/chosen": -535.0823974609375, "logps/rejected": -413.605712890625, "loss": 0.5782, "rewards/accuracies": 0.6875, "rewards/chosen": 0.033472996205091476, "rewards/margins": 0.5626699328422546, "rewards/rejected": -0.5291970372200012, "step": 1480 }, { "epoch": 0.38, "learning_rate": 4.843167256383283e-07, "logits/chosen": -4.93236780166626, "logits/rejected": -5.007393836975098, "logps/chosen": -532.4141845703125, "logps/rejected": -474.51580810546875, "loss": 0.5696, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.013444359414279461, "rewards/margins": 0.4651102125644684, "rewards/rejected": -0.45166587829589844, "step": 1490 }, { "epoch": 0.39, "learning_rate": 4.838385770297408e-07, "logits/chosen": -4.653264045715332, "logits/rejected": -4.575729846954346, "logps/chosen": -618.524169921875, "logps/rejected": -493.82818603515625, "loss": 0.5903, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.024376193061470985, "rewards/margins": 0.4582807123661041, "rewards/rejected": -0.4339044988155365, "step": 1500 }, { "epoch": 0.39, "learning_rate": 4.833604284211533e-07, "logits/chosen": -5.2753214836120605, "logits/rejected": -4.768768787384033, "logps/chosen": -640.6016845703125, "logps/rejected": -463.935546875, "loss": 0.6067, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.047711290419101715, "rewards/margins": 0.40629157423973083, "rewards/rejected": -0.45400285720825195, "step": 1510 }, { "epoch": 0.39, "learning_rate": 4.828822798125657e-07, "logits/chosen": -5.111083030700684, "logits/rejected": -4.9775261878967285, "logps/chosen": -672.8522338867188, "logps/rejected": -531.0037841796875, "loss": 0.5641, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.035914450883865356, "rewards/margins": 0.5853785276412964, "rewards/rejected": -0.5494641065597534, "step": 1520 }, { "epoch": 0.4, "learning_rate": 4.824041312039781e-07, "logits/chosen": -4.77971887588501, "logits/rejected": -4.605884552001953, "logps/chosen": -591.8624877929688, "logps/rejected": -454.6383361816406, "loss": 0.6218, "rewards/accuracies": 0.625, "rewards/chosen": -0.11807356774806976, "rewards/margins": 0.3544183075428009, "rewards/rejected": -0.4724918305873871, "step": 1530 }, { "epoch": 0.4, "learning_rate": 4.819259825953906e-07, "logits/chosen": -4.968782901763916, "logits/rejected": -4.560141086578369, "logps/chosen": -650.2317504882812, "logps/rejected": -520.6522216796875, "loss": 0.6115, "rewards/accuracies": 0.625, "rewards/chosen": 0.04171306639909744, "rewards/margins": 0.31967923045158386, "rewards/rejected": -0.277966171503067, "step": 1540 }, { "epoch": 0.4, "learning_rate": 4.814478339868031e-07, "logits/chosen": -4.788421630859375, "logits/rejected": -4.790238380432129, "logps/chosen": -747.4111328125, "logps/rejected": -542.8977661132812, "loss": 0.5478, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.04947546124458313, "rewards/margins": 0.6173168420791626, "rewards/rejected": -0.5678414702415466, "step": 1550 }, { "epoch": 0.4, "learning_rate": 4.809696853782155e-07, "logits/chosen": -4.941649436950684, "logits/rejected": -4.966439247131348, "logps/chosen": -533.7326049804688, "logps/rejected": -406.86181640625, "loss": 0.6796, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11540485918521881, "rewards/margins": 0.3179125189781189, "rewards/rejected": -0.4333174228668213, "step": 1560 }, { "epoch": 0.41, "learning_rate": 4.80491536769628e-07, "logits/chosen": -5.168390274047852, "logits/rejected": -4.495237827301025, "logps/chosen": -648.8936157226562, "logps/rejected": -461.6477966308594, "loss": 0.6304, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.06425861269235611, "rewards/margins": 0.4375528395175934, "rewards/rejected": -0.37329426407814026, "step": 1570 }, { "epoch": 0.41, "learning_rate": 4.800133881610405e-07, "logits/chosen": -5.522204399108887, "logits/rejected": -4.817874431610107, "logps/chosen": -693.7101440429688, "logps/rejected": -519.9754638671875, "loss": 0.5892, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.17888271808624268, "rewards/margins": 0.500820517539978, "rewards/rejected": -0.3219378590583801, "step": 1580 }, { "epoch": 0.41, "learning_rate": 4.795352395524529e-07, "logits/chosen": -5.114903450012207, "logits/rejected": -4.787779331207275, "logps/chosen": -518.8203125, "logps/rejected": -396.5616455078125, "loss": 0.5776, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.011896096169948578, "rewards/margins": 0.46847033500671387, "rewards/rejected": -0.4565742611885071, "step": 1590 }, { "epoch": 0.41, "learning_rate": 4.790570909438653e-07, "logits/chosen": -4.888575553894043, "logits/rejected": -4.566725254058838, "logps/chosen": -582.5068359375, "logps/rejected": -454.74200439453125, "loss": 0.558, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.037431735545396805, "rewards/margins": 0.3389297127723694, "rewards/rejected": -0.3014979362487793, "step": 1600 }, { "epoch": 0.42, "learning_rate": 4.785789423352778e-07, "logits/chosen": -4.886668682098389, "logits/rejected": -4.733044624328613, "logps/chosen": -602.0886840820312, "logps/rejected": -576.2024536132812, "loss": 0.6087, "rewards/accuracies": 0.625, "rewards/chosen": 0.04170609638094902, "rewards/margins": 0.3814540207386017, "rewards/rejected": -0.33974793553352356, "step": 1610 }, { "epoch": 0.42, "learning_rate": 4.781007937266903e-07, "logits/chosen": -5.160344123840332, "logits/rejected": -4.892527103424072, "logps/chosen": -576.2536010742188, "logps/rejected": -465.459228515625, "loss": 0.5742, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0582873709499836, "rewards/margins": 0.4916876256465912, "rewards/rejected": -0.43340030312538147, "step": 1620 }, { "epoch": 0.42, "learning_rate": 4.776226451181027e-07, "logits/chosen": -4.755358695983887, "logits/rejected": -4.80374002456665, "logps/chosen": -599.2727661132812, "logps/rejected": -518.6896362304688, "loss": 0.5897, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.02418546751141548, "rewards/margins": 0.41175928711891174, "rewards/rejected": -0.387573778629303, "step": 1630 }, { "epoch": 0.42, "learning_rate": 4.771444965095152e-07, "logits/chosen": -5.202331066131592, "logits/rejected": -4.229248046875, "logps/chosen": -623.6382446289062, "logps/rejected": -469.1851501464844, "loss": 0.5923, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.009784695692360401, "rewards/margins": 0.3049367070198059, "rewards/rejected": -0.31472140550613403, "step": 1640 }, { "epoch": 0.43, "learning_rate": 4.7666634790092757e-07, "logits/chosen": -4.667879581451416, "logits/rejected": -4.893617630004883, "logps/chosen": -537.7037353515625, "logps/rejected": -432.49285888671875, "loss": 0.5783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04092567041516304, "rewards/margins": 0.43990960717201233, "rewards/rejected": -0.3989838659763336, "step": 1650 }, { "epoch": 0.43, "learning_rate": 4.7618819929234004e-07, "logits/chosen": -5.438266754150391, "logits/rejected": -5.0233049392700195, "logps/chosen": -577.8635864257812, "logps/rejected": -448.3584899902344, "loss": 0.6148, "rewards/accuracies": 0.625, "rewards/chosen": 0.04020633175969124, "rewards/margins": 0.36865144968032837, "rewards/rejected": -0.3284451365470886, "step": 1660 }, { "epoch": 0.43, "learning_rate": 4.7571005068375246e-07, "logits/chosen": -5.224853038787842, "logits/rejected": -4.777334690093994, "logps/chosen": -526.3955078125, "logps/rejected": -411.90814208984375, "loss": 0.5954, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.012989042326807976, "rewards/margins": 0.34829622507095337, "rewards/rejected": -0.3612852096557617, "step": 1670 }, { "epoch": 0.43, "learning_rate": 4.7523190207516493e-07, "logits/chosen": -5.143417835235596, "logits/rejected": -5.1173906326293945, "logps/chosen": -549.1804809570312, "logps/rejected": -434.93341064453125, "loss": 0.5751, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.18350481986999512, "rewards/margins": 0.5477018356323242, "rewards/rejected": -0.3641970157623291, "step": 1680 }, { "epoch": 0.44, "learning_rate": 4.7475375346657735e-07, "logits/chosen": -4.970638751983643, "logits/rejected": -4.455883026123047, "logps/chosen": -583.8162231445312, "logps/rejected": -445.88427734375, "loss": 0.5722, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1354554146528244, "rewards/margins": 0.42278558015823364, "rewards/rejected": -0.28733018040657043, "step": 1690 }, { "epoch": 0.44, "learning_rate": 4.742756048579898e-07, "logits/chosen": -5.1649580001831055, "logits/rejected": -5.0130534172058105, "logps/chosen": -526.5348510742188, "logps/rejected": -451.66729736328125, "loss": 0.612, "rewards/accuracies": 0.6875, "rewards/chosen": -0.022843575105071068, "rewards/margins": 0.3903067708015442, "rewards/rejected": -0.4131503701210022, "step": 1700 }, { "epoch": 0.44, "learning_rate": 4.737974562494023e-07, "logits/chosen": -4.902640342712402, "logits/rejected": -4.7199506759643555, "logps/chosen": -611.7781982421875, "logps/rejected": -419.04736328125, "loss": 0.6166, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.08467775583267212, "rewards/margins": 0.42187872529029846, "rewards/rejected": -0.33720093965530396, "step": 1710 }, { "epoch": 0.44, "learning_rate": 4.733193076408147e-07, "logits/chosen": -5.4099297523498535, "logits/rejected": -5.182432174682617, "logps/chosen": -657.6848754882812, "logps/rejected": -512.6647338867188, "loss": 0.5949, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.028319314122200012, "rewards/margins": 0.4173654615879059, "rewards/rejected": -0.3890461325645447, "step": 1720 }, { "epoch": 0.45, "learning_rate": 4.7284115903222723e-07, "logits/chosen": -4.78986120223999, "logits/rejected": -4.7402262687683105, "logps/chosen": -653.3038330078125, "logps/rejected": -423.7054748535156, "loss": 0.5301, "rewards/accuracies": 0.75, "rewards/chosen": 0.1847321093082428, "rewards/margins": 0.620894730091095, "rewards/rejected": -0.43616271018981934, "step": 1730 }, { "epoch": 0.45, "learning_rate": 4.7236301042363965e-07, "logits/chosen": -5.122916221618652, "logits/rejected": -4.116336345672607, "logps/chosen": -563.029296875, "logps/rejected": -380.94915771484375, "loss": 0.6101, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02583288587629795, "rewards/margins": 0.4564831852912903, "rewards/rejected": -0.4823160767555237, "step": 1740 }, { "epoch": 0.45, "learning_rate": 4.718848618150521e-07, "logits/chosen": -5.015693664550781, "logits/rejected": -4.740150451660156, "logps/chosen": -672.177490234375, "logps/rejected": -519.8388061523438, "loss": 0.5646, "rewards/accuracies": 0.6875, "rewards/chosen": 0.13952165842056274, "rewards/margins": 0.4552972912788391, "rewards/rejected": -0.31577563285827637, "step": 1750 }, { "epoch": 0.45, "learning_rate": 4.7140671320646454e-07, "logits/chosen": -5.230753421783447, "logits/rejected": -5.070100784301758, "logps/chosen": -616.0086669921875, "logps/rejected": -467.143798828125, "loss": 0.5709, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.023017430678009987, "rewards/margins": 0.5468194484710693, "rewards/rejected": -0.5698368549346924, "step": 1760 }, { "epoch": 0.46, "learning_rate": 4.70928564597877e-07, "logits/chosen": -5.074960708618164, "logits/rejected": -4.861626148223877, "logps/chosen": -627.98095703125, "logps/rejected": -493.4373474121094, "loss": 0.5918, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.031408268958330154, "rewards/margins": 0.39496278762817383, "rewards/rejected": -0.3635544776916504, "step": 1770 }, { "epoch": 0.46, "learning_rate": 4.704504159892895e-07, "logits/chosen": -5.100302696228027, "logits/rejected": -5.032088279724121, "logps/chosen": -519.7884521484375, "logps/rejected": -483.19842529296875, "loss": 0.6443, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.04851038008928299, "rewards/margins": 0.23194298148155212, "rewards/rejected": -0.2804533839225769, "step": 1780 }, { "epoch": 0.46, "learning_rate": 4.699722673807019e-07, "logits/chosen": -5.186885833740234, "logits/rejected": -4.846810817718506, "logps/chosen": -588.336669921875, "logps/rejected": -460.0287170410156, "loss": 0.6102, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.07058558613061905, "rewards/margins": 0.4554968774318695, "rewards/rejected": -0.38491126894950867, "step": 1790 }, { "epoch": 0.46, "learning_rate": 4.694941187721144e-07, "logits/chosen": -5.332368850708008, "logits/rejected": -4.599387168884277, "logps/chosen": -584.2872314453125, "logps/rejected": -424.4054260253906, "loss": 0.5911, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.004210652317851782, "rewards/margins": 0.4804952144622803, "rewards/rejected": -0.4762845039367676, "step": 1800 }, { "epoch": 0.47, "learning_rate": 4.690159701635268e-07, "logits/chosen": -5.299590110778809, "logits/rejected": -4.6746110916137695, "logps/chosen": -567.9857177734375, "logps/rejected": -415.1913146972656, "loss": 0.5837, "rewards/accuracies": 0.6875, "rewards/chosen": 0.10574845969676971, "rewards/margins": 0.4555068910121918, "rewards/rejected": -0.34975841641426086, "step": 1810 }, { "epoch": 0.47, "learning_rate": 4.6853782155493927e-07, "logits/chosen": -5.19783878326416, "logits/rejected": -4.544588565826416, "logps/chosen": -663.893798828125, "logps/rejected": -516.3880004882812, "loss": 0.5401, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.10326318442821503, "rewards/margins": 0.7140646576881409, "rewards/rejected": -0.6108014583587646, "step": 1820 }, { "epoch": 0.47, "learning_rate": 4.680596729463517e-07, "logits/chosen": -5.295289516448975, "logits/rejected": -4.985527992248535, "logps/chosen": -652.5789794921875, "logps/rejected": -464.1044006347656, "loss": 0.5364, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07100570201873779, "rewards/margins": 0.5409476161003113, "rewards/rejected": -0.4699419140815735, "step": 1830 }, { "epoch": 0.48, "learning_rate": 4.6758152433776416e-07, "logits/chosen": -4.77842378616333, "logits/rejected": -4.93510627746582, "logps/chosen": -514.6129760742188, "logps/rejected": -440.177978515625, "loss": 0.5967, "rewards/accuracies": 0.625, "rewards/chosen": -0.12085689604282379, "rewards/margins": 0.3902212679386139, "rewards/rejected": -0.5110781788825989, "step": 1840 }, { "epoch": 0.48, "learning_rate": 4.6710337572917663e-07, "logits/chosen": -5.214171409606934, "logits/rejected": -4.956891059875488, "logps/chosen": -625.2315673828125, "logps/rejected": -472.40484619140625, "loss": 0.5317, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.19639793038368225, "rewards/margins": 0.7369641661643982, "rewards/rejected": -0.5405662655830383, "step": 1850 }, { "epoch": 0.48, "learning_rate": 4.6662522712058905e-07, "logits/chosen": -5.335050582885742, "logits/rejected": -5.153223514556885, "logps/chosen": -544.6256713867188, "logps/rejected": -404.7955627441406, "loss": 0.5423, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.010306475684046745, "rewards/margins": 0.5395177602767944, "rewards/rejected": -0.5498241782188416, "step": 1860 }, { "epoch": 0.48, "learning_rate": 4.661470785120015e-07, "logits/chosen": -5.226727485656738, "logits/rejected": -5.10861873626709, "logps/chosen": -517.5654296875, "logps/rejected": -434.6197204589844, "loss": 0.5589, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.005301390774548054, "rewards/margins": 0.4559910297393799, "rewards/rejected": -0.4506896138191223, "step": 1870 }, { "epoch": 0.49, "learning_rate": 4.6566892990341394e-07, "logits/chosen": -4.913724422454834, "logits/rejected": -4.459155082702637, "logps/chosen": -522.3629760742188, "logps/rejected": -373.4664611816406, "loss": 0.5962, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10803339630365372, "rewards/margins": 0.4539359509944916, "rewards/rejected": -0.5619693994522095, "step": 1880 }, { "epoch": 0.49, "learning_rate": 4.651907812948264e-07, "logits/chosen": -5.0315961837768555, "logits/rejected": -4.9058403968811035, "logps/chosen": -523.3820190429688, "logps/rejected": -423.2005920410156, "loss": 0.5879, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08888228237628937, "rewards/margins": 0.5093055963516235, "rewards/rejected": -0.5981878042221069, "step": 1890 }, { "epoch": 0.49, "learning_rate": 4.6471263268623883e-07, "logits/chosen": -5.0136213302612305, "logits/rejected": -5.4461140632629395, "logps/chosen": -566.8765869140625, "logps/rejected": -480.1028747558594, "loss": 0.5819, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.02095361426472664, "rewards/margins": 0.40233659744262695, "rewards/rejected": -0.4232902526855469, "step": 1900 }, { "epoch": 0.49, "learning_rate": 4.642344840776513e-07, "logits/chosen": -5.042818546295166, "logits/rejected": -4.753702640533447, "logps/chosen": -616.3328247070312, "logps/rejected": -434.3390197753906, "loss": 0.5832, "rewards/accuracies": 0.75, "rewards/chosen": 0.06760098785161972, "rewards/margins": 0.6479200720787048, "rewards/rejected": -0.5803190469741821, "step": 1910 }, { "epoch": 0.5, "learning_rate": 4.637563354690638e-07, "logits/chosen": -5.4794020652771, "logits/rejected": -4.66071891784668, "logps/chosen": -683.9405517578125, "logps/rejected": -507.86041259765625, "loss": 0.5506, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.10601425170898438, "rewards/margins": 0.7859491109848022, "rewards/rejected": -0.6799348592758179, "step": 1920 }, { "epoch": 0.5, "learning_rate": 4.632781868604762e-07, "logits/chosen": -4.775867462158203, "logits/rejected": -5.155367374420166, "logps/chosen": -580.7766723632812, "logps/rejected": -535.8526000976562, "loss": 0.6106, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.030436743050813675, "rewards/margins": 0.4172954559326172, "rewards/rejected": -0.44773221015930176, "step": 1930 }, { "epoch": 0.5, "learning_rate": 4.6280003825188867e-07, "logits/chosen": -4.869858741760254, "logits/rejected": -4.925829887390137, "logps/chosen": -548.210205078125, "logps/rejected": -410.78741455078125, "loss": 0.5537, "rewards/accuracies": 0.75, "rewards/chosen": 0.0692625641822815, "rewards/margins": 0.5936160087585449, "rewards/rejected": -0.524353563785553, "step": 1940 }, { "epoch": 0.5, "learning_rate": 4.623218896433011e-07, "logits/chosen": -5.081202507019043, "logits/rejected": -4.51247501373291, "logps/chosen": -572.0816650390625, "logps/rejected": -390.0577087402344, "loss": 0.5801, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.006283854134380817, "rewards/margins": 0.5745267271995544, "rewards/rejected": -0.580810546875, "step": 1950 }, { "epoch": 0.51, "learning_rate": 4.6184374103471356e-07, "logits/chosen": -5.26422643661499, "logits/rejected": -4.4895429611206055, "logps/chosen": -663.4690551757812, "logps/rejected": -533.2389526367188, "loss": 0.5917, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.012002618052065372, "rewards/margins": 0.43768301606178284, "rewards/rejected": -0.449685662984848, "step": 1960 }, { "epoch": 0.51, "learning_rate": 4.61365592426126e-07, "logits/chosen": -5.206971168518066, "logits/rejected": -4.818270206451416, "logps/chosen": -573.86572265625, "logps/rejected": -416.52935791015625, "loss": 0.5554, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.11328685283660889, "rewards/margins": 0.6511291265487671, "rewards/rejected": -0.5378423929214478, "step": 1970 }, { "epoch": 0.51, "learning_rate": 4.6088744381753845e-07, "logits/chosen": -5.283269882202148, "logits/rejected": -4.621107578277588, "logps/chosen": -561.1148681640625, "logps/rejected": -428.116943359375, "loss": 0.5839, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.04665320739150047, "rewards/margins": 0.5302501916885376, "rewards/rejected": -0.5769033432006836, "step": 1980 }, { "epoch": 0.51, "learning_rate": 4.604092952089509e-07, "logits/chosen": -4.56699275970459, "logits/rejected": -4.727753639221191, "logps/chosen": -531.111328125, "logps/rejected": -465.26556396484375, "loss": 0.593, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.004327183589339256, "rewards/margins": 0.48399001359939575, "rewards/rejected": -0.4883171617984772, "step": 1990 }, { "epoch": 0.52, "learning_rate": 4.5993114660036334e-07, "logits/chosen": -5.3489179611206055, "logits/rejected": -4.768999099731445, "logps/chosen": -674.1377563476562, "logps/rejected": -438.13653564453125, "loss": 0.5349, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.12123505026102066, "rewards/margins": 0.6675950288772583, "rewards/rejected": -0.5463599562644958, "step": 2000 }, { "epoch": 0.52, "eval_logits/chosen": -5.146279335021973, "eval_logits/rejected": -4.8644866943359375, "eval_logps/chosen": -588.3950805664062, "eval_logps/rejected": -447.6207580566406, "eval_loss": 0.577851414680481, "eval_rewards/accuracies": 0.6769999861717224, "eval_rewards/chosen": -0.01255122758448124, "eval_rewards/margins": 0.49547502398490906, "eval_rewards/rejected": -0.5080263018608093, "eval_runtime": 104.3598, "eval_samples_per_second": 19.164, "eval_steps_per_second": 1.198, "step": 2000 }, { "epoch": 0.52, "learning_rate": 4.594529979917758e-07, "logits/chosen": -4.754180431365967, "logits/rejected": -4.150341033935547, "logps/chosen": -574.4520874023438, "logps/rejected": -465.10791015625, "loss": 0.6135, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07928381115198135, "rewards/margins": 0.3342432975769043, "rewards/rejected": -0.41352710127830505, "step": 2010 }, { "epoch": 0.52, "learning_rate": 4.589748493831883e-07, "logits/chosen": -4.955104827880859, "logits/rejected": -4.670805931091309, "logps/chosen": -639.5658569335938, "logps/rejected": -511.34271240234375, "loss": 0.6201, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.008691489696502686, "rewards/margins": 0.3600427508354187, "rewards/rejected": -0.3687342405319214, "step": 2020 }, { "epoch": 0.52, "learning_rate": 4.5849670077460075e-07, "logits/chosen": -5.191245079040527, "logits/rejected": -5.079578876495361, "logps/chosen": -585.6170654296875, "logps/rejected": -500.0289611816406, "loss": 0.5704, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.033618904650211334, "rewards/margins": 0.4694100022315979, "rewards/rejected": -0.4357910752296448, "step": 2030 }, { "epoch": 0.53, "learning_rate": 4.5801855216601317e-07, "logits/chosen": -5.175076484680176, "logits/rejected": -4.710196495056152, "logps/chosen": -624.3074340820312, "logps/rejected": -427.0894470214844, "loss": 0.5522, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.08164355158805847, "rewards/margins": 0.638940691947937, "rewards/rejected": -0.5572971701622009, "step": 2040 }, { "epoch": 0.53, "learning_rate": 4.5754040355742564e-07, "logits/chosen": -5.308157444000244, "logits/rejected": -5.369725704193115, "logps/chosen": -531.7825927734375, "logps/rejected": -429.5498046875, "loss": 0.5644, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.012336271815001965, "rewards/margins": 0.6197837591171265, "rewards/rejected": -0.6074475646018982, "step": 2050 }, { "epoch": 0.53, "learning_rate": 4.570622549488381e-07, "logits/chosen": -4.8568243980407715, "logits/rejected": -4.481594085693359, "logps/chosen": -627.6360473632812, "logps/rejected": -506.6661682128906, "loss": 0.5643, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.10641437768936157, "rewards/margins": 0.5086201429367065, "rewards/rejected": -0.40220585465431213, "step": 2060 }, { "epoch": 0.53, "learning_rate": 4.5658410634025054e-07, "logits/chosen": -4.818617820739746, "logits/rejected": -4.45701789855957, "logps/chosen": -575.7567749023438, "logps/rejected": -413.98016357421875, "loss": 0.6105, "rewards/accuracies": 0.6875, "rewards/chosen": 0.08569834381341934, "rewards/margins": 0.6819712519645691, "rewards/rejected": -0.5962729454040527, "step": 2070 }, { "epoch": 0.54, "learning_rate": 4.56105957731663e-07, "logits/chosen": -5.195056915283203, "logits/rejected": -4.765292167663574, "logps/chosen": -653.9817504882812, "logps/rejected": -476.4908752441406, "loss": 0.6513, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.09224192053079605, "rewards/margins": 0.32308563590049744, "rewards/rejected": -0.4153275489807129, "step": 2080 }, { "epoch": 0.54, "learning_rate": 4.556278091230754e-07, "logits/chosen": -5.061344146728516, "logits/rejected": -4.931196689605713, "logps/chosen": -549.254638671875, "logps/rejected": -429.085693359375, "loss": 0.5875, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.14451441168785095, "rewards/margins": 0.355307400226593, "rewards/rejected": -0.49982184171676636, "step": 2090 }, { "epoch": 0.54, "learning_rate": 4.551496605144879e-07, "logits/chosen": -4.914762496948242, "logits/rejected": -4.701096057891846, "logps/chosen": -431.8565368652344, "logps/rejected": -339.8495788574219, "loss": 0.5705, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.00781789980828762, "rewards/margins": 0.4883212447166443, "rewards/rejected": -0.4961392283439636, "step": 2100 }, { "epoch": 0.54, "learning_rate": 4.546715119059003e-07, "logits/chosen": -5.280637741088867, "logits/rejected": -4.564585208892822, "logps/chosen": -553.3424072265625, "logps/rejected": -459.9766540527344, "loss": 0.6036, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.07073311507701874, "rewards/margins": 0.4561789035797119, "rewards/rejected": -0.385445773601532, "step": 2110 }, { "epoch": 0.55, "learning_rate": 4.541933632973128e-07, "logits/chosen": -5.428469181060791, "logits/rejected": -5.297779560089111, "logps/chosen": -593.0921020507812, "logps/rejected": -437.1546936035156, "loss": 0.5856, "rewards/accuracies": 0.625, "rewards/chosen": 0.05655141919851303, "rewards/margins": 0.3987019956111908, "rewards/rejected": -0.34215062856674194, "step": 2120 }, { "epoch": 0.55, "learning_rate": 4.5371521468872526e-07, "logits/chosen": -5.267939567565918, "logits/rejected": -4.6556172370910645, "logps/chosen": -570.8902587890625, "logps/rejected": -433.583984375, "loss": 0.6492, "rewards/accuracies": 0.6875, "rewards/chosen": 0.12111033499240875, "rewards/margins": 0.4809940457344055, "rewards/rejected": -0.3598836660385132, "step": 2130 }, { "epoch": 0.55, "learning_rate": 4.532370660801377e-07, "logits/chosen": -5.3341145515441895, "logits/rejected": -4.604385852813721, "logps/chosen": -650.091552734375, "logps/rejected": -481.7166442871094, "loss": 0.5709, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.19286799430847168, "rewards/margins": 0.6457473039627075, "rewards/rejected": -0.45287925004959106, "step": 2140 }, { "epoch": 0.56, "learning_rate": 4.5275891747155015e-07, "logits/chosen": -5.059699535369873, "logits/rejected": -5.347445487976074, "logps/chosen": -596.0984497070312, "logps/rejected": -466.853759765625, "loss": 0.5152, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.12193659693002701, "rewards/margins": 0.682623565196991, "rewards/rejected": -0.5606869459152222, "step": 2150 }, { "epoch": 0.56, "learning_rate": 4.5228076886296257e-07, "logits/chosen": -5.061795234680176, "logits/rejected": -4.743279933929443, "logps/chosen": -600.1107177734375, "logps/rejected": -419.19696044921875, "loss": 0.5613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.06328261643648148, "rewards/margins": 0.497763454914093, "rewards/rejected": -0.43448084592819214, "step": 2160 }, { "epoch": 0.56, "learning_rate": 4.5180262025437504e-07, "logits/chosen": -5.278221607208252, "logits/rejected": -4.951820373535156, "logps/chosen": -552.786865234375, "logps/rejected": -404.4030456542969, "loss": 0.5571, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.043316375464200974, "rewards/margins": 0.4411965310573578, "rewards/rejected": -0.3978801369667053, "step": 2170 }, { "epoch": 0.56, "learning_rate": 4.5132447164578746e-07, "logits/chosen": -4.717682838439941, "logits/rejected": -4.954085350036621, "logps/chosen": -511.45318603515625, "logps/rejected": -422.9784240722656, "loss": 0.6055, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.010040628723800182, "rewards/margins": 0.3973018229007721, "rewards/rejected": -0.40734249353408813, "step": 2180 }, { "epoch": 0.57, "learning_rate": 4.5084632303719993e-07, "logits/chosen": -5.219690799713135, "logits/rejected": -4.692103862762451, "logps/chosen": -676.1569213867188, "logps/rejected": -543.9534912109375, "loss": 0.6024, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.035268086940050125, "rewards/margins": 0.46811848878860474, "rewards/rejected": -0.4328504502773285, "step": 2190 }, { "epoch": 0.57, "learning_rate": 4.503681744286124e-07, "logits/chosen": -4.987466812133789, "logits/rejected": -5.086739540100098, "logps/chosen": -607.033203125, "logps/rejected": -515.29248046875, "loss": 0.5527, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.13088318705558777, "rewards/margins": 0.5619222521781921, "rewards/rejected": -0.431039035320282, "step": 2200 }, { "epoch": 0.57, "learning_rate": 4.498900258200248e-07, "logits/chosen": -5.376193523406982, "logits/rejected": -4.793856620788574, "logps/chosen": -614.5145263671875, "logps/rejected": -479.49285888671875, "loss": 0.5603, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.11086299270391464, "rewards/margins": 0.5475805997848511, "rewards/rejected": -0.43671759963035583, "step": 2210 }, { "epoch": 0.57, "learning_rate": 4.494118772114373e-07, "logits/chosen": -5.337400436401367, "logits/rejected": -4.9215803146362305, "logps/chosen": -606.2073974609375, "logps/rejected": -475.0830993652344, "loss": 0.5862, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.013604672625660896, "rewards/margins": 0.49430006742477417, "rewards/rejected": -0.5079048275947571, "step": 2220 }, { "epoch": 0.58, "learning_rate": 4.489337286028497e-07, "logits/chosen": -5.110570430755615, "logits/rejected": -4.919083595275879, "logps/chosen": -586.9144897460938, "logps/rejected": -441.62823486328125, "loss": 0.5421, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.05038571357727051, "rewards/margins": 0.6594342589378357, "rewards/rejected": -0.6090484857559204, "step": 2230 }, { "epoch": 0.58, "learning_rate": 4.484555799942622e-07, "logits/chosen": -5.581158638000488, "logits/rejected": -4.832554817199707, "logps/chosen": -568.5726318359375, "logps/rejected": -442.3194885253906, "loss": 0.5899, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.09931375086307526, "rewards/margins": 0.5159778594970703, "rewards/rejected": -0.41666412353515625, "step": 2240 }, { "epoch": 0.58, "learning_rate": 4.479774313856746e-07, "logits/chosen": -4.789618968963623, "logits/rejected": -4.601677417755127, "logps/chosen": -667.779052734375, "logps/rejected": -495.64935302734375, "loss": 0.5648, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1361313909292221, "rewards/margins": 0.7125908136367798, "rewards/rejected": -0.5764594078063965, "step": 2250 }, { "epoch": 0.58, "learning_rate": 4.474992827770871e-07, "logits/chosen": -4.940236568450928, "logits/rejected": -4.563622951507568, "logps/chosen": -604.3670654296875, "logps/rejected": -406.8453674316406, "loss": 0.5738, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.11580381542444229, "rewards/margins": 0.591064453125, "rewards/rejected": -0.47526073455810547, "step": 2260 }, { "epoch": 0.59, "learning_rate": 4.4702113416849955e-07, "logits/chosen": -5.279782295227051, "logits/rejected": -4.814539909362793, "logps/chosen": -634.1673583984375, "logps/rejected": -520.6923828125, "loss": 0.5593, "rewards/accuracies": 0.6875, "rewards/chosen": 0.020117606967687607, "rewards/margins": 0.5627638101577759, "rewards/rejected": -0.5426462292671204, "step": 2270 }, { "epoch": 0.59, "learning_rate": 4.4654298555991197e-07, "logits/chosen": -5.039707183837891, "logits/rejected": -4.6643171310424805, "logps/chosen": -641.8600463867188, "logps/rejected": -522.2177734375, "loss": 0.6402, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.021140437573194504, "rewards/margins": 0.3078760802745819, "rewards/rejected": -0.3290165066719055, "step": 2280 }, { "epoch": 0.59, "learning_rate": 4.4606483695132444e-07, "logits/chosen": -5.054386615753174, "logits/rejected": -4.7289557456970215, "logps/chosen": -707.6024169921875, "logps/rejected": -492.3511657714844, "loss": 0.5141, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.10909013450145721, "rewards/margins": 0.8125289082527161, "rewards/rejected": -0.7034388184547424, "step": 2290 }, { "epoch": 0.59, "learning_rate": 4.4558668834273686e-07, "logits/chosen": -5.343094825744629, "logits/rejected": -4.963892936706543, "logps/chosen": -605.5589599609375, "logps/rejected": -440.63897705078125, "loss": 0.6232, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03654870018362999, "rewards/margins": 0.3890681862831116, "rewards/rejected": -0.42561691999435425, "step": 2300 }, { "epoch": 0.6, "learning_rate": 4.451085397341494e-07, "logits/chosen": -5.292557716369629, "logits/rejected": -5.246009826660156, "logps/chosen": -527.5159912109375, "logps/rejected": -468.10162353515625, "loss": 0.5339, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.10577945411205292, "rewards/margins": 0.5268454551696777, "rewards/rejected": -0.4210661053657532, "step": 2310 }, { "epoch": 0.6, "learning_rate": 4.446303911255618e-07, "logits/chosen": -5.212244987487793, "logits/rejected": -5.010140419006348, "logps/chosen": -579.4832153320312, "logps/rejected": -558.919921875, "loss": 0.5736, "rewards/accuracies": 0.625, "rewards/chosen": -0.01966727152466774, "rewards/margins": 0.47932949662208557, "rewards/rejected": -0.4989967942237854, "step": 2320 }, { "epoch": 0.6, "learning_rate": 4.441522425169743e-07, "logits/chosen": -5.097935676574707, "logits/rejected": -4.7646636962890625, "logps/chosen": -484.53253173828125, "logps/rejected": -456.3199157714844, "loss": 0.5616, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08460819721221924, "rewards/margins": 0.4856942594051361, "rewards/rejected": -0.570302426815033, "step": 2330 }, { "epoch": 0.6, "learning_rate": 4.4367409390838675e-07, "logits/chosen": -5.351646900177002, "logits/rejected": -5.018394470214844, "logps/chosen": -694.3348388671875, "logps/rejected": -494.0359802246094, "loss": 0.5416, "rewards/accuracies": 0.6875, "rewards/chosen": 0.14900526404380798, "rewards/margins": 0.5575164556503296, "rewards/rejected": -0.4085111618041992, "step": 2340 }, { "epoch": 0.61, "learning_rate": 4.4319594529979916e-07, "logits/chosen": -5.152003288269043, "logits/rejected": -5.216752052307129, "logps/chosen": -547.4890747070312, "logps/rejected": -401.4117126464844, "loss": 0.5792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.029167702421545982, "rewards/margins": 0.5511584877967834, "rewards/rejected": -0.5803261995315552, "step": 2350 }, { "epoch": 0.61, "learning_rate": 4.4271779669121164e-07, "logits/chosen": -5.249228000640869, "logits/rejected": -4.706865310668945, "logps/chosen": -600.8203125, "logps/rejected": -423.93670654296875, "loss": 0.5735, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.05275494605302811, "rewards/margins": 0.5897740721702576, "rewards/rejected": -0.5370191335678101, "step": 2360 }, { "epoch": 0.61, "learning_rate": 4.4223964808262406e-07, "logits/chosen": -4.767922401428223, "logits/rejected": -5.0720133781433105, "logps/chosen": -621.3831787109375, "logps/rejected": -562.7306518554688, "loss": 0.5681, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.009325778111815453, "rewards/margins": 0.5207979679107666, "rewards/rejected": -0.511472225189209, "step": 2370 }, { "epoch": 0.61, "learning_rate": 4.4176149947403653e-07, "logits/chosen": -5.388358116149902, "logits/rejected": -4.941030979156494, "logps/chosen": -541.927001953125, "logps/rejected": -409.4505920410156, "loss": 0.5842, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.024573199450969696, "rewards/margins": 0.5250213146209717, "rewards/rejected": -0.549594521522522, "step": 2380 }, { "epoch": 0.62, "learning_rate": 4.41283350865449e-07, "logits/chosen": -5.258152484893799, "logits/rejected": -5.060222148895264, "logps/chosen": -488.45037841796875, "logps/rejected": -373.4148254394531, "loss": 0.632, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.16966131329536438, "rewards/margins": 0.25001776218414307, "rewards/rejected": -0.41967910528182983, "step": 2390 }, { "epoch": 0.62, "learning_rate": 4.408052022568614e-07, "logits/chosen": -4.757939338684082, "logits/rejected": -5.3442559242248535, "logps/chosen": -520.8878784179688, "logps/rejected": -414.75701904296875, "loss": 0.5729, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.06950821727514267, "rewards/margins": 0.47495198249816895, "rewards/rejected": -0.5444601774215698, "step": 2400 }, { "epoch": 0.62, "learning_rate": 4.403270536482739e-07, "logits/chosen": -5.058515548706055, "logits/rejected": -5.151968002319336, "logps/chosen": -581.0386352539062, "logps/rejected": -451.7078552246094, "loss": 0.5297, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.21057648956775665, "rewards/margins": 0.6704424023628235, "rewards/rejected": -0.45986586809158325, "step": 2410 }, { "epoch": 0.62, "learning_rate": 4.398489050396863e-07, "logits/chosen": -4.98227071762085, "logits/rejected": -4.883034706115723, "logps/chosen": -540.2955322265625, "logps/rejected": -457.1280212402344, "loss": 0.6322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.002387347863987088, "rewards/margins": 0.4342457354068756, "rewards/rejected": -0.4366331100463867, "step": 2420 }, { "epoch": 0.63, "learning_rate": 4.393707564310988e-07, "logits/chosen": -5.257428169250488, "logits/rejected": -4.899024486541748, "logps/chosen": -628.1483764648438, "logps/rejected": -472.5105895996094, "loss": 0.5593, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.44423383474349976, "rewards/margins": 0.8926936388015747, "rewards/rejected": -0.4484596848487854, "step": 2430 }, { "epoch": 0.63, "learning_rate": 4.388926078225112e-07, "logits/chosen": -5.083817481994629, "logits/rejected": -5.1455583572387695, "logps/chosen": -481.34686279296875, "logps/rejected": -406.1684265136719, "loss": 0.5764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.08702026307582855, "rewards/margins": 0.49637556076049805, "rewards/rejected": -0.4093553125858307, "step": 2440 }, { "epoch": 0.63, "learning_rate": 4.3841445921392367e-07, "logits/chosen": -5.213073253631592, "logits/rejected": -5.218255519866943, "logps/chosen": -663.9601440429688, "logps/rejected": -575.1884155273438, "loss": 0.5946, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.006611700169742107, "rewards/margins": 0.6055362224578857, "rewards/rejected": -0.612147867679596, "step": 2450 }, { "epoch": 0.64, "learning_rate": 4.3793631060533614e-07, "logits/chosen": -5.283349514007568, "logits/rejected": -4.799742698669434, "logps/chosen": -641.492431640625, "logps/rejected": -468.0411071777344, "loss": 0.5254, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.11620601266622543, "rewards/margins": 0.6509004831314087, "rewards/rejected": -0.534694492816925, "step": 2460 }, { "epoch": 0.64, "learning_rate": 4.3745816199674856e-07, "logits/chosen": -4.862331390380859, "logits/rejected": -4.658215045928955, "logps/chosen": -645.322998046875, "logps/rejected": -485.50982666015625, "loss": 0.5628, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08128894120454788, "rewards/margins": 0.5460609197616577, "rewards/rejected": -0.46477198600769043, "step": 2470 }, { "epoch": 0.64, "learning_rate": 4.3698001338816103e-07, "logits/chosen": -5.355715751647949, "logits/rejected": -5.029547214508057, "logps/chosen": -735.3001708984375, "logps/rejected": -513.8319091796875, "loss": 0.5621, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.014288777485489845, "rewards/margins": 0.5232454538345337, "rewards/rejected": -0.5089566707611084, "step": 2480 }, { "epoch": 0.64, "learning_rate": 4.3650186477957345e-07, "logits/chosen": -5.127038955688477, "logits/rejected": -4.877999305725098, "logps/chosen": -509.0348205566406, "logps/rejected": -408.6392517089844, "loss": 0.5653, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.10238923132419586, "rewards/margins": 0.5134610533714294, "rewards/rejected": -0.6158502697944641, "step": 2490 }, { "epoch": 0.65, "learning_rate": 4.360237161709859e-07, "logits/chosen": -5.23773193359375, "logits/rejected": -4.782685279846191, "logps/chosen": -633.52294921875, "logps/rejected": -483.8202209472656, "loss": 0.5239, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.06435808539390564, "rewards/margins": 0.6964777112007141, "rewards/rejected": -0.6321195363998413, "step": 2500 }, { "epoch": 0.65, "learning_rate": 4.3554556756239834e-07, "logits/chosen": -5.154562950134277, "logits/rejected": -5.084676742553711, "logps/chosen": -614.1409912109375, "logps/rejected": -492.91973876953125, "loss": 0.5857, "rewards/accuracies": 0.6875, "rewards/chosen": 0.06166275218129158, "rewards/margins": 0.48271411657333374, "rewards/rejected": -0.4210514426231384, "step": 2510 }, { "epoch": 0.65, "learning_rate": 4.350674189538108e-07, "logits/chosen": -5.181066989898682, "logits/rejected": -4.905048370361328, "logps/chosen": -588.6788940429688, "logps/rejected": -445.88226318359375, "loss": 0.5517, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.15706638991832733, "rewards/margins": 0.5878459215164185, "rewards/rejected": -0.4307796061038971, "step": 2520 }, { "epoch": 0.65, "learning_rate": 4.345892703452233e-07, "logits/chosen": -5.12363862991333, "logits/rejected": -5.1981658935546875, "logps/chosen": -546.7200927734375, "logps/rejected": -410.0215759277344, "loss": 0.5705, "rewards/accuracies": 0.6875, "rewards/chosen": -0.10428951680660248, "rewards/margins": 0.46206268668174744, "rewards/rejected": -0.5663522481918335, "step": 2530 }, { "epoch": 0.66, "learning_rate": 4.341111217366357e-07, "logits/chosen": -5.098029136657715, "logits/rejected": -4.901499271392822, "logps/chosen": -546.7400512695312, "logps/rejected": -462.34417724609375, "loss": 0.5732, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0035857378970831633, "rewards/margins": 0.3745895028114319, "rewards/rejected": -0.37100377678871155, "step": 2540 }, { "epoch": 0.66, "learning_rate": 4.336329731280482e-07, "logits/chosen": -5.2484846115112305, "logits/rejected": -5.317324638366699, "logps/chosen": -579.0795288085938, "logps/rejected": -476.16619873046875, "loss": 0.6002, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03629463165998459, "rewards/margins": 0.4470502734184265, "rewards/rejected": -0.4107556939125061, "step": 2550 }, { "epoch": 0.66, "learning_rate": 4.331548245194606e-07, "logits/chosen": -5.290572643280029, "logits/rejected": -4.632424831390381, "logps/chosen": -610.1676025390625, "logps/rejected": -496.96728515625, "loss": 0.584, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.02544744871556759, "rewards/margins": 0.4205760955810547, "rewards/rejected": -0.446023553609848, "step": 2560 }, { "epoch": 0.66, "learning_rate": 4.3267667591087307e-07, "logits/chosen": -5.304609298706055, "logits/rejected": -4.632178783416748, "logps/chosen": -606.6671752929688, "logps/rejected": -458.6371154785156, "loss": 0.5872, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.15550640225410461, "rewards/margins": 0.5943509936332703, "rewards/rejected": -0.43884459137916565, "step": 2570 }, { "epoch": 0.67, "learning_rate": 4.321985273022855e-07, "logits/chosen": -5.085798263549805, "logits/rejected": -4.466156959533691, "logps/chosen": -630.9932861328125, "logps/rejected": -482.27960205078125, "loss": 0.5955, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.2371097058057785, "rewards/margins": 0.6256157159805298, "rewards/rejected": -0.38850611448287964, "step": 2580 }, { "epoch": 0.67, "learning_rate": 4.3172037869369796e-07, "logits/chosen": -5.178996562957764, "logits/rejected": -5.110491752624512, "logps/chosen": -527.7030029296875, "logps/rejected": -391.350830078125, "loss": 0.552, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0023458979558199644, "rewards/margins": 0.5923684239387512, "rewards/rejected": -0.5947142839431763, "step": 2590 }, { "epoch": 0.67, "learning_rate": 4.312422300851105e-07, "logits/chosen": -4.998152732849121, "logits/rejected": -4.799966812133789, "logps/chosen": -653.7890014648438, "logps/rejected": -429.3960876464844, "loss": 0.5339, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.17085520923137665, "rewards/margins": 0.7086859941482544, "rewards/rejected": -0.5378308296203613, "step": 2600 }, { "epoch": 0.67, "learning_rate": 4.307640814765229e-07, "logits/chosen": -5.151551246643066, "logits/rejected": -5.047919273376465, "logps/chosen": -636.4434814453125, "logps/rejected": -528.4822387695312, "loss": 0.5185, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.11411406844854355, "rewards/margins": 0.6535225510597229, "rewards/rejected": -0.5394085049629211, "step": 2610 }, { "epoch": 0.68, "learning_rate": 4.302859328679354e-07, "logits/chosen": -5.412592887878418, "logits/rejected": -5.088252067565918, "logps/chosen": -589.2025146484375, "logps/rejected": -419.9964904785156, "loss": 0.5467, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.047360360622406006, "rewards/margins": 0.6108908653259277, "rewards/rejected": -0.5635305643081665, "step": 2620 }, { "epoch": 0.68, "learning_rate": 4.298077842593478e-07, "logits/chosen": -5.005528450012207, "logits/rejected": -5.104848384857178, "logps/chosen": -590.3175659179688, "logps/rejected": -529.538330078125, "loss": 0.5938, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01627901755273342, "rewards/margins": 0.35472917556762695, "rewards/rejected": -0.3384501338005066, "step": 2630 }, { "epoch": 0.68, "learning_rate": 4.2932963565076027e-07, "logits/chosen": -5.282013893127441, "logits/rejected": -4.624232292175293, "logps/chosen": -681.4449462890625, "logps/rejected": -452.9990234375, "loss": 0.5831, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.19735820591449738, "rewards/margins": 0.6560162901878357, "rewards/rejected": -0.4586581289768219, "step": 2640 }, { "epoch": 0.68, "learning_rate": 4.288514870421727e-07, "logits/chosen": -5.1208882331848145, "logits/rejected": -4.809333801269531, "logps/chosen": -674.2571411132812, "logps/rejected": -565.6589965820312, "loss": 0.5939, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.061952896416187286, "rewards/margins": 0.3939959406852722, "rewards/rejected": -0.33204299211502075, "step": 2650 }, { "epoch": 0.69, "learning_rate": 4.2837333843358516e-07, "logits/chosen": -5.15143346786499, "logits/rejected": -5.11556339263916, "logps/chosen": -543.9790649414062, "logps/rejected": -435.9480895996094, "loss": 0.5838, "rewards/accuracies": 0.6875, "rewards/chosen": 0.014995163306593895, "rewards/margins": 0.5286285281181335, "rewards/rejected": -0.5136333703994751, "step": 2660 }, { "epoch": 0.69, "learning_rate": 4.2789518982499763e-07, "logits/chosen": -5.0635576248168945, "logits/rejected": -4.468456268310547, "logps/chosen": -619.7725219726562, "logps/rejected": -465.1572265625, "loss": 0.5577, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.09982754290103912, "rewards/margins": 0.5600365400314331, "rewards/rejected": -0.4602089524269104, "step": 2670 }, { "epoch": 0.69, "learning_rate": 4.2741704121641005e-07, "logits/chosen": -5.118125915527344, "logits/rejected": -4.619821071624756, "logps/chosen": -624.4805297851562, "logps/rejected": -439.234130859375, "loss": 0.5374, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.08643849194049835, "rewards/margins": 0.6423428058624268, "rewards/rejected": -0.5559043884277344, "step": 2680 }, { "epoch": 0.69, "learning_rate": 4.269388926078225e-07, "logits/chosen": -4.754753589630127, "logits/rejected": -4.942806720733643, "logps/chosen": -572.637939453125, "logps/rejected": -463.81317138671875, "loss": 0.6018, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.04292984679341316, "rewards/margins": 0.5146417021751404, "rewards/rejected": -0.47171181440353394, "step": 2690 }, { "epoch": 0.7, "learning_rate": 4.2646074399923494e-07, "logits/chosen": -5.543126583099365, "logits/rejected": -4.9846978187561035, "logps/chosen": -624.9405517578125, "logps/rejected": -406.91064453125, "loss": 0.6281, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.03104713000357151, "rewards/margins": 0.4832213819026947, "rewards/rejected": -0.45217418670654297, "step": 2700 }, { "epoch": 0.7, "learning_rate": 4.259825953906474e-07, "logits/chosen": -4.872697830200195, "logits/rejected": -4.875360012054443, "logps/chosen": -765.48291015625, "logps/rejected": -527.1544189453125, "loss": 0.5603, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.28182464838027954, "rewards/margins": 0.543058454990387, "rewards/rejected": -0.2612338066101074, "step": 2710 }, { "epoch": 0.7, "learning_rate": 4.2550444678205983e-07, "logits/chosen": -5.342970848083496, "logits/rejected": -5.159431457519531, "logps/chosen": -604.1205444335938, "logps/rejected": -539.5831298828125, "loss": 0.596, "rewards/accuracies": 0.6875, "rewards/chosen": 0.05859052389860153, "rewards/margins": 0.42630401253700256, "rewards/rejected": -0.3677135109901428, "step": 2720 }, { "epoch": 0.7, "learning_rate": 4.250262981734723e-07, "logits/chosen": -5.323681831359863, "logits/rejected": -5.074403285980225, "logps/chosen": -564.4434814453125, "logps/rejected": -443.217529296875, "loss": 0.5987, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.00460275262594223, "rewards/margins": 0.49006548523902893, "rewards/rejected": -0.494668185710907, "step": 2730 }, { "epoch": 0.71, "learning_rate": 4.2454814956488477e-07, "logits/chosen": -5.244222164154053, "logits/rejected": -4.809430122375488, "logps/chosen": -545.374755859375, "logps/rejected": -498.7403869628906, "loss": 0.587, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.09813254326581955, "rewards/margins": 0.500663697719574, "rewards/rejected": -0.40253114700317383, "step": 2740 }, { "epoch": 0.71, "learning_rate": 4.240700009562972e-07, "logits/chosen": -5.24301290512085, "logits/rejected": -4.502610683441162, "logps/chosen": -507.8064880371094, "logps/rejected": -392.7529602050781, "loss": 0.5811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08730830252170563, "rewards/margins": 0.6093380451202393, "rewards/rejected": -0.5220298171043396, "step": 2750 }, { "epoch": 0.71, "learning_rate": 4.2359185234770966e-07, "logits/chosen": -5.391790390014648, "logits/rejected": -5.305670738220215, "logps/chosen": -614.653076171875, "logps/rejected": -445.8578186035156, "loss": 0.5622, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.12220600992441177, "rewards/margins": 0.6899962425231934, "rewards/rejected": -0.5677902102470398, "step": 2760 }, { "epoch": 0.72, "learning_rate": 4.231137037391221e-07, "logits/chosen": -5.461574077606201, "logits/rejected": -4.739812850952148, "logps/chosen": -676.278564453125, "logps/rejected": -461.943115234375, "loss": 0.5377, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.1469791680574417, "rewards/margins": 0.6236746311187744, "rewards/rejected": -0.4766954481601715, "step": 2770 }, { "epoch": 0.72, "learning_rate": 4.2263555513053455e-07, "logits/chosen": -5.332207679748535, "logits/rejected": -5.063170909881592, "logps/chosen": -570.134033203125, "logps/rejected": -415.6617126464844, "loss": 0.6087, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006456410977989435, "rewards/margins": 0.5630040764808655, "rewards/rejected": -0.5565477013587952, "step": 2780 }, { "epoch": 0.72, "learning_rate": 4.2215740652194697e-07, "logits/chosen": -5.260512351989746, "logits/rejected": -4.700934410095215, "logps/chosen": -658.4105224609375, "logps/rejected": -433.4822692871094, "loss": 0.5981, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.09287049621343613, "rewards/margins": 0.4560677409172058, "rewards/rejected": -0.36319732666015625, "step": 2790 }, { "epoch": 0.72, "learning_rate": 4.2167925791335944e-07, "logits/chosen": -5.402337074279785, "logits/rejected": -4.409715175628662, "logps/chosen": -606.4774169921875, "logps/rejected": -428.6263122558594, "loss": 0.6064, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08765562623739243, "rewards/margins": 0.4665617346763611, "rewards/rejected": -0.5542173981666565, "step": 2800 }, { "epoch": 0.73, "learning_rate": 4.212011093047719e-07, "logits/chosen": -5.534334659576416, "logits/rejected": -5.277988910675049, "logps/chosen": -478.5332946777344, "logps/rejected": -412.6917419433594, "loss": 0.5534, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.052935890853405, "rewards/margins": 0.4880947172641754, "rewards/rejected": -0.4351588785648346, "step": 2810 }, { "epoch": 0.73, "learning_rate": 4.2072296069618434e-07, "logits/chosen": -5.379619598388672, "logits/rejected": -4.82396125793457, "logps/chosen": -631.8505859375, "logps/rejected": -428.802001953125, "loss": 0.5099, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.07568497955799103, "rewards/margins": 0.6363910436630249, "rewards/rejected": -0.5607060790061951, "step": 2820 }, { "epoch": 0.73, "learning_rate": 4.202448120875968e-07, "logits/chosen": -4.930148124694824, "logits/rejected": -4.7937421798706055, "logps/chosen": -554.1145629882812, "logps/rejected": -440.90948486328125, "loss": 0.5844, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0248244795948267, "rewards/margins": 0.6005952954292297, "rewards/rejected": -0.6254197955131531, "step": 2830 }, { "epoch": 0.73, "learning_rate": 4.197666634790092e-07, "logits/chosen": -4.495527267456055, "logits/rejected": -4.895731449127197, "logps/chosen": -609.6029052734375, "logps/rejected": -476.82598876953125, "loss": 0.6812, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.12072763592004776, "rewards/margins": 0.5799800753593445, "rewards/rejected": -0.45925241708755493, "step": 2840 }, { "epoch": 0.74, "learning_rate": 4.192885148704217e-07, "logits/chosen": -5.073362827301025, "logits/rejected": -4.713034629821777, "logps/chosen": -629.5406494140625, "logps/rejected": -438.50787353515625, "loss": 0.5991, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.06325655430555344, "rewards/margins": 0.5452449321746826, "rewards/rejected": -0.4819883704185486, "step": 2850 }, { "epoch": 0.74, "learning_rate": 4.188103662618341e-07, "logits/chosen": -5.6294121742248535, "logits/rejected": -5.268061637878418, "logps/chosen": -638.394287109375, "logps/rejected": -552.1012573242188, "loss": 0.5857, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.17535817623138428, "rewards/margins": 0.4183408319950104, "rewards/rejected": -0.2429826259613037, "step": 2860 }, { "epoch": 0.74, "learning_rate": 4.183322176532466e-07, "logits/chosen": -5.095371723175049, "logits/rejected": -4.656705856323242, "logps/chosen": -547.9163818359375, "logps/rejected": -500.2245178222656, "loss": 0.6091, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.011780640110373497, "rewards/margins": 0.3403190076351166, "rewards/rejected": -0.3285383880138397, "step": 2870 }, { "epoch": 0.74, "learning_rate": 4.1785406904465906e-07, "logits/chosen": -5.546196460723877, "logits/rejected": -5.05028772354126, "logps/chosen": -724.4466552734375, "logps/rejected": -562.9118041992188, "loss": 0.6215, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.1469508856534958, "rewards/margins": 0.40469813346862793, "rewards/rejected": -0.25774726271629333, "step": 2880 }, { "epoch": 0.75, "learning_rate": 4.173759204360715e-07, "logits/chosen": -5.0136494636535645, "logits/rejected": -5.298929691314697, "logps/chosen": -580.34716796875, "logps/rejected": -530.3228759765625, "loss": 0.5588, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.19141240417957306, "rewards/margins": 0.4811982214450836, "rewards/rejected": -0.289785772562027, "step": 2890 }, { "epoch": 0.75, "learning_rate": 4.16897771827484e-07, "logits/chosen": -5.307171821594238, "logits/rejected": -4.836307048797607, "logps/chosen": -532.7025146484375, "logps/rejected": -390.014404296875, "loss": 0.5645, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.023624179884791374, "rewards/margins": 0.4527716636657715, "rewards/rejected": -0.42914748191833496, "step": 2900 }, { "epoch": 0.75, "learning_rate": 4.164196232188964e-07, "logits/chosen": -5.260418891906738, "logits/rejected": -4.739095211029053, "logps/chosen": -643.0469970703125, "logps/rejected": -582.5897216796875, "loss": 0.5932, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.2287939339876175, "rewards/margins": 0.31034284830093384, "rewards/rejected": -0.08154893666505814, "step": 2910 }, { "epoch": 0.75, "learning_rate": 4.159414746103089e-07, "logits/chosen": -5.171687126159668, "logits/rejected": -5.0958123207092285, "logps/chosen": -577.0113525390625, "logps/rejected": -439.89373779296875, "loss": 0.566, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.09370137006044388, "rewards/margins": 0.43432050943374634, "rewards/rejected": -0.34061914682388306, "step": 2920 }, { "epoch": 0.76, "learning_rate": 4.154633260017213e-07, "logits/chosen": -5.269101142883301, "logits/rejected": -5.018292427062988, "logps/chosen": -614.4996337890625, "logps/rejected": -438.61163330078125, "loss": 0.6241, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.16350527107715607, "rewards/margins": 0.6221408247947693, "rewards/rejected": -0.4586355686187744, "step": 2930 }, { "epoch": 0.76, "learning_rate": 4.149851773931338e-07, "logits/chosen": -5.238752841949463, "logits/rejected": -4.451853275299072, "logps/chosen": -622.5341796875, "logps/rejected": -430.9085998535156, "loss": 0.5544, "rewards/accuracies": 0.6875, "rewards/chosen": 0.18675033748149872, "rewards/margins": 0.6911321878433228, "rewards/rejected": -0.5043817758560181, "step": 2940 }, { "epoch": 0.76, "learning_rate": 4.1450702878454626e-07, "logits/chosen": -5.0749335289001465, "logits/rejected": -4.6726603507995605, "logps/chosen": -594.6048583984375, "logps/rejected": -462.0939025878906, "loss": 0.4935, "rewards/accuracies": 0.75, "rewards/chosen": 0.08386943489313126, "rewards/margins": 0.6739980578422546, "rewards/rejected": -0.5901286005973816, "step": 2950 }, { "epoch": 0.76, "learning_rate": 4.140288801759587e-07, "logits/chosen": -5.0508198738098145, "logits/rejected": -4.796426296234131, "logps/chosen": -566.7437744140625, "logps/rejected": -404.6668395996094, "loss": 0.5218, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.12706801295280457, "rewards/margins": 0.6414644718170166, "rewards/rejected": -0.5143964290618896, "step": 2960 }, { "epoch": 0.77, "learning_rate": 4.1355073156737115e-07, "logits/chosen": -5.477901935577393, "logits/rejected": -5.117199420928955, "logps/chosen": -502.60076904296875, "logps/rejected": -361.73394775390625, "loss": 0.5849, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03738931566476822, "rewards/margins": 0.5929213762283325, "rewards/rejected": -0.6303106546401978, "step": 2970 }, { "epoch": 0.77, "learning_rate": 4.1307258295878357e-07, "logits/chosen": -5.271552085876465, "logits/rejected": -5.006416320800781, "logps/chosen": -520.046875, "logps/rejected": -431.5006408691406, "loss": 0.626, "rewards/accuracies": 0.625, "rewards/chosen": 0.06680258363485336, "rewards/margins": 0.35946890711784363, "rewards/rejected": -0.29266637563705444, "step": 2980 }, { "epoch": 0.77, "learning_rate": 4.1259443435019604e-07, "logits/chosen": -5.3752031326293945, "logits/rejected": -4.671482086181641, "logps/chosen": -619.343994140625, "logps/rejected": -395.8807678222656, "loss": 0.5111, "rewards/accuracies": 0.75, "rewards/chosen": 0.23466840386390686, "rewards/margins": 0.7433062791824341, "rewards/rejected": -0.5086379051208496, "step": 2990 }, { "epoch": 0.77, "learning_rate": 4.1211628574160846e-07, "logits/chosen": -5.176268577575684, "logits/rejected": -4.916962623596191, "logps/chosen": -573.7061767578125, "logps/rejected": -485.3478088378906, "loss": 0.6029, "rewards/accuracies": 0.6875, "rewards/chosen": -0.01826908253133297, "rewards/margins": 0.48557138442993164, "rewards/rejected": -0.503840446472168, "step": 3000 }, { "epoch": 0.77, "eval_logits/chosen": -5.291067600250244, "eval_logits/rejected": -5.001623630523682, "eval_logps/chosen": -587.367431640625, "eval_logps/rejected": -447.17669677734375, "eval_loss": 0.5656965374946594, "eval_rewards/accuracies": 0.6899999976158142, "eval_rewards/chosen": 0.09021059423685074, "eval_rewards/margins": 0.5538293719291687, "eval_rewards/rejected": -0.46361878514289856, "eval_runtime": 103.3941, "eval_samples_per_second": 19.343, "eval_steps_per_second": 1.209, "step": 3000 }, { "epoch": 0.78, "learning_rate": 4.1163813713302093e-07, "logits/chosen": -5.215742111206055, "logits/rejected": -5.146059513092041, "logps/chosen": -506.08575439453125, "logps/rejected": -488.49090576171875, "loss": 0.5856, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.17541763186454773, "rewards/margins": 0.3880845308303833, "rewards/rejected": -0.5635021924972534, "step": 3010 }, { "epoch": 0.78, "learning_rate": 4.111599885244334e-07, "logits/chosen": -5.6388139724731445, "logits/rejected": -5.1542534828186035, "logps/chosen": -577.5958251953125, "logps/rejected": -501.6172790527344, "loss": 0.5773, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.1911018043756485, "rewards/margins": 0.7097854018211365, "rewards/rejected": -0.5186835527420044, "step": 3020 }, { "epoch": 0.78, "learning_rate": 4.106818399158458e-07, "logits/chosen": -5.420843601226807, "logits/rejected": -5.42513370513916, "logps/chosen": -580.3556518554688, "logps/rejected": -503.9964904785156, "loss": 0.5799, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06382683664560318, "rewards/margins": 0.4019174575805664, "rewards/rejected": -0.4657443165779114, "step": 3030 }, { "epoch": 0.78, "learning_rate": 4.102036913072583e-07, "logits/chosen": -4.878137111663818, "logits/rejected": -4.751458168029785, "logps/chosen": -619.8956298828125, "logps/rejected": -518.23583984375, "loss": 0.5784, "rewards/accuracies": 0.6875, "rewards/chosen": 0.10956134647130966, "rewards/margins": 0.5051667094230652, "rewards/rejected": -0.3956053853034973, "step": 3040 }, { "epoch": 0.79, "learning_rate": 4.097255426986707e-07, "logits/chosen": -5.039268493652344, "logits/rejected": -5.087874412536621, "logps/chosen": -627.4754028320312, "logps/rejected": -494.14263916015625, "loss": 0.5875, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.09773634374141693, "rewards/margins": 0.467266321182251, "rewards/rejected": -0.3695300221443176, "step": 3050 }, { "epoch": 0.79, "learning_rate": 4.092473940900832e-07, "logits/chosen": -4.751810550689697, "logits/rejected": -4.720302581787109, "logps/chosen": -576.5935668945312, "logps/rejected": -475.9479064941406, "loss": 0.4863, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.11184986680746078, "rewards/margins": 0.6622871160507202, "rewards/rejected": -0.5504372119903564, "step": 3060 }, { "epoch": 0.79, "learning_rate": 4.087692454814956e-07, "logits/chosen": -5.326827049255371, "logits/rejected": -5.414839744567871, "logps/chosen": -594.60888671875, "logps/rejected": -454.94354248046875, "loss": 0.6069, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.13708633184432983, "rewards/margins": 0.3661026954650879, "rewards/rejected": -0.22901634871959686, "step": 3070 }, { "epoch": 0.8, "learning_rate": 4.082910968729081e-07, "logits/chosen": -5.3411407470703125, "logits/rejected": -5.260460376739502, "logps/chosen": -588.062255859375, "logps/rejected": -505.5140686035156, "loss": 0.5393, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.12168978154659271, "rewards/margins": 0.5858784317970276, "rewards/rejected": -0.46418866515159607, "step": 3080 }, { "epoch": 0.8, "learning_rate": 4.0781294826432055e-07, "logits/chosen": -5.426017761230469, "logits/rejected": -4.831254482269287, "logps/chosen": -649.7779541015625, "logps/rejected": -485.07958984375, "loss": 0.5924, "rewards/accuracies": 0.6875, "rewards/chosen": 0.1721142679452896, "rewards/margins": 0.5111185908317566, "rewards/rejected": -0.3390043079853058, "step": 3090 }, { "epoch": 0.8, "learning_rate": 4.0733479965573296e-07, "logits/chosen": -5.224730491638184, "logits/rejected": -4.780400276184082, "logps/chosen": -677.8843383789062, "logps/rejected": -478.52447509765625, "loss": 0.5245, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.29468682408332825, "rewards/margins": 0.6371022462844849, "rewards/rejected": -0.34241533279418945, "step": 3100 }, { "epoch": 0.8, "learning_rate": 4.0685665104714544e-07, "logits/chosen": -5.200541019439697, "logits/rejected": -4.742873668670654, "logps/chosen": -597.3360595703125, "logps/rejected": -453.43206787109375, "loss": 0.5461, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.11665183305740356, "rewards/margins": 0.5804598331451416, "rewards/rejected": -0.46380797028541565, "step": 3110 }, { "epoch": 0.81, "learning_rate": 4.0637850243855786e-07, "logits/chosen": -5.212822437286377, "logits/rejected": -5.664287567138672, "logps/chosen": -499.10015869140625, "logps/rejected": -436.43402099609375, "loss": 0.6142, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.06559032201766968, "rewards/margins": 0.5376947522163391, "rewards/rejected": -0.4721044600009918, "step": 3120 }, { "epoch": 0.81, "learning_rate": 4.0590035382997033e-07, "logits/chosen": -5.357455253601074, "logits/rejected": -5.125237464904785, "logps/chosen": -533.3345336914062, "logps/rejected": -399.5873718261719, "loss": 0.5427, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.18078884482383728, "rewards/margins": 0.5445336103439331, "rewards/rejected": -0.3637447655200958, "step": 3130 }, { "epoch": 0.81, "learning_rate": 4.0542220522138275e-07, "logits/chosen": -5.3046875, "logits/rejected": -5.285113334655762, "logps/chosen": -583.47314453125, "logps/rejected": -441.50732421875, "loss": 0.5207, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.16723354160785675, "rewards/margins": 0.7531067132949829, "rewards/rejected": -0.5858731865882874, "step": 3140 }, { "epoch": 0.81, "learning_rate": 4.049440566127952e-07, "logits/chosen": -5.276063442230225, "logits/rejected": -4.917383193969727, "logps/chosen": -659.81396484375, "logps/rejected": -435.8348083496094, "loss": 0.5574, "rewards/accuracies": 0.75, "rewards/chosen": 0.08334384113550186, "rewards/margins": 0.5911930799484253, "rewards/rejected": -0.5078492164611816, "step": 3150 }, { "epoch": 0.82, "learning_rate": 4.044659080042077e-07, "logits/chosen": -5.172604560852051, "logits/rejected": -5.455114364624023, "logps/chosen": -556.4915771484375, "logps/rejected": -478.5448303222656, "loss": 0.5597, "rewards/accuracies": 0.6875, "rewards/chosen": 0.09295444190502167, "rewards/margins": 0.520017683506012, "rewards/rejected": -0.4270631670951843, "step": 3160 }, { "epoch": 0.82, "learning_rate": 4.039877593956201e-07, "logits/chosen": -5.282176971435547, "logits/rejected": -5.041538715362549, "logps/chosen": -652.8903198242188, "logps/rejected": -540.4581298828125, "loss": 0.558, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.2103293240070343, "rewards/margins": 0.5398227572441101, "rewards/rejected": -0.3294934630393982, "step": 3170 }, { "epoch": 0.82, "learning_rate": 4.035096107870326e-07, "logits/chosen": -5.283825874328613, "logits/rejected": -5.0444440841674805, "logps/chosen": -588.059814453125, "logps/rejected": -434.1261291503906, "loss": 0.5202, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.22718743979930878, "rewards/margins": 0.8015328645706177, "rewards/rejected": -0.5743454098701477, "step": 3180 }, { "epoch": 0.82, "learning_rate": 4.03031462178445e-07, "logits/chosen": -5.355008125305176, "logits/rejected": -5.30274772644043, "logps/chosen": -567.9971923828125, "logps/rejected": -475.2953186035156, "loss": 0.5687, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.1294597089290619, "rewards/margins": 0.5204909443855286, "rewards/rejected": -0.3910312056541443, "step": 3190 }, { "epoch": 0.83, "learning_rate": 4.025533135698575e-07, "logits/chosen": -5.282181739807129, "logits/rejected": -4.946359634399414, "logps/chosen": -609.3970947265625, "logps/rejected": -458.1299743652344, "loss": 0.5508, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.10371197760105133, "rewards/margins": 0.632253885269165, "rewards/rejected": -0.5285419225692749, "step": 3200 }, { "epoch": 0.83, "learning_rate": 4.0207516496126994e-07, "logits/chosen": -5.554284572601318, "logits/rejected": -5.293402671813965, "logps/chosen": -586.3056640625, "logps/rejected": -445.1898498535156, "loss": 0.5347, "rewards/accuracies": 0.625, "rewards/chosen": 0.021643463522195816, "rewards/margins": 0.5490753650665283, "rewards/rejected": -0.527431845664978, "step": 3210 }, { "epoch": 0.83, "learning_rate": 4.015970163526824e-07, "logits/chosen": -5.104782581329346, "logits/rejected": -4.941196441650391, "logps/chosen": -595.008056640625, "logps/rejected": -497.55731201171875, "loss": 0.5489, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.030893569812178612, "rewards/margins": 0.6206479668617249, "rewards/rejected": -0.5897544622421265, "step": 3220 }, { "epoch": 0.83, "learning_rate": 4.011188677440949e-07, "logits/chosen": -5.112905979156494, "logits/rejected": -5.0945024490356445, "logps/chosen": -645.4614868164062, "logps/rejected": -447.26385498046875, "loss": 0.5387, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1866869032382965, "rewards/margins": 0.6226974725723267, "rewards/rejected": -0.43601053953170776, "step": 3230 }, { "epoch": 0.84, "learning_rate": 4.006407191355073e-07, "logits/chosen": -5.72912073135376, "logits/rejected": -4.7377400398254395, "logps/chosen": -569.7928466796875, "logps/rejected": -433.04473876953125, "loss": 0.5124, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.002315019490197301, "rewards/margins": 0.7724419832229614, "rewards/rejected": -0.7747569680213928, "step": 3240 }, { "epoch": 0.84, "learning_rate": 4.001625705269198e-07, "logits/chosen": -5.565192699432373, "logits/rejected": -5.363757133483887, "logps/chosen": -577.1485595703125, "logps/rejected": -435.05450439453125, "loss": 0.5609, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.016137491911649704, "rewards/margins": 0.610801100730896, "rewards/rejected": -0.5946635603904724, "step": 3250 }, { "epoch": 0.84, "learning_rate": 3.996844219183322e-07, "logits/chosen": -5.158976078033447, "logits/rejected": -5.161699295043945, "logps/chosen": -577.6820678710938, "logps/rejected": -517.661865234375, "loss": 0.6045, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.006545053329318762, "rewards/margins": 0.5447898507118225, "rewards/rejected": -0.5382447242736816, "step": 3260 }, { "epoch": 0.84, "learning_rate": 3.9920627330974467e-07, "logits/chosen": -5.302914142608643, "logits/rejected": -4.925368309020996, "logps/chosen": -692.4310302734375, "logps/rejected": -483.87713623046875, "loss": 0.5456, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.13608720898628235, "rewards/margins": 0.74052894115448, "rewards/rejected": -0.60444176197052, "step": 3270 }, { "epoch": 0.85, "learning_rate": 3.987281247011571e-07, "logits/chosen": -5.044970512390137, "logits/rejected": -4.995760917663574, "logps/chosen": -695.349609375, "logps/rejected": -528.565673828125, "loss": 0.5453, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.17353370785713196, "rewards/margins": 0.5904036164283752, "rewards/rejected": -0.41686996817588806, "step": 3280 }, { "epoch": 0.85, "learning_rate": 3.9824997609256956e-07, "logits/chosen": -5.238536357879639, "logits/rejected": -5.269024848937988, "logps/chosen": -537.1624755859375, "logps/rejected": -456.67218017578125, "loss": 0.5776, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.1374790221452713, "rewards/margins": 0.6452633738517761, "rewards/rejected": -0.5077843070030212, "step": 3290 }, { "epoch": 0.85, "learning_rate": 3.9777182748398203e-07, "logits/chosen": -5.2962422370910645, "logits/rejected": -4.564726829528809, "logps/chosen": -690.3709716796875, "logps/rejected": -445.2132873535156, "loss": 0.5358, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.27778685092926025, "rewards/margins": 0.8879708051681519, "rewards/rejected": -0.6101840734481812, "step": 3300 }, { "epoch": 0.85, "learning_rate": 3.9729367887539445e-07, "logits/chosen": -5.214041709899902, "logits/rejected": -4.955563545227051, "logps/chosen": -590.9525756835938, "logps/rejected": -429.01043701171875, "loss": 0.5661, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1285330355167389, "rewards/margins": 0.6235883831977844, "rewards/rejected": -0.49505525827407837, "step": 3310 }, { "epoch": 0.86, "learning_rate": 3.968155302668069e-07, "logits/chosen": -5.092454433441162, "logits/rejected": -5.117671966552734, "logps/chosen": -692.7261962890625, "logps/rejected": -527.7227783203125, "loss": 0.5194, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.24313250184059143, "rewards/margins": 0.8606869578361511, "rewards/rejected": -0.6175544857978821, "step": 3320 }, { "epoch": 0.86, "learning_rate": 3.9633738165821934e-07, "logits/chosen": -5.074185848236084, "logits/rejected": -5.004903316497803, "logps/chosen": -618.8615112304688, "logps/rejected": -457.2530212402344, "loss": 0.6063, "rewards/accuracies": 0.75, "rewards/chosen": 0.08944591134786606, "rewards/margins": 0.624073326587677, "rewards/rejected": -0.5346274971961975, "step": 3330 }, { "epoch": 0.86, "learning_rate": 3.958592330496318e-07, "logits/chosen": -4.96697473526001, "logits/rejected": -4.719120025634766, "logps/chosen": -608.1307373046875, "logps/rejected": -474.03778076171875, "loss": 0.6032, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.1688225269317627, "rewards/margins": 0.5783975124359131, "rewards/rejected": -0.4095749258995056, "step": 3340 }, { "epoch": 0.86, "learning_rate": 3.9538108444104423e-07, "logits/chosen": -5.370732307434082, "logits/rejected": -4.946127891540527, "logps/chosen": -543.9302978515625, "logps/rejected": -426.31243896484375, "loss": 0.5546, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.05867813900113106, "rewards/margins": 0.5858211517333984, "rewards/rejected": -0.5271430015563965, "step": 3350 }, { "epoch": 0.87, "learning_rate": 3.949029358324567e-07, "logits/chosen": -4.619930267333984, "logits/rejected": -5.037390232086182, "logps/chosen": -601.0657958984375, "logps/rejected": -546.6511840820312, "loss": 0.641, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0011930428445339203, "rewards/margins": 0.4250010848045349, "rewards/rejected": -0.42380809783935547, "step": 3360 }, { "epoch": 0.87, "learning_rate": 3.944247872238692e-07, "logits/chosen": -5.475283622741699, "logits/rejected": -5.184762001037598, "logps/chosen": -652.035888671875, "logps/rejected": -542.4512939453125, "loss": 0.5624, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.16885468363761902, "rewards/margins": 0.5627483129501343, "rewards/rejected": -0.3938937187194824, "step": 3370 }, { "epoch": 0.87, "learning_rate": 3.939466386152816e-07, "logits/chosen": -5.5554914474487305, "logits/rejected": -5.070727348327637, "logps/chosen": -604.1060791015625, "logps/rejected": -451.76544189453125, "loss": 0.5391, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.2279389351606369, "rewards/margins": 0.6537092924118042, "rewards/rejected": -0.42577043175697327, "step": 3380 }, { "epoch": 0.88, "learning_rate": 3.9346849000669407e-07, "logits/chosen": -5.199848651885986, "logits/rejected": -4.806522846221924, "logps/chosen": -603.6668701171875, "logps/rejected": -479.3272399902344, "loss": 0.6126, "rewards/accuracies": 0.625, "rewards/chosen": -0.11634206771850586, "rewards/margins": 0.35443344712257385, "rewards/rejected": -0.47077545523643494, "step": 3390 }, { "epoch": 0.88, "learning_rate": 3.929903413981065e-07, "logits/chosen": -5.338006019592285, "logits/rejected": -4.938784599304199, "logps/chosen": -629.5792236328125, "logps/rejected": -439.2228088378906, "loss": 0.5716, "rewards/accuracies": 0.5625, "rewards/chosen": 0.1593635231256485, "rewards/margins": 0.5300758481025696, "rewards/rejected": -0.3707122504711151, "step": 3400 }, { "epoch": 0.88, "learning_rate": 3.9251219278951896e-07, "logits/chosen": -5.303593158721924, "logits/rejected": -4.528509140014648, "logps/chosen": -628.5130615234375, "logps/rejected": -462.95745849609375, "loss": 0.5531, "rewards/accuracies": 0.6875, "rewards/chosen": 0.26164278388023376, "rewards/margins": 0.5934808850288391, "rewards/rejected": -0.33183804154396057, "step": 3410 }, { "epoch": 0.88, "learning_rate": 3.920340441809314e-07, "logits/chosen": -5.394709587097168, "logits/rejected": -4.674399375915527, "logps/chosen": -617.9784545898438, "logps/rejected": -455.641845703125, "loss": 0.5438, "rewards/accuracies": 0.8125, "rewards/chosen": 0.07891926169395447, "rewards/margins": 0.712009847164154, "rewards/rejected": -0.6330905556678772, "step": 3420 }, { "epoch": 0.89, "learning_rate": 3.9155589557234385e-07, "logits/chosen": -5.34127950668335, "logits/rejected": -5.147403717041016, "logps/chosen": -639.702880859375, "logps/rejected": -480.78167724609375, "loss": 0.549, "rewards/accuracies": 0.75, "rewards/chosen": 0.1886538714170456, "rewards/margins": 0.6698323488235474, "rewards/rejected": -0.4811784625053406, "step": 3430 }, { "epoch": 0.89, "learning_rate": 3.910777469637563e-07, "logits/chosen": -5.079847812652588, "logits/rejected": -5.393969535827637, "logps/chosen": -524.0960693359375, "logps/rejected": -541.1182250976562, "loss": 0.6556, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.1010889783501625, "rewards/margins": 0.33704736828804016, "rewards/rejected": -0.23595838248729706, "step": 3440 }, { "epoch": 0.89, "learning_rate": 3.9059959835516874e-07, "logits/chosen": -5.178534507751465, "logits/rejected": -4.723514080047607, "logps/chosen": -591.4950561523438, "logps/rejected": -472.28936767578125, "loss": 0.5581, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.10051153600215912, "rewards/margins": 0.5078757405281067, "rewards/rejected": -0.4073641300201416, "step": 3450 }, { "epoch": 0.89, "learning_rate": 3.901214497465812e-07, "logits/chosen": -5.057511329650879, "logits/rejected": -4.958263874053955, "logps/chosen": -688.9708251953125, "logps/rejected": -507.01422119140625, "loss": 0.5601, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.22021882236003876, "rewards/margins": 0.5900182127952576, "rewards/rejected": -0.3697994351387024, "step": 3460 }, { "epoch": 0.9, "learning_rate": 3.8964330113799363e-07, "logits/chosen": -5.364045143127441, "logits/rejected": -4.883383750915527, "logps/chosen": -681.688720703125, "logps/rejected": -499.9970703125, "loss": 0.5921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.21592795848846436, "rewards/margins": 0.6274440884590149, "rewards/rejected": -0.41151610016822815, "step": 3470 }, { "epoch": 0.9, "learning_rate": 3.891651525294061e-07, "logits/chosen": -5.276228904724121, "logits/rejected": -5.2895121574401855, "logps/chosen": -614.3947143554688, "logps/rejected": -475.6708984375, "loss": 0.5556, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.10407014936208725, "rewards/margins": 0.7473887205123901, "rewards/rejected": -0.6433185338973999, "step": 3480 }, { "epoch": 0.9, "learning_rate": 3.8868700392081857e-07, "logits/chosen": -5.489828586578369, "logits/rejected": -4.80216646194458, "logps/chosen": -562.1911010742188, "logps/rejected": -470.89288330078125, "loss": 0.5681, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.05695943906903267, "rewards/margins": 0.4966852068901062, "rewards/rejected": -0.439725786447525, "step": 3490 }, { "epoch": 0.9, "learning_rate": 3.8820885531223104e-07, "logits/chosen": -4.996386528015137, "logits/rejected": -5.114541530609131, "logps/chosen": -578.0485229492188, "logps/rejected": -488.44598388671875, "loss": 0.5561, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.14161017537117004, "rewards/margins": 0.5712050199508667, "rewards/rejected": -0.42959481477737427, "step": 3500 }, { "epoch": 0.91, "learning_rate": 3.877307067036435e-07, "logits/chosen": -5.676054954528809, "logits/rejected": -5.241856575012207, "logps/chosen": -583.5451049804688, "logps/rejected": -433.76727294921875, "loss": 0.5806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.11528865993022919, "rewards/margins": 0.5856851935386658, "rewards/rejected": -0.4703964591026306, "step": 3510 }, { "epoch": 0.91, "learning_rate": 3.8725255809505593e-07, "logits/chosen": -5.222023963928223, "logits/rejected": -5.025261878967285, "logps/chosen": -628.2757568359375, "logps/rejected": -505.69781494140625, "loss": 0.5492, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.10591794550418854, "rewards/margins": 0.5322853326797485, "rewards/rejected": -0.4263673722743988, "step": 3520 }, { "epoch": 0.91, "learning_rate": 3.867744094864684e-07, "logits/chosen": -5.408780097961426, "logits/rejected": -5.114502906799316, "logps/chosen": -593.4816284179688, "logps/rejected": -454.79119873046875, "loss": 0.5489, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.16902071237564087, "rewards/margins": 0.5994380116462708, "rewards/rejected": -0.43041735887527466, "step": 3530 }, { "epoch": 0.91, "learning_rate": 3.862962608778808e-07, "logits/chosen": -5.284912586212158, "logits/rejected": -4.729750156402588, "logps/chosen": -557.8255004882812, "logps/rejected": -450.35345458984375, "loss": 0.5511, "rewards/accuracies": 0.6875, "rewards/chosen": 0.09686243534088135, "rewards/margins": 0.5724712610244751, "rewards/rejected": -0.4756087362766266, "step": 3540 }, { "epoch": 0.92, "learning_rate": 3.858181122692933e-07, "logits/chosen": -5.413939476013184, "logits/rejected": -4.913063049316406, "logps/chosen": -595.08642578125, "logps/rejected": -509.8008728027344, "loss": 0.5994, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.08552693575620651, "rewards/margins": 0.3935163915157318, "rewards/rejected": -0.3079894483089447, "step": 3550 }, { "epoch": 0.92, "learning_rate": 3.853399636607057e-07, "logits/chosen": -5.225398540496826, "logits/rejected": -4.823004722595215, "logps/chosen": -515.8453369140625, "logps/rejected": -437.70196533203125, "loss": 0.6004, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.007819155231118202, "rewards/margins": 0.5654599070549011, "rewards/rejected": -0.5576408505439758, "step": 3560 }, { "epoch": 0.92, "learning_rate": 3.848618150521182e-07, "logits/chosen": -5.227397918701172, "logits/rejected": -5.073920249938965, "logps/chosen": -605.6839599609375, "logps/rejected": -466.0731506347656, "loss": 0.5601, "rewards/accuracies": 0.6875, "rewards/chosen": 0.12342868745326996, "rewards/margins": 0.5423129796981812, "rewards/rejected": -0.4188843369483948, "step": 3570 }, { "epoch": 0.92, "learning_rate": 3.8438366644353066e-07, "logits/chosen": -5.428784370422363, "logits/rejected": -5.07532262802124, "logps/chosen": -635.8427124023438, "logps/rejected": -445.77197265625, "loss": 0.58, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.22827443480491638, "rewards/margins": 0.6386140584945679, "rewards/rejected": -0.41033944487571716, "step": 3580 }, { "epoch": 0.93, "learning_rate": 3.839055178349431e-07, "logits/chosen": -5.210691452026367, "logits/rejected": -5.022581100463867, "logps/chosen": -536.1075439453125, "logps/rejected": -418.1961364746094, "loss": 0.5324, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.009803907945752144, "rewards/margins": 0.5435864329338074, "rewards/rejected": -0.553390383720398, "step": 3590 }, { "epoch": 0.93, "learning_rate": 3.8342736922635555e-07, "logits/chosen": -5.1452317237854, "logits/rejected": -4.737178802490234, "logps/chosen": -499.33154296875, "logps/rejected": -425.4757385253906, "loss": 0.5993, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08329282701015472, "rewards/margins": 0.3500189781188965, "rewards/rejected": -0.43331179022789, "step": 3600 }, { "epoch": 0.93, "learning_rate": 3.8294922061776797e-07, "logits/chosen": -5.234314918518066, "logits/rejected": -5.322993278503418, "logps/chosen": -484.77154541015625, "logps/rejected": -416.435546875, "loss": 0.5598, "rewards/accuracies": 0.6875, "rewards/chosen": 0.11932976543903351, "rewards/margins": 0.5840135812759399, "rewards/rejected": -0.46468386054039, "step": 3610 }, { "epoch": 0.93, "learning_rate": 3.8247107200918044e-07, "logits/chosen": -5.635892391204834, "logits/rejected": -5.254748344421387, "logps/chosen": -519.7491455078125, "logps/rejected": -443.24298095703125, "loss": 0.6004, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02981175109744072, "rewards/margins": 0.47477102279663086, "rewards/rejected": -0.44495922327041626, "step": 3620 }, { "epoch": 0.94, "learning_rate": 3.819929234005929e-07, "logits/chosen": -5.363807678222656, "logits/rejected": -4.731595516204834, "logps/chosen": -557.9256591796875, "logps/rejected": -396.948486328125, "loss": 0.5833, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.04855251684784889, "rewards/margins": 0.5061682462692261, "rewards/rejected": -0.4576157033443451, "step": 3630 }, { "epoch": 0.94, "learning_rate": 3.8151477479200533e-07, "logits/chosen": -5.554810523986816, "logits/rejected": -4.834834098815918, "logps/chosen": -597.5949096679688, "logps/rejected": -388.9228515625, "loss": 0.5879, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.09443709254264832, "rewards/margins": 0.6906223297119141, "rewards/rejected": -0.5961852073669434, "step": 3640 }, { "epoch": 0.94, "learning_rate": 3.810366261834178e-07, "logits/chosen": -5.370425224304199, "logits/rejected": -4.596127986907959, "logps/chosen": -625.6978759765625, "logps/rejected": -417.177001953125, "loss": 0.5554, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.15079176425933838, "rewards/margins": 0.6242234706878662, "rewards/rejected": -0.4734317362308502, "step": 3650 }, { "epoch": 0.95, "learning_rate": 3.805584775748302e-07, "logits/chosen": -4.957612037658691, "logits/rejected": -5.150192737579346, "logps/chosen": -538.17724609375, "logps/rejected": -488.86846923828125, "loss": 0.5296, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.20287001132965088, "rewards/margins": 0.6681716442108154, "rewards/rejected": -0.4653015732765198, "step": 3660 }, { "epoch": 0.95, "learning_rate": 3.800803289662427e-07, "logits/chosen": -5.350467681884766, "logits/rejected": -4.754372596740723, "logps/chosen": -665.8916625976562, "logps/rejected": -488.57415771484375, "loss": 0.6094, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.06955035775899887, "rewards/margins": 0.47966670989990234, "rewards/rejected": -0.4101163446903229, "step": 3670 }, { "epoch": 0.95, "learning_rate": 3.796021803576551e-07, "logits/chosen": -5.416325569152832, "logits/rejected": -5.334246635437012, "logps/chosen": -625.2135620117188, "logps/rejected": -527.814208984375, "loss": 0.5526, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.05157012864947319, "rewards/margins": 0.5192713737487793, "rewards/rejected": -0.4677012860774994, "step": 3680 }, { "epoch": 0.95, "learning_rate": 3.791240317490676e-07, "logits/chosen": -5.26553201675415, "logits/rejected": -4.729050636291504, "logps/chosen": -566.4998779296875, "logps/rejected": -367.4771728515625, "loss": 0.5432, "rewards/accuracies": 0.75, "rewards/chosen": 0.26523613929748535, "rewards/margins": 0.7287276983261108, "rewards/rejected": -0.46349161863327026, "step": 3690 }, { "epoch": 0.96, "learning_rate": 3.7864588314048006e-07, "logits/chosen": -5.016963958740234, "logits/rejected": -5.020668983459473, "logps/chosen": -531.586669921875, "logps/rejected": -465.846923828125, "loss": 0.5837, "rewards/accuracies": 0.75, "rewards/chosen": 0.09061409533023834, "rewards/margins": 0.7055541276931763, "rewards/rejected": -0.6149401068687439, "step": 3700 }, { "epoch": 0.96, "learning_rate": 3.781677345318925e-07, "logits/chosen": -5.192749977111816, "logits/rejected": -5.0426812171936035, "logps/chosen": -572.2974243164062, "logps/rejected": -500.06170654296875, "loss": 0.5215, "rewards/accuracies": 0.75, "rewards/chosen": 0.18328741192817688, "rewards/margins": 0.6538041234016418, "rewards/rejected": -0.4705166816711426, "step": 3710 }, { "epoch": 0.96, "learning_rate": 3.7768958592330495e-07, "logits/chosen": -5.25262975692749, "logits/rejected": -4.654170036315918, "logps/chosen": -524.9263916015625, "logps/rejected": -419.98394775390625, "loss": 0.5451, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02605457976460457, "rewards/margins": 0.6331348419189453, "rewards/rejected": -0.6591894626617432, "step": 3720 }, { "epoch": 0.96, "learning_rate": 3.7721143731471737e-07, "logits/chosen": -5.781769752502441, "logits/rejected": -4.743674278259277, "logps/chosen": -580.6263427734375, "logps/rejected": -404.5472106933594, "loss": 0.5834, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.0012973040575161576, "rewards/margins": 0.39890122413635254, "rewards/rejected": -0.39760392904281616, "step": 3730 }, { "epoch": 0.97, "learning_rate": 3.7673328870612984e-07, "logits/chosen": -5.383538722991943, "logits/rejected": -4.931005954742432, "logps/chosen": -672.62255859375, "logps/rejected": -450.92669677734375, "loss": 0.5558, "rewards/accuracies": 0.75, "rewards/chosen": 0.13410404324531555, "rewards/margins": 0.6388300657272339, "rewards/rejected": -0.5047260522842407, "step": 3740 }, { "epoch": 0.97, "learning_rate": 3.7625514009754226e-07, "logits/chosen": -5.123068332672119, "logits/rejected": -4.85269832611084, "logps/chosen": -636.0330810546875, "logps/rejected": -450.9974060058594, "loss": 0.5793, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.055124081671237946, "rewards/margins": 0.501170814037323, "rewards/rejected": -0.4460466802120209, "step": 3750 }, { "epoch": 0.97, "learning_rate": 3.7577699148895473e-07, "logits/chosen": -5.056084632873535, "logits/rejected": -5.145336627960205, "logps/chosen": -597.5616455078125, "logps/rejected": -529.8365478515625, "loss": 0.582, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.0722968727350235, "rewards/margins": 0.4726008474826813, "rewards/rejected": -0.40030398964881897, "step": 3760 }, { "epoch": 0.97, "learning_rate": 3.7529884288036725e-07, "logits/chosen": -5.07494592666626, "logits/rejected": -5.292084693908691, "logps/chosen": -566.7371826171875, "logps/rejected": -493.9866638183594, "loss": 0.5714, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.011423865333199501, "rewards/margins": 0.46510928869247437, "rewards/rejected": -0.4765331745147705, "step": 3770 }, { "epoch": 0.98, "learning_rate": 3.7482069427177967e-07, "logits/chosen": -5.253510475158691, "logits/rejected": -4.945786952972412, "logps/chosen": -625.378662109375, "logps/rejected": -435.16357421875, "loss": 0.5835, "rewards/accuracies": 0.6875, "rewards/chosen": 0.21430380642414093, "rewards/margins": 0.6482641696929932, "rewards/rejected": -0.4339603781700134, "step": 3780 }, { "epoch": 0.98, "learning_rate": 3.7434254566319215e-07, "logits/chosen": -4.915932655334473, "logits/rejected": -4.489306926727295, "logps/chosen": -642.9627685546875, "logps/rejected": -466.03778076171875, "loss": 0.5884, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.1101498231291771, "rewards/margins": 0.6005399227142334, "rewards/rejected": -0.49039021134376526, "step": 3790 }, { "epoch": 0.98, "learning_rate": 3.7386439705460456e-07, "logits/chosen": -5.35344934463501, "logits/rejected": -4.786173343658447, "logps/chosen": -617.6554565429688, "logps/rejected": -431.93182373046875, "loss": 0.5198, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17522576451301575, "rewards/margins": 0.758629322052002, "rewards/rejected": -0.5834035277366638, "step": 3800 }, { "epoch": 0.98, "learning_rate": 3.7338624844601704e-07, "logits/chosen": -5.351283073425293, "logits/rejected": -5.241473197937012, "logps/chosen": -545.7350463867188, "logps/rejected": -436.51275634765625, "loss": 0.5698, "rewards/accuracies": 0.625, "rewards/chosen": 0.02534036710858345, "rewards/margins": 0.4086519777774811, "rewards/rejected": -0.3833116292953491, "step": 3810 }, { "epoch": 0.99, "learning_rate": 3.7290809983742945e-07, "logits/chosen": -4.974503517150879, "logits/rejected": -5.014293670654297, "logps/chosen": -613.312744140625, "logps/rejected": -507.64910888671875, "loss": 0.5164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.125700443983078, "rewards/margins": 0.6090840697288513, "rewards/rejected": -0.4833836555480957, "step": 3820 }, { "epoch": 0.99, "learning_rate": 3.7242995122884193e-07, "logits/chosen": -4.954161643981934, "logits/rejected": -4.5992279052734375, "logps/chosen": -642.2354736328125, "logps/rejected": -467.0572204589844, "loss": 0.5867, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.1267567276954651, "rewards/margins": 0.5006474256515503, "rewards/rejected": -0.3738906979560852, "step": 3830 }, { "epoch": 0.99, "learning_rate": 3.719518026202544e-07, "logits/chosen": -5.680801868438721, "logits/rejected": -5.667754173278809, "logps/chosen": -538.3419799804688, "logps/rejected": -464.53082275390625, "loss": 0.5402, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.021589960902929306, "rewards/margins": 0.5832234621047974, "rewards/rejected": -0.5616335272789001, "step": 3840 }, { "epoch": 0.99, "learning_rate": 3.714736540116668e-07, "logits/chosen": -5.239333629608154, "logits/rejected": -5.385666847229004, "logps/chosen": -614.7886962890625, "logps/rejected": -476.4855041503906, "loss": 0.5349, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.18404516577720642, "rewards/margins": 0.6751847267150879, "rewards/rejected": -0.49113956093788147, "step": 3850 }, { "epoch": 1.0, "learning_rate": 3.709955054030793e-07, "logits/chosen": -5.351391792297363, "logits/rejected": -4.923891544342041, "logps/chosen": -578.4031982421875, "logps/rejected": -399.3257141113281, "loss": 0.5316, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.03805885836482048, "rewards/margins": 0.5348460674285889, "rewards/rejected": -0.5729048848152161, "step": 3860 }, { "epoch": 1.0, "learning_rate": 3.705173567944917e-07, "logits/chosen": -5.187680721282959, "logits/rejected": -4.8878655433654785, "logps/chosen": -552.7688598632812, "logps/rejected": -492.65740966796875, "loss": 0.5469, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03300458937883377, "rewards/margins": 0.5194489359855652, "rewards/rejected": -0.5524536371231079, "step": 3870 }, { "epoch": 1.0, "learning_rate": 3.700392081859042e-07, "logits/chosen": -5.047120094299316, "logits/rejected": -4.787654399871826, "logps/chosen": -593.0324096679688, "logps/rejected": -507.2745056152344, "loss": 0.514, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.1752258837223053, "rewards/margins": 0.5565892457962036, "rewards/rejected": -0.3813634514808655, "step": 3880 }, { "epoch": 1.0, "learning_rate": 3.695610595773166e-07, "logits/chosen": -5.436816215515137, "logits/rejected": -5.12469482421875, "logps/chosen": -562.8422241210938, "logps/rejected": -432.401123046875, "loss": 0.4973, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17080990970134735, "rewards/margins": 0.7559109926223755, "rewards/rejected": -0.5851010084152222, "step": 3890 }, { "epoch": 1.01, "learning_rate": 3.6908291096872907e-07, "logits/chosen": -5.309493064880371, "logits/rejected": -5.009668350219727, "logps/chosen": -554.4617919921875, "logps/rejected": -408.71026611328125, "loss": 0.4416, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.30683451890945435, "rewards/margins": 0.8760702013969421, "rewards/rejected": -0.5692356824874878, "step": 3900 }, { "epoch": 1.01, "learning_rate": 3.6860476236014154e-07, "logits/chosen": -5.419289588928223, "logits/rejected": -5.23163366317749, "logps/chosen": -502.24468994140625, "logps/rejected": -453.9246520996094, "loss": 0.479, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.08522473275661469, "rewards/margins": 0.785680890083313, "rewards/rejected": -0.7004560828208923, "step": 3910 }, { "epoch": 1.01, "learning_rate": 3.6812661375155396e-07, "logits/chosen": -5.3849101066589355, "logits/rejected": -4.762551784515381, "logps/chosen": -588.3854370117188, "logps/rejected": -475.16058349609375, "loss": 0.5187, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.16400203108787537, "rewards/margins": 0.8451210856437683, "rewards/rejected": -0.6811191439628601, "step": 3920 }, { "epoch": 1.01, "learning_rate": 3.6764846514296643e-07, "logits/chosen": -4.9725799560546875, "logits/rejected": -4.636596202850342, "logps/chosen": -531.9058837890625, "logps/rejected": -430.31549072265625, "loss": 0.5533, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.2137695848941803, "rewards/margins": 0.7480888962745667, "rewards/rejected": -0.534319281578064, "step": 3930 }, { "epoch": 1.02, "learning_rate": 3.6717031653437885e-07, "logits/chosen": -5.10056209564209, "logits/rejected": -5.0999016761779785, "logps/chosen": -577.2899780273438, "logps/rejected": -479.6837463378906, "loss": 0.5018, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.2947807013988495, "rewards/margins": 0.9313607215881348, "rewards/rejected": -0.6365799903869629, "step": 3940 }, { "epoch": 1.02, "learning_rate": 3.666921679257913e-07, "logits/chosen": -5.439792156219482, "logits/rejected": -5.197112083435059, "logps/chosen": -526.4550170898438, "logps/rejected": -427.547607421875, "loss": 0.4819, "rewards/accuracies": 0.75, "rewards/chosen": 0.19371092319488525, "rewards/margins": 0.7507823705673218, "rewards/rejected": -0.5570713877677917, "step": 3950 }, { "epoch": 1.02, "learning_rate": 3.6621401931720374e-07, "logits/chosen": -5.115017890930176, "logits/rejected": -5.374227523803711, "logps/chosen": -482.77081298828125, "logps/rejected": -450.49041748046875, "loss": 0.5182, "rewards/accuracies": 0.75, "rewards/chosen": 0.11968159675598145, "rewards/margins": 0.7507710456848145, "rewards/rejected": -0.6310895085334778, "step": 3960 }, { "epoch": 1.03, "learning_rate": 3.657358707086162e-07, "logits/chosen": -5.213345527648926, "logits/rejected": -5.579896450042725, "logps/chosen": -555.4581909179688, "logps/rejected": -458.87548828125, "loss": 0.5172, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.2391648292541504, "rewards/margins": 0.7623635530471802, "rewards/rejected": -0.5231987237930298, "step": 3970 }, { "epoch": 1.03, "learning_rate": 3.652577221000287e-07, "logits/chosen": -5.180166244506836, "logits/rejected": -5.453112602233887, "logps/chosen": -572.6932373046875, "logps/rejected": -521.3741455078125, "loss": 0.4672, "rewards/accuracies": 0.75, "rewards/chosen": 0.24902081489562988, "rewards/margins": 0.7214894890785217, "rewards/rejected": -0.47246870398521423, "step": 3980 }, { "epoch": 1.03, "learning_rate": 3.647795734914411e-07, "logits/chosen": -5.3087053298950195, "logits/rejected": -4.639279365539551, "logps/chosen": -539.4674072265625, "logps/rejected": -405.54193115234375, "loss": 0.4896, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.0515243299305439, "rewards/margins": 0.657561182975769, "rewards/rejected": -0.7090855836868286, "step": 3990 }, { "epoch": 1.03, "learning_rate": 3.643014248828536e-07, "logits/chosen": -5.403778553009033, "logits/rejected": -5.24149227142334, "logps/chosen": -603.8173828125, "logps/rejected": -517.2132568359375, "loss": 0.5273, "rewards/accuracies": 0.75, "rewards/chosen": 0.21675805747509003, "rewards/margins": 0.6856445074081421, "rewards/rejected": -0.4688865542411804, "step": 4000 }, { "epoch": 1.03, "eval_logits/chosen": -5.289242267608643, "eval_logits/rejected": -4.997243881225586, "eval_logps/chosen": -587.7738037109375, "eval_logps/rejected": -447.9891357421875, "eval_loss": 0.5595789551734924, "eval_rewards/accuracies": 0.7039999961853027, "eval_rewards/chosen": 0.04957044497132301, "eval_rewards/margins": 0.5944339632987976, "eval_rewards/rejected": -0.5448634624481201, "eval_runtime": 103.7848, "eval_samples_per_second": 19.271, "eval_steps_per_second": 1.204, "step": 4000 }, { "epoch": 1.04, "learning_rate": 3.63823276274266e-07, "logits/chosen": -5.143495082855225, "logits/rejected": -4.942715167999268, "logps/chosen": -617.0081787109375, "logps/rejected": -486.1180725097656, "loss": 0.4755, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1605353057384491, "rewards/margins": 0.8345960378646851, "rewards/rejected": -0.6740607619285583, "step": 4010 }, { "epoch": 1.04, "learning_rate": 3.6334512766567847e-07, "logits/chosen": -5.42038631439209, "logits/rejected": -4.5594072341918945, "logps/chosen": -586.0281372070312, "logps/rejected": -479.0580139160156, "loss": 0.436, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2582244277000427, "rewards/margins": 0.9715875387191772, "rewards/rejected": -0.7133631706237793, "step": 4020 }, { "epoch": 1.04, "learning_rate": 3.628669790570909e-07, "logits/chosen": -4.905037879943848, "logits/rejected": -5.122579574584961, "logps/chosen": -576.1090087890625, "logps/rejected": -504.361328125, "loss": 0.4593, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.30206626653671265, "rewards/margins": 0.8874837756156921, "rewards/rejected": -0.5854175090789795, "step": 4030 }, { "epoch": 1.04, "learning_rate": 3.6238883044850336e-07, "logits/chosen": -4.96010684967041, "logits/rejected": -4.782035827636719, "logps/chosen": -641.2225341796875, "logps/rejected": -483.170654296875, "loss": 0.5368, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.08900664001703262, "rewards/margins": 0.5529698729515076, "rewards/rejected": -0.46396327018737793, "step": 4040 }, { "epoch": 1.05, "learning_rate": 3.6191068183991583e-07, "logits/chosen": -5.221703052520752, "logits/rejected": -5.097041130065918, "logps/chosen": -653.7587890625, "logps/rejected": -457.44708251953125, "loss": 0.4515, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2498849332332611, "rewards/margins": 1.007006049156189, "rewards/rejected": -0.7571210861206055, "step": 4050 }, { "epoch": 1.05, "learning_rate": 3.6143253323132825e-07, "logits/chosen": -5.370955467224121, "logits/rejected": -4.379670143127441, "logps/chosen": -643.0932006835938, "logps/rejected": -379.33221435546875, "loss": 0.4911, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.11667200177907944, "rewards/margins": 0.7528603672981262, "rewards/rejected": -0.6361883878707886, "step": 4060 }, { "epoch": 1.05, "learning_rate": 3.609543846227408e-07, "logits/chosen": -4.9756927490234375, "logits/rejected": -5.006033897399902, "logps/chosen": -577.1456298828125, "logps/rejected": -427.2825622558594, "loss": 0.464, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.3538059890270233, "rewards/margins": 0.8790301084518433, "rewards/rejected": -0.5252242088317871, "step": 4070 }, { "epoch": 1.05, "learning_rate": 3.604762360141532e-07, "logits/chosen": -5.354995250701904, "logits/rejected": -4.680268287658691, "logps/chosen": -600.0069580078125, "logps/rejected": -457.77398681640625, "loss": 0.5183, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.24881811439990997, "rewards/margins": 0.7788727879524231, "rewards/rejected": -0.5300546884536743, "step": 4080 }, { "epoch": 1.06, "learning_rate": 3.5999808740556566e-07, "logits/chosen": -4.85617733001709, "logits/rejected": -5.062987327575684, "logps/chosen": -499.90185546875, "logps/rejected": -522.6068115234375, "loss": 0.4809, "rewards/accuracies": 0.6875, "rewards/chosen": 0.22132067382335663, "rewards/margins": 0.7045014500617981, "rewards/rejected": -0.4831807017326355, "step": 4090 }, { "epoch": 1.06, "learning_rate": 3.595199387969781e-07, "logits/chosen": -5.33019495010376, "logits/rejected": -4.81057071685791, "logps/chosen": -672.59423828125, "logps/rejected": -490.0067443847656, "loss": 0.4386, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.4544462263584137, "rewards/margins": 0.9442869424819946, "rewards/rejected": -0.48984068632125854, "step": 4100 }, { "epoch": 1.06, "learning_rate": 3.5904179018839056e-07, "logits/chosen": -5.182413101196289, "logits/rejected": -4.785430431365967, "logps/chosen": -610.5725708007812, "logps/rejected": -446.28546142578125, "loss": 0.4484, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2851361334323883, "rewards/margins": 0.8665271997451782, "rewards/rejected": -0.5813909769058228, "step": 4110 }, { "epoch": 1.06, "learning_rate": 3.5856364157980303e-07, "logits/chosen": -5.490721225738525, "logits/rejected": -5.067840099334717, "logps/chosen": -676.1891479492188, "logps/rejected": -486.19268798828125, "loss": 0.5089, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.23903104662895203, "rewards/margins": 0.8460933566093445, "rewards/rejected": -0.6070621609687805, "step": 4120 }, { "epoch": 1.07, "learning_rate": 3.5808549297121545e-07, "logits/chosen": -4.876012325286865, "logits/rejected": -4.553103923797607, "logps/chosen": -571.106689453125, "logps/rejected": -475.0506896972656, "loss": 0.4598, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.2309737205505371, "rewards/margins": 0.8034477233886719, "rewards/rejected": -0.5724740624427795, "step": 4130 }, { "epoch": 1.07, "learning_rate": 3.576073443626279e-07, "logits/chosen": -5.323142051696777, "logits/rejected": -5.076000690460205, "logps/chosen": -580.35986328125, "logps/rejected": -463.57037353515625, "loss": 0.5371, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.2327614724636078, "rewards/margins": 0.7827898263931274, "rewards/rejected": -0.5500284433364868, "step": 4140 }, { "epoch": 1.07, "learning_rate": 3.5712919575404034e-07, "logits/chosen": -5.3639116287231445, "logits/rejected": -5.327073097229004, "logps/chosen": -568.4605102539062, "logps/rejected": -499.67205810546875, "loss": 0.4953, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.239267498254776, "rewards/margins": 0.74079430103302, "rewards/rejected": -0.5015267133712769, "step": 4150 }, { "epoch": 1.07, "learning_rate": 3.566510471454528e-07, "logits/chosen": -5.4054412841796875, "logits/rejected": -4.813348293304443, "logps/chosen": -665.2691040039062, "logps/rejected": -528.4579467773438, "loss": 0.4619, "rewards/accuracies": 0.875, "rewards/chosen": 0.2826422452926636, "rewards/margins": 0.7881465554237366, "rewards/rejected": -0.5055042505264282, "step": 4160 }, { "epoch": 1.08, "learning_rate": 3.5617289853686523e-07, "logits/chosen": -4.859219074249268, "logits/rejected": -4.4022064208984375, "logps/chosen": -641.3169555664062, "logps/rejected": -426.28466796875, "loss": 0.4602, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.2993168234825134, "rewards/margins": 1.0932554006576538, "rewards/rejected": -0.7939385175704956, "step": 4170 }, { "epoch": 1.08, "learning_rate": 3.556947499282777e-07, "logits/chosen": -5.400458812713623, "logits/rejected": -5.047917366027832, "logps/chosen": -584.2467041015625, "logps/rejected": -460.8126525878906, "loss": 0.5046, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.24151258170604706, "rewards/margins": 0.7608014345169067, "rewards/rejected": -0.5192888975143433, "step": 4180 }, { "epoch": 1.08, "learning_rate": 3.5521660131969017e-07, "logits/chosen": -5.186229705810547, "logits/rejected": -4.837376594543457, "logps/chosen": -544.3914184570312, "logps/rejected": -383.8414611816406, "loss": 0.4492, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.06400472670793533, "rewards/margins": 0.6827914118766785, "rewards/rejected": -0.6187866926193237, "step": 4190 }, { "epoch": 1.08, "learning_rate": 3.547384527111026e-07, "logits/chosen": -5.1177191734313965, "logits/rejected": -4.894205093383789, "logps/chosen": -603.7515258789062, "logps/rejected": -485.48345947265625, "loss": 0.4461, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.32107067108154297, "rewards/margins": 0.9689370393753052, "rewards/rejected": -0.647866427898407, "step": 4200 }, { "epoch": 1.09, "learning_rate": 3.5426030410251506e-07, "logits/chosen": -5.047830104827881, "logits/rejected": -5.408112049102783, "logps/chosen": -549.6142578125, "logps/rejected": -425.99932861328125, "loss": 0.508, "rewards/accuracies": 0.75, "rewards/chosen": 0.1586228311061859, "rewards/margins": 0.714598536491394, "rewards/rejected": -0.5559756755828857, "step": 4210 }, { "epoch": 1.09, "learning_rate": 3.537821554939275e-07, "logits/chosen": -5.1638288497924805, "logits/rejected": -4.8847975730896, "logps/chosen": -662.5618896484375, "logps/rejected": -487.08929443359375, "loss": 0.4455, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.34987086057662964, "rewards/margins": 0.9532626271247864, "rewards/rejected": -0.6033917665481567, "step": 4220 }, { "epoch": 1.09, "learning_rate": 3.5330400688533995e-07, "logits/chosen": -5.125457763671875, "logits/rejected": -5.046628952026367, "logps/chosen": -602.9808349609375, "logps/rejected": -463.4715881347656, "loss": 0.4232, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.21779534220695496, "rewards/margins": 1.039551019668579, "rewards/rejected": -0.821755588054657, "step": 4230 }, { "epoch": 1.09, "learning_rate": 3.5282585827675237e-07, "logits/chosen": -4.800747871398926, "logits/rejected": -4.64938497543335, "logps/chosen": -567.0702514648438, "logps/rejected": -496.3629455566406, "loss": 0.4835, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.09065724164247513, "rewards/margins": 0.7069092392921448, "rewards/rejected": -0.6162520051002502, "step": 4240 }, { "epoch": 1.1, "learning_rate": 3.5234770966816484e-07, "logits/chosen": -5.346933364868164, "logits/rejected": -4.756340026855469, "logps/chosen": -675.7818603515625, "logps/rejected": -496.15753173828125, "loss": 0.4677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.39993295073509216, "rewards/margins": 0.9263052940368652, "rewards/rejected": -0.5263723134994507, "step": 4250 }, { "epoch": 1.1, "learning_rate": 3.518695610595773e-07, "logits/chosen": -4.939312934875488, "logits/rejected": -5.487679958343506, "logps/chosen": -480.0516662597656, "logps/rejected": -403.5835876464844, "loss": 0.5371, "rewards/accuracies": 0.75, "rewards/chosen": 0.07615786790847778, "rewards/margins": 0.6894338726997375, "rewards/rejected": -0.6132760047912598, "step": 4260 }, { "epoch": 1.1, "learning_rate": 3.5139141245098973e-07, "logits/chosen": -4.930965423583984, "logits/rejected": -4.9474287033081055, "logps/chosen": -566.572021484375, "logps/rejected": -494.598388671875, "loss": 0.4699, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.15772639214992523, "rewards/margins": 0.8385675549507141, "rewards/rejected": -0.6808411478996277, "step": 4270 }, { "epoch": 1.11, "learning_rate": 3.509132638424022e-07, "logits/chosen": -5.279797554016113, "logits/rejected": -4.928247928619385, "logps/chosen": -647.3078002929688, "logps/rejected": -502.96453857421875, "loss": 0.472, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.18411852419376373, "rewards/margins": 0.7476534247398376, "rewards/rejected": -0.5635348558425903, "step": 4280 }, { "epoch": 1.11, "learning_rate": 3.504351152338146e-07, "logits/chosen": -5.012953281402588, "logits/rejected": -5.094266414642334, "logps/chosen": -587.7424926757812, "logps/rejected": -460.05340576171875, "loss": 0.409, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2549847960472107, "rewards/margins": 0.9920985102653503, "rewards/rejected": -0.7371136546134949, "step": 4290 }, { "epoch": 1.11, "learning_rate": 3.499569666252271e-07, "logits/chosen": -5.233648300170898, "logits/rejected": -4.978647232055664, "logps/chosen": -611.4915771484375, "logps/rejected": -423.711669921875, "loss": 0.4798, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.1741398125886917, "rewards/margins": 0.7842845320701599, "rewards/rejected": -0.6101447343826294, "step": 4300 }, { "epoch": 1.11, "learning_rate": 3.494788180166395e-07, "logits/chosen": -5.133049011230469, "logits/rejected": -4.831203460693359, "logps/chosen": -603.1854858398438, "logps/rejected": -405.88897705078125, "loss": 0.4922, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.00951471645385027, "rewards/margins": 0.7684996724128723, "rewards/rejected": -0.7780143022537231, "step": 4310 }, { "epoch": 1.12, "learning_rate": 3.49000669408052e-07, "logits/chosen": -5.072078227996826, "logits/rejected": -5.589784622192383, "logps/chosen": -615.3328857421875, "logps/rejected": -560.7830200195312, "loss": 0.4941, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.16032442450523376, "rewards/margins": 0.7718590497970581, "rewards/rejected": -0.6115346550941467, "step": 4320 }, { "epoch": 1.12, "learning_rate": 3.4852252079946446e-07, "logits/chosen": -5.259990692138672, "logits/rejected": -5.528666973114014, "logps/chosen": -564.0557861328125, "logps/rejected": -449.3819885253906, "loss": 0.5229, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.030347129330039024, "rewards/margins": 0.5280643701553345, "rewards/rejected": -0.5584114789962769, "step": 4330 }, { "epoch": 1.12, "learning_rate": 3.480443721908769e-07, "logits/chosen": -5.264027118682861, "logits/rejected": -4.832406044006348, "logps/chosen": -584.8206787109375, "logps/rejected": -415.70623779296875, "loss": 0.4924, "rewards/accuracies": 0.75, "rewards/chosen": 0.14156243205070496, "rewards/margins": 0.7451959848403931, "rewards/rejected": -0.6036335825920105, "step": 4340 }, { "epoch": 1.12, "learning_rate": 3.4756622358228935e-07, "logits/chosen": -5.288492202758789, "logits/rejected": -4.650526523590088, "logps/chosen": -638.345703125, "logps/rejected": -481.65655517578125, "loss": 0.4505, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.29413744807243347, "rewards/margins": 0.9630177617073059, "rewards/rejected": -0.6688803434371948, "step": 4350 }, { "epoch": 1.13, "learning_rate": 3.4708807497370177e-07, "logits/chosen": -5.033524513244629, "logits/rejected": -4.954553604125977, "logps/chosen": -529.1849365234375, "logps/rejected": -426.93914794921875, "loss": 0.5172, "rewards/accuracies": 0.8125, "rewards/chosen": 0.17328806221485138, "rewards/margins": 0.6593273282051086, "rewards/rejected": -0.48603925108909607, "step": 4360 }, { "epoch": 1.13, "learning_rate": 3.466099263651143e-07, "logits/chosen": -5.517739772796631, "logits/rejected": -4.999269008636475, "logps/chosen": -471.56597900390625, "logps/rejected": -432.07879638671875, "loss": 0.4655, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.1718321591615677, "rewards/margins": 0.851507842540741, "rewards/rejected": -0.6796756982803345, "step": 4370 }, { "epoch": 1.13, "learning_rate": 3.461317777565267e-07, "logits/chosen": -5.441883563995361, "logits/rejected": -5.286880970001221, "logps/chosen": -550.421630859375, "logps/rejected": -363.0999450683594, "loss": 0.444, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.26023930311203003, "rewards/margins": 0.9786232709884644, "rewards/rejected": -0.7183840274810791, "step": 4380 }, { "epoch": 1.13, "learning_rate": 3.456536291479392e-07, "logits/chosen": -5.287563800811768, "logits/rejected": -5.175783634185791, "logps/chosen": -495.90155029296875, "logps/rejected": -499.24920654296875, "loss": 0.4804, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.01820945367217064, "rewards/margins": 0.8132980465888977, "rewards/rejected": -0.7950885891914368, "step": 4390 }, { "epoch": 1.14, "learning_rate": 3.4517548053935166e-07, "logits/chosen": -5.393795013427734, "logits/rejected": -4.457180023193359, "logps/chosen": -623.179931640625, "logps/rejected": -409.51446533203125, "loss": 0.4824, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.18180891871452332, "rewards/margins": 0.8476036190986633, "rewards/rejected": -0.6657947301864624, "step": 4400 }, { "epoch": 1.14, "learning_rate": 3.446973319307641e-07, "logits/chosen": -5.538062572479248, "logits/rejected": -5.206186771392822, "logps/chosen": -610.0150146484375, "logps/rejected": -453.5187072753906, "loss": 0.4013, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.24780626595020294, "rewards/margins": 0.8602485656738281, "rewards/rejected": -0.6124423742294312, "step": 4410 }, { "epoch": 1.14, "learning_rate": 3.4421918332217655e-07, "logits/chosen": -5.0772504806518555, "logits/rejected": -4.521323204040527, "logps/chosen": -717.0723876953125, "logps/rejected": -560.2110595703125, "loss": 0.4636, "rewards/accuracies": 0.8125, "rewards/chosen": 0.24910759925842285, "rewards/margins": 1.040990948677063, "rewards/rejected": -0.7918834090232849, "step": 4420 }, { "epoch": 1.14, "learning_rate": 3.4374103471358897e-07, "logits/chosen": -4.975174903869629, "logits/rejected": -4.821107864379883, "logps/chosen": -610.9193725585938, "logps/rejected": -520.1859130859375, "loss": 0.5036, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.08060375601053238, "rewards/margins": 0.651930034160614, "rewards/rejected": -0.5713262557983398, "step": 4430 }, { "epoch": 1.15, "learning_rate": 3.4326288610500144e-07, "logits/chosen": -5.042280197143555, "logits/rejected": -4.921197891235352, "logps/chosen": -462.96270751953125, "logps/rejected": -482.5460510253906, "loss": 0.491, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.024074465036392212, "rewards/margins": 0.6643761992454529, "rewards/rejected": -0.6403016448020935, "step": 4440 }, { "epoch": 1.15, "learning_rate": 3.4278473749641386e-07, "logits/chosen": -4.933724403381348, "logits/rejected": -4.550313949584961, "logps/chosen": -507.729248046875, "logps/rejected": -400.6444396972656, "loss": 0.4422, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1858149766921997, "rewards/margins": 0.8503168821334839, "rewards/rejected": -0.6645020246505737, "step": 4450 }, { "epoch": 1.15, "learning_rate": 3.4230658888782633e-07, "logits/chosen": -4.765952110290527, "logits/rejected": -4.965706825256348, "logps/chosen": -573.6473999023438, "logps/rejected": -467.68017578125, "loss": 0.5159, "rewards/accuracies": 0.75, "rewards/chosen": 0.04189146310091019, "rewards/margins": 0.6135638952255249, "rewards/rejected": -0.5716724991798401, "step": 4460 }, { "epoch": 1.15, "learning_rate": 3.418284402792388e-07, "logits/chosen": -5.294247150421143, "logits/rejected": -5.357491493225098, "logps/chosen": -578.1068115234375, "logps/rejected": -464.01611328125, "loss": 0.4618, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.1511380970478058, "rewards/margins": 0.940453052520752, "rewards/rejected": -0.789314866065979, "step": 4470 }, { "epoch": 1.16, "learning_rate": 3.413502916706512e-07, "logits/chosen": -5.127215385437012, "logits/rejected": -5.167799472808838, "logps/chosen": -540.5990600585938, "logps/rejected": -428.33966064453125, "loss": 0.4913, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0036907524336129427, "rewards/margins": 0.6156603097915649, "rewards/rejected": -0.6193511486053467, "step": 4480 }, { "epoch": 1.16, "learning_rate": 3.408721430620637e-07, "logits/chosen": -5.48513126373291, "logits/rejected": -4.5310797691345215, "logps/chosen": -618.5097045898438, "logps/rejected": -371.42559814453125, "loss": 0.4738, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1490391343832016, "rewards/margins": 0.8799120187759399, "rewards/rejected": -0.7308728694915771, "step": 4490 }, { "epoch": 1.16, "learning_rate": 3.403939944534761e-07, "logits/chosen": -5.202020168304443, "logits/rejected": -4.442000865936279, "logps/chosen": -657.2924194335938, "logps/rejected": -466.9344787597656, "loss": 0.4815, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.2477249801158905, "rewards/margins": 0.9403997659683228, "rewards/rejected": -0.6926747560501099, "step": 4500 }, { "epoch": 1.16, "learning_rate": 3.399158458448886e-07, "logits/chosen": -5.224976062774658, "logits/rejected": -5.242306709289551, "logps/chosen": -620.6184692382812, "logps/rejected": -498.20135498046875, "loss": 0.4963, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.15591242909431458, "rewards/margins": 0.9810744524002075, "rewards/rejected": -0.8251620531082153, "step": 4510 }, { "epoch": 1.17, "learning_rate": 3.39437697236301e-07, "logits/chosen": -5.278960227966309, "logits/rejected": -5.096070766448975, "logps/chosen": -609.2638549804688, "logps/rejected": -502.0244140625, "loss": 0.4316, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.20997580885887146, "rewards/margins": 0.9024831652641296, "rewards/rejected": -0.6925074458122253, "step": 4520 }, { "epoch": 1.17, "learning_rate": 3.3895954862771347e-07, "logits/chosen": -5.444713115692139, "logits/rejected": -4.5354719161987305, "logps/chosen": -571.8021240234375, "logps/rejected": -394.55767822265625, "loss": 0.4553, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.03537260740995407, "rewards/margins": 0.8565903902053833, "rewards/rejected": -0.8212177157402039, "step": 4530 }, { "epoch": 1.17, "learning_rate": 3.3848140001912594e-07, "logits/chosen": -5.181069374084473, "logits/rejected": -5.268179416656494, "logps/chosen": -627.7296142578125, "logps/rejected": -518.2072143554688, "loss": 0.4713, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.26309072971343994, "rewards/margins": 0.8686378598213196, "rewards/rejected": -0.6055471897125244, "step": 4540 }, { "epoch": 1.17, "learning_rate": 3.3800325141053836e-07, "logits/chosen": -5.290936470031738, "logits/rejected": -5.354273796081543, "logps/chosen": -569.967529296875, "logps/rejected": -484.37371826171875, "loss": 0.4365, "rewards/accuracies": 0.875, "rewards/chosen": 0.06716804951429367, "rewards/margins": 1.0905542373657227, "rewards/rejected": -1.0233862400054932, "step": 4550 }, { "epoch": 1.18, "learning_rate": 3.3752510280195084e-07, "logits/chosen": -5.048680305480957, "logits/rejected": -4.900692939758301, "logps/chosen": -574.344970703125, "logps/rejected": -448.35003662109375, "loss": 0.4571, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.14767317473888397, "rewards/margins": 1.080702781677246, "rewards/rejected": -0.9330295324325562, "step": 4560 }, { "epoch": 1.18, "learning_rate": 3.3704695419336325e-07, "logits/chosen": -5.0239787101745605, "logits/rejected": -5.457579135894775, "logps/chosen": -543.3385620117188, "logps/rejected": -453.62603759765625, "loss": 0.5019, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.0031506293453276157, "rewards/margins": 0.7724786996841431, "rewards/rejected": -0.7693281769752502, "step": 4570 }, { "epoch": 1.18, "learning_rate": 3.365688055847757e-07, "logits/chosen": -5.187644958496094, "logits/rejected": -4.662901878356934, "logps/chosen": -643.1417846679688, "logps/rejected": -507.64862060546875, "loss": 0.5038, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.23588129878044128, "rewards/margins": 0.9344266653060913, "rewards/rejected": -0.6985453367233276, "step": 4580 }, { "epoch": 1.19, "learning_rate": 3.3609065697618815e-07, "logits/chosen": -5.236100673675537, "logits/rejected": -5.0812578201293945, "logps/chosen": -619.9830322265625, "logps/rejected": -475.45867919921875, "loss": 0.5087, "rewards/accuracies": 0.75, "rewards/chosen": 0.007851150818169117, "rewards/margins": 0.8705449104309082, "rewards/rejected": -0.8626937866210938, "step": 4590 }, { "epoch": 1.19, "learning_rate": 3.356125083676006e-07, "logits/chosen": -5.450453758239746, "logits/rejected": -5.261104583740234, "logps/chosen": -567.9622802734375, "logps/rejected": -484.0128479003906, "loss": 0.4749, "rewards/accuracies": 0.75, "rewards/chosen": 0.14608897268772125, "rewards/margins": 0.7750371694564819, "rewards/rejected": -0.6289482116699219, "step": 4600 }, { "epoch": 1.19, "learning_rate": 3.351343597590131e-07, "logits/chosen": -5.318947792053223, "logits/rejected": -5.076251029968262, "logps/chosen": -522.8140258789062, "logps/rejected": -466.1317443847656, "loss": 0.4944, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.04090728610754013, "rewards/margins": 0.6796625256538391, "rewards/rejected": -0.7205697298049927, "step": 4610 }, { "epoch": 1.19, "learning_rate": 3.346562111504255e-07, "logits/chosen": -5.24614143371582, "logits/rejected": -5.0669403076171875, "logps/chosen": -599.6268310546875, "logps/rejected": -462.60321044921875, "loss": 0.5242, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.08528577536344528, "rewards/margins": 0.8160438537597656, "rewards/rejected": -0.7307580709457397, "step": 4620 }, { "epoch": 1.2, "learning_rate": 3.34178062541838e-07, "logits/chosen": -5.051326751708984, "logits/rejected": -4.859983921051025, "logps/chosen": -676.2175903320312, "logps/rejected": -549.7678833007812, "loss": 0.4428, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.3320830464363098, "rewards/margins": 0.943957507610321, "rewards/rejected": -0.6118744611740112, "step": 4630 }, { "epoch": 1.2, "learning_rate": 3.336999139332504e-07, "logits/chosen": -5.390644073486328, "logits/rejected": -5.2298455238342285, "logps/chosen": -695.6400146484375, "logps/rejected": -477.67462158203125, "loss": 0.4435, "rewards/accuracies": 0.8125, "rewards/chosen": 0.11209597438573837, "rewards/margins": 0.843142032623291, "rewards/rejected": -0.7310460805892944, "step": 4640 }, { "epoch": 1.2, "learning_rate": 3.3322176532466287e-07, "logits/chosen": -5.209467887878418, "logits/rejected": -4.740462303161621, "logps/chosen": -628.5418090820312, "logps/rejected": -452.6725158691406, "loss": 0.4509, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.2896882891654968, "rewards/margins": 0.8467707633972168, "rewards/rejected": -0.5570825338363647, "step": 4650 }, { "epoch": 1.2, "learning_rate": 3.327436167160753e-07, "logits/chosen": -5.205310821533203, "logits/rejected": -5.341431617736816, "logps/chosen": -543.03076171875, "logps/rejected": -441.93829345703125, "loss": 0.4929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.03931446000933647, "rewards/margins": 0.8425008058547974, "rewards/rejected": -0.8031864166259766, "step": 4660 }, { "epoch": 1.21, "learning_rate": 3.322654681074878e-07, "logits/chosen": -5.384188652038574, "logits/rejected": -4.59286642074585, "logps/chosen": -639.38134765625, "logps/rejected": -462.04339599609375, "loss": 0.4632, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.16538715362548828, "rewards/margins": 0.7829943895339966, "rewards/rejected": -0.6176071166992188, "step": 4670 }, { "epoch": 1.21, "learning_rate": 3.317873194989003e-07, "logits/chosen": -5.266841411590576, "logits/rejected": -5.101069450378418, "logps/chosen": -631.95703125, "logps/rejected": -520.5847778320312, "loss": 0.5696, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.015050624497234821, "rewards/margins": 0.6021886467933655, "rewards/rejected": -0.5871379375457764, "step": 4680 }, { "epoch": 1.21, "learning_rate": 3.313091708903127e-07, "logits/chosen": -5.410739421844482, "logits/rejected": -4.803441047668457, "logps/chosen": -614.3570556640625, "logps/rejected": -469.1231384277344, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": 0.2344636619091034, "rewards/margins": 0.8411576151847839, "rewards/rejected": -0.6066939830780029, "step": 4690 }, { "epoch": 1.21, "learning_rate": 3.308310222817252e-07, "logits/chosen": -5.258256435394287, "logits/rejected": -5.451190948486328, "logps/chosen": -576.4515380859375, "logps/rejected": -485.7108459472656, "loss": 0.4866, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.13143786787986755, "rewards/margins": 0.7280316352844238, "rewards/rejected": -0.5965937376022339, "step": 4700 }, { "epoch": 1.22, "learning_rate": 3.303528736731376e-07, "logits/chosen": -5.212939262390137, "logits/rejected": -5.142415523529053, "logps/chosen": -704.2086181640625, "logps/rejected": -526.8097534179688, "loss": 0.4914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.2733215391635895, "rewards/margins": 0.8825652003288269, "rewards/rejected": -0.609243631362915, "step": 4710 }, { "epoch": 1.22, "learning_rate": 3.2987472506455007e-07, "logits/chosen": -5.1258955001831055, "logits/rejected": -4.5399556159973145, "logps/chosen": -535.5632934570312, "logps/rejected": -395.2476501464844, "loss": 0.4483, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.09962010383605957, "rewards/margins": 0.7645028233528137, "rewards/rejected": -0.6648826599121094, "step": 4720 }, { "epoch": 1.22, "learning_rate": 3.293965764559625e-07, "logits/chosen": -5.426779747009277, "logits/rejected": -5.002619743347168, "logps/chosen": -609.389404296875, "logps/rejected": -550.8883666992188, "loss": 0.4656, "rewards/accuracies": 0.8125, "rewards/chosen": 0.23469075560569763, "rewards/margins": 0.8688748478889465, "rewards/rejected": -0.6341840028762817, "step": 4730 }, { "epoch": 1.22, "learning_rate": 3.2891842784737496e-07, "logits/chosen": -5.367763996124268, "logits/rejected": -4.751004219055176, "logps/chosen": -592.7855224609375, "logps/rejected": -401.44195556640625, "loss": 0.481, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.2908579111099243, "rewards/margins": 0.8994965553283691, "rewards/rejected": -0.6086386442184448, "step": 4740 }, { "epoch": 1.23, "learning_rate": 3.2844027923878743e-07, "logits/chosen": -5.3640851974487305, "logits/rejected": -5.4573655128479, "logps/chosen": -501.5721740722656, "logps/rejected": -392.0328369140625, "loss": 0.4647, "rewards/accuracies": 0.8125, "rewards/chosen": 0.15146663784980774, "rewards/margins": 0.7587504982948303, "rewards/rejected": -0.607283890247345, "step": 4750 }, { "epoch": 1.23, "learning_rate": 3.2796213063019985e-07, "logits/chosen": -4.7618021965026855, "logits/rejected": -5.176852703094482, "logps/chosen": -644.8971557617188, "logps/rejected": -542.431396484375, "loss": 0.4337, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.8001168370246887, "rewards/margins": 1.607272744178772, "rewards/rejected": -0.8071559071540833, "step": 4760 }, { "epoch": 1.23, "learning_rate": 3.274839820216123e-07, "logits/chosen": -5.069498538970947, "logits/rejected": -4.834755897521973, "logps/chosen": -563.3197631835938, "logps/rejected": -462.36102294921875, "loss": 0.4417, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.02548075094819069, "rewards/margins": 0.8663115501403809, "rewards/rejected": -0.8408308029174805, "step": 4770 }, { "epoch": 1.23, "learning_rate": 3.2700583341302474e-07, "logits/chosen": -5.207648277282715, "logits/rejected": -4.98382043838501, "logps/chosen": -640.4539794921875, "logps/rejected": -462.1813049316406, "loss": 0.4727, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.15569433569908142, "rewards/margins": 0.919206976890564, "rewards/rejected": -0.7635125517845154, "step": 4780 }, { "epoch": 1.24, "learning_rate": 3.265276848044372e-07, "logits/chosen": -5.182786464691162, "logits/rejected": -4.717242240905762, "logps/chosen": -529.8687133789062, "logps/rejected": -433.10955810546875, "loss": 0.4796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.04758434742689133, "rewards/margins": 0.8122583627700806, "rewards/rejected": -0.7646740674972534, "step": 4790 }, { "epoch": 1.24, "learning_rate": 3.2604953619584963e-07, "logits/chosen": -5.142504692077637, "logits/rejected": -5.2334136962890625, "logps/chosen": -510.4881896972656, "logps/rejected": -456.25933837890625, "loss": 0.4294, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1306559294462204, "rewards/margins": 0.8227289319038391, "rewards/rejected": -0.6920730471611023, "step": 4800 }, { "epoch": 1.24, "learning_rate": 3.255713875872621e-07, "logits/chosen": -5.400275230407715, "logits/rejected": -4.709908485412598, "logps/chosen": -639.8856201171875, "logps/rejected": -439.47100830078125, "loss": 0.4158, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.1783439815044403, "rewards/margins": 1.0155880451202393, "rewards/rejected": -0.8372441530227661, "step": 4810 }, { "epoch": 1.24, "learning_rate": 3.250932389786746e-07, "logits/chosen": -5.2201128005981445, "logits/rejected": -5.023993492126465, "logps/chosen": -556.1258544921875, "logps/rejected": -500.3677673339844, "loss": 0.4936, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.1758975088596344, "rewards/margins": 0.6871576309204102, "rewards/rejected": -0.5112601518630981, "step": 4820 }, { "epoch": 1.25, "learning_rate": 3.24615090370087e-07, "logits/chosen": -5.350574493408203, "logits/rejected": -5.189095973968506, "logps/chosen": -500.9693908691406, "logps/rejected": -401.7594299316406, "loss": 0.4927, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.08322947472333908, "rewards/margins": 0.8793350458145142, "rewards/rejected": -0.7961055040359497, "step": 4830 }, { "epoch": 1.25, "learning_rate": 3.2413694176149946e-07, "logits/chosen": -5.407814025878906, "logits/rejected": -4.51077938079834, "logps/chosen": -662.258056640625, "logps/rejected": -484.7787170410156, "loss": 0.4213, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.19565343856811523, "rewards/margins": 1.0335760116577148, "rewards/rejected": -0.8379226922988892, "step": 4840 }, { "epoch": 1.25, "learning_rate": 3.236587931529119e-07, "logits/chosen": -5.403429985046387, "logits/rejected": -4.764667510986328, "logps/chosen": -534.7006225585938, "logps/rejected": -448.016845703125, "loss": 0.5182, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07559207081794739, "rewards/margins": 0.7234622836112976, "rewards/rejected": -0.6478702425956726, "step": 4850 }, { "epoch": 1.25, "learning_rate": 3.2318064454432436e-07, "logits/chosen": -5.135013580322266, "logits/rejected": -5.139360427856445, "logps/chosen": -575.1212768554688, "logps/rejected": -482.5987854003906, "loss": 0.4782, "rewards/accuracies": 0.75, "rewards/chosen": 0.27274149656295776, "rewards/margins": 0.9301210641860962, "rewards/rejected": -0.6573796272277832, "step": 4860 }, { "epoch": 1.26, "learning_rate": 3.227024959357368e-07, "logits/chosen": -5.215053081512451, "logits/rejected": -5.221492767333984, "logps/chosen": -616.4022216796875, "logps/rejected": -509.933349609375, "loss": 0.4706, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.07704462856054306, "rewards/margins": 0.7564781308174133, "rewards/rejected": -0.6794334650039673, "step": 4870 }, { "epoch": 1.26, "learning_rate": 3.2222434732714925e-07, "logits/chosen": -5.301988124847412, "logits/rejected": -4.95112419128418, "logps/chosen": -655.2160034179688, "logps/rejected": -495.8883361816406, "loss": 0.4806, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.09709203243255615, "rewards/margins": 0.9167606234550476, "rewards/rejected": -0.8196685910224915, "step": 4880 }, { "epoch": 1.26, "learning_rate": 3.217461987185617e-07, "logits/chosen": -5.207809925079346, "logits/rejected": -5.073318958282471, "logps/chosen": -542.6402587890625, "logps/rejected": -458.5877990722656, "loss": 0.5036, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0012453317176550627, "rewards/margins": 0.9096211194992065, "rewards/rejected": -0.9108665585517883, "step": 4890 }, { "epoch": 1.27, "learning_rate": 3.2126805010997414e-07, "logits/chosen": -5.579865455627441, "logits/rejected": -5.33579683303833, "logps/chosen": -607.6800537109375, "logps/rejected": -456.9635314941406, "loss": 0.5092, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.09256912767887115, "rewards/margins": 0.623670220375061, "rewards/rejected": -0.5311011075973511, "step": 4900 }, { "epoch": 1.27, "learning_rate": 3.207899015013866e-07, "logits/chosen": -5.326630592346191, "logits/rejected": -5.108895778656006, "logps/chosen": -608.5619506835938, "logps/rejected": -473.565673828125, "loss": 0.4679, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.12011335790157318, "rewards/margins": 0.8105586767196655, "rewards/rejected": -0.6904453039169312, "step": 4910 }, { "epoch": 1.27, "learning_rate": 3.2031175289279903e-07, "logits/chosen": -5.19180154800415, "logits/rejected": -4.874623775482178, "logps/chosen": -712.206298828125, "logps/rejected": -480.17059326171875, "loss": 0.4264, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.26678353548049927, "rewards/margins": 1.1572002172470093, "rewards/rejected": -0.8904166221618652, "step": 4920 }, { "epoch": 1.27, "learning_rate": 3.198336042842115e-07, "logits/chosen": -5.521494388580322, "logits/rejected": -5.434743404388428, "logps/chosen": -625.2085571289062, "logps/rejected": -537.6234130859375, "loss": 0.4924, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.30856385827064514, "rewards/margins": 0.8109831809997559, "rewards/rejected": -0.5024193525314331, "step": 4930 }, { "epoch": 1.28, "learning_rate": 3.1935545567562397e-07, "logits/chosen": -5.193690299987793, "logits/rejected": -5.30068302154541, "logps/chosen": -615.994384765625, "logps/rejected": -506.31396484375, "loss": 0.4807, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.15624287724494934, "rewards/margins": 0.8963413238525391, "rewards/rejected": -0.7400984764099121, "step": 4940 }, { "epoch": 1.28, "learning_rate": 3.188773070670364e-07, "logits/chosen": -5.246696949005127, "logits/rejected": -5.202801704406738, "logps/chosen": -671.7757568359375, "logps/rejected": -600.4468994140625, "loss": 0.478, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.25630030035972595, "rewards/margins": 0.8184847831726074, "rewards/rejected": -0.5621846318244934, "step": 4950 }, { "epoch": 1.28, "learning_rate": 3.183991584584489e-07, "logits/chosen": -5.450588226318359, "logits/rejected": -4.567678451538086, "logps/chosen": -515.994140625, "logps/rejected": -400.6774597167969, "loss": 0.5301, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.047671642154455185, "rewards/margins": 0.7600115537643433, "rewards/rejected": -0.8076831698417664, "step": 4960 }, { "epoch": 1.28, "learning_rate": 3.1792100984986133e-07, "logits/chosen": -5.471573829650879, "logits/rejected": -4.921059608459473, "logps/chosen": -661.2037353515625, "logps/rejected": -453.4658203125, "loss": 0.4415, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.10398201644420624, "rewards/margins": 0.8970952033996582, "rewards/rejected": -0.793113112449646, "step": 4970 }, { "epoch": 1.29, "learning_rate": 3.174428612412738e-07, "logits/chosen": -5.29636287689209, "logits/rejected": -4.938093662261963, "logps/chosen": -550.7738647460938, "logps/rejected": -442.25238037109375, "loss": 0.4673, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.1460486799478531, "rewards/margins": 0.7587619423866272, "rewards/rejected": -0.6127133369445801, "step": 4980 }, { "epoch": 1.29, "learning_rate": 3.169647126326862e-07, "logits/chosen": -5.159952163696289, "logits/rejected": -4.928128242492676, "logps/chosen": -576.3331298828125, "logps/rejected": -422.4154357910156, "loss": 0.4443, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1925327628850937, "rewards/margins": 0.9463454484939575, "rewards/rejected": -0.7538124918937683, "step": 4990 }, { "epoch": 1.29, "learning_rate": 3.164865640240987e-07, "logits/chosen": -5.116313934326172, "logits/rejected": -5.117690563201904, "logps/chosen": -604.3567504882812, "logps/rejected": -463.65240478515625, "loss": 0.5, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.10797204822301865, "rewards/margins": 0.7236849665641785, "rewards/rejected": -0.6157129406929016, "step": 5000 }, { "epoch": 1.29, "eval_logits/chosen": -5.30470609664917, "eval_logits/rejected": -5.0107927322387695, "eval_logps/chosen": -587.684326171875, "eval_logps/rejected": -448.6505126953125, "eval_loss": 0.5557167530059814, "eval_rewards/accuracies": 0.7049999833106995, "eval_rewards/chosen": 0.058524537831544876, "eval_rewards/margins": 0.6695210337638855, "eval_rewards/rejected": -0.6109965443611145, "eval_runtime": 104.8091, "eval_samples_per_second": 19.082, "eval_steps_per_second": 1.193, "step": 5000 }, { "epoch": 1.29, "learning_rate": 3.1600841541551117e-07, "logits/chosen": -4.951615333557129, "logits/rejected": -5.115173816680908, "logps/chosen": -534.4778442382812, "logps/rejected": -470.085205078125, "loss": 0.4629, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.013290902599692345, "rewards/margins": 0.6907011270523071, "rewards/rejected": -0.6774102449417114, "step": 5010 }, { "epoch": 1.3, "learning_rate": 3.155302668069236e-07, "logits/chosen": -5.420719146728516, "logits/rejected": -5.101531028747559, "logps/chosen": -638.1929931640625, "logps/rejected": -463.12188720703125, "loss": 0.4584, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2174696922302246, "rewards/margins": 0.9184302091598511, "rewards/rejected": -0.7009605169296265, "step": 5020 }, { "epoch": 1.3, "learning_rate": 3.1505211819833606e-07, "logits/chosen": -5.4076642990112305, "logits/rejected": -5.055717468261719, "logps/chosen": -599.2544555664062, "logps/rejected": -472.71942138671875, "loss": 0.4889, "rewards/accuracies": 0.8125, "rewards/chosen": 0.18419411778450012, "rewards/margins": 0.8163158297538757, "rewards/rejected": -0.6321216821670532, "step": 5030 }, { "epoch": 1.3, "learning_rate": 3.145739695897485e-07, "logits/chosen": -5.018680572509766, "logits/rejected": -4.834623336791992, "logps/chosen": -611.9959716796875, "logps/rejected": -497.1280212402344, "loss": 0.467, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2773978114128113, "rewards/margins": 0.9364258050918579, "rewards/rejected": -0.6590279936790466, "step": 5040 }, { "epoch": 1.3, "learning_rate": 3.1409582098116095e-07, "logits/chosen": -5.10263204574585, "logits/rejected": -5.2326178550720215, "logps/chosen": -542.2662963867188, "logps/rejected": -467.2355041503906, "loss": 0.5078, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17342324554920197, "rewards/margins": 0.6955951452255249, "rewards/rejected": -0.5221718549728394, "step": 5050 }, { "epoch": 1.31, "learning_rate": 3.1361767237257337e-07, "logits/chosen": -5.663632392883301, "logits/rejected": -5.0664520263671875, "logps/chosen": -597.9359741210938, "logps/rejected": -461.3505859375, "loss": 0.4611, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.07507459074258804, "rewards/margins": 0.7427322268486023, "rewards/rejected": -0.667657732963562, "step": 5060 }, { "epoch": 1.31, "learning_rate": 3.1313952376398584e-07, "logits/chosen": -4.968114852905273, "logits/rejected": -4.232183456420898, "logps/chosen": -615.8897705078125, "logps/rejected": -424.3362731933594, "loss": 0.4933, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.129293292760849, "rewards/margins": 0.8433384895324707, "rewards/rejected": -0.7140451669692993, "step": 5070 }, { "epoch": 1.31, "learning_rate": 3.126613751553983e-07, "logits/chosen": -5.43425178527832, "logits/rejected": -4.8876190185546875, "logps/chosen": -603.66064453125, "logps/rejected": -481.7394104003906, "loss": 0.4853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1000271812081337, "rewards/margins": 0.7688115835189819, "rewards/rejected": -0.6687844395637512, "step": 5080 }, { "epoch": 1.31, "learning_rate": 3.1218322654681073e-07, "logits/chosen": -5.406708240509033, "logits/rejected": -5.260541915893555, "logps/chosen": -465.22283935546875, "logps/rejected": -425.2814025878906, "loss": 0.4835, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.16536065936088562, "rewards/margins": 0.9126476049423218, "rewards/rejected": -0.7472870349884033, "step": 5090 }, { "epoch": 1.32, "learning_rate": 3.117050779382232e-07, "logits/chosen": -4.778876781463623, "logits/rejected": -4.43536901473999, "logps/chosen": -573.4124145507812, "logps/rejected": -448.2332458496094, "loss": 0.5179, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.20030739903450012, "rewards/margins": 0.7426692843437195, "rewards/rejected": -0.5423619747161865, "step": 5100 }, { "epoch": 1.32, "learning_rate": 3.112269293296356e-07, "logits/chosen": -5.103249549865723, "logits/rejected": -5.05403995513916, "logps/chosen": -581.7435302734375, "logps/rejected": -504.354248046875, "loss": 0.528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1928052008152008, "rewards/margins": 0.7911714911460876, "rewards/rejected": -0.5983662605285645, "step": 5110 }, { "epoch": 1.32, "learning_rate": 3.107487807210481e-07, "logits/chosen": -5.064652442932129, "logits/rejected": -5.412230491638184, "logps/chosen": -650.730224609375, "logps/rejected": -556.86181640625, "loss": 0.4918, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.26175153255462646, "rewards/margins": 0.8284789323806763, "rewards/rejected": -0.566727340221405, "step": 5120 }, { "epoch": 1.32, "learning_rate": 3.102706321124605e-07, "logits/chosen": -5.027246952056885, "logits/rejected": -5.047433853149414, "logps/chosen": -580.1620483398438, "logps/rejected": -445.06292724609375, "loss": 0.5151, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.10325394570827484, "rewards/margins": 0.8308150172233582, "rewards/rejected": -0.7275611162185669, "step": 5130 }, { "epoch": 1.33, "learning_rate": 3.09792483503873e-07, "logits/chosen": -5.087599277496338, "logits/rejected": -4.841473579406738, "logps/chosen": -622.0618286132812, "logps/rejected": -491.386962890625, "loss": 0.4943, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.10958144813776016, "rewards/margins": 0.6705357432365417, "rewards/rejected": -0.5609542727470398, "step": 5140 }, { "epoch": 1.33, "learning_rate": 3.0931433489528546e-07, "logits/chosen": -5.381837844848633, "logits/rejected": -4.870347023010254, "logps/chosen": -576.2140502929688, "logps/rejected": -496.7810974121094, "loss": 0.4622, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.06123454123735428, "rewards/margins": 0.809292197227478, "rewards/rejected": -0.7480576634407043, "step": 5150 }, { "epoch": 1.33, "learning_rate": 3.088361862866979e-07, "logits/chosen": -5.598812580108643, "logits/rejected": -4.862250328063965, "logps/chosen": -600.2138671875, "logps/rejected": -442.11956787109375, "loss": 0.4546, "rewards/accuracies": 0.8125, "rewards/chosen": 0.22415533661842346, "rewards/margins": 1.0163166522979736, "rewards/rejected": -0.7921613454818726, "step": 5160 }, { "epoch": 1.33, "learning_rate": 3.0835803767811035e-07, "logits/chosen": -5.278334617614746, "logits/rejected": -4.875577926635742, "logps/chosen": -613.833984375, "logps/rejected": -512.5206909179688, "loss": 0.4623, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.2710726857185364, "rewards/margins": 0.9372037053108215, "rewards/rejected": -0.6661309599876404, "step": 5170 }, { "epoch": 1.34, "learning_rate": 3.0787988906952277e-07, "logits/chosen": -5.453562259674072, "logits/rejected": -5.090235710144043, "logps/chosen": -559.8711547851562, "logps/rejected": -410.10107421875, "loss": 0.4615, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2021828442811966, "rewards/margins": 0.9239915609359741, "rewards/rejected": -0.7218087315559387, "step": 5180 }, { "epoch": 1.34, "learning_rate": 3.0740174046093524e-07, "logits/chosen": -5.384959697723389, "logits/rejected": -4.711813926696777, "logps/chosen": -681.5294189453125, "logps/rejected": -486.88116455078125, "loss": 0.4914, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.2118859589099884, "rewards/margins": 0.8949806094169617, "rewards/rejected": -0.6830946207046509, "step": 5190 }, { "epoch": 1.34, "learning_rate": 3.0692359185234766e-07, "logits/chosen": -5.765985488891602, "logits/rejected": -5.084735870361328, "logps/chosen": -663.6946411132812, "logps/rejected": -546.7593383789062, "loss": 0.4844, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.3394524157047272, "rewards/margins": 0.8402635455131531, "rewards/rejected": -0.5008112192153931, "step": 5200 }, { "epoch": 1.35, "learning_rate": 3.0644544324376013e-07, "logits/chosen": -5.284051895141602, "logits/rejected": -4.73442268371582, "logps/chosen": -568.1106567382812, "logps/rejected": -465.56915283203125, "loss": 0.4728, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.08343572169542313, "rewards/margins": 0.7554422616958618, "rewards/rejected": -0.6720064878463745, "step": 5210 }, { "epoch": 1.35, "learning_rate": 3.059672946351726e-07, "logits/chosen": -5.261513710021973, "logits/rejected": -4.691378593444824, "logps/chosen": -577.7540283203125, "logps/rejected": -408.4040222167969, "loss": 0.5097, "rewards/accuracies": 0.625, "rewards/chosen": 0.016951410099864006, "rewards/margins": 0.6715208888053894, "rewards/rejected": -0.6545695066452026, "step": 5220 }, { "epoch": 1.35, "learning_rate": 3.05489146026585e-07, "logits/chosen": -5.683475017547607, "logits/rejected": -4.6039719581604, "logps/chosen": -631.0763549804688, "logps/rejected": -413.6827697753906, "loss": 0.4722, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2749244272708893, "rewards/margins": 0.9215471148490906, "rewards/rejected": -0.6466225981712341, "step": 5230 }, { "epoch": 1.35, "learning_rate": 3.050109974179975e-07, "logits/chosen": -4.738039493560791, "logits/rejected": -4.775629997253418, "logps/chosen": -642.6302490234375, "logps/rejected": -477.5518493652344, "loss": 0.4398, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.4070306420326233, "rewards/margins": 1.0604662895202637, "rewards/rejected": -0.6534357070922852, "step": 5240 }, { "epoch": 1.36, "learning_rate": 3.0453284880940996e-07, "logits/chosen": -4.824185371398926, "logits/rejected": -5.094993591308594, "logps/chosen": -524.2914428710938, "logps/rejected": -486.5076599121094, "loss": 0.4693, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.0016633629566058517, "rewards/margins": 0.8527435064315796, "rewards/rejected": -0.8510801196098328, "step": 5250 }, { "epoch": 1.36, "learning_rate": 3.0405470020082243e-07, "logits/chosen": -5.582604885101318, "logits/rejected": -5.167820930480957, "logps/chosen": -554.4757690429688, "logps/rejected": -427.14447021484375, "loss": 0.4152, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.24802568554878235, "rewards/margins": 0.912145733833313, "rewards/rejected": -0.664120078086853, "step": 5260 }, { "epoch": 1.36, "learning_rate": 3.0357655159223485e-07, "logits/chosen": -5.19129753112793, "logits/rejected": -5.208978652954102, "logps/chosen": -509.1600036621094, "logps/rejected": -383.28057861328125, "loss": 0.5041, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03674743324518204, "rewards/margins": 0.6876164674758911, "rewards/rejected": -0.6508690714836121, "step": 5270 }, { "epoch": 1.36, "learning_rate": 3.030984029836473e-07, "logits/chosen": -5.263606071472168, "logits/rejected": -4.829526424407959, "logps/chosen": -511.5791015625, "logps/rejected": -362.3144226074219, "loss": 0.4919, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.06631062179803848, "rewards/margins": 0.9390446543693542, "rewards/rejected": -0.8727340698242188, "step": 5280 }, { "epoch": 1.37, "learning_rate": 3.026202543750598e-07, "logits/chosen": -5.160284996032715, "logits/rejected": -4.652561187744141, "logps/chosen": -607.651611328125, "logps/rejected": -470.79248046875, "loss": 0.5084, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2653890550136566, "rewards/margins": 0.8887335658073425, "rewards/rejected": -0.6233444809913635, "step": 5290 }, { "epoch": 1.37, "learning_rate": 3.021421057664722e-07, "logits/chosen": -5.290960788726807, "logits/rejected": -5.059186935424805, "logps/chosen": -467.1153259277344, "logps/rejected": -431.36016845703125, "loss": 0.4673, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.12922930717468262, "rewards/margins": 0.7906755208969116, "rewards/rejected": -0.6614463329315186, "step": 5300 }, { "epoch": 1.37, "learning_rate": 3.016639571578847e-07, "logits/chosen": -5.166245937347412, "logits/rejected": -4.678903579711914, "logps/chosen": -533.9697875976562, "logps/rejected": -401.5321960449219, "loss": 0.4935, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.09894763678312302, "rewards/margins": 0.9352729916572571, "rewards/rejected": -0.8363253474235535, "step": 5310 }, { "epoch": 1.37, "learning_rate": 3.011858085492971e-07, "logits/chosen": -5.107386589050293, "logits/rejected": -4.389826774597168, "logps/chosen": -610.1873168945312, "logps/rejected": -457.52960205078125, "loss": 0.4683, "rewards/accuracies": 0.75, "rewards/chosen": 0.277593195438385, "rewards/margins": 0.9089527130126953, "rewards/rejected": -0.6313593983650208, "step": 5320 }, { "epoch": 1.38, "learning_rate": 3.007076599407096e-07, "logits/chosen": -5.148862838745117, "logits/rejected": -5.0272746086120605, "logps/chosen": -502.54547119140625, "logps/rejected": -451.8081970214844, "loss": 0.4874, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1653563231229782, "rewards/margins": 0.9289857149124146, "rewards/rejected": -0.7636293768882751, "step": 5330 }, { "epoch": 1.38, "learning_rate": 3.00229511332122e-07, "logits/chosen": -5.202955722808838, "logits/rejected": -4.977900505065918, "logps/chosen": -535.3466796875, "logps/rejected": -464.5777282714844, "loss": 0.5027, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.06080491468310356, "rewards/margins": 0.6496372222900391, "rewards/rejected": -0.7104421854019165, "step": 5340 }, { "epoch": 1.38, "learning_rate": 2.9975136272353447e-07, "logits/chosen": -5.237588882446289, "logits/rejected": -5.109346866607666, "logps/chosen": -593.1312255859375, "logps/rejected": -419.86376953125, "loss": 0.521, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.17286767065525055, "rewards/margins": 0.8334784507751465, "rewards/rejected": -0.6606107950210571, "step": 5350 }, { "epoch": 1.38, "learning_rate": 2.9927321411494694e-07, "logits/chosen": -5.261499881744385, "logits/rejected": -4.925136566162109, "logps/chosen": -618.867431640625, "logps/rejected": -443.95416259765625, "loss": 0.4859, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.22618035972118378, "rewards/margins": 0.9764344096183777, "rewards/rejected": -0.7502540946006775, "step": 5360 }, { "epoch": 1.39, "learning_rate": 2.9879506550635936e-07, "logits/chosen": -5.053614139556885, "logits/rejected": -4.990055561065674, "logps/chosen": -680.153076171875, "logps/rejected": -492.76885986328125, "loss": 0.4973, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.11473053693771362, "rewards/margins": 0.83479243516922, "rewards/rejected": -0.7200619578361511, "step": 5370 }, { "epoch": 1.39, "learning_rate": 2.9831691689777183e-07, "logits/chosen": -5.0526018142700195, "logits/rejected": -4.916355609893799, "logps/chosen": -606.3969116210938, "logps/rejected": -460.453369140625, "loss": 0.5063, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1508125215768814, "rewards/margins": 0.7722153067588806, "rewards/rejected": -0.6214028000831604, "step": 5380 }, { "epoch": 1.39, "learning_rate": 2.9783876828918425e-07, "logits/chosen": -5.597015857696533, "logits/rejected": -5.233231067657471, "logps/chosen": -603.4032592773438, "logps/rejected": -453.7130432128906, "loss": 0.4656, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0905846357345581, "rewards/margins": 0.9744840860366821, "rewards/rejected": -0.8838993310928345, "step": 5390 }, { "epoch": 1.39, "learning_rate": 2.973606196805967e-07, "logits/chosen": -5.69144344329834, "logits/rejected": -5.291382789611816, "logps/chosen": -536.1588134765625, "logps/rejected": -433.7110290527344, "loss": 0.5152, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.10673501342535019, "rewards/margins": 0.6479343175888062, "rewards/rejected": -0.5411993265151978, "step": 5400 }, { "epoch": 1.4, "learning_rate": 2.9688247107200914e-07, "logits/chosen": -5.024324417114258, "logits/rejected": -4.860573768615723, "logps/chosen": -664.29736328125, "logps/rejected": -459.5118713378906, "loss": 0.4676, "rewards/accuracies": 0.75, "rewards/chosen": 0.2592621147632599, "rewards/margins": 0.9031769037246704, "rewards/rejected": -0.6439147591590881, "step": 5410 }, { "epoch": 1.4, "learning_rate": 2.964043224634216e-07, "logits/chosen": -5.29131555557251, "logits/rejected": -4.9836530685424805, "logps/chosen": -524.1148681640625, "logps/rejected": -428.37451171875, "loss": 0.5187, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.007810732815414667, "rewards/margins": 0.5645586252212524, "rewards/rejected": -0.5567479133605957, "step": 5420 }, { "epoch": 1.4, "learning_rate": 2.959261738548341e-07, "logits/chosen": -5.2077226638793945, "logits/rejected": -4.752447605133057, "logps/chosen": -553.4553833007812, "logps/rejected": -393.95513916015625, "loss": 0.4771, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.05387838929891586, "rewards/margins": 0.6342180967330933, "rewards/rejected": -0.5803396105766296, "step": 5430 }, { "epoch": 1.4, "learning_rate": 2.954480252462465e-07, "logits/chosen": -5.345783233642578, "logits/rejected": -4.957459449768066, "logps/chosen": -632.7833251953125, "logps/rejected": -554.1024169921875, "loss": 0.4725, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2054729461669922, "rewards/margins": 0.8294585943222046, "rewards/rejected": -0.6239856481552124, "step": 5440 }, { "epoch": 1.41, "learning_rate": 2.94969876637659e-07, "logits/chosen": -5.311930179595947, "logits/rejected": -5.093207359313965, "logps/chosen": -622.1610107421875, "logps/rejected": -457.89801025390625, "loss": 0.4548, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1989000141620636, "rewards/margins": 1.0489680767059326, "rewards/rejected": -0.8500679135322571, "step": 5450 }, { "epoch": 1.41, "learning_rate": 2.944917280290714e-07, "logits/chosen": -5.2109174728393555, "logits/rejected": -4.535860538482666, "logps/chosen": -635.5310668945312, "logps/rejected": -439.27056884765625, "loss": 0.4687, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2593763470649719, "rewards/margins": 0.9988039135932922, "rewards/rejected": -0.7394275069236755, "step": 5460 }, { "epoch": 1.41, "learning_rate": 2.9401357942048387e-07, "logits/chosen": -5.159535884857178, "logits/rejected": -4.5238847732543945, "logps/chosen": -633.2022705078125, "logps/rejected": -431.2721252441406, "loss": 0.5269, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.08090498298406601, "rewards/margins": 0.7488837242126465, "rewards/rejected": -0.6679786443710327, "step": 5470 }, { "epoch": 1.41, "learning_rate": 2.935354308118963e-07, "logits/chosen": -5.423813819885254, "logits/rejected": -4.897356986999512, "logps/chosen": -617.12939453125, "logps/rejected": -451.7679138183594, "loss": 0.4538, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.19981808960437775, "rewards/margins": 0.9754274487495422, "rewards/rejected": -0.7756093144416809, "step": 5480 }, { "epoch": 1.42, "learning_rate": 2.9305728220330876e-07, "logits/chosen": -5.049278259277344, "logits/rejected": -4.914906978607178, "logps/chosen": -607.685546875, "logps/rejected": -459.91253662109375, "loss": 0.4559, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.26436182856559753, "rewards/margins": 0.9137558937072754, "rewards/rejected": -0.649394154548645, "step": 5490 }, { "epoch": 1.42, "learning_rate": 2.9257913359472123e-07, "logits/chosen": -5.143016815185547, "logits/rejected": -4.838575839996338, "logps/chosen": -536.3577270507812, "logps/rejected": -461.98443603515625, "loss": 0.4483, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.05272433161735535, "rewards/margins": 0.8006641268730164, "rewards/rejected": -0.7479397654533386, "step": 5500 }, { "epoch": 1.42, "learning_rate": 2.9210098498613365e-07, "logits/chosen": -5.466008186340332, "logits/rejected": -5.072146892547607, "logps/chosen": -542.8016357421875, "logps/rejected": -388.6141052246094, "loss": 0.5066, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.008307349868118763, "rewards/margins": 0.931723415851593, "rewards/rejected": -0.923416018486023, "step": 5510 }, { "epoch": 1.43, "learning_rate": 2.916228363775461e-07, "logits/chosen": -5.363007545471191, "logits/rejected": -5.133292198181152, "logps/chosen": -579.352783203125, "logps/rejected": -385.0237121582031, "loss": 0.4664, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0011771500576287508, "rewards/margins": 0.8338413238525391, "rewards/rejected": -0.8350184559822083, "step": 5520 }, { "epoch": 1.43, "learning_rate": 2.9114468776895854e-07, "logits/chosen": -5.253536224365234, "logits/rejected": -5.012035369873047, "logps/chosen": -667.5882568359375, "logps/rejected": -509.4100646972656, "loss": 0.4297, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2698845863342285, "rewards/margins": 1.018064260482788, "rewards/rejected": -0.7481794953346252, "step": 5530 }, { "epoch": 1.43, "learning_rate": 2.9066653916037106e-07, "logits/chosen": -5.374054908752441, "logits/rejected": -4.855627536773682, "logps/chosen": -620.35498046875, "logps/rejected": -442.03778076171875, "loss": 0.4252, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.15913085639476776, "rewards/margins": 1.0201072692871094, "rewards/rejected": -0.86097651720047, "step": 5540 }, { "epoch": 1.43, "learning_rate": 2.901883905517835e-07, "logits/chosen": -5.221441268920898, "logits/rejected": -4.77347469329834, "logps/chosen": -573.7438354492188, "logps/rejected": -486.37786865234375, "loss": 0.4894, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.11068983376026154, "rewards/margins": 0.7964924573898315, "rewards/rejected": -0.6858025789260864, "step": 5550 }, { "epoch": 1.44, "learning_rate": 2.8971024194319595e-07, "logits/chosen": -5.321528911590576, "logits/rejected": -5.291527271270752, "logps/chosen": -555.6034545898438, "logps/rejected": -468.3346252441406, "loss": 0.5064, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.10749177634716034, "rewards/margins": 0.6543353199958801, "rewards/rejected": -0.5468434691429138, "step": 5560 }, { "epoch": 1.44, "learning_rate": 2.8923209333460843e-07, "logits/chosen": -5.020425319671631, "logits/rejected": -5.021473407745361, "logps/chosen": -689.7359008789062, "logps/rejected": -517.6492919921875, "loss": 0.4787, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2571713328361511, "rewards/margins": 0.9119879007339478, "rewards/rejected": -0.6548165082931519, "step": 5570 }, { "epoch": 1.44, "learning_rate": 2.8875394472602085e-07, "logits/chosen": -5.014799118041992, "logits/rejected": -5.1056694984436035, "logps/chosen": -662.2682495117188, "logps/rejected": -505.5150451660156, "loss": 0.5136, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1722916215658188, "rewards/margins": 0.966114342212677, "rewards/rejected": -0.7938227653503418, "step": 5580 }, { "epoch": 1.44, "learning_rate": 2.882757961174333e-07, "logits/chosen": -5.065103530883789, "logits/rejected": -5.244524002075195, "logps/chosen": -523.4494018554688, "logps/rejected": -471.754150390625, "loss": 0.5095, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.06407101452350616, "rewards/margins": 0.5441995859146118, "rewards/rejected": -0.48012852668762207, "step": 5590 }, { "epoch": 1.45, "learning_rate": 2.8779764750884574e-07, "logits/chosen": -5.154325485229492, "logits/rejected": -5.184544563293457, "logps/chosen": -558.3209228515625, "logps/rejected": -477.3780822753906, "loss": 0.4923, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.22757764160633087, "rewards/margins": 0.927047848701477, "rewards/rejected": -0.6994701623916626, "step": 5600 }, { "epoch": 1.45, "learning_rate": 2.873194989002582e-07, "logits/chosen": -4.956672191619873, "logits/rejected": -5.014763832092285, "logps/chosen": -627.8824462890625, "logps/rejected": -485.72625732421875, "loss": 0.5464, "rewards/accuracies": 0.6875, "rewards/chosen": 0.07893381267786026, "rewards/margins": 0.668362021446228, "rewards/rejected": -0.589428186416626, "step": 5610 }, { "epoch": 1.45, "learning_rate": 2.8684135029167063e-07, "logits/chosen": -5.250463962554932, "logits/rejected": -4.949795722961426, "logps/chosen": -572.1656494140625, "logps/rejected": -461.17535400390625, "loss": 0.4699, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1957431137561798, "rewards/margins": 0.809569239616394, "rewards/rejected": -0.6138260960578918, "step": 5620 }, { "epoch": 1.45, "learning_rate": 2.863632016830831e-07, "logits/chosen": -5.111762046813965, "logits/rejected": -5.1160993576049805, "logps/chosen": -587.2557983398438, "logps/rejected": -437.3421936035156, "loss": 0.4578, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2592245936393738, "rewards/margins": 0.8441736102104187, "rewards/rejected": -0.5849489569664001, "step": 5630 }, { "epoch": 1.46, "learning_rate": 2.8588505307449557e-07, "logits/chosen": -5.289261817932129, "logits/rejected": -4.924485206604004, "logps/chosen": -644.1751708984375, "logps/rejected": -467.97021484375, "loss": 0.4337, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.28497666120529175, "rewards/margins": 0.971488356590271, "rewards/rejected": -0.6865118145942688, "step": 5640 }, { "epoch": 1.46, "learning_rate": 2.85406904465908e-07, "logits/chosen": -5.286837577819824, "logits/rejected": -4.461190223693848, "logps/chosen": -584.1656494140625, "logps/rejected": -436.67578125, "loss": 0.5131, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.13194705545902252, "rewards/margins": 0.7295104265213013, "rewards/rejected": -0.5975633263587952, "step": 5650 }, { "epoch": 1.46, "learning_rate": 2.8492875585732046e-07, "logits/chosen": -5.296660423278809, "logits/rejected": -4.5868024826049805, "logps/chosen": -637.705078125, "logps/rejected": -457.5252380371094, "loss": 0.4864, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2933860421180725, "rewards/margins": 0.8939617276191711, "rewards/rejected": -0.6005758047103882, "step": 5660 }, { "epoch": 1.46, "learning_rate": 2.844506072487329e-07, "logits/chosen": -5.507636070251465, "logits/rejected": -5.2888503074646, "logps/chosen": -582.805908203125, "logps/rejected": -453.923583984375, "loss": 0.5313, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.04933157190680504, "rewards/margins": 0.7174583673477173, "rewards/rejected": -0.6681267619132996, "step": 5670 }, { "epoch": 1.47, "learning_rate": 2.8397245864014535e-07, "logits/chosen": -4.998622894287109, "logits/rejected": -4.86914587020874, "logps/chosen": -597.6934814453125, "logps/rejected": -456.6585998535156, "loss": 0.5134, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.11511091887950897, "rewards/margins": 0.8564032316207886, "rewards/rejected": -0.7412922978401184, "step": 5680 }, { "epoch": 1.47, "learning_rate": 2.8349431003155777e-07, "logits/chosen": -5.344836235046387, "logits/rejected": -5.327383995056152, "logps/chosen": -459.65509033203125, "logps/rejected": -448.505859375, "loss": 0.5325, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.010908651165664196, "rewards/margins": 0.5302246809005737, "rewards/rejected": -0.5411332845687866, "step": 5690 }, { "epoch": 1.47, "learning_rate": 2.8301616142297024e-07, "logits/chosen": -5.281792163848877, "logits/rejected": -5.211630344390869, "logps/chosen": -535.0645141601562, "logps/rejected": -485.158203125, "loss": 0.4387, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.24729618430137634, "rewards/margins": 0.9013670086860657, "rewards/rejected": -0.6540707945823669, "step": 5700 }, { "epoch": 1.47, "learning_rate": 2.825380128143827e-07, "logits/chosen": -5.329213619232178, "logits/rejected": -4.899355888366699, "logps/chosen": -597.4769897460938, "logps/rejected": -480.318115234375, "loss": 0.4535, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19998638331890106, "rewards/margins": 0.9344824552536011, "rewards/rejected": -0.7344961762428284, "step": 5710 }, { "epoch": 1.48, "learning_rate": 2.8205986420579513e-07, "logits/chosen": -5.753786087036133, "logits/rejected": -5.130732536315918, "logps/chosen": -627.09814453125, "logps/rejected": -494.4100646972656, "loss": 0.4247, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.14391648769378662, "rewards/margins": 1.0051610469818115, "rewards/rejected": -0.8612446784973145, "step": 5720 }, { "epoch": 1.48, "learning_rate": 2.815817155972076e-07, "logits/chosen": -5.231484413146973, "logits/rejected": -4.86224889755249, "logps/chosen": -618.2662353515625, "logps/rejected": -517.1012573242188, "loss": 0.4314, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.26319602131843567, "rewards/margins": 0.88639897108078, "rewards/rejected": -0.6232029795646667, "step": 5730 }, { "epoch": 1.48, "learning_rate": 2.8110356698862e-07, "logits/chosen": -4.925096035003662, "logits/rejected": -4.588588237762451, "logps/chosen": -582.688232421875, "logps/rejected": -467.90850830078125, "loss": 0.4541, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.2703607678413391, "rewards/margins": 1.014561414718628, "rewards/rejected": -0.7442008256912231, "step": 5740 }, { "epoch": 1.48, "learning_rate": 2.806254183800325e-07, "logits/chosen": -5.30141019821167, "logits/rejected": -5.098990440368652, "logps/chosen": -551.7254028320312, "logps/rejected": -448.8724060058594, "loss": 0.5073, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.15013381838798523, "rewards/margins": 0.5967189073562622, "rewards/rejected": -0.446585088968277, "step": 5750 }, { "epoch": 1.49, "learning_rate": 2.801472697714449e-07, "logits/chosen": -5.211381435394287, "logits/rejected": -5.0231499671936035, "logps/chosen": -581.7259521484375, "logps/rejected": -440.8310546875, "loss": 0.539, "rewards/accuracies": 0.6875, "rewards/chosen": 0.056160785257816315, "rewards/margins": 0.7174104452133179, "rewards/rejected": -0.6612496376037598, "step": 5760 }, { "epoch": 1.49, "learning_rate": 2.796691211628574e-07, "logits/chosen": -4.983366966247559, "logits/rejected": -5.19180154800415, "logps/chosen": -473.90008544921875, "logps/rejected": -432.15887451171875, "loss": 0.5038, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.06687311828136444, "rewards/margins": 0.7399019598960876, "rewards/rejected": -0.6730288863182068, "step": 5770 }, { "epoch": 1.49, "learning_rate": 2.7919097255426986e-07, "logits/chosen": -5.168404579162598, "logits/rejected": -4.71201229095459, "logps/chosen": -553.3631591796875, "logps/rejected": -447.97296142578125, "loss": 0.4699, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0202735997736454, "rewards/margins": 0.8463420867919922, "rewards/rejected": -0.8260685205459595, "step": 5780 }, { "epoch": 1.49, "learning_rate": 2.787128239456823e-07, "logits/chosen": -5.536955833435059, "logits/rejected": -5.151968955993652, "logps/chosen": -582.7752075195312, "logps/rejected": -434.57281494140625, "loss": 0.4924, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.03780301287770271, "rewards/margins": 0.7694236040115356, "rewards/rejected": -0.7316205501556396, "step": 5790 }, { "epoch": 1.5, "learning_rate": 2.7823467533709475e-07, "logits/chosen": -4.957120418548584, "logits/rejected": -4.899052619934082, "logps/chosen": -537.8779296875, "logps/rejected": -425.59307861328125, "loss": 0.5121, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.14123961329460144, "rewards/margins": 0.7674670219421387, "rewards/rejected": -0.6262273192405701, "step": 5800 }, { "epoch": 1.5, "learning_rate": 2.7775652672850717e-07, "logits/chosen": -5.135714530944824, "logits/rejected": -5.094111442565918, "logps/chosen": -509.888916015625, "logps/rejected": -405.11700439453125, "loss": 0.4899, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.12924091517925262, "rewards/margins": 0.8350270390510559, "rewards/rejected": -0.7057861089706421, "step": 5810 }, { "epoch": 1.5, "learning_rate": 2.7727837811991964e-07, "logits/chosen": -5.172341346740723, "logits/rejected": -5.001723289489746, "logps/chosen": -543.4580688476562, "logps/rejected": -474.4319763183594, "loss": 0.5081, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.15545754134655, "rewards/margins": 0.7855085134506226, "rewards/rejected": -0.6300509572029114, "step": 5820 }, { "epoch": 1.51, "learning_rate": 2.7680022951133206e-07, "logits/chosen": -5.325826644897461, "logits/rejected": -5.098412990570068, "logps/chosen": -634.0623779296875, "logps/rejected": -474.9085998535156, "loss": 0.431, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.16221727430820465, "rewards/margins": 0.8316564559936523, "rewards/rejected": -0.6694391965866089, "step": 5830 }, { "epoch": 1.51, "learning_rate": 2.763220809027446e-07, "logits/chosen": -5.2836151123046875, "logits/rejected": -5.3150315284729, "logps/chosen": -600.0272216796875, "logps/rejected": -531.51123046875, "loss": 0.5131, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.020458927378058434, "rewards/margins": 0.7092123627662659, "rewards/rejected": -0.72967129945755, "step": 5840 }, { "epoch": 1.51, "learning_rate": 2.7584393229415706e-07, "logits/chosen": -5.100133419036865, "logits/rejected": -4.807437896728516, "logps/chosen": -588.0416259765625, "logps/rejected": -421.79461669921875, "loss": 0.5063, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.15671463310718536, "rewards/margins": 0.7961578369140625, "rewards/rejected": -0.6394431591033936, "step": 5850 }, { "epoch": 1.51, "learning_rate": 2.753657836855695e-07, "logits/chosen": -4.850018501281738, "logits/rejected": -4.782118797302246, "logps/chosen": -523.2684936523438, "logps/rejected": -435.30419921875, "loss": 0.4913, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1405399739742279, "rewards/margins": 0.8211439251899719, "rewards/rejected": -0.6806038618087769, "step": 5860 }, { "epoch": 1.52, "learning_rate": 2.7488763507698195e-07, "logits/chosen": -5.19276237487793, "logits/rejected": -4.835424900054932, "logps/chosen": -607.2968139648438, "logps/rejected": -482.1409606933594, "loss": 0.4812, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.12264330685138702, "rewards/margins": 0.8532178997993469, "rewards/rejected": -0.7305744886398315, "step": 5870 }, { "epoch": 1.52, "learning_rate": 2.7440948646839437e-07, "logits/chosen": -5.299439430236816, "logits/rejected": -4.764008522033691, "logps/chosen": -571.4149169921875, "logps/rejected": -419.2232971191406, "loss": 0.4806, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.13617673516273499, "rewards/margins": 0.8195874094963074, "rewards/rejected": -0.6834107637405396, "step": 5880 }, { "epoch": 1.52, "learning_rate": 2.7393133785980684e-07, "logits/chosen": -5.180103302001953, "logits/rejected": -5.524107933044434, "logps/chosen": -528.50390625, "logps/rejected": -478.69561767578125, "loss": 0.4819, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.04233068972826004, "rewards/margins": 0.7594857811927795, "rewards/rejected": -0.7171549797058105, "step": 5890 }, { "epoch": 1.52, "learning_rate": 2.7345318925121926e-07, "logits/chosen": -5.25108528137207, "logits/rejected": -5.024029731750488, "logps/chosen": -501.51251220703125, "logps/rejected": -416.06402587890625, "loss": 0.4602, "rewards/accuracies": 0.75, "rewards/chosen": 0.030466342344880104, "rewards/margins": 0.7858845591545105, "rewards/rejected": -0.7554183006286621, "step": 5900 }, { "epoch": 1.53, "learning_rate": 2.7297504064263173e-07, "logits/chosen": -5.196867942810059, "logits/rejected": -5.30721378326416, "logps/chosen": -653.5975952148438, "logps/rejected": -516.9046630859375, "loss": 0.4951, "rewards/accuracies": 0.75, "rewards/chosen": 0.16582247614860535, "rewards/margins": 0.8621706962585449, "rewards/rejected": -0.6963481307029724, "step": 5910 }, { "epoch": 1.53, "learning_rate": 2.724968920340442e-07, "logits/chosen": -5.332226753234863, "logits/rejected": -5.370752811431885, "logps/chosen": -566.7185668945312, "logps/rejected": -446.3601989746094, "loss": 0.467, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07175563275814056, "rewards/margins": 0.838473916053772, "rewards/rejected": -0.7667182683944702, "step": 5920 }, { "epoch": 1.53, "learning_rate": 2.720187434254566e-07, "logits/chosen": -5.3680219650268555, "logits/rejected": -5.064005374908447, "logps/chosen": -695.9888916015625, "logps/rejected": -456.39532470703125, "loss": 0.4438, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2869570255279541, "rewards/margins": 1.1450444459915161, "rewards/rejected": -0.8580873608589172, "step": 5930 }, { "epoch": 1.53, "learning_rate": 2.715405948168691e-07, "logits/chosen": -5.152412414550781, "logits/rejected": -5.179426670074463, "logps/chosen": -685.0109252929688, "logps/rejected": -534.4718017578125, "loss": 0.4839, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.3079465329647064, "rewards/margins": 0.7919296026229858, "rewards/rejected": -0.4839830994606018, "step": 5940 }, { "epoch": 1.54, "learning_rate": 2.710624462082815e-07, "logits/chosen": -4.971086502075195, "logits/rejected": -5.090357303619385, "logps/chosen": -532.109130859375, "logps/rejected": -386.98468017578125, "loss": 0.4708, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10805933177471161, "rewards/margins": 0.9101721048355103, "rewards/rejected": -1.0182313919067383, "step": 5950 }, { "epoch": 1.54, "learning_rate": 2.70584297599694e-07, "logits/chosen": -5.36342716217041, "logits/rejected": -4.700772762298584, "logps/chosen": -698.775634765625, "logps/rejected": -500.16119384765625, "loss": 0.4666, "rewards/accuracies": 0.75, "rewards/chosen": 0.07067932188510895, "rewards/margins": 0.9582692980766296, "rewards/rejected": -0.8875899314880371, "step": 5960 }, { "epoch": 1.54, "learning_rate": 2.701061489911064e-07, "logits/chosen": -5.0797438621521, "logits/rejected": -4.60978364944458, "logps/chosen": -584.8685913085938, "logps/rejected": -453.17303466796875, "loss": 0.5306, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.05196085572242737, "rewards/margins": 0.6554912328720093, "rewards/rejected": -0.707452118396759, "step": 5970 }, { "epoch": 1.54, "learning_rate": 2.6962800038251887e-07, "logits/chosen": -5.560000896453857, "logits/rejected": -5.112074851989746, "logps/chosen": -599.0046997070312, "logps/rejected": -408.2813720703125, "loss": 0.4654, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.16352809965610504, "rewards/margins": 0.9768415689468384, "rewards/rejected": -0.8133134841918945, "step": 5980 }, { "epoch": 1.55, "learning_rate": 2.6914985177393134e-07, "logits/chosen": -5.151548862457275, "logits/rejected": -4.297739505767822, "logps/chosen": -564.8278198242188, "logps/rejected": -445.3720703125, "loss": 0.4599, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1206878274679184, "rewards/margins": 0.9396132230758667, "rewards/rejected": -0.8189253807067871, "step": 5990 }, { "epoch": 1.55, "learning_rate": 2.6867170316534376e-07, "logits/chosen": -5.610724449157715, "logits/rejected": -5.046086311340332, "logps/chosen": -541.7059326171875, "logps/rejected": -389.84515380859375, "loss": 0.5056, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.04327429458498955, "rewards/margins": 0.8475513458251953, "rewards/rejected": -0.8042769432067871, "step": 6000 }, { "epoch": 1.55, "eval_logits/chosen": -5.2906951904296875, "eval_logits/rejected": -4.9987664222717285, "eval_logps/chosen": -588.2153930664062, "eval_logps/rejected": -449.25982666015625, "eval_loss": 0.5498782992362976, "eval_rewards/accuracies": 0.7129999995231628, "eval_rewards/chosen": 0.005415632389485836, "eval_rewards/margins": 0.6773431897163391, "eval_rewards/rejected": -0.6719275712966919, "eval_runtime": 104.056, "eval_samples_per_second": 19.22, "eval_steps_per_second": 1.201, "step": 6000 }, { "epoch": 1.55, "learning_rate": 2.6819355455675623e-07, "logits/chosen": -5.453244209289551, "logits/rejected": -5.15369987487793, "logps/chosen": -644.30126953125, "logps/rejected": -552.1677856445312, "loss": 0.4309, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.25022321939468384, "rewards/margins": 0.9541277885437012, "rewards/rejected": -0.7039045691490173, "step": 6010 }, { "epoch": 1.55, "learning_rate": 2.6771540594816865e-07, "logits/chosen": -5.7302565574646, "logits/rejected": -4.779721736907959, "logps/chosen": -573.475830078125, "logps/rejected": -388.52923583984375, "loss": 0.4531, "rewards/accuracies": 0.8125, "rewards/chosen": 0.06880221515893936, "rewards/margins": 0.7123725414276123, "rewards/rejected": -0.6435703635215759, "step": 6020 }, { "epoch": 1.56, "learning_rate": 2.672372573395811e-07, "logits/chosen": -5.661740779876709, "logits/rejected": -4.799096584320068, "logps/chosen": -559.1987915039062, "logps/rejected": -385.0419006347656, "loss": 0.5081, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.09739485383033752, "rewards/margins": 0.8209842443466187, "rewards/rejected": -0.7235893607139587, "step": 6030 }, { "epoch": 1.56, "learning_rate": 2.6675910873099354e-07, "logits/chosen": -5.171940326690674, "logits/rejected": -5.090329647064209, "logps/chosen": -581.93115234375, "logps/rejected": -496.2869567871094, "loss": 0.4609, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.11855746805667877, "rewards/margins": 0.9430662393569946, "rewards/rejected": -0.824508786201477, "step": 6040 }, { "epoch": 1.56, "learning_rate": 2.66280960122406e-07, "logits/chosen": -5.270074367523193, "logits/rejected": -4.715147495269775, "logps/chosen": -432.666259765625, "logps/rejected": -354.40679931640625, "loss": 0.4674, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2362377643585205, "rewards/margins": 0.7694851756095886, "rewards/rejected": -0.5332474708557129, "step": 6050 }, { "epoch": 1.56, "learning_rate": 2.658028115138185e-07, "logits/chosen": -4.981286525726318, "logits/rejected": -5.304615497589111, "logps/chosen": -523.1409912109375, "logps/rejected": -427.2158203125, "loss": 0.419, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.2757008671760559, "rewards/margins": 0.9259557723999023, "rewards/rejected": -0.6502547860145569, "step": 6060 }, { "epoch": 1.57, "learning_rate": 2.653246629052309e-07, "logits/chosen": -4.9419074058532715, "logits/rejected": -5.185403347015381, "logps/chosen": -612.6180419921875, "logps/rejected": -578.0643920898438, "loss": 0.5022, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.18794450163841248, "rewards/margins": 0.814724326133728, "rewards/rejected": -0.6267797946929932, "step": 6070 }, { "epoch": 1.57, "learning_rate": 2.648465142966434e-07, "logits/chosen": -4.68510103225708, "logits/rejected": -4.6957173347473145, "logps/chosen": -598.9067993164062, "logps/rejected": -521.862548828125, "loss": 0.4686, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.16604530811309814, "rewards/margins": 0.906887412071228, "rewards/rejected": -0.7408421039581299, "step": 6080 }, { "epoch": 1.57, "learning_rate": 2.643683656880558e-07, "logits/chosen": -4.772303581237793, "logits/rejected": -4.759460926055908, "logps/chosen": -721.9468383789062, "logps/rejected": -442.3534240722656, "loss": 0.4436, "rewards/accuracies": 0.8125, "rewards/chosen": 0.36118924617767334, "rewards/margins": 1.097320795059204, "rewards/rejected": -0.7361315488815308, "step": 6090 }, { "epoch": 1.58, "learning_rate": 2.6389021707946827e-07, "logits/chosen": -5.40725040435791, "logits/rejected": -4.7502851486206055, "logps/chosen": -513.9375, "logps/rejected": -408.05517578125, "loss": 0.4406, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.02200430817902088, "rewards/margins": 0.7920781970024109, "rewards/rejected": -0.7700738906860352, "step": 6100 }, { "epoch": 1.58, "learning_rate": 2.634120684708807e-07, "logits/chosen": -5.0691118240356445, "logits/rejected": -5.121496200561523, "logps/chosen": -579.9508666992188, "logps/rejected": -500.75372314453125, "loss": 0.474, "rewards/accuracies": 0.75, "rewards/chosen": 0.09940090775489807, "rewards/margins": 0.7958636283874512, "rewards/rejected": -0.6964627504348755, "step": 6110 }, { "epoch": 1.58, "learning_rate": 2.6293391986229316e-07, "logits/chosen": -5.623298645019531, "logits/rejected": -4.960019588470459, "logps/chosen": -607.1505126953125, "logps/rejected": -463.81304931640625, "loss": 0.4425, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2635231912136078, "rewards/margins": 0.9602527618408203, "rewards/rejected": -0.6967295408248901, "step": 6120 }, { "epoch": 1.58, "learning_rate": 2.624557712537057e-07, "logits/chosen": -5.534486770629883, "logits/rejected": -5.34757661819458, "logps/chosen": -632.3162841796875, "logps/rejected": -499.102783203125, "loss": 0.4733, "rewards/accuracies": 0.75, "rewards/chosen": 0.05217469483613968, "rewards/margins": 0.9407072067260742, "rewards/rejected": -0.8885326385498047, "step": 6130 }, { "epoch": 1.59, "learning_rate": 2.619776226451181e-07, "logits/chosen": -4.8669915199279785, "logits/rejected": -4.848659038543701, "logps/chosen": -583.6809692382812, "logps/rejected": -432.1404724121094, "loss": 0.4756, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.07757476717233658, "rewards/margins": 0.8153963088989258, "rewards/rejected": -0.7378215193748474, "step": 6140 }, { "epoch": 1.59, "learning_rate": 2.614994740365306e-07, "logits/chosen": -5.783295154571533, "logits/rejected": -5.073866844177246, "logps/chosen": -652.9971313476562, "logps/rejected": -467.5584411621094, "loss": 0.5117, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.19042713940143585, "rewards/margins": 0.8246679306030273, "rewards/rejected": -0.6342408657073975, "step": 6150 }, { "epoch": 1.59, "learning_rate": 2.61021325427943e-07, "logits/chosen": -5.0888214111328125, "logits/rejected": -4.6778974533081055, "logps/chosen": -637.9072265625, "logps/rejected": -469.81817626953125, "loss": 0.4635, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2662716209888458, "rewards/margins": 1.0054959058761597, "rewards/rejected": -0.7392243146896362, "step": 6160 }, { "epoch": 1.59, "learning_rate": 2.6054317681935547e-07, "logits/chosen": -4.938897609710693, "logits/rejected": -4.936993598937988, "logps/chosen": -578.4232788085938, "logps/rejected": -422.798095703125, "loss": 0.4883, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.3279651701450348, "rewards/margins": 0.9422677159309387, "rewards/rejected": -0.6143025159835815, "step": 6170 }, { "epoch": 1.6, "learning_rate": 2.600650282107679e-07, "logits/chosen": -4.972899913787842, "logits/rejected": -4.700381755828857, "logps/chosen": -515.343505859375, "logps/rejected": -372.33441162109375, "loss": 0.4616, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1548624187707901, "rewards/margins": 0.8439728021621704, "rewards/rejected": -0.6891103386878967, "step": 6180 }, { "epoch": 1.6, "learning_rate": 2.5958687960218036e-07, "logits/chosen": -5.1609930992126465, "logits/rejected": -5.191376209259033, "logps/chosen": -599.2490844726562, "logps/rejected": -488.11456298828125, "loss": 0.4525, "rewards/accuracies": 0.8125, "rewards/chosen": 0.37178951501846313, "rewards/margins": 1.0715231895446777, "rewards/rejected": -0.6997336149215698, "step": 6190 }, { "epoch": 1.6, "learning_rate": 2.5910873099359283e-07, "logits/chosen": -5.31819486618042, "logits/rejected": -4.608155250549316, "logps/chosen": -608.2944946289062, "logps/rejected": -407.66558837890625, "loss": 0.4838, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.26564979553222656, "rewards/margins": 0.816923975944519, "rewards/rejected": -0.5512741804122925, "step": 6200 }, { "epoch": 1.6, "learning_rate": 2.5863058238500525e-07, "logits/chosen": -5.0928635597229, "logits/rejected": -4.938261032104492, "logps/chosen": -636.6690063476562, "logps/rejected": -456.9441833496094, "loss": 0.4417, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1947118043899536, "rewards/margins": 1.0188127756118774, "rewards/rejected": -0.8241009712219238, "step": 6210 }, { "epoch": 1.61, "learning_rate": 2.581524337764177e-07, "logits/chosen": -4.997957229614258, "logits/rejected": -5.020618915557861, "logps/chosen": -642.6024169921875, "logps/rejected": -489.79345703125, "loss": 0.4825, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1633283942937851, "rewards/margins": 0.8822242617607117, "rewards/rejected": -0.7188957929611206, "step": 6220 }, { "epoch": 1.61, "learning_rate": 2.5767428516783014e-07, "logits/chosen": -5.288960933685303, "logits/rejected": -4.712518692016602, "logps/chosen": -617.5596923828125, "logps/rejected": -466.94598388671875, "loss": 0.5172, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.05507078766822815, "rewards/margins": 0.909329891204834, "rewards/rejected": -0.8542591333389282, "step": 6230 }, { "epoch": 1.61, "learning_rate": 2.571961365592426e-07, "logits/chosen": -5.276818752288818, "logits/rejected": -4.7398271560668945, "logps/chosen": -640.8303833007812, "logps/rejected": -441.1388244628906, "loss": 0.4579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1792055070400238, "rewards/margins": 0.9101894497871399, "rewards/rejected": -0.7309837937355042, "step": 6240 }, { "epoch": 1.61, "learning_rate": 2.567179879506551e-07, "logits/chosen": -5.096794128417969, "logits/rejected": -5.106588363647461, "logps/chosen": -620.3663940429688, "logps/rejected": -491.5909729003906, "loss": 0.4606, "rewards/accuracies": 0.75, "rewards/chosen": 0.34899502992630005, "rewards/margins": 0.9501008987426758, "rewards/rejected": -0.601105809211731, "step": 6250 }, { "epoch": 1.62, "learning_rate": 2.562398393420675e-07, "logits/chosen": -5.543417930603027, "logits/rejected": -5.054560661315918, "logps/chosen": -547.40771484375, "logps/rejected": -442.52447509765625, "loss": 0.5423, "rewards/accuracies": 0.75, "rewards/chosen": -0.07681821286678314, "rewards/margins": 0.7147814631462097, "rewards/rejected": -0.791599690914154, "step": 6260 }, { "epoch": 1.62, "learning_rate": 2.5576169073347997e-07, "logits/chosen": -5.368969440460205, "logits/rejected": -4.852042198181152, "logps/chosen": -621.84130859375, "logps/rejected": -522.9584350585938, "loss": 0.4787, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.27284741401672363, "rewards/margins": 1.0021681785583496, "rewards/rejected": -0.7293206453323364, "step": 6270 }, { "epoch": 1.62, "learning_rate": 2.552835421248924e-07, "logits/chosen": -5.053814888000488, "logits/rejected": -4.31284761428833, "logps/chosen": -633.384765625, "logps/rejected": -527.5802001953125, "loss": 0.4785, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17659424245357513, "rewards/margins": 0.9453839063644409, "rewards/rejected": -0.7687896490097046, "step": 6280 }, { "epoch": 1.62, "learning_rate": 2.5480539351630486e-07, "logits/chosen": -5.41217041015625, "logits/rejected": -4.429527282714844, "logps/chosen": -714.9429931640625, "logps/rejected": -414.08538818359375, "loss": 0.4117, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2845489978790283, "rewards/margins": 1.0826810598373413, "rewards/rejected": -0.798132061958313, "step": 6290 }, { "epoch": 1.63, "learning_rate": 2.543272449077173e-07, "logits/chosen": -5.345113754272461, "logits/rejected": -4.958364486694336, "logps/chosen": -638.6312255859375, "logps/rejected": -453.43377685546875, "loss": 0.469, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.10958293825387955, "rewards/margins": 0.9192023277282715, "rewards/rejected": -0.8096194267272949, "step": 6300 }, { "epoch": 1.63, "learning_rate": 2.5384909629912975e-07, "logits/chosen": -5.447089195251465, "logits/rejected": -4.97067928314209, "logps/chosen": -627.8514404296875, "logps/rejected": -433.92962646484375, "loss": 0.4462, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3082348704338074, "rewards/margins": 1.103125810623169, "rewards/rejected": -0.7948909401893616, "step": 6310 }, { "epoch": 1.63, "learning_rate": 2.533709476905422e-07, "logits/chosen": -5.7362961769104, "logits/rejected": -5.320249557495117, "logps/chosen": -605.8050537109375, "logps/rejected": -446.39013671875, "loss": 0.4785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0761009082198143, "rewards/margins": 0.8175575137138367, "rewards/rejected": -0.7414566278457642, "step": 6320 }, { "epoch": 1.63, "learning_rate": 2.5289279908195465e-07, "logits/chosen": -5.099646091461182, "logits/rejected": -4.92291259765625, "logps/chosen": -613.3463134765625, "logps/rejected": -489.710205078125, "loss": 0.4865, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2802828252315521, "rewards/margins": 0.90876704454422, "rewards/rejected": -0.6284841299057007, "step": 6330 }, { "epoch": 1.64, "learning_rate": 2.524146504733671e-07, "logits/chosen": -5.596928596496582, "logits/rejected": -5.073002815246582, "logps/chosen": -574.1611328125, "logps/rejected": -388.6968994140625, "loss": 0.515, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03728940337896347, "rewards/margins": 0.6715155839920044, "rewards/rejected": -0.7088049650192261, "step": 6340 }, { "epoch": 1.64, "learning_rate": 2.5193650186477954e-07, "logits/chosen": -5.336203098297119, "logits/rejected": -5.145905017852783, "logps/chosen": -575.8031616210938, "logps/rejected": -448.1175842285156, "loss": 0.4847, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.0751899927854538, "rewards/margins": 0.7143184542655945, "rewards/rejected": -0.6391285061836243, "step": 6350 }, { "epoch": 1.64, "learning_rate": 2.51458353256192e-07, "logits/chosen": -5.231790542602539, "logits/rejected": -4.9688825607299805, "logps/chosen": -623.6688232421875, "logps/rejected": -524.2118530273438, "loss": 0.5146, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.12363441288471222, "rewards/margins": 0.7707942128181458, "rewards/rejected": -0.6471598744392395, "step": 6360 }, { "epoch": 1.64, "learning_rate": 2.5098020464760443e-07, "logits/chosen": -5.008232593536377, "logits/rejected": -5.173331260681152, "logps/chosen": -686.8382568359375, "logps/rejected": -537.06787109375, "loss": 0.4891, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2776939570903778, "rewards/margins": 0.9717704057693481, "rewards/rejected": -0.6940763592720032, "step": 6370 }, { "epoch": 1.65, "learning_rate": 2.505020560390169e-07, "logits/chosen": -5.206994533538818, "logits/rejected": -4.711002349853516, "logps/chosen": -528.8482055664062, "logps/rejected": -467.34539794921875, "loss": 0.5372, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0059933140873909, "rewards/margins": 0.5247630476951599, "rewards/rejected": -0.5187697410583496, "step": 6380 }, { "epoch": 1.65, "learning_rate": 2.5002390743042937e-07, "logits/chosen": -5.128048896789551, "logits/rejected": -4.762636661529541, "logps/chosen": -570.8037719726562, "logps/rejected": -437.07305908203125, "loss": 0.4978, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.03184998780488968, "rewards/margins": 0.7286216020584106, "rewards/rejected": -0.6967716217041016, "step": 6390 }, { "epoch": 1.65, "learning_rate": 2.495457588218418e-07, "logits/chosen": -4.997927665710449, "logits/rejected": -5.038205146789551, "logps/chosen": -596.4259033203125, "logps/rejected": -476.92388916015625, "loss": 0.4627, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.18238988518714905, "rewards/margins": 0.8336280584335327, "rewards/rejected": -0.651238203048706, "step": 6400 }, { "epoch": 1.66, "learning_rate": 2.4906761021325426e-07, "logits/chosen": -5.236283302307129, "logits/rejected": -5.174281120300293, "logps/chosen": -681.5098876953125, "logps/rejected": -484.61083984375, "loss": 0.439, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.22161927819252014, "rewards/margins": 1.0318154096603394, "rewards/rejected": -0.8101962804794312, "step": 6410 }, { "epoch": 1.66, "learning_rate": 2.485894616046667e-07, "logits/chosen": -5.263054847717285, "logits/rejected": -5.4549970626831055, "logps/chosen": -595.5380859375, "logps/rejected": -486.0205993652344, "loss": 0.4297, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.22621488571166992, "rewards/margins": 0.8020917773246765, "rewards/rejected": -0.5758769512176514, "step": 6420 }, { "epoch": 1.66, "learning_rate": 2.481113129960792e-07, "logits/chosen": -5.065393447875977, "logits/rejected": -4.601251125335693, "logps/chosen": -565.5445556640625, "logps/rejected": -447.7747497558594, "loss": 0.448, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1477576196193695, "rewards/margins": 1.0091482400894165, "rewards/rejected": -0.8613905906677246, "step": 6430 }, { "epoch": 1.66, "learning_rate": 2.476331643874916e-07, "logits/chosen": -5.345168113708496, "logits/rejected": -5.003687858581543, "logps/chosen": -563.2815551757812, "logps/rejected": -432.439453125, "loss": 0.516, "rewards/accuracies": 0.75, "rewards/chosen": 0.11725252866744995, "rewards/margins": 0.7590247392654419, "rewards/rejected": -0.6417721509933472, "step": 6440 }, { "epoch": 1.67, "learning_rate": 2.471550157789041e-07, "logits/chosen": -5.319686412811279, "logits/rejected": -4.826287269592285, "logps/chosen": -599.8450927734375, "logps/rejected": -461.84613037109375, "loss": 0.5183, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.07568398863077164, "rewards/margins": 0.7551029920578003, "rewards/rejected": -0.6794189810752869, "step": 6450 }, { "epoch": 1.67, "learning_rate": 2.466768671703165e-07, "logits/chosen": -5.648402214050293, "logits/rejected": -4.9532246589660645, "logps/chosen": -548.5721435546875, "logps/rejected": -410.8759765625, "loss": 0.525, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03501836210489273, "rewards/margins": 0.673780083656311, "rewards/rejected": -0.708798348903656, "step": 6460 }, { "epoch": 1.67, "learning_rate": 2.46198718561729e-07, "logits/chosen": -5.099196910858154, "logits/rejected": -4.805300712585449, "logps/chosen": -641.7225341796875, "logps/rejected": -500.89410400390625, "loss": 0.5039, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.2475341111421585, "rewards/margins": 0.8559266328811646, "rewards/rejected": -0.608392596244812, "step": 6470 }, { "epoch": 1.67, "learning_rate": 2.457205699531414e-07, "logits/chosen": -5.415421485900879, "logits/rejected": -4.865019798278809, "logps/chosen": -611.9879760742188, "logps/rejected": -448.6189880371094, "loss": 0.4989, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.22945913672447205, "rewards/margins": 0.6715555191040039, "rewards/rejected": -0.44209641218185425, "step": 6480 }, { "epoch": 1.68, "learning_rate": 2.452424213445539e-07, "logits/chosen": -5.199051856994629, "logits/rejected": -4.84391975402832, "logps/chosen": -653.8082275390625, "logps/rejected": -464.706787109375, "loss": 0.4435, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2306828498840332, "rewards/margins": 0.99567049741745, "rewards/rejected": -0.764987587928772, "step": 6490 }, { "epoch": 1.68, "learning_rate": 2.4476427273596635e-07, "logits/chosen": -5.388723850250244, "logits/rejected": -4.997467994689941, "logps/chosen": -618.6880493164062, "logps/rejected": -468.45306396484375, "loss": 0.4969, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.19417648017406464, "rewards/margins": 0.8078724145889282, "rewards/rejected": -0.6136959791183472, "step": 6500 }, { "epoch": 1.68, "learning_rate": 2.4428612412737877e-07, "logits/chosen": -5.438475131988525, "logits/rejected": -4.839552402496338, "logps/chosen": -565.8646240234375, "logps/rejected": -433.3873596191406, "loss": 0.4534, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.19921420514583588, "rewards/margins": 0.8071877360343933, "rewards/rejected": -0.607973575592041, "step": 6510 }, { "epoch": 1.68, "learning_rate": 2.4380797551879124e-07, "logits/chosen": -4.853618621826172, "logits/rejected": -4.97342586517334, "logps/chosen": -591.2943115234375, "logps/rejected": -465.2264099121094, "loss": 0.4712, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.24594172835350037, "rewards/margins": 0.9235225915908813, "rewards/rejected": -0.6775807738304138, "step": 6520 }, { "epoch": 1.69, "learning_rate": 2.4332982691020366e-07, "logits/chosen": -4.767547607421875, "logits/rejected": -4.886878967285156, "logps/chosen": -557.1605224609375, "logps/rejected": -484.56072998046875, "loss": 0.533, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.11640181392431259, "rewards/margins": 0.6341034770011902, "rewards/rejected": -0.5177017450332642, "step": 6530 }, { "epoch": 1.69, "learning_rate": 2.4285167830161613e-07, "logits/chosen": -5.221864700317383, "logits/rejected": -4.788050651550293, "logps/chosen": -601.3493041992188, "logps/rejected": -461.08734130859375, "loss": 0.4735, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.20074152946472168, "rewards/margins": 0.8954550623893738, "rewards/rejected": -0.6947135925292969, "step": 6540 }, { "epoch": 1.69, "learning_rate": 2.4237352969302855e-07, "logits/chosen": -5.306252479553223, "logits/rejected": -4.783545017242432, "logps/chosen": -616.226318359375, "logps/rejected": -454.01483154296875, "loss": 0.5005, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.14841921627521515, "rewards/margins": 0.8449894785881042, "rewards/rejected": -0.6965702176094055, "step": 6550 }, { "epoch": 1.69, "learning_rate": 2.41895381084441e-07, "logits/chosen": -5.28151798248291, "logits/rejected": -3.6343307495117188, "logps/chosen": -542.8233642578125, "logps/rejected": -409.71435546875, "loss": 0.4748, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.19003179669380188, "rewards/margins": 0.8299418687820435, "rewards/rejected": -0.6399100422859192, "step": 6560 }, { "epoch": 1.7, "learning_rate": 2.414172324758535e-07, "logits/chosen": -5.390178680419922, "logits/rejected": -5.086108684539795, "logps/chosen": -560.9259033203125, "logps/rejected": -442.2779235839844, "loss": 0.4855, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.23656418919563293, "rewards/margins": 0.9066953659057617, "rewards/rejected": -0.6701311469078064, "step": 6570 }, { "epoch": 1.7, "learning_rate": 2.4093908386726596e-07, "logits/chosen": -5.148705005645752, "logits/rejected": -4.654217720031738, "logps/chosen": -603.0735473632812, "logps/rejected": -502.6656188964844, "loss": 0.4563, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.10407658666372299, "rewards/margins": 0.7867701649665833, "rewards/rejected": -0.682693600654602, "step": 6580 }, { "epoch": 1.7, "learning_rate": 2.404609352586784e-07, "logits/chosen": -5.291640281677246, "logits/rejected": -4.948567867279053, "logps/chosen": -582.6641235351562, "logps/rejected": -482.5367736816406, "loss": 0.4668, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1869414746761322, "rewards/margins": 0.8936322927474976, "rewards/rejected": -0.7066908478736877, "step": 6590 }, { "epoch": 1.7, "learning_rate": 2.3998278665009086e-07, "logits/chosen": -5.203324794769287, "logits/rejected": -4.9245500564575195, "logps/chosen": -675.8508911132812, "logps/rejected": -477.69866943359375, "loss": 0.4784, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.19050365686416626, "rewards/margins": 0.8903636932373047, "rewards/rejected": -0.6998600959777832, "step": 6600 }, { "epoch": 1.71, "learning_rate": 2.395046380415033e-07, "logits/chosen": -5.545949459075928, "logits/rejected": -5.254366874694824, "logps/chosen": -599.8272705078125, "logps/rejected": -513.608642578125, "loss": 0.4966, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.3082560896873474, "rewards/margins": 0.9381462931632996, "rewards/rejected": -0.6298902630805969, "step": 6610 }, { "epoch": 1.71, "learning_rate": 2.3902648943291575e-07, "logits/chosen": -5.113016128540039, "logits/rejected": -5.245876789093018, "logps/chosen": -587.2180786132812, "logps/rejected": -466.4981384277344, "loss": 0.5788, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.2080243080854416, "rewards/margins": 0.6526762247085571, "rewards/rejected": -0.44465193152427673, "step": 6620 }, { "epoch": 1.71, "learning_rate": 2.385483408243282e-07, "logits/chosen": -5.294241905212402, "logits/rejected": -5.205246448516846, "logps/chosen": -505.4332580566406, "logps/rejected": -361.124267578125, "loss": 0.4643, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1441817283630371, "rewards/margins": 0.7761465311050415, "rewards/rejected": -0.6319648623466492, "step": 6630 }, { "epoch": 1.71, "learning_rate": 2.3807019221574066e-07, "logits/chosen": -4.930095195770264, "logits/rejected": -5.222455978393555, "logps/chosen": -525.7227783203125, "logps/rejected": -450.4359436035156, "loss": 0.4136, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.08919308334589005, "rewards/margins": 0.8631596565246582, "rewards/rejected": -0.7739664912223816, "step": 6640 }, { "epoch": 1.72, "learning_rate": 2.375920436071531e-07, "logits/chosen": -5.554598808288574, "logits/rejected": -4.539285182952881, "logps/chosen": -635.398681640625, "logps/rejected": -413.87835693359375, "loss": 0.4156, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.2974664866924286, "rewards/margins": 0.9692127108573914, "rewards/rejected": -0.6717461943626404, "step": 6650 }, { "epoch": 1.72, "learning_rate": 2.3711389499856555e-07, "logits/chosen": -5.163575649261475, "logits/rejected": -4.865039825439453, "logps/chosen": -632.5794067382812, "logps/rejected": -431.5252380371094, "loss": 0.4274, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3368532657623291, "rewards/margins": 0.9175024032592773, "rewards/rejected": -0.580649197101593, "step": 6660 }, { "epoch": 1.72, "learning_rate": 2.36635746389978e-07, "logits/chosen": -5.108762264251709, "logits/rejected": -4.833866119384766, "logps/chosen": -672.9143676757812, "logps/rejected": -536.2535400390625, "loss": 0.4638, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.28247708082199097, "rewards/margins": 0.9210672378540039, "rewards/rejected": -0.6385902166366577, "step": 6670 }, { "epoch": 1.72, "learning_rate": 2.3615759778139044e-07, "logits/chosen": -5.513070583343506, "logits/rejected": -5.68106746673584, "logps/chosen": -479.6800842285156, "logps/rejected": -437.13677978515625, "loss": 0.4346, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.12171383202075958, "rewards/margins": 0.7678426504135132, "rewards/rejected": -0.64612877368927, "step": 6680 }, { "epoch": 1.73, "learning_rate": 2.356794491728029e-07, "logits/chosen": -5.617056846618652, "logits/rejected": -4.9727277755737305, "logps/chosen": -596.6974487304688, "logps/rejected": -422.83880615234375, "loss": 0.4893, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.08837094902992249, "rewards/margins": 0.7667514085769653, "rewards/rejected": -0.6783804893493652, "step": 6690 }, { "epoch": 1.73, "learning_rate": 2.3520130056421536e-07, "logits/chosen": -4.854419708251953, "logits/rejected": -5.026902198791504, "logps/chosen": -560.0449829101562, "logps/rejected": -488.96484375, "loss": 0.4992, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.16998063027858734, "rewards/margins": 0.8507875204086304, "rewards/rejected": -0.6808069348335266, "step": 6700 }, { "epoch": 1.73, "learning_rate": 2.347231519556278e-07, "logits/chosen": -5.294173240661621, "logits/rejected": -5.254807949066162, "logps/chosen": -634.12841796875, "logps/rejected": -504.68060302734375, "loss": 0.4507, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.25019344687461853, "rewards/margins": 0.837687611579895, "rewards/rejected": -0.5874941945075989, "step": 6710 }, { "epoch": 1.74, "learning_rate": 2.3424500334704025e-07, "logits/chosen": -5.115827560424805, "logits/rejected": -4.983232021331787, "logps/chosen": -566.4320068359375, "logps/rejected": -523.3078002929688, "loss": 0.5363, "rewards/accuracies": 0.6875, "rewards/chosen": 0.041759006679058075, "rewards/margins": 0.7115862369537354, "rewards/rejected": -0.6698271632194519, "step": 6720 }, { "epoch": 1.74, "learning_rate": 2.337668547384527e-07, "logits/chosen": -5.057227611541748, "logits/rejected": -4.848478317260742, "logps/chosen": -634.1058959960938, "logps/rejected": -510.5616149902344, "loss": 0.4666, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.21456125378608704, "rewards/margins": 0.9962800741195679, "rewards/rejected": -0.7817188501358032, "step": 6730 }, { "epoch": 1.74, "learning_rate": 2.3328870612986514e-07, "logits/chosen": -5.374121189117432, "logits/rejected": -4.907290458679199, "logps/chosen": -579.9872436523438, "logps/rejected": -375.5261535644531, "loss": 0.45, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.18604478240013123, "rewards/margins": 0.9055026769638062, "rewards/rejected": -0.7194578051567078, "step": 6740 }, { "epoch": 1.74, "learning_rate": 2.328105575212776e-07, "logits/chosen": -5.206822395324707, "logits/rejected": -4.883788585662842, "logps/chosen": -696.1502075195312, "logps/rejected": -533.0372924804688, "loss": 0.4695, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2497372329235077, "rewards/margins": 0.8818610906600952, "rewards/rejected": -0.6321238279342651, "step": 6750 }, { "epoch": 1.75, "learning_rate": 2.3233240891269003e-07, "logits/chosen": -5.293177604675293, "logits/rejected": -5.1824541091918945, "logps/chosen": -542.9893798828125, "logps/rejected": -408.16925048828125, "loss": 0.4543, "rewards/accuracies": 0.8125, "rewards/chosen": -0.004529730882495642, "rewards/margins": 0.8361835479736328, "rewards/rejected": -0.8407133221626282, "step": 6760 }, { "epoch": 1.75, "learning_rate": 2.318542603041025e-07, "logits/chosen": -5.138808727264404, "logits/rejected": -5.379240989685059, "logps/chosen": -475.71856689453125, "logps/rejected": -473.3731994628906, "loss": 0.4737, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08443831652402878, "rewards/margins": 0.7547516822814941, "rewards/rejected": -0.8391900062561035, "step": 6770 }, { "epoch": 1.75, "learning_rate": 2.3137611169551495e-07, "logits/chosen": -5.452600955963135, "logits/rejected": -5.107142448425293, "logps/chosen": -551.1433715820312, "logps/rejected": -450.29205322265625, "loss": 0.4368, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.01827383041381836, "rewards/margins": 0.8234564065933228, "rewards/rejected": -0.8051825761795044, "step": 6780 }, { "epoch": 1.75, "learning_rate": 2.3089796308692742e-07, "logits/chosen": -5.2220869064331055, "logits/rejected": -5.536231994628906, "logps/chosen": -540.0103149414062, "logps/rejected": -431.88232421875, "loss": 0.4383, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0655921921133995, "rewards/margins": 0.8451724052429199, "rewards/rejected": -0.9107645750045776, "step": 6790 }, { "epoch": 1.76, "learning_rate": 2.3041981447833987e-07, "logits/chosen": -5.251306533813477, "logits/rejected": -4.730404853820801, "logps/chosen": -578.265869140625, "logps/rejected": -482.8453674316406, "loss": 0.4706, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1278570294380188, "rewards/margins": 0.8912492990493774, "rewards/rejected": -0.7633922100067139, "step": 6800 }, { "epoch": 1.76, "learning_rate": 2.2994166586975231e-07, "logits/chosen": -5.25925350189209, "logits/rejected": -5.214791297912598, "logps/chosen": -585.8431396484375, "logps/rejected": -509.83709716796875, "loss": 0.4856, "rewards/accuracies": 0.75, "rewards/chosen": 0.002995340619236231, "rewards/margins": 0.7670646905899048, "rewards/rejected": -0.7640693187713623, "step": 6810 }, { "epoch": 1.76, "learning_rate": 2.2946351726116476e-07, "logits/chosen": -5.346457004547119, "logits/rejected": -4.893197536468506, "logps/chosen": -654.0994873046875, "logps/rejected": -435.63226318359375, "loss": 0.4391, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1445787250995636, "rewards/margins": 0.9594966173171997, "rewards/rejected": -0.8149178624153137, "step": 6820 }, { "epoch": 1.76, "learning_rate": 2.289853686525772e-07, "logits/chosen": -5.374524116516113, "logits/rejected": -4.911614894866943, "logps/chosen": -539.2493896484375, "logps/rejected": -394.8899841308594, "loss": 0.4759, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.08093634992837906, "rewards/margins": 0.7061105966567993, "rewards/rejected": -0.6251741647720337, "step": 6830 }, { "epoch": 1.77, "learning_rate": 2.2850722004398968e-07, "logits/chosen": -5.098431587219238, "logits/rejected": -4.920378684997559, "logps/chosen": -574.76611328125, "logps/rejected": -540.5425415039062, "loss": 0.4675, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.026515021920204163, "rewards/margins": 0.7846910357475281, "rewards/rejected": -0.8112061619758606, "step": 6840 }, { "epoch": 1.77, "learning_rate": 2.2802907143540212e-07, "logits/chosen": -5.139833450317383, "logits/rejected": -5.077704906463623, "logps/chosen": -569.126953125, "logps/rejected": -466.74444580078125, "loss": 0.4333, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.13700780272483826, "rewards/margins": 1.0585730075836182, "rewards/rejected": -0.9215652346611023, "step": 6850 }, { "epoch": 1.77, "learning_rate": 2.2755092282681457e-07, "logits/chosen": -5.4935173988342285, "logits/rejected": -5.349701881408691, "logps/chosen": -601.4210205078125, "logps/rejected": -501.93524169921875, "loss": 0.5007, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.21929176151752472, "rewards/margins": 0.8135753870010376, "rewards/rejected": -0.5942835211753845, "step": 6860 }, { "epoch": 1.77, "learning_rate": 2.27072774218227e-07, "logits/chosen": -5.107312202453613, "logits/rejected": -5.270110130310059, "logps/chosen": -599.1405639648438, "logps/rejected": -490.7293395996094, "loss": 0.4598, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1486978828907013, "rewards/margins": 0.8832511901855469, "rewards/rejected": -0.734553337097168, "step": 6870 }, { "epoch": 1.78, "learning_rate": 2.2659462560963946e-07, "logits/chosen": -4.839105606079102, "logits/rejected": -4.600606918334961, "logps/chosen": -700.8055419921875, "logps/rejected": -541.5048217773438, "loss": 0.5263, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.18337486684322357, "rewards/margins": 0.7742009162902832, "rewards/rejected": -0.5908260345458984, "step": 6880 }, { "epoch": 1.78, "learning_rate": 2.261164770010519e-07, "logits/chosen": -5.139800071716309, "logits/rejected": -5.249125003814697, "logps/chosen": -609.7293701171875, "logps/rejected": -521.4674072265625, "loss": 0.4709, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.1670023649930954, "rewards/margins": 0.85869961977005, "rewards/rejected": -0.6916972398757935, "step": 6890 }, { "epoch": 1.78, "learning_rate": 2.2563832839246435e-07, "logits/chosen": -5.332768440246582, "logits/rejected": -5.0288543701171875, "logps/chosen": -587.9389038085938, "logps/rejected": -457.0713806152344, "loss": 0.4912, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.06400305777788162, "rewards/margins": 0.802017092704773, "rewards/rejected": -0.7380140423774719, "step": 6900 }, { "epoch": 1.78, "learning_rate": 2.2516017978387682e-07, "logits/chosen": -5.2278923988342285, "logits/rejected": -4.521875381469727, "logps/chosen": -650.0590209960938, "logps/rejected": -416.13311767578125, "loss": 0.4296, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.17268410325050354, "rewards/margins": 1.0738023519515991, "rewards/rejected": -0.9011181592941284, "step": 6910 }, { "epoch": 1.79, "learning_rate": 2.2468203117528927e-07, "logits/chosen": -5.383666038513184, "logits/rejected": -4.628347873687744, "logps/chosen": -697.8087158203125, "logps/rejected": -442.2432556152344, "loss": 0.4278, "rewards/accuracies": 0.8125, "rewards/chosen": 0.15144984424114227, "rewards/margins": 0.988418459892273, "rewards/rejected": -0.836968719959259, "step": 6920 }, { "epoch": 1.79, "learning_rate": 2.242038825667017e-07, "logits/chosen": -5.264932155609131, "logits/rejected": -4.608864784240723, "logps/chosen": -684.7554931640625, "logps/rejected": -489.71868896484375, "loss": 0.4322, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.22825929522514343, "rewards/margins": 1.1259441375732422, "rewards/rejected": -0.8976848721504211, "step": 6930 }, { "epoch": 1.79, "learning_rate": 2.2372573395811418e-07, "logits/chosen": -5.471644878387451, "logits/rejected": -5.0419111251831055, "logps/chosen": -516.4398193359375, "logps/rejected": -421.69183349609375, "loss": 0.5323, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06627535820007324, "rewards/margins": 0.8441283106803894, "rewards/rejected": -0.9104036092758179, "step": 6940 }, { "epoch": 1.79, "learning_rate": 2.2324758534952663e-07, "logits/chosen": -5.299819469451904, "logits/rejected": -5.046054840087891, "logps/chosen": -611.6752319335938, "logps/rejected": -464.3207092285156, "loss": 0.4245, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1692255437374115, "rewards/margins": 0.9484872817993164, "rewards/rejected": -0.7792617678642273, "step": 6950 }, { "epoch": 1.8, "learning_rate": 2.2276943674093907e-07, "logits/chosen": -5.108996391296387, "logits/rejected": -5.130315780639648, "logps/chosen": -584.8426513671875, "logps/rejected": -509.0596618652344, "loss": 0.4846, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.01804596371948719, "rewards/margins": 0.7934946417808533, "rewards/rejected": -0.7754486799240112, "step": 6960 }, { "epoch": 1.8, "learning_rate": 2.2229128813235152e-07, "logits/chosen": -5.403630256652832, "logits/rejected": -5.014420509338379, "logps/chosen": -604.8436279296875, "logps/rejected": -408.82513427734375, "loss": 0.482, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.14850488305091858, "rewards/margins": 0.823590099811554, "rewards/rejected": -0.675085186958313, "step": 6970 }, { "epoch": 1.8, "learning_rate": 2.21813139523764e-07, "logits/chosen": -5.376317501068115, "logits/rejected": -4.994105339050293, "logps/chosen": -577.3953857421875, "logps/rejected": -484.5353088378906, "loss": 0.4258, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.06336905062198639, "rewards/margins": 0.9787061810493469, "rewards/rejected": -0.9153369665145874, "step": 6980 }, { "epoch": 1.8, "learning_rate": 2.2133499091517644e-07, "logits/chosen": -5.16377592086792, "logits/rejected": -5.0089216232299805, "logps/chosen": -673.1622924804688, "logps/rejected": -454.974853515625, "loss": 0.4588, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1691974699497223, "rewards/margins": 1.0113651752471924, "rewards/rejected": -0.8421677350997925, "step": 6990 }, { "epoch": 1.81, "learning_rate": 2.2085684230658888e-07, "logits/chosen": -5.491048336029053, "logits/rejected": -5.076659679412842, "logps/chosen": -554.4429321289062, "logps/rejected": -453.893798828125, "loss": 0.4608, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.033755138516426086, "rewards/margins": 0.862399697303772, "rewards/rejected": -0.8286444544792175, "step": 7000 }, { "epoch": 1.81, "eval_logits/chosen": -5.34059476852417, "eval_logits/rejected": -5.054922103881836, "eval_logps/chosen": -588.6455078125, "eval_logps/rejected": -450.0340576171875, "eval_loss": 0.5499718189239502, "eval_rewards/accuracies": 0.703000009059906, "eval_rewards/chosen": -0.03759216517210007, "eval_rewards/margins": 0.7117614150047302, "eval_rewards/rejected": -0.7493535876274109, "eval_runtime": 104.2518, "eval_samples_per_second": 19.184, "eval_steps_per_second": 1.199, "step": 7000 }, { "epoch": 1.81, "learning_rate": 2.2037869369800133e-07, "logits/chosen": -5.028128623962402, "logits/rejected": -4.990630149841309, "logps/chosen": -557.70458984375, "logps/rejected": -452.8056640625, "loss": 0.4409, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1278298795223236, "rewards/margins": 1.0109275579452515, "rewards/rejected": -0.8830976486206055, "step": 7010 }, { "epoch": 1.81, "learning_rate": 2.1990054508941377e-07, "logits/chosen": -4.857028484344482, "logits/rejected": -4.449906349182129, "logps/chosen": -673.762451171875, "logps/rejected": -433.81884765625, "loss": 0.4157, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1144794374704361, "rewards/margins": 0.9481874704360962, "rewards/rejected": -0.8337081074714661, "step": 7020 }, { "epoch": 1.82, "learning_rate": 2.1942239648082622e-07, "logits/chosen": -5.26198673248291, "logits/rejected": -5.048341274261475, "logps/chosen": -653.9793090820312, "logps/rejected": -595.7237548828125, "loss": 0.4593, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.3137285113334656, "rewards/margins": 1.033069372177124, "rewards/rejected": -0.7193408608436584, "step": 7030 }, { "epoch": 1.82, "learning_rate": 2.1894424787223866e-07, "logits/chosen": -5.072684288024902, "logits/rejected": -4.779746055603027, "logps/chosen": -550.0865478515625, "logps/rejected": -426.05255126953125, "loss": 0.4782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.020730257034301758, "rewards/margins": 0.9214369058609009, "rewards/rejected": -0.9007066488265991, "step": 7040 }, { "epoch": 1.82, "learning_rate": 2.1846609926365114e-07, "logits/chosen": -4.85120964050293, "logits/rejected": -4.813653945922852, "logps/chosen": -523.7491455078125, "logps/rejected": -510.88555908203125, "loss": 0.5363, "rewards/accuracies": 0.75, "rewards/chosen": 0.000857760023791343, "rewards/margins": 0.765397310256958, "rewards/rejected": -0.7645395994186401, "step": 7050 }, { "epoch": 1.82, "learning_rate": 2.1798795065506358e-07, "logits/chosen": -5.27264928817749, "logits/rejected": -4.68823766708374, "logps/chosen": -550.83203125, "logps/rejected": -396.96685791015625, "loss": 0.461, "rewards/accuracies": 0.75, "rewards/chosen": 0.1095241904258728, "rewards/margins": 0.7476019263267517, "rewards/rejected": -0.6380777955055237, "step": 7060 }, { "epoch": 1.83, "learning_rate": 2.1750980204647603e-07, "logits/chosen": -5.054840564727783, "logits/rejected": -5.141736030578613, "logps/chosen": -594.248779296875, "logps/rejected": -485.877685546875, "loss": 0.5009, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.1810082048177719, "rewards/margins": 0.7005289196968079, "rewards/rejected": -0.881537139415741, "step": 7070 }, { "epoch": 1.83, "learning_rate": 2.1703165343788847e-07, "logits/chosen": -5.0071611404418945, "logits/rejected": -4.642799377441406, "logps/chosen": -577.8411865234375, "logps/rejected": -440.16192626953125, "loss": 0.5311, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19975970685482025, "rewards/margins": 0.9847708940505981, "rewards/rejected": -0.7850111722946167, "step": 7080 }, { "epoch": 1.83, "learning_rate": 2.1655350482930094e-07, "logits/chosen": -5.100114345550537, "logits/rejected": -4.70775032043457, "logps/chosen": -537.167236328125, "logps/rejected": -397.72015380859375, "loss": 0.5022, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.018068009987473488, "rewards/margins": 0.683052122592926, "rewards/rejected": -0.6649841070175171, "step": 7090 }, { "epoch": 1.83, "learning_rate": 2.160753562207134e-07, "logits/chosen": -5.021732330322266, "logits/rejected": -4.918165683746338, "logps/chosen": -586.81494140625, "logps/rejected": -444.57177734375, "loss": 0.4798, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.0524064302444458, "rewards/margins": 0.7386361360549927, "rewards/rejected": -0.6862296462059021, "step": 7100 }, { "epoch": 1.84, "learning_rate": 2.1559720761212583e-07, "logits/chosen": -5.376067161560059, "logits/rejected": -5.160840034484863, "logps/chosen": -615.3870849609375, "logps/rejected": -489.8373107910156, "loss": 0.4707, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0585918202996254, "rewards/margins": 0.7581653594970703, "rewards/rejected": -0.8167570233345032, "step": 7110 }, { "epoch": 1.84, "learning_rate": 2.151190590035383e-07, "logits/chosen": -5.182246685028076, "logits/rejected": -5.0253987312316895, "logps/chosen": -584.4147338867188, "logps/rejected": -439.663818359375, "loss": 0.4768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07703863829374313, "rewards/margins": 0.7995796203613281, "rewards/rejected": -0.7225409746170044, "step": 7120 }, { "epoch": 1.84, "learning_rate": 2.1464091039495075e-07, "logits/chosen": -4.9328460693359375, "logits/rejected": -5.095547199249268, "logps/chosen": -484.158203125, "logps/rejected": -437.71514892578125, "loss": 0.47, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.030128106474876404, "rewards/margins": 0.851874053478241, "rewards/rejected": -0.8217459917068481, "step": 7130 }, { "epoch": 1.84, "learning_rate": 2.141627617863632e-07, "logits/chosen": -5.310992240905762, "logits/rejected": -4.782107353210449, "logps/chosen": -654.7882690429688, "logps/rejected": -471.72955322265625, "loss": 0.4558, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.16006126999855042, "rewards/margins": 0.9477246403694153, "rewards/rejected": -0.787663459777832, "step": 7140 }, { "epoch": 1.85, "learning_rate": 2.1368461317777564e-07, "logits/chosen": -5.068331718444824, "logits/rejected": -4.580300807952881, "logps/chosen": -673.2592163085938, "logps/rejected": -442.55340576171875, "loss": 0.4802, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.2618125081062317, "rewards/margins": 0.9193479418754578, "rewards/rejected": -0.6575354337692261, "step": 7150 }, { "epoch": 1.85, "learning_rate": 2.132064645691881e-07, "logits/chosen": -5.319109916687012, "logits/rejected": -5.332709789276123, "logps/chosen": -635.5621948242188, "logps/rejected": -523.44091796875, "loss": 0.4743, "rewards/accuracies": 0.8125, "rewards/chosen": 0.12405027449131012, "rewards/margins": 0.8707083463668823, "rewards/rejected": -0.746658205986023, "step": 7160 }, { "epoch": 1.85, "learning_rate": 2.1272831596060053e-07, "logits/chosen": -5.2993550300598145, "logits/rejected": -4.800571441650391, "logps/chosen": -591.5736083984375, "logps/rejected": -399.31121826171875, "loss": 0.4634, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.030682694166898727, "rewards/margins": 0.7389487624168396, "rewards/rejected": -0.708266019821167, "step": 7170 }, { "epoch": 1.85, "learning_rate": 2.12250167352013e-07, "logits/chosen": -5.15109395980835, "logits/rejected": -4.827673435211182, "logps/chosen": -594.216796875, "logps/rejected": -457.98516845703125, "loss": 0.4477, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1791529357433319, "rewards/margins": 0.9441717267036438, "rewards/rejected": -0.7650187611579895, "step": 7180 }, { "epoch": 1.86, "learning_rate": 2.1177201874342545e-07, "logits/chosen": -5.153967380523682, "logits/rejected": -5.078028202056885, "logps/chosen": -604.881103515625, "logps/rejected": -434.54693603515625, "loss": 0.4696, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.15045422315597534, "rewards/margins": 0.9470342397689819, "rewards/rejected": -0.7965801358222961, "step": 7190 }, { "epoch": 1.86, "learning_rate": 2.112938701348379e-07, "logits/chosen": -5.469221591949463, "logits/rejected": -5.076963424682617, "logps/chosen": -623.1976318359375, "logps/rejected": -487.63427734375, "loss": 0.4921, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.07546250522136688, "rewards/margins": 0.7950947284698486, "rewards/rejected": -0.7196321487426758, "step": 7200 }, { "epoch": 1.86, "learning_rate": 2.1081572152625034e-07, "logits/chosen": -5.117960453033447, "logits/rejected": -4.908760070800781, "logps/chosen": -592.5912475585938, "logps/rejected": -491.81182861328125, "loss": 0.4691, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.011232802644371986, "rewards/margins": 0.7139037847518921, "rewards/rejected": -0.7026709914207458, "step": 7210 }, { "epoch": 1.86, "learning_rate": 2.1033757291766279e-07, "logits/chosen": -5.477794647216797, "logits/rejected": -5.327876091003418, "logps/chosen": -559.2304077148438, "logps/rejected": -445.64141845703125, "loss": 0.485, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.0026226043701171875, "rewards/margins": 0.7451986074447632, "rewards/rejected": -0.7425758838653564, "step": 7220 }, { "epoch": 1.87, "learning_rate": 2.0985942430907526e-07, "logits/chosen": -5.345255374908447, "logits/rejected": -5.4000444412231445, "logps/chosen": -617.7759399414062, "logps/rejected": -534.4215698242188, "loss": 0.4687, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.10407984256744385, "rewards/margins": 0.8936818242073059, "rewards/rejected": -0.7896019220352173, "step": 7230 }, { "epoch": 1.87, "learning_rate": 2.093812757004877e-07, "logits/chosen": -5.149765968322754, "logits/rejected": -4.657567977905273, "logps/chosen": -621.5892333984375, "logps/rejected": -402.2050476074219, "loss": 0.4639, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.08164798468351364, "rewards/margins": 0.8893488645553589, "rewards/rejected": -0.8077009320259094, "step": 7240 }, { "epoch": 1.87, "learning_rate": 2.0890312709190018e-07, "logits/chosen": -5.437896251678467, "logits/rejected": -5.069968223571777, "logps/chosen": -618.131591796875, "logps/rejected": -433.1944885253906, "loss": 0.453, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.19558210670948029, "rewards/margins": 1.1002891063690186, "rewards/rejected": -0.904707133769989, "step": 7250 }, { "epoch": 1.87, "learning_rate": 2.0842497848331262e-07, "logits/chosen": -5.0665411949157715, "logits/rejected": -4.926205635070801, "logps/chosen": -589.693359375, "logps/rejected": -466.3052673339844, "loss": 0.5004, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.1114911213517189, "rewards/margins": 0.8689071536064148, "rewards/rejected": -0.7574161291122437, "step": 7260 }, { "epoch": 1.88, "learning_rate": 2.0794682987472507e-07, "logits/chosen": -5.242837905883789, "logits/rejected": -4.094232082366943, "logps/chosen": -606.5155639648438, "logps/rejected": -413.037109375, "loss": 0.4084, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.15639005601406097, "rewards/margins": 1.1192923784255981, "rewards/rejected": -0.9629022479057312, "step": 7270 }, { "epoch": 1.88, "learning_rate": 2.074686812661375e-07, "logits/chosen": -4.7948198318481445, "logits/rejected": -4.836452484130859, "logps/chosen": -640.7286376953125, "logps/rejected": -532.4818115234375, "loss": 0.469, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.18369440734386444, "rewards/margins": 1.0803520679473877, "rewards/rejected": -0.8966576457023621, "step": 7280 }, { "epoch": 1.88, "learning_rate": 2.0699053265754996e-07, "logits/chosen": -5.260308742523193, "logits/rejected": -4.813953876495361, "logps/chosen": -589.7776489257812, "logps/rejected": -447.84368896484375, "loss": 0.4196, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.008264536038041115, "rewards/margins": 0.9151209592819214, "rewards/rejected": -0.9068564176559448, "step": 7290 }, { "epoch": 1.88, "learning_rate": 2.065123840489624e-07, "logits/chosen": -5.263618469238281, "logits/rejected": -5.115094184875488, "logps/chosen": -651.4525146484375, "logps/rejected": -427.03271484375, "loss": 0.4651, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10109229385852814, "rewards/margins": 0.8812934160232544, "rewards/rejected": -0.9823856353759766, "step": 7300 }, { "epoch": 1.89, "learning_rate": 2.0603423544037485e-07, "logits/chosen": -5.47549295425415, "logits/rejected": -4.719350337982178, "logps/chosen": -614.9442138671875, "logps/rejected": -545.3407592773438, "loss": 0.4826, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.16467507183551788, "rewards/margins": 0.8984909057617188, "rewards/rejected": -0.7338157892227173, "step": 7310 }, { "epoch": 1.89, "learning_rate": 2.0555608683178732e-07, "logits/chosen": -5.301560401916504, "logits/rejected": -5.050819396972656, "logps/chosen": -560.2103271484375, "logps/rejected": -435.50067138671875, "loss": 0.4717, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.14835253357887268, "rewards/margins": 0.6449403166770935, "rewards/rejected": -0.7932928800582886, "step": 7320 }, { "epoch": 1.89, "learning_rate": 2.0507793822319976e-07, "logits/chosen": -5.046361923217773, "logits/rejected": -4.977335453033447, "logps/chosen": -568.0091552734375, "logps/rejected": -484.661865234375, "loss": 0.4961, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.053889013826847076, "rewards/margins": 0.658076286315918, "rewards/rejected": -0.7119652032852173, "step": 7330 }, { "epoch": 1.9, "learning_rate": 2.045997896146122e-07, "logits/chosen": -5.582226753234863, "logits/rejected": -4.891000270843506, "logps/chosen": -699.2998046875, "logps/rejected": -497.0565490722656, "loss": 0.4048, "rewards/accuracies": 0.8125, "rewards/chosen": 0.08822374045848846, "rewards/margins": 0.9959708452224731, "rewards/rejected": -0.9077471494674683, "step": 7340 }, { "epoch": 1.9, "learning_rate": 2.0412164100602466e-07, "logits/chosen": -5.369495391845703, "logits/rejected": -5.269856929779053, "logps/chosen": -691.9635620117188, "logps/rejected": -468.8521423339844, "loss": 0.4562, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.10614285618066788, "rewards/margins": 0.9587852358818054, "rewards/rejected": -0.8526424169540405, "step": 7350 }, { "epoch": 1.9, "learning_rate": 2.036434923974371e-07, "logits/chosen": -5.563731670379639, "logits/rejected": -5.174225807189941, "logps/chosen": -597.6758422851562, "logps/rejected": -463.6148986816406, "loss": 0.4753, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.17414303123950958, "rewards/margins": 1.0170536041259766, "rewards/rejected": -0.8429105877876282, "step": 7360 }, { "epoch": 1.9, "learning_rate": 2.0316534378884955e-07, "logits/chosen": -5.141673564910889, "logits/rejected": -5.205550193786621, "logps/chosen": -570.4118041992188, "logps/rejected": -420.4744567871094, "loss": 0.4492, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.005055886693298817, "rewards/margins": 0.9130409359931946, "rewards/rejected": -0.9079850316047668, "step": 7370 }, { "epoch": 1.91, "learning_rate": 2.0268719518026202e-07, "logits/chosen": -4.997907638549805, "logits/rejected": -5.371258735656738, "logps/chosen": -653.9464111328125, "logps/rejected": -563.2281494140625, "loss": 0.4584, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1967121660709381, "rewards/margins": 1.0735669136047363, "rewards/rejected": -0.8768547773361206, "step": 7380 }, { "epoch": 1.91, "learning_rate": 2.022090465716745e-07, "logits/chosen": -5.148959636688232, "logits/rejected": -4.36342716217041, "logps/chosen": -612.3659057617188, "logps/rejected": -568.2076416015625, "loss": 0.4762, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.16235734522342682, "rewards/margins": 0.8376115560531616, "rewards/rejected": -0.675254225730896, "step": 7390 }, { "epoch": 1.91, "learning_rate": 2.0173089796308694e-07, "logits/chosen": -4.916360855102539, "logits/rejected": -5.108966827392578, "logps/chosen": -608.4568481445312, "logps/rejected": -460.01739501953125, "loss": 0.3757, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.13873198628425598, "rewards/margins": 1.1332553625106812, "rewards/rejected": -0.9945232272148132, "step": 7400 }, { "epoch": 1.91, "learning_rate": 2.0125274935449938e-07, "logits/chosen": -5.304028511047363, "logits/rejected": -4.630060195922852, "logps/chosen": -613.0452880859375, "logps/rejected": -449.7854919433594, "loss": 0.4442, "rewards/accuracies": 0.875, "rewards/chosen": 0.10255654156208038, "rewards/margins": 0.9704843759536743, "rewards/rejected": -0.8679278492927551, "step": 7410 }, { "epoch": 1.92, "learning_rate": 2.0077460074591183e-07, "logits/chosen": -5.362182140350342, "logits/rejected": -5.262276649475098, "logps/chosen": -587.7139282226562, "logps/rejected": -428.37091064453125, "loss": 0.4708, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.13019302487373352, "rewards/margins": 0.8134794235229492, "rewards/rejected": -0.6832863688468933, "step": 7420 }, { "epoch": 1.92, "learning_rate": 2.0029645213732427e-07, "logits/chosen": -5.470076560974121, "logits/rejected": -4.929447650909424, "logps/chosen": -559.8795166015625, "logps/rejected": -414.584716796875, "loss": 0.4818, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.09233951568603516, "rewards/margins": 0.8154813051223755, "rewards/rejected": -0.7231415510177612, "step": 7430 }, { "epoch": 1.92, "learning_rate": 1.9981830352873672e-07, "logits/chosen": -5.014301300048828, "logits/rejected": -4.959453582763672, "logps/chosen": -743.4678955078125, "logps/rejected": -507.08233642578125, "loss": 0.489, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.13469497859477997, "rewards/margins": 1.009397268295288, "rewards/rejected": -0.8747022747993469, "step": 7440 }, { "epoch": 1.92, "learning_rate": 1.9934015492014916e-07, "logits/chosen": -5.312097072601318, "logits/rejected": -4.883395671844482, "logps/chosen": -645.2822875976562, "logps/rejected": -508.09796142578125, "loss": 0.469, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.10549630224704742, "rewards/margins": 0.8188384175300598, "rewards/rejected": -0.7133420705795288, "step": 7450 }, { "epoch": 1.93, "learning_rate": 1.9886200631156163e-07, "logits/chosen": -4.880892276763916, "logits/rejected": -4.81312894821167, "logps/chosen": -582.4495849609375, "logps/rejected": -483.58935546875, "loss": 0.4536, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.162168949842453, "rewards/margins": 0.6887463331222534, "rewards/rejected": -0.526577353477478, "step": 7460 }, { "epoch": 1.93, "learning_rate": 1.9838385770297408e-07, "logits/chosen": -5.796114921569824, "logits/rejected": -5.485968589782715, "logps/chosen": -546.2130126953125, "logps/rejected": -460.0263671875, "loss": 0.5423, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08136261254549026, "rewards/margins": 0.6598677635192871, "rewards/rejected": -0.7412303686141968, "step": 7470 }, { "epoch": 1.93, "learning_rate": 1.9790570909438652e-07, "logits/chosen": -5.025102138519287, "logits/rejected": -4.507571697235107, "logps/chosen": -617.2833862304688, "logps/rejected": -500.47412109375, "loss": 0.4423, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.20344337821006775, "rewards/margins": 0.9838153719902039, "rewards/rejected": -0.7803719639778137, "step": 7480 }, { "epoch": 1.93, "learning_rate": 1.9742756048579897e-07, "logits/chosen": -4.993091106414795, "logits/rejected": -4.762662887573242, "logps/chosen": -599.6630859375, "logps/rejected": -477.6331481933594, "loss": 0.49, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.057207606732845306, "rewards/margins": 0.8926295042037964, "rewards/rejected": -0.8354218602180481, "step": 7490 }, { "epoch": 1.94, "learning_rate": 1.9694941187721142e-07, "logits/chosen": -5.024720668792725, "logits/rejected": -5.113988876342773, "logps/chosen": -608.9468383789062, "logps/rejected": -528.6306762695312, "loss": 0.4323, "rewards/accuracies": 0.75, "rewards/chosen": 0.18434059619903564, "rewards/margins": 0.8232332468032837, "rewards/rejected": -0.6388925909996033, "step": 7500 }, { "epoch": 1.94, "learning_rate": 1.9647126326862386e-07, "logits/chosen": -5.060567855834961, "logits/rejected": -4.785902976989746, "logps/chosen": -556.506103515625, "logps/rejected": -415.358154296875, "loss": 0.434, "rewards/accuracies": 0.8125, "rewards/chosen": 0.18992505967617035, "rewards/margins": 0.9889433979988098, "rewards/rejected": -0.7990182638168335, "step": 7510 }, { "epoch": 1.94, "learning_rate": 1.959931146600363e-07, "logits/chosen": -4.649436950683594, "logits/rejected": -4.775508403778076, "logps/chosen": -595.4949340820312, "logps/rejected": -473.9935607910156, "loss": 0.3918, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.38916072249412537, "rewards/margins": 1.1668272018432617, "rewards/rejected": -0.7776663899421692, "step": 7520 }, { "epoch": 1.94, "learning_rate": 1.955149660514488e-07, "logits/chosen": -4.912306785583496, "logits/rejected": -4.64109468460083, "logps/chosen": -576.0599365234375, "logps/rejected": -491.81884765625, "loss": 0.4669, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.10686711221933365, "rewards/margins": 0.9544180035591125, "rewards/rejected": -0.8475509881973267, "step": 7530 }, { "epoch": 1.95, "learning_rate": 1.9503681744286125e-07, "logits/chosen": -5.289119243621826, "logits/rejected": -5.082324028015137, "logps/chosen": -588.4970703125, "logps/rejected": -453.1360778808594, "loss": 0.4645, "rewards/accuracies": 0.75, "rewards/chosen": 0.19126999378204346, "rewards/margins": 0.8247021436691284, "rewards/rejected": -0.6334322094917297, "step": 7540 }, { "epoch": 1.95, "learning_rate": 1.945586688342737e-07, "logits/chosen": -5.492282390594482, "logits/rejected": -4.894688129425049, "logps/chosen": -575.3875732421875, "logps/rejected": -424.06329345703125, "loss": 0.4748, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.15780594944953918, "rewards/margins": 0.8154786825180054, "rewards/rejected": -0.6576727628707886, "step": 7550 }, { "epoch": 1.95, "learning_rate": 1.9408052022568614e-07, "logits/chosen": -5.332259178161621, "logits/rejected": -4.899247646331787, "logps/chosen": -616.7271728515625, "logps/rejected": -509.8811950683594, "loss": 0.4528, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.3009008765220642, "rewards/margins": 0.9429820775985718, "rewards/rejected": -0.6420812010765076, "step": 7560 }, { "epoch": 1.95, "learning_rate": 1.9360237161709859e-07, "logits/chosen": -5.198887825012207, "logits/rejected": -5.021070957183838, "logps/chosen": -571.5408935546875, "logps/rejected": -479.42755126953125, "loss": 0.4983, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1684959977865219, "rewards/margins": 0.8230608701705933, "rewards/rejected": -0.6545648574829102, "step": 7570 }, { "epoch": 1.96, "learning_rate": 1.9312422300851103e-07, "logits/chosen": -5.297555446624756, "logits/rejected": -5.186488628387451, "logps/chosen": -567.9306030273438, "logps/rejected": -499.56353759765625, "loss": 0.5103, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.08171135187149048, "rewards/margins": 0.8353952169418335, "rewards/rejected": -0.7536839246749878, "step": 7580 }, { "epoch": 1.96, "learning_rate": 1.9264607439992348e-07, "logits/chosen": -5.225955009460449, "logits/rejected": -4.79823637008667, "logps/chosen": -579.01904296875, "logps/rejected": -421.85650634765625, "loss": 0.5132, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.09432654827833176, "rewards/margins": 0.7014795541763306, "rewards/rejected": -0.6071529984474182, "step": 7590 }, { "epoch": 1.96, "learning_rate": 1.9216792579133595e-07, "logits/chosen": -5.359795570373535, "logits/rejected": -5.398813724517822, "logps/chosen": -593.2606201171875, "logps/rejected": -486.38787841796875, "loss": 0.4964, "rewards/accuracies": 0.8125, "rewards/chosen": 0.10490729659795761, "rewards/margins": 0.9919099807739258, "rewards/rejected": -0.8870025873184204, "step": 7600 }, { "epoch": 1.96, "learning_rate": 1.916897771827484e-07, "logits/chosen": -5.080293655395508, "logits/rejected": -5.111090183258057, "logps/chosen": -578.6470947265625, "logps/rejected": -483.47906494140625, "loss": 0.5089, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.11409546434879303, "rewards/margins": 0.8013245463371277, "rewards/rejected": -0.6872292160987854, "step": 7610 }, { "epoch": 1.97, "learning_rate": 1.9121162857416084e-07, "logits/chosen": -5.233499050140381, "logits/rejected": -4.8739752769470215, "logps/chosen": -682.1953735351562, "logps/rejected": -488.12860107421875, "loss": 0.4633, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.30940836668014526, "rewards/margins": 0.9833210110664368, "rewards/rejected": -0.6739126443862915, "step": 7620 }, { "epoch": 1.97, "learning_rate": 1.9073347996557328e-07, "logits/chosen": -5.415169715881348, "logits/rejected": -5.305741786956787, "logps/chosen": -544.7804565429688, "logps/rejected": -434.00714111328125, "loss": 0.4379, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2114163339138031, "rewards/margins": 0.860611617565155, "rewards/rejected": -0.6491953134536743, "step": 7630 }, { "epoch": 1.97, "learning_rate": 1.9025533135698573e-07, "logits/chosen": -5.200839996337891, "logits/rejected": -5.567751407623291, "logps/chosen": -490.81097412109375, "logps/rejected": -422.99444580078125, "loss": 0.5085, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.1039358600974083, "rewards/margins": 0.8731986284255981, "rewards/rejected": -0.7692627906799316, "step": 7640 }, { "epoch": 1.98, "learning_rate": 1.8977718274839818e-07, "logits/chosen": -5.188553810119629, "logits/rejected": -5.225887298583984, "logps/chosen": -499.153076171875, "logps/rejected": -397.7505798339844, "loss": 0.5152, "rewards/accuracies": 0.75, "rewards/chosen": 0.08936653286218643, "rewards/margins": 0.7421793937683105, "rewards/rejected": -0.6528127789497375, "step": 7650 }, { "epoch": 1.98, "learning_rate": 1.8929903413981062e-07, "logits/chosen": -5.003813743591309, "logits/rejected": -4.673523426055908, "logps/chosen": -653.228271484375, "logps/rejected": -420.73065185546875, "loss": 0.4951, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.22011475265026093, "rewards/margins": 1.0047276020050049, "rewards/rejected": -0.784612774848938, "step": 7660 }, { "epoch": 1.98, "learning_rate": 1.8882088553122312e-07, "logits/chosen": -5.179425239562988, "logits/rejected": -4.225193023681641, "logps/chosen": -621.59326171875, "logps/rejected": -412.08953857421875, "loss": 0.3833, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.21339578926563263, "rewards/margins": 1.0513508319854736, "rewards/rejected": -0.8379548788070679, "step": 7670 }, { "epoch": 1.98, "learning_rate": 1.8834273692263556e-07, "logits/chosen": -5.229235649108887, "logits/rejected": -5.146288871765137, "logps/chosen": -586.733154296875, "logps/rejected": -521.9217529296875, "loss": 0.4795, "rewards/accuracies": 0.75, "rewards/chosen": 0.17969836294651031, "rewards/margins": 0.7576990127563477, "rewards/rejected": -0.5780006051063538, "step": 7680 }, { "epoch": 1.99, "learning_rate": 1.87864588314048e-07, "logits/chosen": -5.341981410980225, "logits/rejected": -5.055700778961182, "logps/chosen": -614.1552734375, "logps/rejected": -551.3032836914062, "loss": 0.4869, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.17653414607048035, "rewards/margins": 0.8233851194381714, "rewards/rejected": -0.6468511819839478, "step": 7690 }, { "epoch": 1.99, "learning_rate": 1.8738643970546045e-07, "logits/chosen": -5.639716148376465, "logits/rejected": -5.43728494644165, "logps/chosen": -501.36846923828125, "logps/rejected": -420.3121643066406, "loss": 0.4842, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.0667429119348526, "rewards/margins": 0.7757546305656433, "rewards/rejected": -0.7090117931365967, "step": 7700 }, { "epoch": 1.99, "learning_rate": 1.869082910968729e-07, "logits/chosen": -5.079355239868164, "logits/rejected": -4.6222429275512695, "logps/chosen": -656.0880126953125, "logps/rejected": -497.13800048828125, "loss": 0.4575, "rewards/accuracies": 0.8125, "rewards/chosen": 0.11486373096704483, "rewards/margins": 0.9413955807685852, "rewards/rejected": -0.8265317678451538, "step": 7710 }, { "epoch": 1.99, "learning_rate": 1.8643014248828535e-07, "logits/chosen": -5.4234185218811035, "logits/rejected": -4.812007427215576, "logps/chosen": -638.269775390625, "logps/rejected": -468.7798767089844, "loss": 0.4155, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.17249687016010284, "rewards/margins": 1.1058835983276367, "rewards/rejected": -0.933386504650116, "step": 7720 }, { "epoch": 2.0, "learning_rate": 1.859519938796978e-07, "logits/chosen": -5.148641109466553, "logits/rejected": -5.508623123168945, "logps/chosen": -489.86865234375, "logps/rejected": -412.28607177734375, "loss": 0.4738, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.008669626899063587, "rewards/margins": 0.7085140943527222, "rewards/rejected": -0.699844479560852, "step": 7730 }, { "epoch": 2.0, "learning_rate": 1.8547384527111026e-07, "logits/chosen": -5.713903903961182, "logits/rejected": -4.743488311767578, "logps/chosen": -596.1246337890625, "logps/rejected": -466.3055725097656, "loss": 0.4422, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.13221338391304016, "rewards/margins": 0.8360271453857422, "rewards/rejected": -0.7038136720657349, "step": 7740 }, { "epoch": 2.0, "learning_rate": 1.849956966625227e-07, "logits/chosen": -5.3539018630981445, "logits/rejected": -4.894670009613037, "logps/chosen": -605.1731567382812, "logps/rejected": -424.0458984375, "loss": 0.45, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1047578677535057, "rewards/margins": 0.9269531965255737, "rewards/rejected": -0.8221955299377441, "step": 7750 }, { "epoch": 2.0, "learning_rate": 1.8451754805393515e-07, "logits/chosen": -5.0256476402282715, "logits/rejected": -4.921692371368408, "logps/chosen": -538.4234619140625, "logps/rejected": -405.90972900390625, "loss": 0.432, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.21402347087860107, "rewards/margins": 0.8611900210380554, "rewards/rejected": -0.6471666097640991, "step": 7760 }, { "epoch": 2.01, "learning_rate": 1.840393994453476e-07, "logits/chosen": -5.382691860198975, "logits/rejected": -5.287382125854492, "logps/chosen": -643.8206787109375, "logps/rejected": -447.2210998535156, "loss": 0.4193, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.31302639842033386, "rewards/margins": 1.0182029008865356, "rewards/rejected": -0.7051764726638794, "step": 7770 }, { "epoch": 2.01, "learning_rate": 1.8356125083676004e-07, "logits/chosen": -5.431169509887695, "logits/rejected": -4.6812028884887695, "logps/chosen": -597.528564453125, "logps/rejected": -488.8351135253906, "loss": 0.3923, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.3829580247402191, "rewards/margins": 1.1099520921707153, "rewards/rejected": -0.726993978023529, "step": 7780 }, { "epoch": 2.01, "learning_rate": 1.830831022281725e-07, "logits/chosen": -4.99957275390625, "logits/rejected": -5.081376552581787, "logps/chosen": -558.5182495117188, "logps/rejected": -548.7123413085938, "loss": 0.4139, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.10888919979333878, "rewards/margins": 0.962694525718689, "rewards/rejected": -0.8538052439689636, "step": 7790 }, { "epoch": 2.01, "learning_rate": 1.8260495361958494e-07, "logits/chosen": -4.8798627853393555, "logits/rejected": -4.9727606773376465, "logps/chosen": -617.8023681640625, "logps/rejected": -512.6556396484375, "loss": 0.3848, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.13423356413841248, "rewards/margins": 1.1527694463729858, "rewards/rejected": -1.018535852432251, "step": 7800 }, { "epoch": 2.02, "learning_rate": 1.821268050109974e-07, "logits/chosen": -5.4405083656311035, "logits/rejected": -5.100772857666016, "logps/chosen": -615.9570922851562, "logps/rejected": -460.81182861328125, "loss": 0.4208, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.29386988282203674, "rewards/margins": 1.0045809745788574, "rewards/rejected": -0.7107110023498535, "step": 7810 }, { "epoch": 2.02, "learning_rate": 1.8164865640240988e-07, "logits/chosen": -5.454472064971924, "logits/rejected": -4.90125036239624, "logps/chosen": -648.1519775390625, "logps/rejected": -564.240478515625, "loss": 0.3966, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.13806867599487305, "rewards/margins": 1.164413332939148, "rewards/rejected": -1.026344656944275, "step": 7820 }, { "epoch": 2.02, "learning_rate": 1.8117050779382232e-07, "logits/chosen": -5.181923866271973, "logits/rejected": -4.989427089691162, "logps/chosen": -538.7706909179688, "logps/rejected": -440.638916015625, "loss": 0.3691, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2663555443286896, "rewards/margins": 1.1259498596191406, "rewards/rejected": -0.8595943450927734, "step": 7830 }, { "epoch": 2.02, "learning_rate": 1.8069235918523477e-07, "logits/chosen": -5.149709224700928, "logits/rejected": -4.896397590637207, "logps/chosen": -568.2212524414062, "logps/rejected": -552.949462890625, "loss": 0.3917, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1580764800310135, "rewards/margins": 1.1133893728256226, "rewards/rejected": -0.9553130269050598, "step": 7840 }, { "epoch": 2.03, "learning_rate": 1.8021421057664721e-07, "logits/chosen": -5.2497477531433105, "logits/rejected": -4.9836530685424805, "logps/chosen": -525.4978637695312, "logps/rejected": -486.14764404296875, "loss": 0.4191, "rewards/accuracies": 0.8125, "rewards/chosen": 0.19245919585227966, "rewards/margins": 0.9812302589416504, "rewards/rejected": -0.7887710332870483, "step": 7850 }, { "epoch": 2.03, "learning_rate": 1.7973606196805966e-07, "logits/chosen": -5.775905609130859, "logits/rejected": -4.355559825897217, "logps/chosen": -588.6670532226562, "logps/rejected": -360.2308044433594, "loss": 0.4031, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2269849330186844, "rewards/margins": 1.0431114435195923, "rewards/rejected": -0.8161264657974243, "step": 7860 }, { "epoch": 2.03, "learning_rate": 1.7925791335947213e-07, "logits/chosen": -5.028735160827637, "logits/rejected": -4.612760543823242, "logps/chosen": -624.9261474609375, "logps/rejected": -460.96929931640625, "loss": 0.3927, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.32105642557144165, "rewards/margins": 1.162856936454773, "rewards/rejected": -0.8418005704879761, "step": 7870 }, { "epoch": 2.03, "learning_rate": 1.7877976475088458e-07, "logits/chosen": -5.460697174072266, "logits/rejected": -4.839834213256836, "logps/chosen": -514.6184692382812, "logps/rejected": -429.8304748535156, "loss": 0.4323, "rewards/accuracies": 0.8125, "rewards/chosen": 0.07212267816066742, "rewards/margins": 0.9679670333862305, "rewards/rejected": -0.8958442807197571, "step": 7880 }, { "epoch": 2.04, "learning_rate": 1.7830161614229702e-07, "logits/chosen": -5.131017684936523, "logits/rejected": -4.812282562255859, "logps/chosen": -541.8582763671875, "logps/rejected": -437.62542724609375, "loss": 0.4099, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.13868260383605957, "rewards/margins": 1.045739769935608, "rewards/rejected": -0.907056987285614, "step": 7890 }, { "epoch": 2.04, "learning_rate": 1.7782346753370947e-07, "logits/chosen": -4.91749906539917, "logits/rejected": -4.759985446929932, "logps/chosen": -633.8049926757812, "logps/rejected": -500.849609375, "loss": 0.401, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.20097458362579346, "rewards/margins": 1.1193686723709106, "rewards/rejected": -0.9183940887451172, "step": 7900 }, { "epoch": 2.04, "learning_rate": 1.7734531892512191e-07, "logits/chosen": -5.184528350830078, "logits/rejected": -4.6595611572265625, "logps/chosen": -635.494384765625, "logps/rejected": -485.52056884765625, "loss": 0.4204, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.19295108318328857, "rewards/margins": 1.0092103481292725, "rewards/rejected": -0.8162592649459839, "step": 7910 }, { "epoch": 2.04, "learning_rate": 1.7686717031653436e-07, "logits/chosen": -5.319760322570801, "logits/rejected": -4.679954528808594, "logps/chosen": -586.76611328125, "logps/rejected": -420.97113037109375, "loss": 0.3988, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2110450267791748, "rewards/margins": 1.1287661790847778, "rewards/rejected": -0.9177210927009583, "step": 7920 }, { "epoch": 2.05, "learning_rate": 1.763890217079468e-07, "logits/chosen": -4.967594623565674, "logits/rejected": -4.576747417449951, "logps/chosen": -564.4395751953125, "logps/rejected": -445.49676513671875, "loss": 0.4029, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.25796133279800415, "rewards/margins": 0.989995002746582, "rewards/rejected": -0.7320337891578674, "step": 7930 }, { "epoch": 2.05, "learning_rate": 1.7591087309935928e-07, "logits/chosen": -4.919544696807861, "logits/rejected": -4.740033149719238, "logps/chosen": -575.7665405273438, "logps/rejected": -483.2290954589844, "loss": 0.4435, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.1340060532093048, "rewards/margins": 0.9589295387268066, "rewards/rejected": -0.8249235153198242, "step": 7940 }, { "epoch": 2.05, "learning_rate": 1.7543272449077172e-07, "logits/chosen": -5.245118141174316, "logits/rejected": -4.511632442474365, "logps/chosen": -552.7853393554688, "logps/rejected": -382.9903564453125, "loss": 0.3966, "rewards/accuracies": 0.8125, "rewards/chosen": 0.18725669384002686, "rewards/margins": 1.0256520509719849, "rewards/rejected": -0.8383952975273132, "step": 7950 }, { "epoch": 2.06, "learning_rate": 1.7495457588218417e-07, "logits/chosen": -5.20995569229126, "logits/rejected": -4.994682788848877, "logps/chosen": -629.2788696289062, "logps/rejected": -488.37841796875, "loss": 0.3953, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1885319948196411, "rewards/margins": 1.1752607822418213, "rewards/rejected": -0.9867287874221802, "step": 7960 }, { "epoch": 2.06, "learning_rate": 1.7447642727359664e-07, "logits/chosen": -5.187922477722168, "logits/rejected": -5.37373685836792, "logps/chosen": -639.4783325195312, "logps/rejected": -489.51104736328125, "loss": 0.3943, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3077123165130615, "rewards/margins": 1.1060400009155273, "rewards/rejected": -0.7983275651931763, "step": 7970 }, { "epoch": 2.06, "learning_rate": 1.7399827866500908e-07, "logits/chosen": -4.774896144866943, "logits/rejected": -4.786694049835205, "logps/chosen": -584.5519409179688, "logps/rejected": -458.012451171875, "loss": 0.433, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.07896748185157776, "rewards/margins": 1.0414762496948242, "rewards/rejected": -0.9625086784362793, "step": 7980 }, { "epoch": 2.06, "learning_rate": 1.7352013005642153e-07, "logits/chosen": -5.049200057983398, "logits/rejected": -5.324783802032471, "logps/chosen": -527.4605102539062, "logps/rejected": -386.48211669921875, "loss": 0.3866, "rewards/accuracies": 0.8125, "rewards/chosen": 0.20277097821235657, "rewards/margins": 1.128659963607788, "rewards/rejected": -0.9258890151977539, "step": 7990 }, { "epoch": 2.07, "learning_rate": 1.7304198144783397e-07, "logits/chosen": -4.91616153717041, "logits/rejected": -5.216489315032959, "logps/chosen": -603.232666015625, "logps/rejected": -480.7772521972656, "loss": 0.426, "rewards/accuracies": 0.875, "rewards/chosen": 0.15771400928497314, "rewards/margins": 1.1333376169204712, "rewards/rejected": -0.9756234884262085, "step": 8000 }, { "epoch": 2.07, "eval_logits/chosen": -5.262606620788574, "eval_logits/rejected": -4.975000858306885, "eval_logps/chosen": -588.3751220703125, "eval_logps/rejected": -449.5616760253906, "eval_loss": 0.5471857786178589, "eval_rewards/accuracies": 0.7099999785423279, "eval_rewards/chosen": -0.010556370951235294, "eval_rewards/margins": 0.6915596127510071, "eval_rewards/rejected": -0.7021159529685974, "eval_runtime": 103.942, "eval_samples_per_second": 19.242, "eval_steps_per_second": 1.203, "step": 8000 }, { "epoch": 2.07, "learning_rate": 1.7256383283924645e-07, "logits/chosen": -5.12730073928833, "logits/rejected": -4.962862968444824, "logps/chosen": -547.7819213867188, "logps/rejected": -417.5228576660156, "loss": 0.4645, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.10693345218896866, "rewards/margins": 0.8832060694694519, "rewards/rejected": -0.776272714138031, "step": 8010 }, { "epoch": 2.07, "learning_rate": 1.720856842306589e-07, "logits/chosen": -5.380902290344238, "logits/rejected": -4.684375762939453, "logps/chosen": -591.8121337890625, "logps/rejected": -470.96868896484375, "loss": 0.3923, "rewards/accuracies": 0.8125, "rewards/chosen": 0.15529599785804749, "rewards/margins": 1.1763697862625122, "rewards/rejected": -1.0210736989974976, "step": 8020 }, { "epoch": 2.07, "learning_rate": 1.7160753562207134e-07, "logits/chosen": -5.269471168518066, "logits/rejected": -4.958675861358643, "logps/chosen": -594.1776123046875, "logps/rejected": -424.61566162109375, "loss": 0.3676, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2871091067790985, "rewards/margins": 1.1147130727767944, "rewards/rejected": -0.8276039361953735, "step": 8030 }, { "epoch": 2.08, "learning_rate": 1.7112938701348378e-07, "logits/chosen": -5.132725238800049, "logits/rejected": -5.178119659423828, "logps/chosen": -613.3796997070312, "logps/rejected": -392.7496337890625, "loss": 0.3756, "rewards/accuracies": 0.875, "rewards/chosen": 0.2507365345954895, "rewards/margins": 1.2219046354293823, "rewards/rejected": -0.971168041229248, "step": 8040 }, { "epoch": 2.08, "learning_rate": 1.7065123840489623e-07, "logits/chosen": -5.211643218994141, "logits/rejected": -4.954792499542236, "logps/chosen": -633.693603515625, "logps/rejected": -477.187744140625, "loss": 0.3929, "rewards/accuracies": 0.875, "rewards/chosen": 0.28198346495628357, "rewards/margins": 1.26167893409729, "rewards/rejected": -0.9796956181526184, "step": 8050 }, { "epoch": 2.08, "learning_rate": 1.7017308979630867e-07, "logits/chosen": -5.2028584480285645, "logits/rejected": -5.071413993835449, "logps/chosen": -679.797119140625, "logps/rejected": -483.1123962402344, "loss": 0.411, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2664170563220978, "rewards/margins": 1.0208543539047241, "rewards/rejected": -0.7544373273849487, "step": 8060 }, { "epoch": 2.08, "learning_rate": 1.6969494118772112e-07, "logits/chosen": -5.182798385620117, "logits/rejected": -5.4608154296875, "logps/chosen": -537.1019287109375, "logps/rejected": -540.8271484375, "loss": 0.4157, "rewards/accuracies": 0.8125, "rewards/chosen": 0.029635760933160782, "rewards/margins": 1.0147740840911865, "rewards/rejected": -0.9851382970809937, "step": 8070 }, { "epoch": 2.09, "learning_rate": 1.692167925791336e-07, "logits/chosen": -5.244307994842529, "logits/rejected": -4.779998302459717, "logps/chosen": -573.4909057617188, "logps/rejected": -369.702880859375, "loss": 0.399, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.15672807395458221, "rewards/margins": 1.0914655923843384, "rewards/rejected": -0.934737503528595, "step": 8080 }, { "epoch": 2.09, "learning_rate": 1.6873864397054604e-07, "logits/chosen": -5.1279425621032715, "logits/rejected": -4.475416660308838, "logps/chosen": -577.4273681640625, "logps/rejected": -466.87579345703125, "loss": 0.4255, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1378626525402069, "rewards/margins": 0.9378665089607239, "rewards/rejected": -0.8000038862228394, "step": 8090 }, { "epoch": 2.09, "learning_rate": 1.6826049536195848e-07, "logits/chosen": -5.297357559204102, "logits/rejected": -4.9026994705200195, "logps/chosen": -636.1399536132812, "logps/rejected": -443.3895568847656, "loss": 0.4142, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2203119993209839, "rewards/margins": 1.0146148204803467, "rewards/rejected": -0.7943028807640076, "step": 8100 }, { "epoch": 2.09, "learning_rate": 1.6778234675337095e-07, "logits/chosen": -5.111029624938965, "logits/rejected": -4.597440242767334, "logps/chosen": -646.9424438476562, "logps/rejected": -483.5672302246094, "loss": 0.3957, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.299672931432724, "rewards/margins": 1.2735130786895752, "rewards/rejected": -0.973840057849884, "step": 8110 }, { "epoch": 2.1, "learning_rate": 1.673041981447834e-07, "logits/chosen": -5.243075370788574, "logits/rejected": -5.258086204528809, "logps/chosen": -491.25677490234375, "logps/rejected": -388.8957214355469, "loss": 0.4377, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.09861594438552856, "rewards/margins": 1.0654370784759521, "rewards/rejected": -0.9668210744857788, "step": 8120 }, { "epoch": 2.1, "learning_rate": 1.6682604953619584e-07, "logits/chosen": -5.080204963684082, "logits/rejected": -5.283219337463379, "logps/chosen": -526.7443237304688, "logps/rejected": -450.92669677734375, "loss": 0.424, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.04044118523597717, "rewards/margins": 0.9165732264518738, "rewards/rejected": -0.8761320114135742, "step": 8130 }, { "epoch": 2.1, "learning_rate": 1.663479009276083e-07, "logits/chosen": -5.412210941314697, "logits/rejected": -4.748445510864258, "logps/chosen": -620.7987060546875, "logps/rejected": -517.1263427734375, "loss": 0.38, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.2432643175125122, "rewards/margins": 1.233642816543579, "rewards/rejected": -0.9903783798217773, "step": 8140 }, { "epoch": 2.1, "learning_rate": 1.6586975231902076e-07, "logits/chosen": -5.609719276428223, "logits/rejected": -4.906424522399902, "logps/chosen": -665.1422729492188, "logps/rejected": -482.28369140625, "loss": 0.3787, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.37276241183280945, "rewards/margins": 1.276485562324524, "rewards/rejected": -0.9037232398986816, "step": 8150 }, { "epoch": 2.11, "learning_rate": 1.653916037104332e-07, "logits/chosen": -5.305022716522217, "logits/rejected": -4.772315502166748, "logps/chosen": -676.3944091796875, "logps/rejected": -419.16107177734375, "loss": 0.4172, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.2703319191932678, "rewards/margins": 1.1896977424621582, "rewards/rejected": -0.9193658828735352, "step": 8160 }, { "epoch": 2.11, "learning_rate": 1.6491345510184565e-07, "logits/chosen": -4.978839874267578, "logits/rejected": -4.465869426727295, "logps/chosen": -634.2669067382812, "logps/rejected": -478.91436767578125, "loss": 0.3883, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2819287180900574, "rewards/margins": 1.178794264793396, "rewards/rejected": -0.8968656659126282, "step": 8170 }, { "epoch": 2.11, "learning_rate": 1.644353064932581e-07, "logits/chosen": -4.962603569030762, "logits/rejected": -4.7876763343811035, "logps/chosen": -726.8413696289062, "logps/rejected": -579.091796875, "loss": 0.3729, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.3299553394317627, "rewards/margins": 1.3375122547149658, "rewards/rejected": -1.0075570344924927, "step": 8180 }, { "epoch": 2.11, "learning_rate": 1.6395715788467054e-07, "logits/chosen": -5.084351539611816, "logits/rejected": -5.388245582580566, "logps/chosen": -644.0919189453125, "logps/rejected": -615.7410888671875, "loss": 0.4375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.12136878073215485, "rewards/margins": 0.8792859315872192, "rewards/rejected": -0.7579171061515808, "step": 8190 }, { "epoch": 2.12, "learning_rate": 1.63479009276083e-07, "logits/chosen": -5.470258712768555, "logits/rejected": -5.210948944091797, "logps/chosen": -492.4591369628906, "logps/rejected": -373.815185546875, "loss": 0.4435, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.11447542905807495, "rewards/margins": 0.9876270294189453, "rewards/rejected": -0.8731516003608704, "step": 8200 }, { "epoch": 2.12, "learning_rate": 1.6300086066749543e-07, "logits/chosen": -5.497845649719238, "logits/rejected": -5.325311660766602, "logps/chosen": -625.2362060546875, "logps/rejected": -468.85406494140625, "loss": 0.3898, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3159496486186981, "rewards/margins": 1.2339494228363037, "rewards/rejected": -0.9179998636245728, "step": 8210 }, { "epoch": 2.12, "learning_rate": 1.625227120589079e-07, "logits/chosen": -5.309291839599609, "logits/rejected": -5.182384490966797, "logps/chosen": -655.6583251953125, "logps/rejected": -431.2212829589844, "loss": 0.4081, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.20747092366218567, "rewards/margins": 1.061989188194275, "rewards/rejected": -0.8545182943344116, "step": 8220 }, { "epoch": 2.12, "learning_rate": 1.6204456345032035e-07, "logits/chosen": -5.5399017333984375, "logits/rejected": -4.818986892700195, "logps/chosen": -600.7839965820312, "logps/rejected": -438.95770263671875, "loss": 0.3786, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.26221323013305664, "rewards/margins": 1.2287256717681885, "rewards/rejected": -0.9665123224258423, "step": 8230 }, { "epoch": 2.13, "learning_rate": 1.615664148417328e-07, "logits/chosen": -5.445650577545166, "logits/rejected": -4.961193561553955, "logps/chosen": -695.8235473632812, "logps/rejected": -547.7935791015625, "loss": 0.426, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3487267792224884, "rewards/margins": 1.1441400051116943, "rewards/rejected": -0.7954132556915283, "step": 8240 }, { "epoch": 2.13, "learning_rate": 1.6108826623314524e-07, "logits/chosen": -5.114203453063965, "logits/rejected": -5.360022068023682, "logps/chosen": -638.7425537109375, "logps/rejected": -595.7391357421875, "loss": 0.3717, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.32794010639190674, "rewards/margins": 1.1220533847808838, "rewards/rejected": -0.794113278388977, "step": 8250 }, { "epoch": 2.13, "learning_rate": 1.606101176245577e-07, "logits/chosen": -5.607443332672119, "logits/rejected": -5.451035499572754, "logps/chosen": -535.799072265625, "logps/rejected": -481.39263916015625, "loss": 0.4509, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1883089691400528, "rewards/margins": 1.0100950002670288, "rewards/rejected": -0.8217860460281372, "step": 8260 }, { "epoch": 2.14, "learning_rate": 1.6013196901597016e-07, "logits/chosen": -5.299283981323242, "logits/rejected": -5.120297431945801, "logps/chosen": -571.9395751953125, "logps/rejected": -480.9169921875, "loss": 0.3891, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.15850834548473358, "rewards/margins": 1.1168177127838135, "rewards/rejected": -0.9583093523979187, "step": 8270 }, { "epoch": 2.14, "learning_rate": 1.596538204073826e-07, "logits/chosen": -5.164376258850098, "logits/rejected": -4.979989051818848, "logps/chosen": -643.7547607421875, "logps/rejected": -485.268798828125, "loss": 0.398, "rewards/accuracies": 0.875, "rewards/chosen": 0.3094736635684967, "rewards/margins": 1.1463011503219604, "rewards/rejected": -0.8368274569511414, "step": 8280 }, { "epoch": 2.14, "learning_rate": 1.5917567179879508e-07, "logits/chosen": -5.192044258117676, "logits/rejected": -5.331370830535889, "logps/chosen": -577.6287841796875, "logps/rejected": -459.251708984375, "loss": 0.4129, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2355198860168457, "rewards/margins": 1.075758695602417, "rewards/rejected": -0.8402387499809265, "step": 8290 }, { "epoch": 2.14, "learning_rate": 1.5869752319020752e-07, "logits/chosen": -5.375481605529785, "logits/rejected": -5.017037391662598, "logps/chosen": -574.2267456054688, "logps/rejected": -392.20733642578125, "loss": 0.4427, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.1801949292421341, "rewards/margins": 0.883888840675354, "rewards/rejected": -0.7036939263343811, "step": 8300 }, { "epoch": 2.15, "learning_rate": 1.5821937458161997e-07, "logits/chosen": -5.40267276763916, "logits/rejected": -5.034054756164551, "logps/chosen": -637.0773315429688, "logps/rejected": -454.6651916503906, "loss": 0.3913, "rewards/accuracies": 0.8125, "rewards/chosen": 0.22038130462169647, "rewards/margins": 1.150342583656311, "rewards/rejected": -0.9299613833427429, "step": 8310 }, { "epoch": 2.15, "learning_rate": 1.577412259730324e-07, "logits/chosen": -5.188730239868164, "logits/rejected": -4.617671012878418, "logps/chosen": -600.5393676757812, "logps/rejected": -434.15869140625, "loss": 0.3911, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.33983051776885986, "rewards/margins": 1.1515066623687744, "rewards/rejected": -0.8116761445999146, "step": 8320 }, { "epoch": 2.15, "learning_rate": 1.5726307736444486e-07, "logits/chosen": -4.9417009353637695, "logits/rejected": -5.020532131195068, "logps/chosen": -680.23681640625, "logps/rejected": -540.175537109375, "loss": 0.4331, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.30343449115753174, "rewards/margins": 1.089016318321228, "rewards/rejected": -0.7855818867683411, "step": 8330 }, { "epoch": 2.15, "learning_rate": 1.567849287558573e-07, "logits/chosen": -5.112343788146973, "logits/rejected": -5.287036895751953, "logps/chosen": -535.6332397460938, "logps/rejected": -427.95306396484375, "loss": 0.4507, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05829819291830063, "rewards/margins": 0.9456847906112671, "rewards/rejected": -1.0039829015731812, "step": 8340 }, { "epoch": 2.16, "learning_rate": 1.5630678014726975e-07, "logits/chosen": -5.178046703338623, "logits/rejected": -5.05265474319458, "logps/chosen": -586.1431274414062, "logps/rejected": -528.2030029296875, "loss": 0.3757, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.30888527631759644, "rewards/margins": 1.1961561441421509, "rewards/rejected": -0.8872706294059753, "step": 8350 }, { "epoch": 2.16, "learning_rate": 1.5582863153868222e-07, "logits/chosen": -5.244011878967285, "logits/rejected": -4.512989521026611, "logps/chosen": -513.8408203125, "logps/rejected": -418.798095703125, "loss": 0.4433, "rewards/accuracies": 0.8125, "rewards/chosen": 0.11688132584095001, "rewards/margins": 0.884832501411438, "rewards/rejected": -0.7679511904716492, "step": 8360 }, { "epoch": 2.16, "learning_rate": 1.5535048293009467e-07, "logits/chosen": -5.328274250030518, "logits/rejected": -4.926009178161621, "logps/chosen": -588.2261962890625, "logps/rejected": -468.12017822265625, "loss": 0.4091, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.21067626774311066, "rewards/margins": 1.2039439678192139, "rewards/rejected": -0.9932676553726196, "step": 8370 }, { "epoch": 2.16, "learning_rate": 1.548723343215071e-07, "logits/chosen": -5.379843711853027, "logits/rejected": -4.890900611877441, "logps/chosen": -553.0413818359375, "logps/rejected": -352.1067810058594, "loss": 0.3872, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.16198644042015076, "rewards/margins": 1.2105299234390259, "rewards/rejected": -1.0485433340072632, "step": 8380 }, { "epoch": 2.17, "learning_rate": 1.5439418571291956e-07, "logits/chosen": -4.949242115020752, "logits/rejected": -4.92618989944458, "logps/chosen": -627.3192138671875, "logps/rejected": -544.1038818359375, "loss": 0.4524, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.023895036429166794, "rewards/margins": 0.9048730731010437, "rewards/rejected": -0.880977988243103, "step": 8390 }, { "epoch": 2.17, "learning_rate": 1.53916037104332e-07, "logits/chosen": -5.116093635559082, "logits/rejected": -4.964105606079102, "logps/chosen": -550.8726196289062, "logps/rejected": -393.24603271484375, "loss": 0.4301, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.1429412066936493, "rewards/margins": 1.0656877756118774, "rewards/rejected": -0.9227465391159058, "step": 8400 }, { "epoch": 2.17, "learning_rate": 1.5343788849574447e-07, "logits/chosen": -5.443047523498535, "logits/rejected": -4.816893577575684, "logps/chosen": -578.63720703125, "logps/rejected": -380.6755065917969, "loss": 0.4037, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.20660197734832764, "rewards/margins": 1.1650125980377197, "rewards/rejected": -0.9584105610847473, "step": 8410 }, { "epoch": 2.17, "learning_rate": 1.5295973988715692e-07, "logits/chosen": -5.0951972007751465, "logits/rejected": -4.993190765380859, "logps/chosen": -595.4962158203125, "logps/rejected": -477.5155334472656, "loss": 0.3812, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2011035680770874, "rewards/margins": 1.1112459897994995, "rewards/rejected": -0.9101424217224121, "step": 8420 }, { "epoch": 2.18, "learning_rate": 1.524815912785694e-07, "logits/chosen": -5.0991973876953125, "logits/rejected": -4.96377420425415, "logps/chosen": -520.2467041015625, "logps/rejected": -452.40283203125, "loss": 0.4346, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.07229737192392349, "rewards/margins": 0.8878879547119141, "rewards/rejected": -0.8155905604362488, "step": 8430 }, { "epoch": 2.18, "learning_rate": 1.5200344266998184e-07, "logits/chosen": -5.067339897155762, "logits/rejected": -5.042455196380615, "logps/chosen": -597.399658203125, "logps/rejected": -412.2734375, "loss": 0.3573, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.3214265704154968, "rewards/margins": 1.2026283740997314, "rewards/rejected": -0.8812018632888794, "step": 8440 }, { "epoch": 2.18, "learning_rate": 1.5152529406139428e-07, "logits/chosen": -5.390539169311523, "logits/rejected": -4.652660369873047, "logps/chosen": -617.2260131835938, "logps/rejected": -531.3043823242188, "loss": 0.385, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1459963470697403, "rewards/margins": 1.173037052154541, "rewards/rejected": -1.027040719985962, "step": 8450 }, { "epoch": 2.18, "learning_rate": 1.5104714545280673e-07, "logits/chosen": -5.295803070068359, "logits/rejected": -4.878375053405762, "logps/chosen": -646.1485595703125, "logps/rejected": -456.03350830078125, "loss": 0.4024, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.22115692496299744, "rewards/margins": 1.2065430879592896, "rewards/rejected": -0.985386073589325, "step": 8460 }, { "epoch": 2.19, "learning_rate": 1.5056899684421917e-07, "logits/chosen": -5.24402379989624, "logits/rejected": -5.117186546325684, "logps/chosen": -605.8353271484375, "logps/rejected": -483.9873046875, "loss": 0.3926, "rewards/accuracies": 0.875, "rewards/chosen": 0.27236291766166687, "rewards/margins": 1.006831407546997, "rewards/rejected": -0.7344684600830078, "step": 8470 }, { "epoch": 2.19, "learning_rate": 1.5009084823563162e-07, "logits/chosen": -5.014266490936279, "logits/rejected": -5.203970432281494, "logps/chosen": -601.0038452148438, "logps/rejected": -444.72406005859375, "loss": 0.4217, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.08958698809146881, "rewards/margins": 1.017202377319336, "rewards/rejected": -0.9276154637336731, "step": 8480 }, { "epoch": 2.19, "learning_rate": 1.496126996270441e-07, "logits/chosen": -5.022156715393066, "logits/rejected": -5.050393104553223, "logps/chosen": -506.4818420410156, "logps/rejected": -449.8377380371094, "loss": 0.4154, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.03139140456914902, "rewards/margins": 0.9901823997497559, "rewards/rejected": -0.9587909579277039, "step": 8490 }, { "epoch": 2.19, "learning_rate": 1.4913455101845653e-07, "logits/chosen": -5.256711483001709, "logits/rejected": -5.1396074295043945, "logps/chosen": -579.0619506835938, "logps/rejected": -448.27947998046875, "loss": 0.4297, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.14726193249225616, "rewards/margins": 1.1811213493347168, "rewards/rejected": -1.033859372138977, "step": 8500 }, { "epoch": 2.2, "learning_rate": 1.4865640240986898e-07, "logits/chosen": -5.236362457275391, "logits/rejected": -4.910983085632324, "logps/chosen": -624.5769653320312, "logps/rejected": -460.2875061035156, "loss": 0.4159, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.12299472093582153, "rewards/margins": 1.1222976446151733, "rewards/rejected": -0.9993030428886414, "step": 8510 }, { "epoch": 2.2, "learning_rate": 1.4817825380128143e-07, "logits/chosen": -4.9931254386901855, "logits/rejected": -5.069777965545654, "logps/chosen": -559.9403076171875, "logps/rejected": -471.64404296875, "loss": 0.4107, "rewards/accuracies": 0.8125, "rewards/chosen": 0.12139090150594711, "rewards/margins": 1.1654703617095947, "rewards/rejected": -1.0440794229507446, "step": 8520 }, { "epoch": 2.2, "learning_rate": 1.4770010519269387e-07, "logits/chosen": -5.479605674743652, "logits/rejected": -5.09905481338501, "logps/chosen": -522.7894287109375, "logps/rejected": -384.4767761230469, "loss": 0.4208, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.11427979171276093, "rewards/margins": 0.9889079332351685, "rewards/rejected": -0.8746281862258911, "step": 8530 }, { "epoch": 2.21, "learning_rate": 1.4722195658410632e-07, "logits/chosen": -5.008366107940674, "logits/rejected": -4.79036808013916, "logps/chosen": -599.2615966796875, "logps/rejected": -413.77154541015625, "loss": 0.4203, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.20859523117542267, "rewards/margins": 1.0430549383163452, "rewards/rejected": -0.834459662437439, "step": 8540 }, { "epoch": 2.21, "learning_rate": 1.4674380797551876e-07, "logits/chosen": -5.483518600463867, "logits/rejected": -5.5576324462890625, "logps/chosen": -510.0367736816406, "logps/rejected": -482.890869140625, "loss": 0.3926, "rewards/accuracies": 0.8125, "rewards/chosen": 0.03713089972734451, "rewards/margins": 0.9116647839546204, "rewards/rejected": -0.8745338320732117, "step": 8550 }, { "epoch": 2.21, "learning_rate": 1.4626565936693126e-07, "logits/chosen": -5.124260425567627, "logits/rejected": -5.109321117401123, "logps/chosen": -588.9959716796875, "logps/rejected": -459.58306884765625, "loss": 0.3768, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.06866136938333511, "rewards/margins": 1.1823476552963257, "rewards/rejected": -1.1136863231658936, "step": 8560 }, { "epoch": 2.21, "learning_rate": 1.457875107583437e-07, "logits/chosen": -4.915524005889893, "logits/rejected": -5.232248306274414, "logps/chosen": -694.8782958984375, "logps/rejected": -506.3724670410156, "loss": 0.4158, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.18040207028388977, "rewards/margins": 1.3046828508377075, "rewards/rejected": -1.1242808103561401, "step": 8570 }, { "epoch": 2.22, "learning_rate": 1.4530936214975615e-07, "logits/chosen": -5.070877552032471, "logits/rejected": -5.049712181091309, "logps/chosen": -555.0972290039062, "logps/rejected": -531.4332275390625, "loss": 0.4229, "rewards/accuracies": 0.8125, "rewards/chosen": 0.06575857102870941, "rewards/margins": 0.8561042547225952, "rewards/rejected": -0.7903456687927246, "step": 8580 }, { "epoch": 2.22, "learning_rate": 1.448312135411686e-07, "logits/chosen": -5.25252628326416, "logits/rejected": -4.480896949768066, "logps/chosen": -732.493408203125, "logps/rejected": -476.01849365234375, "loss": 0.3831, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.41332364082336426, "rewards/margins": 1.254883050918579, "rewards/rejected": -0.8415594100952148, "step": 8590 }, { "epoch": 2.22, "learning_rate": 1.4435306493258104e-07, "logits/chosen": -5.14136266708374, "logits/rejected": -4.989603519439697, "logps/chosen": -497.1466369628906, "logps/rejected": -385.6968078613281, "loss": 0.4559, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.15759995579719543, "rewards/margins": 1.0398390293121338, "rewards/rejected": -0.882239043712616, "step": 8600 }, { "epoch": 2.22, "learning_rate": 1.4387491632399349e-07, "logits/chosen": -5.211543083190918, "logits/rejected": -4.5062713623046875, "logps/chosen": -585.9052124023438, "logps/rejected": -461.22589111328125, "loss": 0.3929, "rewards/accuracies": 0.875, "rewards/chosen": 0.15375399589538574, "rewards/margins": 1.2072999477386475, "rewards/rejected": -1.0535459518432617, "step": 8610 }, { "epoch": 2.23, "learning_rate": 1.4339676771540593e-07, "logits/chosen": -5.253554344177246, "logits/rejected": -4.713998317718506, "logps/chosen": -655.806396484375, "logps/rejected": -500.8138732910156, "loss": 0.3823, "rewards/accuracies": 0.875, "rewards/chosen": 0.44568008184432983, "rewards/margins": 1.2653167247772217, "rewards/rejected": -0.8196367025375366, "step": 8620 }, { "epoch": 2.23, "learning_rate": 1.429186191068184e-07, "logits/chosen": -5.502247333526611, "logits/rejected": -5.1702070236206055, "logps/chosen": -610.3519897460938, "logps/rejected": -484.87713623046875, "loss": 0.4293, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2292877733707428, "rewards/margins": 1.119429588317871, "rewards/rejected": -0.8901419639587402, "step": 8630 }, { "epoch": 2.23, "learning_rate": 1.4244047049823085e-07, "logits/chosen": -5.07344913482666, "logits/rejected": -4.9181132316589355, "logps/chosen": -695.7974853515625, "logps/rejected": -495.666015625, "loss": 0.3872, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.22705774009227753, "rewards/margins": 1.2420011758804321, "rewards/rejected": -1.0149433612823486, "step": 8640 }, { "epoch": 2.23, "learning_rate": 1.419623218896433e-07, "logits/chosen": -5.245809078216553, "logits/rejected": -5.317660808563232, "logps/chosen": -618.6363525390625, "logps/rejected": -546.1066284179688, "loss": 0.4234, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.0619942769408226, "rewards/margins": 0.9512344598770142, "rewards/rejected": -0.8892401456832886, "step": 8650 }, { "epoch": 2.24, "learning_rate": 1.4148417328105574e-07, "logits/chosen": -5.057327747344971, "logits/rejected": -5.018956661224365, "logps/chosen": -585.0314331054688, "logps/rejected": -445.57427978515625, "loss": 0.379, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.313793808221817, "rewards/margins": 1.3592708110809326, "rewards/rejected": -1.0454767942428589, "step": 8660 }, { "epoch": 2.24, "learning_rate": 1.4100602467246819e-07, "logits/chosen": -5.208488941192627, "logits/rejected": -5.145411491394043, "logps/chosen": -562.2486572265625, "logps/rejected": -430.40411376953125, "loss": 0.3979, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.16709716618061066, "rewards/margins": 1.0568501949310303, "rewards/rejected": -0.8897531628608704, "step": 8670 }, { "epoch": 2.24, "learning_rate": 1.4052787606388063e-07, "logits/chosen": -5.100646018981934, "logits/rejected": -5.2377166748046875, "logps/chosen": -481.1898498535156, "logps/rejected": -457.78057861328125, "loss": 0.3903, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.21274928748607635, "rewards/margins": 1.0785999298095703, "rewards/rejected": -0.8658507466316223, "step": 8680 }, { "epoch": 2.24, "learning_rate": 1.4004972745529308e-07, "logits/chosen": -4.926587104797363, "logits/rejected": -4.657877445220947, "logps/chosen": -703.3741455078125, "logps/rejected": -524.2244873046875, "loss": 0.4324, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.34772372245788574, "rewards/margins": 1.287778615951538, "rewards/rejected": -0.9400548934936523, "step": 8690 }, { "epoch": 2.25, "learning_rate": 1.3957157884670557e-07, "logits/chosen": -5.2414984703063965, "logits/rejected": -5.0198774337768555, "logps/chosen": -601.4893188476562, "logps/rejected": -439.61297607421875, "loss": 0.4158, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.39130082726478577, "rewards/margins": 1.0289254188537598, "rewards/rejected": -0.6376248598098755, "step": 8700 }, { "epoch": 2.25, "learning_rate": 1.3909343023811802e-07, "logits/chosen": -5.486611366271973, "logits/rejected": -4.943757057189941, "logps/chosen": -541.8342895507812, "logps/rejected": -409.05670166015625, "loss": 0.4634, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.11002092063426971, "rewards/margins": 0.9764989614486694, "rewards/rejected": -0.8664781451225281, "step": 8710 }, { "epoch": 2.25, "learning_rate": 1.3861528162953046e-07, "logits/chosen": -5.14481258392334, "logits/rejected": -5.052521228790283, "logps/chosen": -606.603515625, "logps/rejected": -448.3111267089844, "loss": 0.4329, "rewards/accuracies": 0.8125, "rewards/chosen": 0.32283180952072144, "rewards/margins": 1.196566104888916, "rewards/rejected": -0.8737342953681946, "step": 8720 }, { "epoch": 2.25, "learning_rate": 1.381371330209429e-07, "logits/chosen": -4.914074897766113, "logits/rejected": -4.6625189781188965, "logps/chosen": -526.2372436523438, "logps/rejected": -461.84429931640625, "loss": 0.4096, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.21054068207740784, "rewards/margins": 1.0089232921600342, "rewards/rejected": -0.7983825206756592, "step": 8730 }, { "epoch": 2.26, "learning_rate": 1.3765898441235536e-07, "logits/chosen": -5.3910651206970215, "logits/rejected": -4.553156852722168, "logps/chosen": -526.1373901367188, "logps/rejected": -398.94390869140625, "loss": 0.4384, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.0990876629948616, "rewards/margins": 0.9111483693122864, "rewards/rejected": -0.8120607137680054, "step": 8740 }, { "epoch": 2.26, "learning_rate": 1.371808358037678e-07, "logits/chosen": -4.984807014465332, "logits/rejected": -4.800888538360596, "logps/chosen": -650.8736572265625, "logps/rejected": -505.5897521972656, "loss": 0.3795, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.371494859457016, "rewards/margins": 1.1762138605117798, "rewards/rejected": -0.8047188520431519, "step": 8750 }, { "epoch": 2.26, "learning_rate": 1.3670268719518025e-07, "logits/chosen": -5.0141119956970215, "logits/rejected": -4.757113933563232, "logps/chosen": -566.364013671875, "logps/rejected": -426.0364685058594, "loss": 0.4056, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.29574403166770935, "rewards/margins": 1.0355995893478394, "rewards/rejected": -0.7398555874824524, "step": 8760 }, { "epoch": 2.26, "learning_rate": 1.3622453858659272e-07, "logits/chosen": -5.00956392288208, "logits/rejected": -4.885385036468506, "logps/chosen": -595.9583740234375, "logps/rejected": -481.47894287109375, "loss": 0.4001, "rewards/accuracies": 0.875, "rewards/chosen": 0.3912639915943146, "rewards/margins": 1.2049304246902466, "rewards/rejected": -0.8136664628982544, "step": 8770 }, { "epoch": 2.27, "learning_rate": 1.3574638997800516e-07, "logits/chosen": -5.123583793640137, "logits/rejected": -5.276081562042236, "logps/chosen": -523.3998413085938, "logps/rejected": -482.66082763671875, "loss": 0.4113, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19291847944259644, "rewards/margins": 0.9098933339118958, "rewards/rejected": -0.7169749140739441, "step": 8780 }, { "epoch": 2.27, "learning_rate": 1.352682413694176e-07, "logits/chosen": -5.139613151550293, "logits/rejected": -4.75595235824585, "logps/chosen": -559.1314697265625, "logps/rejected": -429.2255859375, "loss": 0.3995, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.11918888986110687, "rewards/margins": 1.093587875366211, "rewards/rejected": -0.9743989706039429, "step": 8790 }, { "epoch": 2.27, "learning_rate": 1.3479009276083005e-07, "logits/chosen": -4.906497001647949, "logits/rejected": -4.860668182373047, "logps/chosen": -602.7081298828125, "logps/rejected": -437.9563903808594, "loss": 0.4174, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1488390862941742, "rewards/margins": 1.0576043128967285, "rewards/rejected": -0.9087653160095215, "step": 8800 }, { "epoch": 2.27, "learning_rate": 1.343119441522425e-07, "logits/chosen": -5.4389262199401855, "logits/rejected": -4.721698760986328, "logps/chosen": -491.42059326171875, "logps/rejected": -420.76495361328125, "loss": 0.4354, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.09367617219686508, "rewards/margins": 0.8417415618896484, "rewards/rejected": -0.7480654716491699, "step": 8810 }, { "epoch": 2.28, "learning_rate": 1.3383379554365495e-07, "logits/chosen": -4.979583740234375, "logits/rejected": -4.927232265472412, "logps/chosen": -590.5, "logps/rejected": -488.33612060546875, "loss": 0.4142, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3089243769645691, "rewards/margins": 1.123367428779602, "rewards/rejected": -0.8144429922103882, "step": 8820 }, { "epoch": 2.28, "learning_rate": 1.333556469350674e-07, "logits/chosen": -5.336668014526367, "logits/rejected": -5.0404863357543945, "logps/chosen": -667.9921875, "logps/rejected": -463.6204528808594, "loss": 0.4093, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.30740755796432495, "rewards/margins": 1.1877397298812866, "rewards/rejected": -0.8803321719169617, "step": 8830 }, { "epoch": 2.28, "learning_rate": 1.3287749832647986e-07, "logits/chosen": -5.291746616363525, "logits/rejected": -5.173768997192383, "logps/chosen": -608.45654296875, "logps/rejected": -446.27374267578125, "loss": 0.423, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.04764443635940552, "rewards/margins": 0.9390350580215454, "rewards/rejected": -0.8913904428482056, "step": 8840 }, { "epoch": 2.29, "learning_rate": 1.3239934971789233e-07, "logits/chosen": -4.931333065032959, "logits/rejected": -4.5769805908203125, "logps/chosen": -629.9171142578125, "logps/rejected": -407.9053649902344, "loss": 0.3459, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3546852469444275, "rewards/margins": 1.262172818183899, "rewards/rejected": -0.9074875712394714, "step": 8850 }, { "epoch": 2.29, "learning_rate": 1.3192120110930478e-07, "logits/chosen": -4.932380199432373, "logits/rejected": -4.769390106201172, "logps/chosen": -642.8351440429688, "logps/rejected": -476.3658142089844, "loss": 0.3697, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.292226642370224, "rewards/margins": 1.1583656072616577, "rewards/rejected": -0.8661389350891113, "step": 8860 }, { "epoch": 2.29, "learning_rate": 1.3144305250071722e-07, "logits/chosen": -5.338796138763428, "logits/rejected": -5.293082237243652, "logps/chosen": -548.365966796875, "logps/rejected": -468.99810791015625, "loss": 0.3684, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.4010956287384033, "rewards/margins": 1.1560646295547485, "rewards/rejected": -0.7549688816070557, "step": 8870 }, { "epoch": 2.29, "learning_rate": 1.3096490389212967e-07, "logits/chosen": -5.71329402923584, "logits/rejected": -4.696215629577637, "logps/chosen": -766.0521850585938, "logps/rejected": -507.58306884765625, "loss": 0.3606, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.46683377027511597, "rewards/margins": 1.2758691310882568, "rewards/rejected": -0.8090354204177856, "step": 8880 }, { "epoch": 2.3, "learning_rate": 1.3048675528354212e-07, "logits/chosen": -4.959466457366943, "logits/rejected": -4.5368218421936035, "logps/chosen": -650.7796630859375, "logps/rejected": -477.26629638671875, "loss": 0.3543, "rewards/accuracies": 0.875, "rewards/chosen": 0.2167484313249588, "rewards/margins": 1.2726080417633057, "rewards/rejected": -1.0558595657348633, "step": 8890 }, { "epoch": 2.3, "learning_rate": 1.3000860667495456e-07, "logits/chosen": -5.175396919250488, "logits/rejected": -5.134932518005371, "logps/chosen": -560.56396484375, "logps/rejected": -438.78668212890625, "loss": 0.4281, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.08481582999229431, "rewards/margins": 1.0262219905853271, "rewards/rejected": -0.9414064288139343, "step": 8900 }, { "epoch": 2.3, "learning_rate": 1.2953045806636703e-07, "logits/chosen": -4.958725452423096, "logits/rejected": -4.727789402008057, "logps/chosen": -502.96624755859375, "logps/rejected": -373.6336975097656, "loss": 0.4412, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.007611866109073162, "rewards/margins": 0.9462712407112122, "rewards/rejected": -0.9538830518722534, "step": 8910 }, { "epoch": 2.3, "learning_rate": 1.2905230945777948e-07, "logits/chosen": -5.260983467102051, "logits/rejected": -5.140371799468994, "logps/chosen": -594.4066772460938, "logps/rejected": -516.9420166015625, "loss": 0.4085, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.28145527839660645, "rewards/margins": 1.0362153053283691, "rewards/rejected": -0.7547600865364075, "step": 8920 }, { "epoch": 2.31, "learning_rate": 1.2857416084919192e-07, "logits/chosen": -5.393424034118652, "logits/rejected": -5.033969879150391, "logps/chosen": -538.9322509765625, "logps/rejected": -415.38836669921875, "loss": 0.3598, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2709987461566925, "rewards/margins": 1.1924853324890137, "rewards/rejected": -0.9214865565299988, "step": 8930 }, { "epoch": 2.31, "learning_rate": 1.2809601224060437e-07, "logits/chosen": -5.327182292938232, "logits/rejected": -5.091122627258301, "logps/chosen": -587.4850463867188, "logps/rejected": -404.6625671386719, "loss": 0.4127, "rewards/accuracies": 0.8125, "rewards/chosen": 0.14921171963214874, "rewards/margins": 1.0567060708999634, "rewards/rejected": -0.9074943661689758, "step": 8940 }, { "epoch": 2.31, "learning_rate": 1.2761786363201681e-07, "logits/chosen": -5.241934776306152, "logits/rejected": -4.6756415367126465, "logps/chosen": -613.3863525390625, "logps/rejected": -490.61724853515625, "loss": 0.3772, "rewards/accuracies": 0.8125, "rewards/chosen": 0.12274515628814697, "rewards/margins": 0.9661418795585632, "rewards/rejected": -0.843396782875061, "step": 8950 }, { "epoch": 2.31, "learning_rate": 1.2713971502342926e-07, "logits/chosen": -5.182160377502441, "logits/rejected": -4.7301859855651855, "logps/chosen": -732.2741088867188, "logps/rejected": -556.2435913085938, "loss": 0.4039, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3593631386756897, "rewards/margins": 1.141364336013794, "rewards/rejected": -0.7820011973381042, "step": 8960 }, { "epoch": 2.32, "learning_rate": 1.266615664148417e-07, "logits/chosen": -5.603774070739746, "logits/rejected": -5.369876861572266, "logps/chosen": -658.1661987304688, "logps/rejected": -528.1641845703125, "loss": 0.3999, "rewards/accuracies": 0.875, "rewards/chosen": 0.19019028544425964, "rewards/margins": 1.0093767642974854, "rewards/rejected": -0.8191865086555481, "step": 8970 }, { "epoch": 2.32, "learning_rate": 1.2618341780625418e-07, "logits/chosen": -5.253783226013184, "logits/rejected": -5.640148639678955, "logps/chosen": -472.32000732421875, "logps/rejected": -442.6932067871094, "loss": 0.4396, "rewards/accuracies": 0.75, "rewards/chosen": 0.09147828817367554, "rewards/margins": 0.8869901895523071, "rewards/rejected": -0.7955118417739868, "step": 8980 }, { "epoch": 2.32, "learning_rate": 1.2570526919766665e-07, "logits/chosen": -4.9518938064575195, "logits/rejected": -5.1416239738464355, "logps/chosen": -581.7947998046875, "logps/rejected": -439.458251953125, "loss": 0.4343, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.22108681499958038, "rewards/margins": 0.879797101020813, "rewards/rejected": -0.658710241317749, "step": 8990 }, { "epoch": 2.32, "learning_rate": 1.252271205890791e-07, "logits/chosen": -5.178545951843262, "logits/rejected": -4.871384620666504, "logps/chosen": -613.4766235351562, "logps/rejected": -474.4812927246094, "loss": 0.3875, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.24828031659126282, "rewards/margins": 1.1226164102554321, "rewards/rejected": -0.8743361234664917, "step": 9000 }, { "epoch": 2.32, "eval_logits/chosen": -5.2796125411987305, "eval_logits/rejected": -4.993527889251709, "eval_logps/chosen": -588.281005859375, "eval_logps/rejected": -449.7113342285156, "eval_loss": 0.5463607311248779, "eval_rewards/accuracies": 0.7139999866485596, "eval_rewards/chosen": -0.0011471300385892391, "eval_rewards/margins": 0.7159355282783508, "eval_rewards/rejected": -0.717082679271698, "eval_runtime": 105.0983, "eval_samples_per_second": 19.03, "eval_steps_per_second": 1.189, "step": 9000 }, { "epoch": 2.33, "learning_rate": 1.2474897198049154e-07, "logits/chosen": -5.232527732849121, "logits/rejected": -5.203549861907959, "logps/chosen": -600.9226684570312, "logps/rejected": -428.3251953125, "loss": 0.413, "rewards/accuracies": 0.875, "rewards/chosen": 0.055987078696489334, "rewards/margins": 1.127004623413086, "rewards/rejected": -1.0710175037384033, "step": 9010 }, { "epoch": 2.33, "learning_rate": 1.2427082337190398e-07, "logits/chosen": -4.864667892456055, "logits/rejected": -4.9846110343933105, "logps/chosen": -573.8277587890625, "logps/rejected": -454.307373046875, "loss": 0.441, "rewards/accuracies": 0.75, "rewards/chosen": -0.012289764359593391, "rewards/margins": 0.786608099937439, "rewards/rejected": -0.7988978624343872, "step": 9020 }, { "epoch": 2.33, "learning_rate": 1.2379267476331643e-07, "logits/chosen": -5.22079610824585, "logits/rejected": -5.036009788513184, "logps/chosen": -538.3673095703125, "logps/rejected": -484.510498046875, "loss": 0.4598, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.09232844412326813, "rewards/margins": 0.8442330360412598, "rewards/rejected": -0.7519046068191528, "step": 9030 }, { "epoch": 2.33, "learning_rate": 1.2331452615472888e-07, "logits/chosen": -5.0710649490356445, "logits/rejected": -4.951976299285889, "logps/chosen": -691.0711059570312, "logps/rejected": -552.1945190429688, "loss": 0.3922, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3993602991104126, "rewards/margins": 1.1959184408187866, "rewards/rejected": -0.796558141708374, "step": 9040 }, { "epoch": 2.34, "learning_rate": 1.2283637754614132e-07, "logits/chosen": -5.484630107879639, "logits/rejected": -5.332329750061035, "logps/chosen": -564.9556884765625, "logps/rejected": -439.154052734375, "loss": 0.416, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.07704527676105499, "rewards/margins": 1.064769983291626, "rewards/rejected": -0.9877246618270874, "step": 9050 }, { "epoch": 2.34, "learning_rate": 1.223582289375538e-07, "logits/chosen": -4.822518825531006, "logits/rejected": -5.022304534912109, "logps/chosen": -718.7652587890625, "logps/rejected": -524.3084106445312, "loss": 0.4314, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.33718428015708923, "rewards/margins": 1.1826421022415161, "rewards/rejected": -0.8454577326774597, "step": 9060 }, { "epoch": 2.34, "learning_rate": 1.2188008032896624e-07, "logits/chosen": -5.416438102722168, "logits/rejected": -5.02370548248291, "logps/chosen": -596.5806884765625, "logps/rejected": -447.2923278808594, "loss": 0.3953, "rewards/accuracies": 0.8125, "rewards/chosen": 0.17813752591609955, "rewards/margins": 1.1195310354232788, "rewards/rejected": -0.9413934946060181, "step": 9070 }, { "epoch": 2.34, "learning_rate": 1.2140193172037868e-07, "logits/chosen": -5.461902141571045, "logits/rejected": -4.867642879486084, "logps/chosen": -591.1005249023438, "logps/rejected": -522.9752197265625, "loss": 0.407, "rewards/accuracies": 0.875, "rewards/chosen": 0.13191409409046173, "rewards/margins": 1.0549063682556152, "rewards/rejected": -0.9229922294616699, "step": 9080 }, { "epoch": 2.35, "learning_rate": 1.2092378311179113e-07, "logits/chosen": -5.10472297668457, "logits/rejected": -5.013697624206543, "logps/chosen": -558.2575073242188, "logps/rejected": -392.1866760253906, "loss": 0.3747, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.24854043126106262, "rewards/margins": 1.2942866086959839, "rewards/rejected": -1.0457462072372437, "step": 9090 }, { "epoch": 2.35, "learning_rate": 1.204456345032036e-07, "logits/chosen": -5.394787788391113, "logits/rejected": -4.911860942840576, "logps/chosen": -639.1219482421875, "logps/rejected": -476.77447509765625, "loss": 0.4191, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.36193379759788513, "rewards/margins": 1.0199388265609741, "rewards/rejected": -0.6580051779747009, "step": 9100 }, { "epoch": 2.35, "learning_rate": 1.1996748589461605e-07, "logits/chosen": -5.574795722961426, "logits/rejected": -5.087183475494385, "logps/chosen": -560.880126953125, "logps/rejected": -425.2019958496094, "loss": 0.4201, "rewards/accuracies": 0.8125, "rewards/chosen": 0.157833069562912, "rewards/margins": 0.9612352252006531, "rewards/rejected": -0.8034020662307739, "step": 9110 }, { "epoch": 2.35, "learning_rate": 1.194893372860285e-07, "logits/chosen": -5.334373474121094, "logits/rejected": -4.500037670135498, "logps/chosen": -632.1149291992188, "logps/rejected": -441.5838928222656, "loss": 0.3975, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.31106969714164734, "rewards/margins": 1.1279239654541016, "rewards/rejected": -0.8168543577194214, "step": 9120 }, { "epoch": 2.36, "learning_rate": 1.1901118867744095e-07, "logits/chosen": -5.169785022735596, "logits/rejected": -4.9117255210876465, "logps/chosen": -633.2828979492188, "logps/rejected": -448.40191650390625, "loss": 0.3824, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2647024989128113, "rewards/margins": 1.1345622539520264, "rewards/rejected": -0.8698596954345703, "step": 9130 }, { "epoch": 2.36, "learning_rate": 1.185330400688534e-07, "logits/chosen": -4.755401611328125, "logits/rejected": -4.878584861755371, "logps/chosen": -592.0015258789062, "logps/rejected": -471.6319274902344, "loss": 0.4448, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.1320294588804245, "rewards/margins": 0.8978122472763062, "rewards/rejected": -0.7657827734947205, "step": 9140 }, { "epoch": 2.36, "learning_rate": 1.1805489146026584e-07, "logits/chosen": -5.254443168640137, "logits/rejected": -5.030881404876709, "logps/chosen": -615.5501708984375, "logps/rejected": -446.8966369628906, "loss": 0.4059, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2605306804180145, "rewards/margins": 1.2347052097320557, "rewards/rejected": -0.9741746187210083, "step": 9150 }, { "epoch": 2.37, "learning_rate": 1.1757674285167829e-07, "logits/chosen": -5.544168472290039, "logits/rejected": -5.057156562805176, "logps/chosen": -650.9607543945312, "logps/rejected": -490.3645935058594, "loss": 0.3886, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.4137301445007324, "rewards/margins": 1.2300504446029663, "rewards/rejected": -0.8163203001022339, "step": 9160 }, { "epoch": 2.37, "learning_rate": 1.1709859424309074e-07, "logits/chosen": -5.380502223968506, "logits/rejected": -5.531805515289307, "logps/chosen": -615.1707763671875, "logps/rejected": -502.7727966308594, "loss": 0.379, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.25635772943496704, "rewards/margins": 1.176969051361084, "rewards/rejected": -0.9206113815307617, "step": 9170 }, { "epoch": 2.37, "learning_rate": 1.166204456345032e-07, "logits/chosen": -5.2935709953308105, "logits/rejected": -5.204921722412109, "logps/chosen": -567.6307373046875, "logps/rejected": -429.57745361328125, "loss": 0.4093, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3017306923866272, "rewards/margins": 1.101093053817749, "rewards/rejected": -0.799362301826477, "step": 9180 }, { "epoch": 2.37, "learning_rate": 1.1614229702591565e-07, "logits/chosen": -5.181605339050293, "logits/rejected": -5.086179256439209, "logps/chosen": -615.4990234375, "logps/rejected": -442.78192138671875, "loss": 0.4226, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.28323793411254883, "rewards/margins": 1.146881103515625, "rewards/rejected": -0.8636430501937866, "step": 9190 }, { "epoch": 2.38, "learning_rate": 1.1566414841732811e-07, "logits/chosen": -5.581186771392822, "logits/rejected": -5.307178020477295, "logps/chosen": -577.7361450195312, "logps/rejected": -451.347412109375, "loss": 0.4175, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.22439420223236084, "rewards/margins": 0.9434807896614075, "rewards/rejected": -0.7190865278244019, "step": 9200 }, { "epoch": 2.38, "learning_rate": 1.1518599980874055e-07, "logits/chosen": -5.569378852844238, "logits/rejected": -4.989590644836426, "logps/chosen": -640.82568359375, "logps/rejected": -425.42071533203125, "loss": 0.4123, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.27006450295448303, "rewards/margins": 1.081485629081726, "rewards/rejected": -0.8114210963249207, "step": 9210 }, { "epoch": 2.38, "learning_rate": 1.14707851200153e-07, "logits/chosen": -5.29638671875, "logits/rejected": -5.286472320556641, "logps/chosen": -542.1975708007812, "logps/rejected": -435.4867248535156, "loss": 0.4577, "rewards/accuracies": 0.75, "rewards/chosen": 0.03344978019595146, "rewards/margins": 0.7191168665885925, "rewards/rejected": -0.6856670379638672, "step": 9220 }, { "epoch": 2.38, "learning_rate": 1.1422970259156544e-07, "logits/chosen": -5.197695732116699, "logits/rejected": -5.451485633850098, "logps/chosen": -523.5208740234375, "logps/rejected": -431.89044189453125, "loss": 0.398, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2067144364118576, "rewards/margins": 1.111469030380249, "rewards/rejected": -0.9047545194625854, "step": 9230 }, { "epoch": 2.39, "learning_rate": 1.137515539829779e-07, "logits/chosen": -5.15535831451416, "logits/rejected": -4.985384941101074, "logps/chosen": -576.5391845703125, "logps/rejected": -538.34912109375, "loss": 0.407, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.20319774746894836, "rewards/margins": 1.038339614868164, "rewards/rejected": -0.8351419568061829, "step": 9240 }, { "epoch": 2.39, "learning_rate": 1.1327340537439036e-07, "logits/chosen": -5.611729145050049, "logits/rejected": -5.153866291046143, "logps/chosen": -538.2857666015625, "logps/rejected": -466.53143310546875, "loss": 0.3912, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.23438772559165955, "rewards/margins": 1.028347373008728, "rewards/rejected": -0.7939596772193909, "step": 9250 }, { "epoch": 2.39, "learning_rate": 1.127952567658028e-07, "logits/chosen": -5.1808648109436035, "logits/rejected": -5.2541069984436035, "logps/chosen": -482.39398193359375, "logps/rejected": -455.90618896484375, "loss": 0.4284, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06726590543985367, "rewards/margins": 1.0078978538513184, "rewards/rejected": -0.9406320452690125, "step": 9260 }, { "epoch": 2.39, "learning_rate": 1.1231710815721526e-07, "logits/chosen": -5.577635765075684, "logits/rejected": -5.466068744659424, "logps/chosen": -550.7892456054688, "logps/rejected": -445.75579833984375, "loss": 0.3912, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.18886613845825195, "rewards/margins": 1.0335638523101807, "rewards/rejected": -0.8446976542472839, "step": 9270 }, { "epoch": 2.4, "learning_rate": 1.1183895954862771e-07, "logits/chosen": -5.537787437438965, "logits/rejected": -5.206395149230957, "logps/chosen": -536.3954467773438, "logps/rejected": -439.52734375, "loss": 0.4285, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.17503497004508972, "rewards/margins": 0.9858128428459167, "rewards/rejected": -0.8107778429985046, "step": 9280 }, { "epoch": 2.4, "learning_rate": 1.1136081094004016e-07, "logits/chosen": -5.481219291687012, "logits/rejected": -5.1448469161987305, "logps/chosen": -626.1700439453125, "logps/rejected": -456.57598876953125, "loss": 0.3542, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.19735896587371826, "rewards/margins": 1.2003471851348877, "rewards/rejected": -1.002988338470459, "step": 9290 }, { "epoch": 2.4, "learning_rate": 1.1088266233145261e-07, "logits/chosen": -5.126064777374268, "logits/rejected": -4.5587663650512695, "logps/chosen": -633.751953125, "logps/rejected": -512.8193359375, "loss": 0.399, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.33576175570487976, "rewards/margins": 1.270309567451477, "rewards/rejected": -0.9345479011535645, "step": 9300 }, { "epoch": 2.4, "learning_rate": 1.1040451372286506e-07, "logits/chosen": -4.800873756408691, "logits/rejected": -5.069218158721924, "logps/chosen": -530.9320068359375, "logps/rejected": -542.6378784179688, "loss": 0.3852, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.19378536939620972, "rewards/margins": 1.1233909130096436, "rewards/rejected": -0.9296056628227234, "step": 9310 }, { "epoch": 2.41, "learning_rate": 1.099263651142775e-07, "logits/chosen": -4.914328575134277, "logits/rejected": -4.524318218231201, "logps/chosen": -594.5137939453125, "logps/rejected": -463.04327392578125, "loss": 0.4082, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.18592919409275055, "rewards/margins": 1.110671877861023, "rewards/rejected": -0.9247426986694336, "step": 9320 }, { "epoch": 2.41, "learning_rate": 1.0944821650568996e-07, "logits/chosen": -5.278843402862549, "logits/rejected": -5.216768741607666, "logps/chosen": -546.6336669921875, "logps/rejected": -448.9330139160156, "loss": 0.4361, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.20759305357933044, "rewards/margins": 0.8864353895187378, "rewards/rejected": -0.678842306137085, "step": 9330 }, { "epoch": 2.41, "learning_rate": 1.0897006789710242e-07, "logits/chosen": -5.166994571685791, "logits/rejected": -4.893740653991699, "logps/chosen": -539.4088134765625, "logps/rejected": -494.1925354003906, "loss": 0.4275, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.28593021631240845, "rewards/margins": 1.025370717048645, "rewards/rejected": -0.7394405603408813, "step": 9340 }, { "epoch": 2.41, "learning_rate": 1.0849191928851487e-07, "logits/chosen": -5.02545166015625, "logits/rejected": -5.271948337554932, "logps/chosen": -463.8212890625, "logps/rejected": -397.4366760253906, "loss": 0.3858, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1447581946849823, "rewards/margins": 1.0440213680267334, "rewards/rejected": -0.8992632627487183, "step": 9350 }, { "epoch": 2.42, "learning_rate": 1.0801377067992731e-07, "logits/chosen": -5.407784461975098, "logits/rejected": -5.526049613952637, "logps/chosen": -543.912353515625, "logps/rejected": -532.0668334960938, "loss": 0.4317, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.006288623902946711, "rewards/margins": 1.0043671131134033, "rewards/rejected": -0.998078465461731, "step": 9360 }, { "epoch": 2.42, "learning_rate": 1.0753562207133977e-07, "logits/chosen": -5.562361240386963, "logits/rejected": -5.096370697021484, "logps/chosen": -617.0789184570312, "logps/rejected": -471.2618713378906, "loss": 0.4073, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.015236323699355125, "rewards/margins": 1.0904223918914795, "rewards/rejected": -1.075186014175415, "step": 9370 }, { "epoch": 2.42, "learning_rate": 1.0705747346275222e-07, "logits/chosen": -5.349741458892822, "logits/rejected": -5.164155006408691, "logps/chosen": -533.4205932617188, "logps/rejected": -447.8409729003906, "loss": 0.4467, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.19854943454265594, "rewards/margins": 1.0045076608657837, "rewards/rejected": -0.8059582710266113, "step": 9380 }, { "epoch": 2.42, "learning_rate": 1.0657932485416466e-07, "logits/chosen": -5.013423919677734, "logits/rejected": -4.890993595123291, "logps/chosen": -590.2261352539062, "logps/rejected": -421.6311950683594, "loss": 0.39, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.11332786083221436, "rewards/margins": 1.1091948747634888, "rewards/rejected": -0.9958670735359192, "step": 9390 }, { "epoch": 2.43, "learning_rate": 1.0610117624557712e-07, "logits/chosen": -5.296818256378174, "logits/rejected": -5.348574638366699, "logps/chosen": -496.8872985839844, "logps/rejected": -400.486083984375, "loss": 0.4236, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08599910885095596, "rewards/margins": 0.9718599319458008, "rewards/rejected": -1.057859182357788, "step": 9400 }, { "epoch": 2.43, "learning_rate": 1.0562302763698958e-07, "logits/chosen": -5.2132391929626465, "logits/rejected": -5.15817928314209, "logps/chosen": -623.1036987304688, "logps/rejected": -475.31451416015625, "loss": 0.4322, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1324896216392517, "rewards/margins": 1.1194932460784912, "rewards/rejected": -0.98700350522995, "step": 9410 }, { "epoch": 2.43, "learning_rate": 1.0514487902840202e-07, "logits/chosen": -5.241889953613281, "logits/rejected": -4.858502388000488, "logps/chosen": -615.0098876953125, "logps/rejected": -462.01715087890625, "loss": 0.3872, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.46009406447410583, "rewards/margins": 1.3380892276763916, "rewards/rejected": -0.8779951333999634, "step": 9420 }, { "epoch": 2.43, "learning_rate": 1.0466673041981447e-07, "logits/chosen": -5.133173942565918, "logits/rejected": -5.214886665344238, "logps/chosen": -538.3379516601562, "logps/rejected": -435.9345703125, "loss": 0.4637, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.172339528799057, "rewards/margins": 1.0032904148101807, "rewards/rejected": -0.8309507369995117, "step": 9430 }, { "epoch": 2.44, "learning_rate": 1.0418858181122693e-07, "logits/chosen": -4.938714504241943, "logits/rejected": -4.734953880310059, "logps/chosen": -567.0638427734375, "logps/rejected": -512.55322265625, "loss": 0.4008, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.20345647633075714, "rewards/margins": 1.0681368112564087, "rewards/rejected": -0.864680290222168, "step": 9440 }, { "epoch": 2.44, "learning_rate": 1.0371043320263937e-07, "logits/chosen": -5.214462757110596, "logits/rejected": -4.907632350921631, "logps/chosen": -592.830810546875, "logps/rejected": -514.3264770507812, "loss": 0.4354, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.08971532434225082, "rewards/margins": 0.9116581082344055, "rewards/rejected": -0.8219428062438965, "step": 9450 }, { "epoch": 2.44, "learning_rate": 1.0323228459405182e-07, "logits/chosen": -5.17854642868042, "logits/rejected": -4.944499969482422, "logps/chosen": -577.8115844726562, "logps/rejected": -434.40478515625, "loss": 0.4333, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.2049691379070282, "rewards/margins": 1.0749871730804443, "rewards/rejected": -0.8700180053710938, "step": 9460 }, { "epoch": 2.45, "learning_rate": 1.0275413598546428e-07, "logits/chosen": -4.911605358123779, "logits/rejected": -4.925177574157715, "logps/chosen": -599.5512084960938, "logps/rejected": -450.93621826171875, "loss": 0.41, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.21702222526073456, "rewards/margins": 1.150356411933899, "rewards/rejected": -0.9333343505859375, "step": 9470 }, { "epoch": 2.45, "learning_rate": 1.0227598737687674e-07, "logits/chosen": -5.037371635437012, "logits/rejected": -5.117720127105713, "logps/chosen": -601.3138427734375, "logps/rejected": -472.99853515625, "loss": 0.3748, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2165440022945404, "rewards/margins": 1.0495119094848633, "rewards/rejected": -0.8329678773880005, "step": 9480 }, { "epoch": 2.45, "learning_rate": 1.0179783876828918e-07, "logits/chosen": -5.044945240020752, "logits/rejected": -5.146431922912598, "logps/chosen": -643.4053955078125, "logps/rejected": -516.2630004882812, "loss": 0.4076, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2751425802707672, "rewards/margins": 0.9747351408004761, "rewards/rejected": -0.6995925307273865, "step": 9490 }, { "epoch": 2.45, "learning_rate": 1.0131969015970163e-07, "logits/chosen": -5.175450325012207, "logits/rejected": -4.947918891906738, "logps/chosen": -672.46240234375, "logps/rejected": -565.2347412109375, "loss": 0.3853, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.2718862295150757, "rewards/margins": 1.103073000907898, "rewards/rejected": -0.8311868906021118, "step": 9500 }, { "epoch": 2.46, "learning_rate": 1.0084154155111409e-07, "logits/chosen": -4.9580583572387695, "logits/rejected": -4.754397392272949, "logps/chosen": -631.4376220703125, "logps/rejected": -509.25213623046875, "loss": 0.4123, "rewards/accuracies": 0.8125, "rewards/chosen": 0.20145881175994873, "rewards/margins": 1.0759531259536743, "rewards/rejected": -0.8744942545890808, "step": 9510 }, { "epoch": 2.46, "learning_rate": 1.0036339294252653e-07, "logits/chosen": -5.170888423919678, "logits/rejected": -4.883397102355957, "logps/chosen": -579.1624145507812, "logps/rejected": -397.5838928222656, "loss": 0.403, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.2127404659986496, "rewards/margins": 1.012398362159729, "rewards/rejected": -0.7996578812599182, "step": 9520 }, { "epoch": 2.46, "learning_rate": 9.988524433393898e-08, "logits/chosen": -4.840704441070557, "logits/rejected": -4.776615619659424, "logps/chosen": -495.5926208496094, "logps/rejected": -471.90557861328125, "loss": 0.406, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.13271553814411163, "rewards/margins": 1.0079237222671509, "rewards/rejected": -0.8752081990242004, "step": 9530 }, { "epoch": 2.46, "learning_rate": 9.940709572535142e-08, "logits/chosen": -5.245855331420898, "logits/rejected": -4.89205265045166, "logps/chosen": -618.5016479492188, "logps/rejected": -481.7393493652344, "loss": 0.403, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.26402729749679565, "rewards/margins": 1.1769886016845703, "rewards/rejected": -0.9129613041877747, "step": 9540 }, { "epoch": 2.47, "learning_rate": 9.89289471167639e-08, "logits/chosen": -5.497570037841797, "logits/rejected": -5.007233142852783, "logps/chosen": -656.050537109375, "logps/rejected": -432.415283203125, "loss": 0.428, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1520691215991974, "rewards/margins": 1.0191755294799805, "rewards/rejected": -0.8671064376831055, "step": 9550 }, { "epoch": 2.47, "learning_rate": 9.845079850817634e-08, "logits/chosen": -5.370931148529053, "logits/rejected": -4.906863689422607, "logps/chosen": -576.743408203125, "logps/rejected": -382.6946105957031, "loss": 0.4929, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03935273736715317, "rewards/margins": 0.7814738750457764, "rewards/rejected": -0.742121160030365, "step": 9560 }, { "epoch": 2.47, "learning_rate": 9.797264989958878e-08, "logits/chosen": -4.855576515197754, "logits/rejected": -4.833737850189209, "logps/chosen": -540.277099609375, "logps/rejected": -475.0962829589844, "loss": 0.3854, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.1520138680934906, "rewards/margins": 1.0771015882492065, "rewards/rejected": -0.9250876307487488, "step": 9570 }, { "epoch": 2.47, "learning_rate": 9.749450129100124e-08, "logits/chosen": -5.099086284637451, "logits/rejected": -4.602459907531738, "logps/chosen": -630.2706298828125, "logps/rejected": -480.13165283203125, "loss": 0.4255, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.2643203139305115, "rewards/margins": 0.9930081367492676, "rewards/rejected": -0.7286878824234009, "step": 9580 }, { "epoch": 2.48, "learning_rate": 9.701635268241369e-08, "logits/chosen": -5.1408491134643555, "logits/rejected": -4.615050315856934, "logps/chosen": -626.7959594726562, "logps/rejected": -494.91094970703125, "loss": 0.3926, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.13219256699085236, "rewards/margins": 0.9721064567565918, "rewards/rejected": -0.8399137258529663, "step": 9590 }, { "epoch": 2.48, "learning_rate": 9.653820407382613e-08, "logits/chosen": -5.365232467651367, "logits/rejected": -4.723877906799316, "logps/chosen": -639.6741333007812, "logps/rejected": -493.96893310546875, "loss": 0.3889, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.36111947894096375, "rewards/margins": 1.0166513919830322, "rewards/rejected": -0.6555320024490356, "step": 9600 }, { "epoch": 2.48, "learning_rate": 9.606005546523859e-08, "logits/chosen": -5.230041027069092, "logits/rejected": -4.957730293273926, "logps/chosen": -608.0250244140625, "logps/rejected": -504.1060485839844, "loss": 0.4113, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2601136565208435, "rewards/margins": 1.0693730115890503, "rewards/rejected": -0.8092594146728516, "step": 9610 }, { "epoch": 2.48, "learning_rate": 9.558190685665105e-08, "logits/chosen": -5.119355201721191, "logits/rejected": -5.07079553604126, "logps/chosen": -533.9777221679688, "logps/rejected": -493.66717529296875, "loss": 0.4025, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.22037819027900696, "rewards/margins": 1.1517854928970337, "rewards/rejected": -0.9314072728157043, "step": 9620 }, { "epoch": 2.49, "learning_rate": 9.51037582480635e-08, "logits/chosen": -4.976778030395508, "logits/rejected": -5.018677711486816, "logps/chosen": -516.3341064453125, "logps/rejected": -440.13189697265625, "loss": 0.4438, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.06625747680664062, "rewards/margins": 0.8570903539657593, "rewards/rejected": -0.7908328175544739, "step": 9630 }, { "epoch": 2.49, "learning_rate": 9.462560963947594e-08, "logits/chosen": -5.268161773681641, "logits/rejected": -5.254558563232422, "logps/chosen": -548.01220703125, "logps/rejected": -432.8326110839844, "loss": 0.4383, "rewards/accuracies": 0.875, "rewards/chosen": 0.1738913208246231, "rewards/margins": 0.9434558749198914, "rewards/rejected": -0.7695645689964294, "step": 9640 }, { "epoch": 2.49, "learning_rate": 9.41474610308884e-08, "logits/chosen": -5.309719562530518, "logits/rejected": -4.939174175262451, "logps/chosen": -529.4866943359375, "logps/rejected": -400.7486877441406, "loss": 0.4203, "rewards/accuracies": 0.8125, "rewards/chosen": 0.0764772817492485, "rewards/margins": 0.9041735529899597, "rewards/rejected": -0.8276962041854858, "step": 9650 }, { "epoch": 2.49, "learning_rate": 9.366931242230085e-08, "logits/chosen": -5.252267360687256, "logits/rejected": -5.268181324005127, "logps/chosen": -643.4264526367188, "logps/rejected": -520.07861328125, "loss": 0.4114, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1836097687482834, "rewards/margins": 1.0382187366485596, "rewards/rejected": -0.8546088933944702, "step": 9660 }, { "epoch": 2.5, "learning_rate": 9.319116381371329e-08, "logits/chosen": -5.1670050621032715, "logits/rejected": -5.007319450378418, "logps/chosen": -496.46966552734375, "logps/rejected": -466.4947204589844, "loss": 0.4559, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.038730502128601074, "rewards/margins": 0.8120914697647095, "rewards/rejected": -0.7733608484268188, "step": 9670 }, { "epoch": 2.5, "learning_rate": 9.271301520512575e-08, "logits/chosen": -5.409829139709473, "logits/rejected": -5.114466667175293, "logps/chosen": -587.5802001953125, "logps/rejected": -488.4268493652344, "loss": 0.4164, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.24579067528247833, "rewards/margins": 0.8948987722396851, "rewards/rejected": -0.6491080522537231, "step": 9680 }, { "epoch": 2.5, "learning_rate": 9.223486659653821e-08, "logits/chosen": -5.181349754333496, "logits/rejected": -4.894546031951904, "logps/chosen": -546.8074340820312, "logps/rejected": -480.3727111816406, "loss": 0.4269, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.22827155888080597, "rewards/margins": 1.0738029479980469, "rewards/rejected": -0.8455312848091125, "step": 9690 }, { "epoch": 2.5, "learning_rate": 9.175671798795065e-08, "logits/chosen": -5.265007495880127, "logits/rejected": -4.794343948364258, "logps/chosen": -659.1524658203125, "logps/rejected": -554.326416015625, "loss": 0.388, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.26429885625839233, "rewards/margins": 1.1775274276733398, "rewards/rejected": -0.9132285118103027, "step": 9700 }, { "epoch": 2.51, "learning_rate": 9.12785693793631e-08, "logits/chosen": -4.94766902923584, "logits/rejected": -5.063207149505615, "logps/chosen": -543.993896484375, "logps/rejected": -490.1288146972656, "loss": 0.4522, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.21634194254875183, "rewards/margins": 1.0598456859588623, "rewards/rejected": -0.8435036540031433, "step": 9710 }, { "epoch": 2.51, "learning_rate": 9.080042077077556e-08, "logits/chosen": -4.989123344421387, "logits/rejected": -4.757800102233887, "logps/chosen": -600.2832641601562, "logps/rejected": -486.5591735839844, "loss": 0.4175, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.23919133841991425, "rewards/margins": 0.9285603761672974, "rewards/rejected": -0.6893690824508667, "step": 9720 }, { "epoch": 2.51, "learning_rate": 9.0322272162188e-08, "logits/chosen": -4.9826741218566895, "logits/rejected": -5.413107395172119, "logps/chosen": -576.0633544921875, "logps/rejected": -531.5577392578125, "loss": 0.4223, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2565976679325104, "rewards/margins": 1.0967843532562256, "rewards/rejected": -0.840186595916748, "step": 9730 }, { "epoch": 2.51, "learning_rate": 8.984412355360045e-08, "logits/chosen": -5.321845531463623, "logits/rejected": -5.092312812805176, "logps/chosen": -560.239990234375, "logps/rejected": -476.88043212890625, "loss": 0.4143, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2631523907184601, "rewards/margins": 1.0128511190414429, "rewards/rejected": -0.74969881772995, "step": 9740 }, { "epoch": 2.52, "learning_rate": 8.936597494501291e-08, "logits/chosen": -5.103719711303711, "logits/rejected": -5.063908576965332, "logps/chosen": -559.268310546875, "logps/rejected": -492.6524963378906, "loss": 0.3999, "rewards/accuracies": 0.875, "rewards/chosen": 0.2429356575012207, "rewards/margins": 1.0560001134872437, "rewards/rejected": -0.8130645751953125, "step": 9750 }, { "epoch": 2.52, "learning_rate": 8.888782633642535e-08, "logits/chosen": -5.236142158508301, "logits/rejected": -4.825135231018066, "logps/chosen": -594.6126708984375, "logps/rejected": -455.89923095703125, "loss": 0.3888, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.22701787948608398, "rewards/margins": 1.2092812061309814, "rewards/rejected": -0.9822633862495422, "step": 9760 }, { "epoch": 2.52, "learning_rate": 8.840967772783781e-08, "logits/chosen": -5.272485733032227, "logits/rejected": -4.652812957763672, "logps/chosen": -642.854248046875, "logps/rejected": -514.8763427734375, "loss": 0.4268, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.20174089074134827, "rewards/margins": 1.0487675666809082, "rewards/rejected": -0.8470266461372375, "step": 9770 }, { "epoch": 2.53, "learning_rate": 8.793152911925026e-08, "logits/chosen": -4.901000022888184, "logits/rejected": -4.877824783325195, "logps/chosen": -609.9915771484375, "logps/rejected": -464.26611328125, "loss": 0.4192, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.3255142867565155, "rewards/margins": 1.0890953540802002, "rewards/rejected": -0.7635809183120728, "step": 9780 }, { "epoch": 2.53, "learning_rate": 8.745338051066271e-08, "logits/chosen": -5.259279251098633, "logits/rejected": -4.788205146789551, "logps/chosen": -454.16754150390625, "logps/rejected": -476.9732360839844, "loss": 0.4119, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.10745303332805634, "rewards/margins": 0.9351380467414856, "rewards/rejected": -1.0425910949707031, "step": 9790 }, { "epoch": 2.53, "learning_rate": 8.697523190207516e-08, "logits/chosen": -5.339990615844727, "logits/rejected": -4.503426551818848, "logps/chosen": -647.5823974609375, "logps/rejected": -470.74176025390625, "loss": 0.4179, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1846919059753418, "rewards/margins": 1.0087801218032837, "rewards/rejected": -0.8240880966186523, "step": 9800 }, { "epoch": 2.53, "learning_rate": 8.64970832934876e-08, "logits/chosen": -5.27567720413208, "logits/rejected": -5.077097415924072, "logps/chosen": -686.619140625, "logps/rejected": -508.90142822265625, "loss": 0.4334, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.24139609932899475, "rewards/margins": 0.9411234855651855, "rewards/rejected": -0.6997272968292236, "step": 9810 }, { "epoch": 2.54, "learning_rate": 8.601893468490006e-08, "logits/chosen": -5.360865116119385, "logits/rejected": -4.703429222106934, "logps/chosen": -677.4623413085938, "logps/rejected": -485.81201171875, "loss": 0.4055, "rewards/accuracies": 0.8125, "rewards/chosen": 0.22544629871845245, "rewards/margins": 1.1194368600845337, "rewards/rejected": -0.8939906358718872, "step": 9820 }, { "epoch": 2.54, "learning_rate": 8.554078607631251e-08, "logits/chosen": -5.213848114013672, "logits/rejected": -4.964876651763916, "logps/chosen": -581.568359375, "logps/rejected": -474.3987731933594, "loss": 0.4328, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.21556663513183594, "rewards/margins": 0.998814582824707, "rewards/rejected": -0.7832479476928711, "step": 9830 }, { "epoch": 2.54, "learning_rate": 8.506263746772497e-08, "logits/chosen": -5.06487512588501, "logits/rejected": -4.771307945251465, "logps/chosen": -550.454345703125, "logps/rejected": -401.881591796875, "loss": 0.4185, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.21907778084278107, "rewards/margins": 1.0759190320968628, "rewards/rejected": -0.8568412661552429, "step": 9840 }, { "epoch": 2.54, "learning_rate": 8.458448885913741e-08, "logits/chosen": -4.870003700256348, "logits/rejected": -4.830417633056641, "logps/chosen": -589.4920654296875, "logps/rejected": -545.3414306640625, "loss": 0.4465, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2854982912540436, "rewards/margins": 1.0601844787597656, "rewards/rejected": -0.7746862173080444, "step": 9850 }, { "epoch": 2.55, "learning_rate": 8.410634025054987e-08, "logits/chosen": -5.113330364227295, "logits/rejected": -4.948873043060303, "logps/chosen": -607.0372924804688, "logps/rejected": -389.5906982421875, "loss": 0.3996, "rewards/accuracies": 0.8125, "rewards/chosen": 0.22636358439922333, "rewards/margins": 1.1103427410125732, "rewards/rejected": -0.8839792013168335, "step": 9860 }, { "epoch": 2.55, "learning_rate": 8.362819164196232e-08, "logits/chosen": -5.22025203704834, "logits/rejected": -5.161636829376221, "logps/chosen": -694.7374267578125, "logps/rejected": -445.330810546875, "loss": 0.4122, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.336225301027298, "rewards/margins": 1.1317239999771118, "rewards/rejected": -0.7954985499382019, "step": 9870 }, { "epoch": 2.55, "learning_rate": 8.315004303337476e-08, "logits/chosen": -5.151349067687988, "logits/rejected": -4.971185207366943, "logps/chosen": -570.3270263671875, "logps/rejected": -467.54437255859375, "loss": 0.4297, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.08529824763536453, "rewards/margins": 0.9588583707809448, "rewards/rejected": -0.8735602498054504, "step": 9880 }, { "epoch": 2.55, "learning_rate": 8.267189442478722e-08, "logits/chosen": -5.277881145477295, "logits/rejected": -5.058869361877441, "logps/chosen": -666.9710693359375, "logps/rejected": -526.72900390625, "loss": 0.4032, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.3909066319465637, "rewards/margins": 1.2201108932495117, "rewards/rejected": -0.829204261302948, "step": 9890 }, { "epoch": 2.56, "learning_rate": 8.219374581619967e-08, "logits/chosen": -5.189238548278809, "logits/rejected": -4.669981002807617, "logps/chosen": -602.9674072265625, "logps/rejected": -466.51678466796875, "loss": 0.3876, "rewards/accuracies": 0.875, "rewards/chosen": 0.24410252273082733, "rewards/margins": 1.0820668935775757, "rewards/rejected": -0.8379644155502319, "step": 9900 }, { "epoch": 2.56, "learning_rate": 8.171559720761213e-08, "logits/chosen": -5.65836238861084, "logits/rejected": -4.967256546020508, "logps/chosen": -689.4050903320312, "logps/rejected": -485.69287109375, "loss": 0.362, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2965410053730011, "rewards/margins": 1.3259546756744385, "rewards/rejected": -1.0294137001037598, "step": 9910 }, { "epoch": 2.56, "learning_rate": 8.123744859902458e-08, "logits/chosen": -5.499475002288818, "logits/rejected": -5.135407447814941, "logps/chosen": -626.61962890625, "logps/rejected": -462.79534912109375, "loss": 0.4036, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1736307591199875, "rewards/margins": 1.074575662612915, "rewards/rejected": -0.9009448289871216, "step": 9920 }, { "epoch": 2.56, "learning_rate": 8.075929999043703e-08, "logits/chosen": -5.292958736419678, "logits/rejected": -5.104090690612793, "logps/chosen": -643.8049926757812, "logps/rejected": -468.07830810546875, "loss": 0.3953, "rewards/accuracies": 0.875, "rewards/chosen": 0.2673695385456085, "rewards/margins": 1.3309223651885986, "rewards/rejected": -1.0635526180267334, "step": 9930 }, { "epoch": 2.57, "learning_rate": 8.028115138184947e-08, "logits/chosen": -5.062832832336426, "logits/rejected": -5.052889347076416, "logps/chosen": -639.5963134765625, "logps/rejected": -456.20281982421875, "loss": 0.3713, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.36559009552001953, "rewards/margins": 1.372484803199768, "rewards/rejected": -1.006894826889038, "step": 9940 }, { "epoch": 2.57, "learning_rate": 7.980300277326192e-08, "logits/chosen": -4.8681840896606445, "logits/rejected": -4.999460697174072, "logps/chosen": -526.5410766601562, "logps/rejected": -478.4747619628906, "loss": 0.3986, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.21607431769371033, "rewards/margins": 1.0760928392410278, "rewards/rejected": -0.8600185513496399, "step": 9950 }, { "epoch": 2.57, "learning_rate": 7.932485416467438e-08, "logits/chosen": -5.35211181640625, "logits/rejected": -5.078972816467285, "logps/chosen": -602.5831298828125, "logps/rejected": -484.30743408203125, "loss": 0.4033, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.18471883237361908, "rewards/margins": 1.1239378452301025, "rewards/rejected": -0.9392191171646118, "step": 9960 }, { "epoch": 2.57, "learning_rate": 7.884670555608682e-08, "logits/chosen": -5.130538463592529, "logits/rejected": -4.634621620178223, "logps/chosen": -612.1524658203125, "logps/rejected": -494.8617248535156, "loss": 0.4445, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.23994842171669006, "rewards/margins": 1.0639581680297852, "rewards/rejected": -0.8240097761154175, "step": 9970 }, { "epoch": 2.58, "learning_rate": 7.836855694749927e-08, "logits/chosen": -5.514558792114258, "logits/rejected": -5.144597053527832, "logps/chosen": -588.83251953125, "logps/rejected": -412.82672119140625, "loss": 0.3733, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.18743649125099182, "rewards/margins": 1.1740742921829224, "rewards/rejected": -0.9866378903388977, "step": 9980 }, { "epoch": 2.58, "learning_rate": 7.789040833891174e-08, "logits/chosen": -5.036164283752441, "logits/rejected": -4.882107257843018, "logps/chosen": -673.9295654296875, "logps/rejected": -519.7779541015625, "loss": 0.3716, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.3717886209487915, "rewards/margins": 1.3253209590911865, "rewards/rejected": -0.9535323977470398, "step": 9990 }, { "epoch": 2.58, "learning_rate": 7.741225973032419e-08, "logits/chosen": -4.990980625152588, "logits/rejected": -4.993449687957764, "logps/chosen": -552.8897705078125, "logps/rejected": -509.7940979003906, "loss": 0.397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.12034394592046738, "rewards/margins": 1.007899522781372, "rewards/rejected": -0.8875554800033569, "step": 10000 }, { "epoch": 2.58, "eval_logits/chosen": -5.2617902755737305, "eval_logits/rejected": -4.973721981048584, "eval_logps/chosen": -588.6602172851562, "eval_logps/rejected": -450.1063537597656, "eval_loss": 0.5461593270301819, "eval_rewards/accuracies": 0.718999981880188, "eval_rewards/chosen": -0.03906048461794853, "eval_rewards/margins": 0.7175220251083374, "eval_rewards/rejected": -0.756582498550415, "eval_runtime": 103.868, "eval_samples_per_second": 19.255, "eval_steps_per_second": 1.203, "step": 10000 }, { "epoch": 2.58, "learning_rate": 7.693411112173663e-08, "logits/chosen": -5.258916854858398, "logits/rejected": -4.887443542480469, "logps/chosen": -585.2681884765625, "logps/rejected": -431.5528869628906, "loss": 0.4028, "rewards/accuracies": 0.75, "rewards/chosen": 0.10066582262516022, "rewards/margins": 1.1201262474060059, "rewards/rejected": -1.0194604396820068, "step": 10010 }, { "epoch": 2.59, "learning_rate": 7.645596251314908e-08, "logits/chosen": -5.196690082550049, "logits/rejected": -4.7715301513671875, "logps/chosen": -590.9891357421875, "logps/rejected": -451.350341796875, "loss": 0.3976, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.25268784165382385, "rewards/margins": 1.2554985284805298, "rewards/rejected": -1.0028107166290283, "step": 10020 }, { "epoch": 2.59, "learning_rate": 7.597781390456154e-08, "logits/chosen": -4.614828586578369, "logits/rejected": -4.961326599121094, "logps/chosen": -577.7391357421875, "logps/rejected": -493.870849609375, "loss": 0.4164, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.184700608253479, "rewards/margins": 1.0514756441116333, "rewards/rejected": -0.8667751550674438, "step": 10030 }, { "epoch": 2.59, "learning_rate": 7.549966529597398e-08, "logits/chosen": -5.522664546966553, "logits/rejected": -5.1724467277526855, "logps/chosen": -537.3428955078125, "logps/rejected": -427.755859375, "loss": 0.3754, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.07819285988807678, "rewards/margins": 1.1857638359069824, "rewards/rejected": -1.1075708866119385, "step": 10040 }, { "epoch": 2.59, "learning_rate": 7.502151668738643e-08, "logits/chosen": -5.299912452697754, "logits/rejected": -5.206515312194824, "logps/chosen": -566.0, "logps/rejected": -404.2640380859375, "loss": 0.3902, "rewards/accuracies": 0.875, "rewards/chosen": 0.11028033494949341, "rewards/margins": 0.96464604139328, "rewards/rejected": -0.8543658256530762, "step": 10050 }, { "epoch": 2.6, "learning_rate": 7.45433680787989e-08, "logits/chosen": -5.18181037902832, "logits/rejected": -5.024550437927246, "logps/chosen": -582.25927734375, "logps/rejected": -474.6307678222656, "loss": 0.4628, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.12884239852428436, "rewards/margins": 0.9871414303779602, "rewards/rejected": -0.8582989573478699, "step": 10060 }, { "epoch": 2.6, "learning_rate": 7.406521947021134e-08, "logits/chosen": -5.13668155670166, "logits/rejected": -5.313270568847656, "logps/chosen": -463.503173828125, "logps/rejected": -395.1259765625, "loss": 0.456, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06854995340108871, "rewards/margins": 0.8877840042114258, "rewards/rejected": -0.9563339352607727, "step": 10070 }, { "epoch": 2.6, "learning_rate": 7.358707086162379e-08, "logits/chosen": -5.05751371383667, "logits/rejected": -5.0952253341674805, "logps/chosen": -590.20654296875, "logps/rejected": -420.0675354003906, "loss": 0.3779, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.14583192765712738, "rewards/margins": 1.1483267545700073, "rewards/rejected": -1.0024948120117188, "step": 10080 }, { "epoch": 2.61, "learning_rate": 7.310892225303623e-08, "logits/chosen": -4.851193904876709, "logits/rejected": -5.1329569816589355, "logps/chosen": -517.195556640625, "logps/rejected": -436.81256103515625, "loss": 0.4045, "rewards/accuracies": 0.8125, "rewards/chosen": 0.011698281392455101, "rewards/margins": 0.9464808702468872, "rewards/rejected": -0.9347826242446899, "step": 10090 }, { "epoch": 2.61, "learning_rate": 7.26307736444487e-08, "logits/chosen": -5.149366855621338, "logits/rejected": -4.9068098068237305, "logps/chosen": -568.7178955078125, "logps/rejected": -501.647216796875, "loss": 0.4127, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.14851641654968262, "rewards/margins": 0.9669898748397827, "rewards/rejected": -0.8184734582901001, "step": 10100 }, { "epoch": 2.61, "learning_rate": 7.215262503586114e-08, "logits/chosen": -5.047942161560059, "logits/rejected": -4.450873374938965, "logps/chosen": -574.2588500976562, "logps/rejected": -411.87445068359375, "loss": 0.4375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.16956233978271484, "rewards/margins": 0.9762819409370422, "rewards/rejected": -0.8067196011543274, "step": 10110 }, { "epoch": 2.61, "learning_rate": 7.167447642727358e-08, "logits/chosen": -5.321221351623535, "logits/rejected": -4.466827392578125, "logps/chosen": -584.6290893554688, "logps/rejected": -475.2749938964844, "loss": 0.4116, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.14684633910655975, "rewards/margins": 1.185976266860962, "rewards/rejected": -1.0391299724578857, "step": 10120 }, { "epoch": 2.62, "learning_rate": 7.119632781868606e-08, "logits/chosen": -4.97410249710083, "logits/rejected": -4.721528053283691, "logps/chosen": -634.0584106445312, "logps/rejected": -424.6041564941406, "loss": 0.4254, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.3174940347671509, "rewards/margins": 1.1738578081130981, "rewards/rejected": -0.8563637733459473, "step": 10130 }, { "epoch": 2.62, "learning_rate": 7.07181792100985e-08, "logits/chosen": -5.285845756530762, "logits/rejected": -4.317139625549316, "logps/chosen": -669.4439086914062, "logps/rejected": -427.67626953125, "loss": 0.4225, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.21405205130577087, "rewards/margins": 1.148776650428772, "rewards/rejected": -0.9347246289253235, "step": 10140 }, { "epoch": 2.62, "learning_rate": 7.024003060151095e-08, "logits/chosen": -5.52186393737793, "logits/rejected": -4.9930572509765625, "logps/chosen": -541.8760986328125, "logps/rejected": -471.94415283203125, "loss": 0.4259, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.14658014476299286, "rewards/margins": 1.0038237571716309, "rewards/rejected": -0.857243537902832, "step": 10150 }, { "epoch": 2.62, "learning_rate": 6.976188199292339e-08, "logits/chosen": -5.335560321807861, "logits/rejected": -5.299051284790039, "logps/chosen": -594.3919067382812, "logps/rejected": -446.48297119140625, "loss": 0.4045, "rewards/accuracies": 0.8125, "rewards/chosen": 0.06599708646535873, "rewards/margins": 0.9845902323722839, "rewards/rejected": -0.9185932874679565, "step": 10160 }, { "epoch": 2.63, "learning_rate": 6.928373338433585e-08, "logits/chosen": -4.979187965393066, "logits/rejected": -5.215720176696777, "logps/chosen": -545.281982421875, "logps/rejected": -470.568359375, "loss": 0.4381, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.1550372689962387, "rewards/margins": 1.1480567455291748, "rewards/rejected": -0.9930194616317749, "step": 10170 }, { "epoch": 2.63, "learning_rate": 6.88055847757483e-08, "logits/chosen": -5.192938804626465, "logits/rejected": -5.068904876708984, "logps/chosen": -587.7664794921875, "logps/rejected": -439.4042053222656, "loss": 0.3998, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.19166696071624756, "rewards/margins": 1.0339982509613037, "rewards/rejected": -0.8423314094543457, "step": 10180 }, { "epoch": 2.63, "learning_rate": 6.832743616716074e-08, "logits/chosen": -5.008269786834717, "logits/rejected": -4.806353569030762, "logps/chosen": -602.433837890625, "logps/rejected": -425.72088623046875, "loss": 0.4023, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.07541229575872421, "rewards/margins": 1.0304886102676392, "rewards/rejected": -0.9550763964653015, "step": 10190 }, { "epoch": 2.63, "learning_rate": 6.78492875585732e-08, "logits/chosen": -5.330568313598633, "logits/rejected": -4.680765628814697, "logps/chosen": -633.2193603515625, "logps/rejected": -452.66851806640625, "loss": 0.3912, "rewards/accuracies": 0.875, "rewards/chosen": 0.2120560109615326, "rewards/margins": 1.1905310153961182, "rewards/rejected": -0.9784750938415527, "step": 10200 }, { "epoch": 2.64, "learning_rate": 6.737113894998566e-08, "logits/chosen": -5.2222185134887695, "logits/rejected": -4.833889007568359, "logps/chosen": -556.43212890625, "logps/rejected": -414.4996032714844, "loss": 0.4534, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.01689082756638527, "rewards/margins": 0.927936851978302, "rewards/rejected": -0.9110462069511414, "step": 10210 }, { "epoch": 2.64, "learning_rate": 6.68929903413981e-08, "logits/chosen": -4.719674110412598, "logits/rejected": -4.543835639953613, "logps/chosen": -628.451904296875, "logps/rejected": -539.8873291015625, "loss": 0.3908, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.30797868967056274, "rewards/margins": 1.2429304122924805, "rewards/rejected": -0.9349517822265625, "step": 10220 }, { "epoch": 2.64, "learning_rate": 6.641484173281055e-08, "logits/chosen": -5.092551231384277, "logits/rejected": -4.877713680267334, "logps/chosen": -519.641845703125, "logps/rejected": -496.81268310546875, "loss": 0.4527, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.05771777778863907, "rewards/margins": 0.8620578646659851, "rewards/rejected": -0.919775664806366, "step": 10230 }, { "epoch": 2.64, "learning_rate": 6.593669312422301e-08, "logits/chosen": -5.5809125900268555, "logits/rejected": -5.096505641937256, "logps/chosen": -586.9886474609375, "logps/rejected": -515.4144287109375, "loss": 0.434, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.06356178224086761, "rewards/margins": 0.9160860180854797, "rewards/rejected": -0.8525241613388062, "step": 10240 }, { "epoch": 2.65, "learning_rate": 6.545854451563545e-08, "logits/chosen": -5.220267295837402, "logits/rejected": -4.801431179046631, "logps/chosen": -557.3544921875, "logps/rejected": -442.37298583984375, "loss": 0.3754, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1393493413925171, "rewards/margins": 1.109182357788086, "rewards/rejected": -0.9698331952095032, "step": 10250 }, { "epoch": 2.65, "learning_rate": 6.49803959070479e-08, "logits/chosen": -5.192594528198242, "logits/rejected": -5.151909351348877, "logps/chosen": -516.73876953125, "logps/rejected": -439.0381774902344, "loss": 0.3744, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2026437222957611, "rewards/margins": 1.15705406665802, "rewards/rejected": -0.9544102549552917, "step": 10260 }, { "epoch": 2.65, "learning_rate": 6.450224729846036e-08, "logits/chosen": -5.252741813659668, "logits/rejected": -4.437867164611816, "logps/chosen": -624.604736328125, "logps/rejected": -465.870849609375, "loss": 0.405, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19555148482322693, "rewards/margins": 1.0643898248672485, "rewards/rejected": -0.8688383102416992, "step": 10270 }, { "epoch": 2.65, "learning_rate": 6.402409868987282e-08, "logits/chosen": -5.244747161865234, "logits/rejected": -4.893701553344727, "logps/chosen": -669.1451416015625, "logps/rejected": -484.07318115234375, "loss": 0.3755, "rewards/accuracies": 0.875, "rewards/chosen": 0.1654139906167984, "rewards/margins": 1.2341837882995605, "rewards/rejected": -1.0687698125839233, "step": 10280 }, { "epoch": 2.66, "learning_rate": 6.354595008128526e-08, "logits/chosen": -5.374107360839844, "logits/rejected": -4.999588966369629, "logps/chosen": -569.3839721679688, "logps/rejected": -454.7257385253906, "loss": 0.4047, "rewards/accuracies": 0.875, "rewards/chosen": 0.11159074306488037, "rewards/margins": 0.9383357167243958, "rewards/rejected": -0.8267450332641602, "step": 10290 }, { "epoch": 2.66, "learning_rate": 6.306780147269772e-08, "logits/chosen": -5.10075044631958, "logits/rejected": -5.069006443023682, "logps/chosen": -568.9295654296875, "logps/rejected": -478.25030517578125, "loss": 0.4516, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.30575743317604065, "rewards/margins": 1.0938596725463867, "rewards/rejected": -0.7881022691726685, "step": 10300 }, { "epoch": 2.66, "learning_rate": 6.258965286411017e-08, "logits/chosen": -5.301299095153809, "logits/rejected": -5.126402854919434, "logps/chosen": -592.0169677734375, "logps/rejected": -484.45404052734375, "loss": 0.3763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.28560104966163635, "rewards/margins": 1.069044828414917, "rewards/rejected": -0.783443808555603, "step": 10310 }, { "epoch": 2.66, "learning_rate": 6.211150425552261e-08, "logits/chosen": -5.358194828033447, "logits/rejected": -4.614445209503174, "logps/chosen": -638.6898803710938, "logps/rejected": -483.4712829589844, "loss": 0.3824, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.06751944869756699, "rewards/margins": 1.136157751083374, "rewards/rejected": -1.0686382055282593, "step": 10320 }, { "epoch": 2.67, "learning_rate": 6.163335564693507e-08, "logits/chosen": -5.5661115646362305, "logits/rejected": -4.696878433227539, "logps/chosen": -619.2021484375, "logps/rejected": -390.28436279296875, "loss": 0.4021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.2490989714860916, "rewards/margins": 1.112476110458374, "rewards/rejected": -0.8633772134780884, "step": 10330 }, { "epoch": 2.67, "learning_rate": 6.115520703834751e-08, "logits/chosen": -5.504397392272949, "logits/rejected": -4.728663444519043, "logps/chosen": -624.9332275390625, "logps/rejected": -401.07135009765625, "loss": 0.3852, "rewards/accuracies": 0.875, "rewards/chosen": 0.15293744206428528, "rewards/margins": 1.1888784170150757, "rewards/rejected": -1.0359410047531128, "step": 10340 }, { "epoch": 2.67, "learning_rate": 6.067705842975997e-08, "logits/chosen": -5.406341552734375, "logits/rejected": -5.2821364402771, "logps/chosen": -623.741943359375, "logps/rejected": -551.5464477539062, "loss": 0.4281, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08996991813182831, "rewards/margins": 0.8949453234672546, "rewards/rejected": -0.8049753904342651, "step": 10350 }, { "epoch": 2.67, "learning_rate": 6.019890982117242e-08, "logits/chosen": -4.811121463775635, "logits/rejected": -4.812069892883301, "logps/chosen": -638.1264038085938, "logps/rejected": -510.97479248046875, "loss": 0.4216, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.16103044152259827, "rewards/margins": 1.2257672548294067, "rewards/rejected": -1.0647368431091309, "step": 10360 }, { "epoch": 2.68, "learning_rate": 5.972076121258486e-08, "logits/chosen": -5.334190368652344, "logits/rejected": -5.031996726989746, "logps/chosen": -559.3236083984375, "logps/rejected": -491.35137939453125, "loss": 0.4021, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.05140472576022148, "rewards/margins": 0.9881698489189148, "rewards/rejected": -0.9367650747299194, "step": 10370 }, { "epoch": 2.68, "learning_rate": 5.924261260399732e-08, "logits/chosen": -5.369019985198975, "logits/rejected": -5.103786468505859, "logps/chosen": -615.0999755859375, "logps/rejected": -496.0865173339844, "loss": 0.3692, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.27969834208488464, "rewards/margins": 1.09476637840271, "rewards/rejected": -0.8150679469108582, "step": 10380 }, { "epoch": 2.68, "learning_rate": 5.876446399540977e-08, "logits/chosen": -4.845582485198975, "logits/rejected": -4.998870849609375, "logps/chosen": -654.9290771484375, "logps/rejected": -470.646728515625, "loss": 0.3972, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.41148486733436584, "rewards/margins": 1.3627593517303467, "rewards/rejected": -0.9512745141983032, "step": 10390 }, { "epoch": 2.69, "learning_rate": 5.828631538682222e-08, "logits/chosen": -5.777537822723389, "logits/rejected": -4.993617057800293, "logps/chosen": -539.076171875, "logps/rejected": -429.24810791015625, "loss": 0.421, "rewards/accuracies": 0.8125, "rewards/chosen": 0.10452292114496231, "rewards/margins": 1.0473525524139404, "rewards/rejected": -0.9428294897079468, "step": 10400 }, { "epoch": 2.69, "learning_rate": 5.780816677823467e-08, "logits/chosen": -5.4554009437561035, "logits/rejected": -4.904742240905762, "logps/chosen": -505.2308654785156, "logps/rejected": -395.083740234375, "loss": 0.4102, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07544006407260895, "rewards/margins": 0.9021860361099243, "rewards/rejected": -0.9776261448860168, "step": 10410 }, { "epoch": 2.69, "learning_rate": 5.7330018169647124e-08, "logits/chosen": -5.185637474060059, "logits/rejected": -4.8047003746032715, "logps/chosen": -514.4906005859375, "logps/rejected": -468.2937927246094, "loss": 0.3898, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.19679608941078186, "rewards/margins": 1.0662695169448853, "rewards/rejected": -0.8694734573364258, "step": 10420 }, { "epoch": 2.69, "learning_rate": 5.6851869561059576e-08, "logits/chosen": -5.629096508026123, "logits/rejected": -5.167855739593506, "logps/chosen": -581.6770629882812, "logps/rejected": -485.30609130859375, "loss": 0.3998, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.11335190385580063, "rewards/margins": 1.0991451740264893, "rewards/rejected": -0.9857932329177856, "step": 10430 }, { "epoch": 2.7, "learning_rate": 5.637372095247202e-08, "logits/chosen": -5.328433513641357, "logits/rejected": -5.203598976135254, "logps/chosen": -563.7362060546875, "logps/rejected": -440.15252685546875, "loss": 0.4004, "rewards/accuracies": 0.875, "rewards/chosen": 0.010166558437049389, "rewards/margins": 0.9039319753646851, "rewards/rejected": -0.8937654495239258, "step": 10440 }, { "epoch": 2.7, "learning_rate": 5.589557234388448e-08, "logits/chosen": -5.099793910980225, "logits/rejected": -5.017919063568115, "logps/chosen": -557.96923828125, "logps/rejected": -403.8143310546875, "loss": 0.4139, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.1541690081357956, "rewards/margins": 1.1039698123931885, "rewards/rejected": -0.9498008489608765, "step": 10450 }, { "epoch": 2.7, "learning_rate": 5.5417423735296925e-08, "logits/chosen": -5.707282543182373, "logits/rejected": -4.6976637840271, "logps/chosen": -646.089599609375, "logps/rejected": -448.223876953125, "loss": 0.4065, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.099668487906456, "rewards/margins": 0.9914876818656921, "rewards/rejected": -0.8918192982673645, "step": 10460 }, { "epoch": 2.7, "learning_rate": 5.493927512670938e-08, "logits/chosen": -5.2354583740234375, "logits/rejected": -5.318177700042725, "logps/chosen": -639.8482666015625, "logps/rejected": -521.4459228515625, "loss": 0.3768, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.10263649374246597, "rewards/margins": 1.1889212131500244, "rewards/rejected": -1.0862845182418823, "step": 10470 }, { "epoch": 2.71, "learning_rate": 5.4461126518121836e-08, "logits/chosen": -5.029707908630371, "logits/rejected": -5.210406303405762, "logps/chosen": -553.2432861328125, "logps/rejected": -404.58929443359375, "loss": 0.3672, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.07032093405723572, "rewards/margins": 1.1120290756225586, "rewards/rejected": -1.0417081117630005, "step": 10480 }, { "epoch": 2.71, "learning_rate": 5.398297790953428e-08, "logits/chosen": -5.058577537536621, "logits/rejected": -5.084117889404297, "logps/chosen": -621.150390625, "logps/rejected": -467.9248046875, "loss": 0.4292, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.13086922466754913, "rewards/margins": 0.983147144317627, "rewards/rejected": -0.852277934551239, "step": 10490 }, { "epoch": 2.71, "learning_rate": 5.350482930094673e-08, "logits/chosen": -5.027474403381348, "logits/rejected": -4.37065315246582, "logps/chosen": -686.8825073242188, "logps/rejected": -499.9881286621094, "loss": 0.3727, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.35918015241622925, "rewards/margins": 1.1717029809951782, "rewards/rejected": -0.8125227093696594, "step": 10500 }, { "epoch": 2.71, "learning_rate": 5.302668069235918e-08, "logits/chosen": -5.138524055480957, "logits/rejected": -4.913440704345703, "logps/chosen": -615.0032958984375, "logps/rejected": -474.43841552734375, "loss": 0.3801, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.1318053901195526, "rewards/margins": 1.0527489185333252, "rewards/rejected": -0.9209436178207397, "step": 10510 }, { "epoch": 2.72, "learning_rate": 5.254853208377164e-08, "logits/chosen": -5.285904884338379, "logits/rejected": -4.8262505531311035, "logps/chosen": -508.054443359375, "logps/rejected": -395.2572937011719, "loss": 0.442, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.05695826932787895, "rewards/margins": 0.9043815732002258, "rewards/rejected": -0.8474231958389282, "step": 10520 }, { "epoch": 2.72, "learning_rate": 5.207038347518408e-08, "logits/chosen": -5.377849102020264, "logits/rejected": -5.239131450653076, "logps/chosen": -647.3140869140625, "logps/rejected": -515.1484985351562, "loss": 0.4056, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.12186398357152939, "rewards/margins": 1.060632348060608, "rewards/rejected": -0.9387683868408203, "step": 10530 }, { "epoch": 2.72, "learning_rate": 5.1592234866596535e-08, "logits/chosen": -5.058574199676514, "logits/rejected": -4.472035884857178, "logps/chosen": -615.1215209960938, "logps/rejected": -427.4737243652344, "loss": 0.414, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.05604837089776993, "rewards/margins": 0.926599383354187, "rewards/rejected": -0.8705511093139648, "step": 10540 }, { "epoch": 2.72, "learning_rate": 5.111408625800899e-08, "logits/chosen": -5.268280982971191, "logits/rejected": -4.632715225219727, "logps/chosen": -596.66455078125, "logps/rejected": -466.43267822265625, "loss": 0.4413, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.10216410458087921, "rewards/margins": 1.0182863473892212, "rewards/rejected": -0.9161221385002136, "step": 10550 }, { "epoch": 2.73, "learning_rate": 5.063593764942144e-08, "logits/chosen": -5.038386821746826, "logits/rejected": -4.742863655090332, "logps/chosen": -626.3829345703125, "logps/rejected": -481.62603759765625, "loss": 0.4015, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07264451682567596, "rewards/margins": 1.0129032135009766, "rewards/rejected": -0.9402586817741394, "step": 10560 }, { "epoch": 2.73, "learning_rate": 5.015778904083389e-08, "logits/chosen": -5.296903133392334, "logits/rejected": -5.184691905975342, "logps/chosen": -564.7054443359375, "logps/rejected": -461.12628173828125, "loss": 0.3968, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.29682374000549316, "rewards/margins": 1.1766250133514404, "rewards/rejected": -0.8798012733459473, "step": 10570 }, { "epoch": 2.73, "learning_rate": 4.9679640432246336e-08, "logits/chosen": -4.6953043937683105, "logits/rejected": -4.188130855560303, "logps/chosen": -537.1995849609375, "logps/rejected": -458.36553955078125, "loss": 0.4244, "rewards/accuracies": 0.875, "rewards/chosen": 0.08148957043886185, "rewards/margins": 1.111132264137268, "rewards/rejected": -1.0296428203582764, "step": 10580 }, { "epoch": 2.73, "learning_rate": 4.9201491823658794e-08, "logits/chosen": -5.389612674713135, "logits/rejected": -4.897646903991699, "logps/chosen": -604.6990966796875, "logps/rejected": -410.3059997558594, "loss": 0.3851, "rewards/accuracies": 0.875, "rewards/chosen": 0.13737204670906067, "rewards/margins": 1.2379915714263916, "rewards/rejected": -1.1006195545196533, "step": 10590 }, { "epoch": 2.74, "learning_rate": 4.872334321507124e-08, "logits/chosen": -5.396420955657959, "logits/rejected": -4.8571882247924805, "logps/chosen": -615.3311767578125, "logps/rejected": -518.5477294921875, "loss": 0.3832, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.20482313632965088, "rewards/margins": 1.2090235948562622, "rewards/rejected": -1.0042004585266113, "step": 10600 }, { "epoch": 2.74, "learning_rate": 4.824519460648369e-08, "logits/chosen": -5.005032539367676, "logits/rejected": -5.459707260131836, "logps/chosen": -540.0255126953125, "logps/rejected": -515.580322265625, "loss": 0.4095, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.23440416157245636, "rewards/margins": 0.989944338798523, "rewards/rejected": -0.7555402517318726, "step": 10610 }, { "epoch": 2.74, "learning_rate": 4.7767045997896144e-08, "logits/chosen": -5.131434440612793, "logits/rejected": -5.1242218017578125, "logps/chosen": -522.86474609375, "logps/rejected": -458.14404296875, "loss": 0.4195, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.05565230920910835, "rewards/margins": 0.9062932729721069, "rewards/rejected": -0.8506410717964172, "step": 10620 }, { "epoch": 2.74, "learning_rate": 4.7288897389308596e-08, "logits/chosen": -5.044508457183838, "logits/rejected": -4.776641368865967, "logps/chosen": -460.0609436035156, "logps/rejected": -362.6016845703125, "loss": 0.3958, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.03089933469891548, "rewards/margins": 0.929731547832489, "rewards/rejected": -0.9606307744979858, "step": 10630 }, { "epoch": 2.75, "learning_rate": 4.681074878072105e-08, "logits/chosen": -5.188359260559082, "logits/rejected": -5.008777141571045, "logps/chosen": -659.11181640625, "logps/rejected": -575.6497802734375, "loss": 0.4002, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.21622923016548157, "rewards/margins": 1.1650102138519287, "rewards/rejected": -0.9487810134887695, "step": 10640 }, { "epoch": 2.75, "learning_rate": 4.633260017213349e-08, "logits/chosen": -4.8494157791137695, "logits/rejected": -4.4689531326293945, "logps/chosen": -604.5797119140625, "logps/rejected": -429.34124755859375, "loss": 0.4027, "rewards/accuracies": 0.8125, "rewards/chosen": 0.22863852977752686, "rewards/margins": 1.1503980159759521, "rewards/rejected": -0.9217596054077148, "step": 10650 }, { "epoch": 2.75, "learning_rate": 4.5854451563545945e-08, "logits/chosen": -5.238471508026123, "logits/rejected": -5.15804386138916, "logps/chosen": -663.4188232421875, "logps/rejected": -493.63653564453125, "loss": 0.3518, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.2919968068599701, "rewards/margins": 1.4369533061981201, "rewards/rejected": -1.144956350326538, "step": 10660 }, { "epoch": 2.75, "learning_rate": 4.5376302954958404e-08, "logits/chosen": -5.260534286499023, "logits/rejected": -5.246522426605225, "logps/chosen": -581.69482421875, "logps/rejected": -457.3666076660156, "loss": 0.44, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2374936044216156, "rewards/margins": 1.0943622589111328, "rewards/rejected": -0.8568687438964844, "step": 10670 }, { "epoch": 2.76, "learning_rate": 4.489815434637085e-08, "logits/chosen": -5.15450382232666, "logits/rejected": -4.846923828125, "logps/chosen": -640.2000732421875, "logps/rejected": -452.527587890625, "loss": 0.3903, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.25244617462158203, "rewards/margins": 1.0716512203216553, "rewards/rejected": -0.819205105304718, "step": 10680 }, { "epoch": 2.76, "learning_rate": 4.44200057377833e-08, "logits/chosen": -4.913086414337158, "logits/rejected": -5.056898593902588, "logps/chosen": -677.2471313476562, "logps/rejected": -526.3246459960938, "loss": 0.409, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2830817997455597, "rewards/margins": 1.1547108888626099, "rewards/rejected": -0.8716291189193726, "step": 10690 }, { "epoch": 2.76, "learning_rate": 4.394185712919575e-08, "logits/chosen": -4.993624687194824, "logits/rejected": -5.19844913482666, "logps/chosen": -596.6737060546875, "logps/rejected": -422.99755859375, "loss": 0.3689, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.23273415863513947, "rewards/margins": 1.2237880229949951, "rewards/rejected": -0.9910538792610168, "step": 10700 }, { "epoch": 2.77, "learning_rate": 4.3463708520608205e-08, "logits/chosen": -5.653388977050781, "logits/rejected": -5.461355686187744, "logps/chosen": -524.1907958984375, "logps/rejected": -415.66290283203125, "loss": 0.4671, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.07645565271377563, "rewards/margins": 0.7900048494338989, "rewards/rejected": -0.7135492563247681, "step": 10710 }, { "epoch": 2.77, "learning_rate": 4.298555991202065e-08, "logits/chosen": -5.245291709899902, "logits/rejected": -5.462549209594727, "logps/chosen": -597.80908203125, "logps/rejected": -537.8518676757812, "loss": 0.3646, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.3628588318824768, "rewards/margins": 1.329831600189209, "rewards/rejected": -0.9669727087020874, "step": 10720 }, { "epoch": 2.77, "learning_rate": 4.25074113034331e-08, "logits/chosen": -5.309878826141357, "logits/rejected": -4.7491254806518555, "logps/chosen": -514.7105102539062, "logps/rejected": -506.60400390625, "loss": 0.4398, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.03218911588191986, "rewards/margins": 0.8696039915084839, "rewards/rejected": -0.8374149203300476, "step": 10730 }, { "epoch": 2.77, "learning_rate": 4.202926269484556e-08, "logits/chosen": -4.935606956481934, "logits/rejected": -5.080079078674316, "logps/chosen": -501.88153076171875, "logps/rejected": -458.59002685546875, "loss": 0.4231, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11492898315191269, "rewards/margins": 0.9588969349861145, "rewards/rejected": -1.0738258361816406, "step": 10740 }, { "epoch": 2.78, "learning_rate": 4.1551114086258006e-08, "logits/chosen": -5.171691417694092, "logits/rejected": -4.949386119842529, "logps/chosen": -645.7387084960938, "logps/rejected": -519.1723022460938, "loss": 0.418, "rewards/accuracies": 0.875, "rewards/chosen": 0.3383481502532959, "rewards/margins": 1.2345738410949707, "rewards/rejected": -0.89622563123703, "step": 10750 }, { "epoch": 2.78, "learning_rate": 4.107296547767046e-08, "logits/chosen": -5.237408638000488, "logits/rejected": -4.463073253631592, "logps/chosen": -604.7012939453125, "logps/rejected": -437.8807067871094, "loss": 0.4079, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19912220537662506, "rewards/margins": 0.9736934900283813, "rewards/rejected": -0.7745712995529175, "step": 10760 }, { "epoch": 2.78, "learning_rate": 4.0594816869082904e-08, "logits/chosen": -5.1899309158325195, "logits/rejected": -4.876815319061279, "logps/chosen": -588.1791381835938, "logps/rejected": -437.59808349609375, "loss": 0.3518, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.3035685420036316, "rewards/margins": 1.2819626331329346, "rewards/rejected": -0.9783941507339478, "step": 10770 }, { "epoch": 2.78, "learning_rate": 4.011666826049536e-08, "logits/chosen": -4.651065349578857, "logits/rejected": -4.834026336669922, "logps/chosen": -613.0702514648438, "logps/rejected": -508.9181213378906, "loss": 0.3755, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.26986876130104065, "rewards/margins": 1.2463657855987549, "rewards/rejected": -0.9764968156814575, "step": 10780 }, { "epoch": 2.79, "learning_rate": 3.963851965190781e-08, "logits/chosen": -5.145914077758789, "logits/rejected": -4.979100227355957, "logps/chosen": -580.7586669921875, "logps/rejected": -488.2984313964844, "loss": 0.3989, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.1552223265171051, "rewards/margins": 1.10028076171875, "rewards/rejected": -0.9450585246086121, "step": 10790 }, { "epoch": 2.79, "learning_rate": 3.916037104332026e-08, "logits/chosen": -4.899274826049805, "logits/rejected": -4.811643123626709, "logps/chosen": -603.8887329101562, "logps/rejected": -473.2591857910156, "loss": 0.3931, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2601724863052368, "rewards/margins": 1.1973979473114014, "rewards/rejected": -0.9372254610061646, "step": 10800 }, { "epoch": 2.79, "learning_rate": 3.868222243473272e-08, "logits/chosen": -4.998772621154785, "logits/rejected": -4.617021083831787, "logps/chosen": -604.7291870117188, "logps/rejected": -404.94482421875, "loss": 0.4358, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.011016097851097584, "rewards/margins": 0.9618034362792969, "rewards/rejected": -0.9728196263313293, "step": 10810 }, { "epoch": 2.79, "learning_rate": 3.8204073826145164e-08, "logits/chosen": -5.062497138977051, "logits/rejected": -4.805143356323242, "logps/chosen": -581.732666015625, "logps/rejected": -466.7610778808594, "loss": 0.3939, "rewards/accuracies": 0.8125, "rewards/chosen": 0.022178644314408302, "rewards/margins": 0.9689008593559265, "rewards/rejected": -0.946722149848938, "step": 10820 }, { "epoch": 2.8, "learning_rate": 3.7725925217557616e-08, "logits/chosen": -4.942138671875, "logits/rejected": -4.8341803550720215, "logps/chosen": -657.3033447265625, "logps/rejected": -524.6795043945312, "loss": 0.4109, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.14862839877605438, "rewards/margins": 0.8575156927108765, "rewards/rejected": -0.7088873386383057, "step": 10830 }, { "epoch": 2.8, "learning_rate": 3.724777660897006e-08, "logits/chosen": -5.543076992034912, "logits/rejected": -4.890496253967285, "logps/chosen": -599.9197387695312, "logps/rejected": -557.0701293945312, "loss": 0.4145, "rewards/accuracies": 0.8125, "rewards/chosen": 0.050505928695201874, "rewards/margins": 0.8944368362426758, "rewards/rejected": -0.8439309000968933, "step": 10840 }, { "epoch": 2.8, "learning_rate": 3.676962800038252e-08, "logits/chosen": -4.789456367492676, "logits/rejected": -4.726601600646973, "logps/chosen": -606.8335571289062, "logps/rejected": -459.9032287597656, "loss": 0.4256, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.017344746738672256, "rewards/margins": 1.0640472173690796, "rewards/rejected": -1.04670250415802, "step": 10850 }, { "epoch": 2.8, "learning_rate": 3.629147939179497e-08, "logits/chosen": -4.849493503570557, "logits/rejected": -4.743731498718262, "logps/chosen": -605.0320434570312, "logps/rejected": -536.1588134765625, "loss": 0.3593, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.39723560214042664, "rewards/margins": 1.0983682870864868, "rewards/rejected": -0.7011326551437378, "step": 10860 }, { "epoch": 2.81, "learning_rate": 3.581333078320742e-08, "logits/chosen": -5.077559471130371, "logits/rejected": -4.8739824295043945, "logps/chosen": -579.3734741210938, "logps/rejected": -535.8782348632812, "loss": 0.4162, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.251595139503479, "rewards/margins": 1.0471177101135254, "rewards/rejected": -0.7955224514007568, "step": 10870 }, { "epoch": 2.81, "learning_rate": 3.533518217461987e-08, "logits/chosen": -5.446054458618164, "logits/rejected": -4.624466896057129, "logps/chosen": -569.0961303710938, "logps/rejected": -434.794677734375, "loss": 0.3802, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.01497502438724041, "rewards/margins": 0.9362460374832153, "rewards/rejected": -0.921271026134491, "step": 10880 }, { "epoch": 2.81, "learning_rate": 3.485703356603232e-08, "logits/chosen": -4.879721641540527, "logits/rejected": -5.231494426727295, "logps/chosen": -626.5659790039062, "logps/rejected": -517.7338256835938, "loss": 0.4139, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.23776862025260925, "rewards/margins": 1.1936100721359253, "rewards/rejected": -0.9558414220809937, "step": 10890 }, { "epoch": 2.81, "learning_rate": 3.437888495744477e-08, "logits/chosen": -5.0824198722839355, "logits/rejected": -4.448968887329102, "logps/chosen": -640.1406860351562, "logps/rejected": -497.05572509765625, "loss": 0.4042, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.167084738612175, "rewards/margins": 1.0173536539077759, "rewards/rejected": -0.8502688407897949, "step": 10900 }, { "epoch": 2.82, "learning_rate": 3.390073634885722e-08, "logits/chosen": -5.0261993408203125, "logits/rejected": -4.657477378845215, "logps/chosen": -599.4151611328125, "logps/rejected": -498.02801513671875, "loss": 0.4566, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.15629445016384125, "rewards/margins": 0.9980450868606567, "rewards/rejected": -0.8417506217956543, "step": 10910 }, { "epoch": 2.82, "learning_rate": 3.342258774026968e-08, "logits/chosen": -5.083239555358887, "logits/rejected": -5.0695271492004395, "logps/chosen": -568.6922607421875, "logps/rejected": -494.43817138671875, "loss": 0.408, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.14541958272457123, "rewards/margins": 1.1823451519012451, "rewards/rejected": -1.0369254350662231, "step": 10920 }, { "epoch": 2.82, "learning_rate": 3.294443913168213e-08, "logits/chosen": -5.421158313751221, "logits/rejected": -4.893280982971191, "logps/chosen": -618.0091552734375, "logps/rejected": -457.04132080078125, "loss": 0.402, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.2274591028690338, "rewards/margins": 1.2484320402145386, "rewards/rejected": -1.0209730863571167, "step": 10930 }, { "epoch": 2.82, "learning_rate": 3.2466290523094574e-08, "logits/chosen": -5.408086776733398, "logits/rejected": -5.141633033752441, "logps/chosen": -617.16259765625, "logps/rejected": -442.84344482421875, "loss": 0.4474, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.07840131223201752, "rewards/margins": 0.9347522854804993, "rewards/rejected": -0.8563508987426758, "step": 10940 }, { "epoch": 2.83, "learning_rate": 3.1988141914507026e-08, "logits/chosen": -4.953086853027344, "logits/rejected": -5.043787956237793, "logps/chosen": -649.3943481445312, "logps/rejected": -534.9197387695312, "loss": 0.3553, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.2805200517177582, "rewards/margins": 1.2963310480117798, "rewards/rejected": -1.0158109664916992, "step": 10950 }, { "epoch": 2.83, "learning_rate": 3.150999330591948e-08, "logits/chosen": -5.0587615966796875, "logits/rejected": -4.837898254394531, "logps/chosen": -616.4351196289062, "logps/rejected": -515.75341796875, "loss": 0.3863, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.1392417699098587, "rewards/margins": 1.1594905853271484, "rewards/rejected": -1.0202487707138062, "step": 10960 }, { "epoch": 2.83, "learning_rate": 3.103184469733193e-08, "logits/chosen": -5.006904125213623, "logits/rejected": -4.758618354797363, "logps/chosen": -540.1115112304688, "logps/rejected": -447.772705078125, "loss": 0.3689, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.15211224555969238, "rewards/margins": 1.1957544088363647, "rewards/rejected": -1.0436421632766724, "step": 10970 }, { "epoch": 2.84, "learning_rate": 3.055369608874438e-08, "logits/chosen": -5.383715629577637, "logits/rejected": -5.0456085205078125, "logps/chosen": -556.093505859375, "logps/rejected": -427.29180908203125, "loss": 0.4283, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.54372239112854, "rewards/margins": 1.508208990097046, "rewards/rejected": -0.9644867181777954, "step": 10980 }, { "epoch": 2.84, "learning_rate": 3.007554748015683e-08, "logits/chosen": -5.0954909324646, "logits/rejected": -5.068613052368164, "logps/chosen": -644.834716796875, "logps/rejected": -478.4432678222656, "loss": 0.3978, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.25002703070640564, "rewards/margins": 1.2999857664108276, "rewards/rejected": -1.0499587059020996, "step": 10990 }, { "epoch": 2.84, "learning_rate": 2.959739887156928e-08, "logits/chosen": -5.343197822570801, "logits/rejected": -5.068399429321289, "logps/chosen": -563.7489013671875, "logps/rejected": -442.8545837402344, "loss": 0.4486, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.16238142549991608, "rewards/margins": 1.005318522453308, "rewards/rejected": -0.8429371118545532, "step": 11000 }, { "epoch": 2.84, "eval_logits/chosen": -5.244109153747559, "eval_logits/rejected": -4.956947326660156, "eval_logps/chosen": -588.7628784179688, "eval_logps/rejected": -450.2074279785156, "eval_loss": 0.5458506345748901, "eval_rewards/accuracies": 0.7110000252723694, "eval_rewards/chosen": -0.04933188483119011, "eval_rewards/margins": 0.7173619866371155, "eval_rewards/rejected": -0.7666938304901123, "eval_runtime": 103.8404, "eval_samples_per_second": 19.26, "eval_steps_per_second": 1.204, "step": 11000 }, { "epoch": 2.84, "learning_rate": 2.9119250262981735e-08, "logits/chosen": -5.136931419372559, "logits/rejected": -4.643008708953857, "logps/chosen": -554.3670654296875, "logps/rejected": -486.82012939453125, "loss": 0.3927, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.10032905638217926, "rewards/margins": 1.2257516384124756, "rewards/rejected": -1.125422716140747, "step": 11010 }, { "epoch": 2.85, "learning_rate": 2.8641101654394187e-08, "logits/chosen": -5.154452800750732, "logits/rejected": -4.73182487487793, "logps/chosen": -522.4246826171875, "logps/rejected": -414.4034729003906, "loss": 0.4045, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.09225860983133316, "rewards/margins": 1.0196186304092407, "rewards/rejected": -0.9273600578308105, "step": 11020 }, { "epoch": 2.85, "learning_rate": 2.8162953045806636e-08, "logits/chosen": -5.236639976501465, "logits/rejected": -5.06455135345459, "logps/chosen": -667.8474731445312, "logps/rejected": -524.4158935546875, "loss": 0.3667, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.22485068440437317, "rewards/margins": 1.2035481929779053, "rewards/rejected": -0.9786975979804993, "step": 11030 }, { "epoch": 2.85, "learning_rate": 2.7684804437219088e-08, "logits/chosen": -5.221505165100098, "logits/rejected": -4.124737739562988, "logps/chosen": -543.410400390625, "logps/rejected": -423.26849365234375, "loss": 0.4393, "rewards/accuracies": 0.75, "rewards/chosen": 0.024420540779829025, "rewards/margins": 0.9866177439689636, "rewards/rejected": -0.9621971845626831, "step": 11040 }, { "epoch": 2.85, "learning_rate": 2.7206655828631536e-08, "logits/chosen": -5.276465892791748, "logits/rejected": -4.62787389755249, "logps/chosen": -715.0985107421875, "logps/rejected": -519.7482299804688, "loss": 0.3692, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.3222195506095886, "rewards/margins": 1.3012373447418213, "rewards/rejected": -0.9790178537368774, "step": 11050 }, { "epoch": 2.86, "learning_rate": 2.6728507220043988e-08, "logits/chosen": -4.955366611480713, "logits/rejected": -5.002579689025879, "logps/chosen": -571.1654052734375, "logps/rejected": -499.5303649902344, "loss": 0.3872, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.19790926575660706, "rewards/margins": 1.0662347078323364, "rewards/rejected": -0.868325412273407, "step": 11060 }, { "epoch": 2.86, "learning_rate": 2.6250358611456437e-08, "logits/chosen": -5.129946708679199, "logits/rejected": -4.940310478210449, "logps/chosen": -584.2059326171875, "logps/rejected": -498.72393798828125, "loss": 0.3854, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.22770479321479797, "rewards/margins": 1.1077311038970947, "rewards/rejected": -0.8800262212753296, "step": 11070 }, { "epoch": 2.86, "learning_rate": 2.5772210002868892e-08, "logits/chosen": -4.588015556335449, "logits/rejected": -4.595081329345703, "logps/chosen": -577.1110229492188, "logps/rejected": -472.5956115722656, "loss": 0.4316, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2859738767147064, "rewards/margins": 1.1880446672439575, "rewards/rejected": -0.9020708799362183, "step": 11080 }, { "epoch": 2.86, "learning_rate": 2.529406139428134e-08, "logits/chosen": -5.255947113037109, "logits/rejected": -4.920037269592285, "logps/chosen": -618.0155029296875, "logps/rejected": -519.5169677734375, "loss": 0.4611, "rewards/accuracies": 0.8125, "rewards/chosen": 0.14254876971244812, "rewards/margins": 0.8952218294143677, "rewards/rejected": -0.7526730298995972, "step": 11090 }, { "epoch": 2.87, "learning_rate": 2.4815912785693793e-08, "logits/chosen": -4.89483642578125, "logits/rejected": -4.422279357910156, "logps/chosen": -622.4539794921875, "logps/rejected": -442.0025329589844, "loss": 0.4105, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.2152954787015915, "rewards/margins": 1.154651403427124, "rewards/rejected": -0.9393560290336609, "step": 11100 }, { "epoch": 2.87, "learning_rate": 2.433776417710624e-08, "logits/chosen": -5.299358367919922, "logits/rejected": -4.944146156311035, "logps/chosen": -626.6629638671875, "logps/rejected": -464.47869873046875, "loss": 0.4377, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.24762144684791565, "rewards/margins": 1.271044135093689, "rewards/rejected": -1.0234227180480957, "step": 11110 }, { "epoch": 2.87, "learning_rate": 2.3859615568518694e-08, "logits/chosen": -5.197515487670898, "logits/rejected": -5.026610851287842, "logps/chosen": -672.92919921875, "logps/rejected": -483.5021057128906, "loss": 0.4075, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.3204244077205658, "rewards/margins": 1.2096140384674072, "rewards/rejected": -0.8891897201538086, "step": 11120 }, { "epoch": 2.87, "learning_rate": 2.3381466959931146e-08, "logits/chosen": -5.401242733001709, "logits/rejected": -4.566195487976074, "logps/chosen": -719.1121826171875, "logps/rejected": -493.3193359375, "loss": 0.4123, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.2991449534893036, "rewards/margins": 1.2127931118011475, "rewards/rejected": -0.913648247718811, "step": 11130 }, { "epoch": 2.88, "learning_rate": 2.2903318351343597e-08, "logits/chosen": -5.326563358306885, "logits/rejected": -4.679368019104004, "logps/chosen": -624.999755859375, "logps/rejected": -531.452880859375, "loss": 0.3834, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.31413981318473816, "rewards/margins": 1.1637380123138428, "rewards/rejected": -0.8495980501174927, "step": 11140 }, { "epoch": 2.88, "learning_rate": 2.242516974275605e-08, "logits/chosen": -4.883437156677246, "logits/rejected": -5.025925636291504, "logps/chosen": -496.538818359375, "logps/rejected": -401.8677673339844, "loss": 0.4134, "rewards/accuracies": 0.8125, "rewards/chosen": 0.16495931148529053, "rewards/margins": 1.0185165405273438, "rewards/rejected": -0.8535572290420532, "step": 11150 }, { "epoch": 2.88, "learning_rate": 2.1947021134168498e-08, "logits/chosen": -5.06108283996582, "logits/rejected": -4.526723861694336, "logps/chosen": -566.9498291015625, "logps/rejected": -444.62286376953125, "loss": 0.4013, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.14621193706989288, "rewards/margins": 0.9810665845870972, "rewards/rejected": -0.8348547220230103, "step": 11160 }, { "epoch": 2.88, "learning_rate": 2.146887252558095e-08, "logits/chosen": -5.214183330535889, "logits/rejected": -4.73587703704834, "logps/chosen": -640.5264282226562, "logps/rejected": -481.3013610839844, "loss": 0.3898, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.3025330603122711, "rewards/margins": 1.1933023929595947, "rewards/rejected": -0.8907693028450012, "step": 11170 }, { "epoch": 2.89, "learning_rate": 2.09907239169934e-08, "logits/chosen": -4.903995037078857, "logits/rejected": -5.242600440979004, "logps/chosen": -632.2268676757812, "logps/rejected": -489.19610595703125, "loss": 0.3962, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.27793973684310913, "rewards/margins": 1.0693607330322266, "rewards/rejected": -0.7914210557937622, "step": 11180 }, { "epoch": 2.89, "learning_rate": 2.051257530840585e-08, "logits/chosen": -5.333563804626465, "logits/rejected": -5.291523456573486, "logps/chosen": -547.6246948242188, "logps/rejected": -447.10345458984375, "loss": 0.4356, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.08446265757083893, "rewards/margins": 0.9883478283882141, "rewards/rejected": -0.9038850665092468, "step": 11190 }, { "epoch": 2.89, "learning_rate": 2.0034426699818303e-08, "logits/chosen": -5.213520050048828, "logits/rejected": -5.180721282958984, "logps/chosen": -564.168701171875, "logps/rejected": -443.5384216308594, "loss": 0.3886, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.13456830382347107, "rewards/margins": 1.0850478410720825, "rewards/rejected": -0.9504795074462891, "step": 11200 }, { "epoch": 2.89, "learning_rate": 1.9556278091230755e-08, "logits/chosen": -5.575275897979736, "logits/rejected": -5.0331711769104, "logps/chosen": -561.3367919921875, "logps/rejected": -449.62109375, "loss": 0.4463, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.01956062577664852, "rewards/margins": 0.8645012974739075, "rewards/rejected": -0.8449406623840332, "step": 11210 }, { "epoch": 2.9, "learning_rate": 1.9078129482643203e-08, "logits/chosen": -4.956969261169434, "logits/rejected": -4.817302703857422, "logps/chosen": -617.4757080078125, "logps/rejected": -496.909423828125, "loss": 0.3727, "rewards/accuracies": 0.8125, "rewards/chosen": 0.28931763768196106, "rewards/margins": 1.2083638906478882, "rewards/rejected": -0.9190464019775391, "step": 11220 }, { "epoch": 2.9, "learning_rate": 1.8599980874055655e-08, "logits/chosen": -5.030013084411621, "logits/rejected": -4.896213054656982, "logps/chosen": -572.0458374023438, "logps/rejected": -488.8038635253906, "loss": 0.3871, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.1581289917230606, "rewards/margins": 1.1221048831939697, "rewards/rejected": -0.9639759063720703, "step": 11230 }, { "epoch": 2.9, "learning_rate": 1.8121832265468107e-08, "logits/chosen": -5.5985589027404785, "logits/rejected": -4.830717086791992, "logps/chosen": -637.0513916015625, "logps/rejected": -475.4072265625, "loss": 0.3836, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.18177826702594757, "rewards/margins": 1.2653608322143555, "rewards/rejected": -1.0835825204849243, "step": 11240 }, { "epoch": 2.9, "learning_rate": 1.7643683656880556e-08, "logits/chosen": -5.324755668640137, "logits/rejected": -4.789327621459961, "logps/chosen": -662.132568359375, "logps/rejected": -455.95526123046875, "loss": 0.4122, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.050634223967790604, "rewards/margins": 1.046976089477539, "rewards/rejected": -0.9963418841362, "step": 11250 }, { "epoch": 2.91, "learning_rate": 1.716553504829301e-08, "logits/chosen": -5.082071304321289, "logits/rejected": -5.104189872741699, "logps/chosen": -606.2183837890625, "logps/rejected": -522.6820678710938, "loss": 0.4096, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1666480004787445, "rewards/margins": 1.2264320850372314, "rewards/rejected": -1.059783935546875, "step": 11260 }, { "epoch": 2.91, "learning_rate": 1.668738643970546e-08, "logits/chosen": -4.711189270019531, "logits/rejected": -4.924211502075195, "logps/chosen": -601.4144287109375, "logps/rejected": -461.46075439453125, "loss": 0.3694, "rewards/accuracies": 0.8125, "rewards/chosen": 0.08000713586807251, "rewards/margins": 1.271324634552002, "rewards/rejected": -1.1913176774978638, "step": 11270 }, { "epoch": 2.91, "learning_rate": 1.6209237831117912e-08, "logits/chosen": -5.3008341789245605, "logits/rejected": -4.911168098449707, "logps/chosen": -624.7938232421875, "logps/rejected": -481.44659423828125, "loss": 0.3929, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.13657629489898682, "rewards/margins": 0.9566575288772583, "rewards/rejected": -0.8200812339782715, "step": 11280 }, { "epoch": 2.92, "learning_rate": 1.573108922253036e-08, "logits/chosen": -5.148921012878418, "logits/rejected": -4.999274253845215, "logps/chosen": -543.7133178710938, "logps/rejected": -441.2300720214844, "loss": 0.3863, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.1094493493437767, "rewards/margins": 0.9870722889900208, "rewards/rejected": -0.8776227831840515, "step": 11290 }, { "epoch": 2.92, "learning_rate": 1.5252940613942813e-08, "logits/chosen": -5.350049018859863, "logits/rejected": -5.173722267150879, "logps/chosen": -592.4625854492188, "logps/rejected": -472.517333984375, "loss": 0.3556, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2755183279514313, "rewards/margins": 1.2032588720321655, "rewards/rejected": -0.9277405738830566, "step": 11300 }, { "epoch": 2.92, "learning_rate": 1.4774792005355265e-08, "logits/chosen": -5.169276237487793, "logits/rejected": -4.513784885406494, "logps/chosen": -645.7938232421875, "logps/rejected": -462.9095764160156, "loss": 0.437, "rewards/accuracies": 0.8125, "rewards/chosen": 0.06281527131795883, "rewards/margins": 1.0009191036224365, "rewards/rejected": -0.9381038546562195, "step": 11310 }, { "epoch": 2.92, "learning_rate": 1.4296643396767715e-08, "logits/chosen": -5.3375115394592285, "logits/rejected": -4.727958679199219, "logps/chosen": -569.2283325195312, "logps/rejected": -406.9765319824219, "loss": 0.4235, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1055120974779129, "rewards/margins": 1.0146260261535645, "rewards/rejected": -0.9091137647628784, "step": 11320 }, { "epoch": 2.93, "learning_rate": 1.3818494788180165e-08, "logits/chosen": -4.964106559753418, "logits/rejected": -5.05617618560791, "logps/chosen": -567.2974853515625, "logps/rejected": -459.1414489746094, "loss": 0.4399, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.01430426724255085, "rewards/margins": 0.8834322094917297, "rewards/rejected": -0.8977364301681519, "step": 11330 }, { "epoch": 2.93, "learning_rate": 1.3340346179592617e-08, "logits/chosen": -5.229657173156738, "logits/rejected": -4.761871814727783, "logps/chosen": -548.1785888671875, "logps/rejected": -410.94879150390625, "loss": 0.4633, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.0006675541517324746, "rewards/margins": 0.7918963432312012, "rewards/rejected": -0.7912287712097168, "step": 11340 }, { "epoch": 2.93, "learning_rate": 1.2862197571005068e-08, "logits/chosen": -4.907290935516357, "logits/rejected": -4.702147960662842, "logps/chosen": -548.246826171875, "logps/rejected": -476.45721435546875, "loss": 0.374, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.1656375229358673, "rewards/margins": 1.2119715213775635, "rewards/rejected": -1.0463340282440186, "step": 11350 }, { "epoch": 2.93, "learning_rate": 1.2384048962417518e-08, "logits/chosen": -5.070906639099121, "logits/rejected": -4.715464115142822, "logps/chosen": -617.2854614257812, "logps/rejected": -529.847900390625, "loss": 0.4051, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.2547385096549988, "rewards/margins": 1.145498514175415, "rewards/rejected": -0.8907599449157715, "step": 11360 }, { "epoch": 2.94, "learning_rate": 1.190590035382997e-08, "logits/chosen": -5.162139892578125, "logits/rejected": -4.548586845397949, "logps/chosen": -579.1178588867188, "logps/rejected": -483.55010986328125, "loss": 0.4254, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.12099353969097137, "rewards/margins": 0.8619059324264526, "rewards/rejected": -0.7409123778343201, "step": 11370 }, { "epoch": 2.94, "learning_rate": 1.142775174524242e-08, "logits/chosen": -5.335607528686523, "logits/rejected": -4.42781400680542, "logps/chosen": -581.2821044921875, "logps/rejected": -471.3570251464844, "loss": 0.4348, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1334008425474167, "rewards/margins": 0.9548916816711426, "rewards/rejected": -0.8214908838272095, "step": 11380 }, { "epoch": 2.94, "learning_rate": 1.094960313665487e-08, "logits/chosen": -5.036791801452637, "logits/rejected": -4.69455099105835, "logps/chosen": -592.8031005859375, "logps/rejected": -465.68743896484375, "loss": 0.4054, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3002071976661682, "rewards/margins": 1.0062183141708374, "rewards/rejected": -0.7060111165046692, "step": 11390 }, { "epoch": 2.94, "learning_rate": 1.0471454528067324e-08, "logits/chosen": -5.2326836585998535, "logits/rejected": -5.402526378631592, "logps/chosen": -614.5970458984375, "logps/rejected": -434.4915466308594, "loss": 0.3824, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.19606807827949524, "rewards/margins": 1.2152729034423828, "rewards/rejected": -1.01920485496521, "step": 11400 }, { "epoch": 2.95, "learning_rate": 9.993305919479775e-09, "logits/chosen": -4.570802211761475, "logits/rejected": -4.6292829513549805, "logps/chosen": -598.6087646484375, "logps/rejected": -477.5489807128906, "loss": 0.3886, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.12893423438072205, "rewards/margins": 1.1758556365966797, "rewards/rejected": -1.0469214916229248, "step": 11410 }, { "epoch": 2.95, "learning_rate": 9.515157310892225e-09, "logits/chosen": -5.247891426086426, "logits/rejected": -4.595234394073486, "logps/chosen": -657.8994750976562, "logps/rejected": -489.642333984375, "loss": 0.4316, "rewards/accuracies": 0.8125, "rewards/chosen": 0.168633371591568, "rewards/margins": 1.0197985172271729, "rewards/rejected": -0.8511651754379272, "step": 11420 }, { "epoch": 2.95, "learning_rate": 9.037008702304677e-09, "logits/chosen": -4.719280242919922, "logits/rejected": -4.744603157043457, "logps/chosen": -515.602783203125, "logps/rejected": -493.06524658203125, "loss": 0.4072, "rewards/accuracies": 0.875, "rewards/chosen": 0.008562874980270863, "rewards/margins": 1.0708144903182983, "rewards/rejected": -1.0622518062591553, "step": 11430 }, { "epoch": 2.95, "learning_rate": 8.558860093717127e-09, "logits/chosen": -5.311524868011475, "logits/rejected": -4.540936470031738, "logps/chosen": -531.6647338867188, "logps/rejected": -458.78643798828125, "loss": 0.4506, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0632825642824173, "rewards/margins": 0.9224278330802917, "rewards/rejected": -0.9857103228569031, "step": 11440 }, { "epoch": 2.96, "learning_rate": 8.080711485129578e-09, "logits/chosen": -5.365767955780029, "logits/rejected": -4.714945316314697, "logps/chosen": -534.965087890625, "logps/rejected": -509.87225341796875, "loss": 0.4089, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.08514610677957535, "rewards/margins": 1.128566026687622, "rewards/rejected": -1.0434198379516602, "step": 11450 }, { "epoch": 2.96, "learning_rate": 7.60256287654203e-09, "logits/chosen": -5.2114057540893555, "logits/rejected": -4.999919414520264, "logps/chosen": -644.8795166015625, "logps/rejected": -455.7203674316406, "loss": 0.4477, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.05770410969853401, "rewards/margins": 0.9279583692550659, "rewards/rejected": -0.8702543377876282, "step": 11460 }, { "epoch": 2.96, "learning_rate": 7.12441426795448e-09, "logits/chosen": -5.1854448318481445, "logits/rejected": -4.566479206085205, "logps/chosen": -549.0654907226562, "logps/rejected": -432.8983459472656, "loss": 0.3872, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.16914112865924835, "rewards/margins": 1.1147611141204834, "rewards/rejected": -0.9456197619438171, "step": 11470 }, { "epoch": 2.96, "learning_rate": 6.646265659366931e-09, "logits/chosen": -5.139792442321777, "logits/rejected": -4.437985420227051, "logps/chosen": -475.9539489746094, "logps/rejected": -390.60382080078125, "loss": 0.4233, "rewards/accuracies": 0.8125, "rewards/chosen": 0.06741130352020264, "rewards/margins": 0.9288379549980164, "rewards/rejected": -0.8614265322685242, "step": 11480 }, { "epoch": 2.97, "learning_rate": 6.168117050779382e-09, "logits/chosen": -5.081090450286865, "logits/rejected": -4.591541290283203, "logps/chosen": -581.8884887695312, "logps/rejected": -477.81536865234375, "loss": 0.4537, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.01888551190495491, "rewards/margins": 0.972995936870575, "rewards/rejected": -0.954110324382782, "step": 11490 }, { "epoch": 2.97, "learning_rate": 5.6899684421918334e-09, "logits/chosen": -4.864903450012207, "logits/rejected": -4.825181007385254, "logps/chosen": -590.5306396484375, "logps/rejected": -508.35675048828125, "loss": 0.4155, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.05778289958834648, "rewards/margins": 0.9284170866012573, "rewards/rejected": -0.8706341981887817, "step": 11500 }, { "epoch": 2.97, "learning_rate": 5.211819833604284e-09, "logits/chosen": -5.208063125610352, "logits/rejected": -4.997429847717285, "logps/chosen": -702.4976806640625, "logps/rejected": -539.1067504882812, "loss": 0.367, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.3502900004386902, "rewards/margins": 1.2139803171157837, "rewards/rejected": -0.863690197467804, "step": 11510 }, { "epoch": 2.97, "learning_rate": 4.733671225016735e-09, "logits/chosen": -4.988503456115723, "logits/rejected": -5.004144191741943, "logps/chosen": -517.9584350585938, "logps/rejected": -426.789306640625, "loss": 0.4216, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.14973224699497223, "rewards/margins": 0.9260549545288086, "rewards/rejected": -0.7763225436210632, "step": 11520 }, { "epoch": 2.98, "learning_rate": 4.255522616429185e-09, "logits/chosen": -5.296090126037598, "logits/rejected": -5.140782833099365, "logps/chosen": -621.7508544921875, "logps/rejected": -465.28814697265625, "loss": 0.4134, "rewards/accuracies": 0.875, "rewards/chosen": 0.18310534954071045, "rewards/margins": 1.1279979944229126, "rewards/rejected": -0.9448927640914917, "step": 11530 }, { "epoch": 2.98, "learning_rate": 3.777374007841637e-09, "logits/chosen": -5.11737060546875, "logits/rejected": -4.68073844909668, "logps/chosen": -578.8333740234375, "logps/rejected": -495.9488830566406, "loss": 0.4062, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 0.13919498026371002, "rewards/margins": 1.061436414718628, "rewards/rejected": -0.9222413897514343, "step": 11540 }, { "epoch": 2.98, "learning_rate": 3.2992253992540884e-09, "logits/chosen": -5.362083911895752, "logits/rejected": -5.487084865570068, "logps/chosen": -620.727294921875, "logps/rejected": -460.12176513671875, "loss": 0.378, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.016081079840660095, "rewards/margins": 1.1477344036102295, "rewards/rejected": -1.131653070449829, "step": 11550 }, { "epoch": 2.98, "learning_rate": 2.821076790666539e-09, "logits/chosen": -5.245089530944824, "logits/rejected": -4.6937336921691895, "logps/chosen": -589.3862915039062, "logps/rejected": -461.2257385253906, "loss": 0.3999, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.33588045835494995, "rewards/margins": 1.1613339185714722, "rewards/rejected": -0.8254534602165222, "step": 11560 }, { "epoch": 2.99, "learning_rate": 2.34292818207899e-09, "logits/chosen": -5.043553352355957, "logits/rejected": -4.979231834411621, "logps/chosen": -558.26904296875, "logps/rejected": -578.2135620117188, "loss": 0.4322, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.1724320352077484, "rewards/margins": 1.0562989711761475, "rewards/rejected": -0.8838667869567871, "step": 11570 }, { "epoch": 2.99, "learning_rate": 1.864779573491441e-09, "logits/chosen": -5.370102882385254, "logits/rejected": -4.807511329650879, "logps/chosen": -672.3551025390625, "logps/rejected": -507.9878845214844, "loss": 0.3906, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.19947457313537598, "rewards/margins": 1.0999289751052856, "rewards/rejected": -0.9004544019699097, "step": 11580 }, { "epoch": 2.99, "learning_rate": 1.386630964903892e-09, "logits/chosen": -5.004294395446777, "logits/rejected": -5.237103462219238, "logps/chosen": -570.5283813476562, "logps/rejected": -508.21026611328125, "loss": 0.4031, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2111063301563263, "rewards/margins": 1.1202895641326904, "rewards/rejected": -0.9091832041740417, "step": 11590 }, { "epoch": 3.0, "learning_rate": 9.084823563163431e-10, "logits/chosen": -5.558377265930176, "logits/rejected": -5.324938774108887, "logps/chosen": -439.72186279296875, "logps/rejected": -417.1603088378906, "loss": 0.4423, "rewards/accuracies": 0.8125, "rewards/chosen": 0.06920477747917175, "rewards/margins": 0.8391389846801758, "rewards/rejected": -0.7699342966079712, "step": 11600 }, { "epoch": 3.0, "learning_rate": 4.3033374772879407e-10, "logits/chosen": -5.0607452392578125, "logits/rejected": -4.838926792144775, "logps/chosen": -553.263427734375, "logps/rejected": -461.45489501953125, "loss": 0.4131, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.13693445920944214, "rewards/margins": 1.0933754444122314, "rewards/rejected": -0.9564409255981445, "step": 11610 }, { "epoch": 3.0, "step": 11619, "total_flos": 0.0, "train_loss": 0.49318724932753905, "train_runtime": 14700.0435, "train_samples_per_second": 12.646, "train_steps_per_second": 0.79 } ], "logging_steps": 10, "max_steps": 11619, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }