AmberYifan commited on
Commit
8a8abe4
·
verified ·
1 Parent(s): cae9902

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/global_step1668/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d986a1e7a744bde47dc9e19fa2159cf390f994fdb8524597f1c9f0e21f96df5
3
+ size 17969116286
last-checkpoint/global_step1668/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe691be817b1ffe6a0a903a49023e3beda246fd0e1c55f12a6823b1e17f8f62d
3
+ size 17969116286
last-checkpoint/global_step1668/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2687f619fb339b920df44d65a1cc1d47ea0d98f82e0d5ce051e577e27e65d31e
3
+ size 17969116286
last-checkpoint/global_step1668/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac8562260fa5dfdc8e01a790eeca43f7f764dc81c095809afc85db694359ae5
3
+ size 150693
last-checkpoint/global_step1668/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6b4c3caea832181f39ba4edfb395c07cdbf869f4c1072ef33a56d7df79ed9b
3
+ size 150693
last-checkpoint/global_step1668/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bccdbadac1523d3d15bb0e4f78b020320345a73867e46179395984f98a6e5aa
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step834
 
1
+ global_step1668
last-checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3879357e44c96aa19d78c814b3c1f8738ec9ca9e3151c7c425a061206fcac7df
3
  size 4938985352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4083b832c5b3ac41afa856bc59f661857a48c246443cc7b36e6f745363e3c8
3
  size 4938985352
last-checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad466c3fa93791946d9d8337de57eaaa724508edc7d0a31873e00e191af148f4
3
  size 4947390880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:991b18df0ae50a421c4cb04ca9f64491a3078f1ed787d3e4a74c7304a16c58f1
3
  size 4947390880
last-checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:519d71464886272102ea7fcaa88fc8b8b2d1385bc60f5c089d97983efe839ab7
3
  size 3590488816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195e7ebcf2b01a8749662c00b461a1d520705ab175f099f6e0797acbfbd5bc4
3
  size 3590488816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b580656286e8a6f334aced7bdb46499a54f3bb95644a0167405da037afbd894d
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9698021f2d84167912e7be6ba48d3d2b8d6b20894f23319f36df078c03b33a64
3
  size 14768
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a763d1d109f11374f3725ac97283433a5c2264a51fd11d55a5af0441e79bbe2c
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a140d1d010220b1679bf6e519f8d3d518cb57331e0e7fb30008dc00e427811
3
  size 14768
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5685be549346816d411abdb06552706ef94ec9c1b6cb3302d99d90f37622b797
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19aef773503e08b43c9bd940d36e298220b8d39900e7bd698b6996ac3625e59e
3
  size 14768
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55febb44a9d245e649a9b6071529c6f084be4339b0c0578ab70892487be29366
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd0242fe78905f8cb32fe932e8bcb70076d2384705e561f1118d71e5d750d8b4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 834,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1283,6 +1283,1267 @@
1283
  "eval_samples_per_second": 16.921,
1284
  "eval_steps_per_second": 0.717,
1285
  "step": 834
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1286
  }
1287
  ],
1288
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 1668,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1283
  "eval_samples_per_second": 16.921,
1284
  "eval_steps_per_second": 0.717,
1285
  "step": 834
1286
+ },
1287
+ {
1288
+ "epoch": 1.0071942446043165,
1289
+ "grad_norm": 17.925966726812263,
1290
+ "learning_rate": 3.691692581075078e-07,
1291
+ "logits/chosen": -1.484375,
1292
+ "logits/rejected": -1.5078125,
1293
+ "logps/chosen": -198.0,
1294
+ "logps/rejected": -228.0,
1295
+ "loss": 0.3292,
1296
+ "rewards/accuracies": 0.949999988079071,
1297
+ "rewards/chosen": -1.28125,
1298
+ "rewards/margins": 1.6328125,
1299
+ "rewards/rejected": -2.921875,
1300
+ "step": 840
1301
+ },
1302
+ {
1303
+ "epoch": 1.0191846522781776,
1304
+ "grad_norm": 15.13245137359342,
1305
+ "learning_rate": 3.6694802310084405e-07,
1306
+ "logits/chosen": -1.4921875,
1307
+ "logits/rejected": -1.5234375,
1308
+ "logps/chosen": -191.0,
1309
+ "logps/rejected": -209.0,
1310
+ "loss": 0.2941,
1311
+ "rewards/accuracies": 0.887499988079071,
1312
+ "rewards/chosen": -1.390625,
1313
+ "rewards/margins": 1.390625,
1314
+ "rewards/rejected": -2.78125,
1315
+ "step": 850
1316
+ },
1317
+ {
1318
+ "epoch": 1.0311750599520384,
1319
+ "grad_norm": 15.85613126792236,
1320
+ "learning_rate": 3.6472678809418033e-07,
1321
+ "logits/chosen": -1.453125,
1322
+ "logits/rejected": -1.453125,
1323
+ "logps/chosen": -197.0,
1324
+ "logps/rejected": -200.0,
1325
+ "loss": 0.3029,
1326
+ "rewards/accuracies": 0.887499988079071,
1327
+ "rewards/chosen": -1.28125,
1328
+ "rewards/margins": 1.65625,
1329
+ "rewards/rejected": -2.9375,
1330
+ "step": 860
1331
+ },
1332
+ {
1333
+ "epoch": 1.0431654676258992,
1334
+ "grad_norm": 17.934043297524138,
1335
+ "learning_rate": 3.625055530875166e-07,
1336
+ "logits/chosen": -1.484375,
1337
+ "logits/rejected": -1.4765625,
1338
+ "logps/chosen": -201.0,
1339
+ "logps/rejected": -217.0,
1340
+ "loss": 0.2741,
1341
+ "rewards/accuracies": 0.9125000238418579,
1342
+ "rewards/chosen": -1.59375,
1343
+ "rewards/margins": 1.828125,
1344
+ "rewards/rejected": -3.421875,
1345
+ "step": 870
1346
+ },
1347
+ {
1348
+ "epoch": 1.0551558752997603,
1349
+ "grad_norm": 12.6431049483951,
1350
+ "learning_rate": 3.6028431808085294e-07,
1351
+ "logits/chosen": -1.5078125,
1352
+ "logits/rejected": -1.46875,
1353
+ "logps/chosen": -203.0,
1354
+ "logps/rejected": -221.0,
1355
+ "loss": 0.2871,
1356
+ "rewards/accuracies": 0.949999988079071,
1357
+ "rewards/chosen": -1.7890625,
1358
+ "rewards/margins": 1.5859375,
1359
+ "rewards/rejected": -3.375,
1360
+ "step": 880
1361
+ },
1362
+ {
1363
+ "epoch": 1.0671462829736211,
1364
+ "grad_norm": 14.781119124887237,
1365
+ "learning_rate": 3.5806308307418926e-07,
1366
+ "logits/chosen": -1.5078125,
1367
+ "logits/rejected": -1.5078125,
1368
+ "logps/chosen": -200.0,
1369
+ "logps/rejected": -222.0,
1370
+ "loss": 0.2536,
1371
+ "rewards/accuracies": 0.949999988079071,
1372
+ "rewards/chosen": -1.90625,
1373
+ "rewards/margins": 1.828125,
1374
+ "rewards/rejected": -3.71875,
1375
+ "step": 890
1376
+ },
1377
+ {
1378
+ "epoch": 1.079136690647482,
1379
+ "grad_norm": 13.675786724224842,
1380
+ "learning_rate": 3.5584184806752554e-07,
1381
+ "logits/chosen": -1.46875,
1382
+ "logits/rejected": -1.453125,
1383
+ "logps/chosen": -191.0,
1384
+ "logps/rejected": -216.0,
1385
+ "loss": 0.2305,
1386
+ "rewards/accuracies": 0.949999988079071,
1387
+ "rewards/chosen": -1.84375,
1388
+ "rewards/margins": 2.03125,
1389
+ "rewards/rejected": -3.875,
1390
+ "step": 900
1391
+ },
1392
+ {
1393
+ "epoch": 1.091127098321343,
1394
+ "grad_norm": 15.133495835048825,
1395
+ "learning_rate": 3.536206130608618e-07,
1396
+ "logits/chosen": -1.4609375,
1397
+ "logits/rejected": -1.484375,
1398
+ "logps/chosen": -191.0,
1399
+ "logps/rejected": -208.0,
1400
+ "loss": 0.2446,
1401
+ "rewards/accuracies": 0.9125000238418579,
1402
+ "rewards/chosen": -1.7734375,
1403
+ "rewards/margins": 2.015625,
1404
+ "rewards/rejected": -3.796875,
1405
+ "step": 910
1406
+ },
1407
+ {
1408
+ "epoch": 1.1031175059952039,
1409
+ "grad_norm": 17.223627578304455,
1410
+ "learning_rate": 3.513993780541981e-07,
1411
+ "logits/chosen": -1.453125,
1412
+ "logits/rejected": -1.4921875,
1413
+ "logps/chosen": -172.0,
1414
+ "logps/rejected": -210.0,
1415
+ "loss": 0.2433,
1416
+ "rewards/accuracies": 0.9750000238418579,
1417
+ "rewards/chosen": -1.796875,
1418
+ "rewards/margins": 1.9765625,
1419
+ "rewards/rejected": -3.765625,
1420
+ "step": 920
1421
+ },
1422
+ {
1423
+ "epoch": 1.1151079136690647,
1424
+ "grad_norm": 12.606788377121838,
1425
+ "learning_rate": 3.491781430475344e-07,
1426
+ "logits/chosen": -1.5390625,
1427
+ "logits/rejected": -1.5546875,
1428
+ "logps/chosen": -192.0,
1429
+ "logps/rejected": -231.0,
1430
+ "loss": 0.2498,
1431
+ "rewards/accuracies": 0.887499988079071,
1432
+ "rewards/chosen": -1.78125,
1433
+ "rewards/margins": 1.796875,
1434
+ "rewards/rejected": -3.578125,
1435
+ "step": 930
1436
+ },
1437
+ {
1438
+ "epoch": 1.1270983213429258,
1439
+ "grad_norm": 20.113412627019386,
1440
+ "learning_rate": 3.469569080408707e-07,
1441
+ "logits/chosen": -1.4453125,
1442
+ "logits/rejected": -1.4375,
1443
+ "logps/chosen": -185.0,
1444
+ "logps/rejected": -204.0,
1445
+ "loss": 0.258,
1446
+ "rewards/accuracies": 0.8999999761581421,
1447
+ "rewards/chosen": -1.5234375,
1448
+ "rewards/margins": 2.125,
1449
+ "rewards/rejected": -3.640625,
1450
+ "step": 940
1451
+ },
1452
+ {
1453
+ "epoch": 1.1390887290167866,
1454
+ "grad_norm": 22.896453855913,
1455
+ "learning_rate": 3.4473567303420703e-07,
1456
+ "logits/chosen": -1.53125,
1457
+ "logits/rejected": -1.515625,
1458
+ "logps/chosen": -200.0,
1459
+ "logps/rejected": -217.0,
1460
+ "loss": 0.2356,
1461
+ "rewards/accuracies": 0.9624999761581421,
1462
+ "rewards/chosen": -2.0,
1463
+ "rewards/margins": 2.09375,
1464
+ "rewards/rejected": -4.09375,
1465
+ "step": 950
1466
+ },
1467
+ {
1468
+ "epoch": 1.1510791366906474,
1469
+ "grad_norm": 17.597413512159072,
1470
+ "learning_rate": 3.425144380275433e-07,
1471
+ "logits/chosen": -1.484375,
1472
+ "logits/rejected": -1.53125,
1473
+ "logps/chosen": -213.0,
1474
+ "logps/rejected": -235.0,
1475
+ "loss": 0.2713,
1476
+ "rewards/accuracies": 0.925000011920929,
1477
+ "rewards/chosen": -2.359375,
1478
+ "rewards/margins": 1.8984375,
1479
+ "rewards/rejected": -4.25,
1480
+ "step": 960
1481
+ },
1482
+ {
1483
+ "epoch": 1.1630695443645085,
1484
+ "grad_norm": 14.323268951448473,
1485
+ "learning_rate": 3.402932030208796e-07,
1486
+ "logits/chosen": -1.515625,
1487
+ "logits/rejected": -1.5,
1488
+ "logps/chosen": -210.0,
1489
+ "logps/rejected": -235.0,
1490
+ "loss": 0.2388,
1491
+ "rewards/accuracies": 0.9750000238418579,
1492
+ "rewards/chosen": -2.40625,
1493
+ "rewards/margins": 2.171875,
1494
+ "rewards/rejected": -4.5625,
1495
+ "step": 970
1496
+ },
1497
+ {
1498
+ "epoch": 1.1750599520383693,
1499
+ "grad_norm": 13.502480458522491,
1500
+ "learning_rate": 3.380719680142159e-07,
1501
+ "logits/chosen": -1.515625,
1502
+ "logits/rejected": -1.546875,
1503
+ "logps/chosen": -192.0,
1504
+ "logps/rejected": -209.0,
1505
+ "loss": 0.2241,
1506
+ "rewards/accuracies": 0.9125000238418579,
1507
+ "rewards/chosen": -1.6953125,
1508
+ "rewards/margins": 2.078125,
1509
+ "rewards/rejected": -3.765625,
1510
+ "step": 980
1511
+ },
1512
+ {
1513
+ "epoch": 1.1870503597122302,
1514
+ "grad_norm": 12.261596643016196,
1515
+ "learning_rate": 3.358507330075522e-07,
1516
+ "logits/chosen": -1.5546875,
1517
+ "logits/rejected": -1.5390625,
1518
+ "logps/chosen": -199.0,
1519
+ "logps/rejected": -219.0,
1520
+ "loss": 0.2554,
1521
+ "rewards/accuracies": 0.925000011920929,
1522
+ "rewards/chosen": -2.140625,
1523
+ "rewards/margins": 1.953125,
1524
+ "rewards/rejected": -4.09375,
1525
+ "step": 990
1526
+ },
1527
+ {
1528
+ "epoch": 1.1990407673860912,
1529
+ "grad_norm": 11.618624189476918,
1530
+ "learning_rate": 3.3362949800088847e-07,
1531
+ "logits/chosen": -1.5078125,
1532
+ "logits/rejected": -1.4921875,
1533
+ "logps/chosen": -204.0,
1534
+ "logps/rejected": -222.0,
1535
+ "loss": 0.2051,
1536
+ "rewards/accuracies": 0.949999988079071,
1537
+ "rewards/chosen": -1.71875,
1538
+ "rewards/margins": 2.171875,
1539
+ "rewards/rejected": -3.890625,
1540
+ "step": 1000
1541
+ },
1542
+ {
1543
+ "epoch": 1.211031175059952,
1544
+ "grad_norm": 22.814769403029445,
1545
+ "learning_rate": 3.3140826299422474e-07,
1546
+ "logits/chosen": -1.546875,
1547
+ "logits/rejected": -1.53125,
1548
+ "logps/chosen": -201.0,
1549
+ "logps/rejected": -229.0,
1550
+ "loss": 0.2417,
1551
+ "rewards/accuracies": 0.9375,
1552
+ "rewards/chosen": -2.015625,
1553
+ "rewards/margins": 2.25,
1554
+ "rewards/rejected": -4.28125,
1555
+ "step": 1010
1556
+ },
1557
+ {
1558
+ "epoch": 1.223021582733813,
1559
+ "grad_norm": 23.105005233123116,
1560
+ "learning_rate": 3.291870279875611e-07,
1561
+ "logits/chosen": -1.546875,
1562
+ "logits/rejected": -1.578125,
1563
+ "logps/chosen": -209.0,
1564
+ "logps/rejected": -235.0,
1565
+ "loss": 0.2123,
1566
+ "rewards/accuracies": 0.949999988079071,
1567
+ "rewards/chosen": -2.4375,
1568
+ "rewards/margins": 2.21875,
1569
+ "rewards/rejected": -4.65625,
1570
+ "step": 1020
1571
+ },
1572
+ {
1573
+ "epoch": 1.235011990407674,
1574
+ "grad_norm": 16.59163456560542,
1575
+ "learning_rate": 3.269657929808974e-07,
1576
+ "logits/chosen": -1.546875,
1577
+ "logits/rejected": -1.515625,
1578
+ "logps/chosen": -206.0,
1579
+ "logps/rejected": -229.0,
1580
+ "loss": 0.2316,
1581
+ "rewards/accuracies": 0.925000011920929,
1582
+ "rewards/chosen": -2.3125,
1583
+ "rewards/margins": 1.953125,
1584
+ "rewards/rejected": -4.28125,
1585
+ "step": 1030
1586
+ },
1587
+ {
1588
+ "epoch": 1.2470023980815348,
1589
+ "grad_norm": 16.97136930640841,
1590
+ "learning_rate": 3.247445579742337e-07,
1591
+ "logits/chosen": -1.4921875,
1592
+ "logits/rejected": -1.4609375,
1593
+ "logps/chosen": -196.0,
1594
+ "logps/rejected": -228.0,
1595
+ "loss": 0.233,
1596
+ "rewards/accuracies": 0.949999988079071,
1597
+ "rewards/chosen": -2.640625,
1598
+ "rewards/margins": 2.078125,
1599
+ "rewards/rejected": -4.71875,
1600
+ "step": 1040
1601
+ },
1602
+ {
1603
+ "epoch": 1.2589928057553956,
1604
+ "grad_norm": 11.658846274024977,
1605
+ "learning_rate": 3.2252332296756996e-07,
1606
+ "logits/chosen": -1.484375,
1607
+ "logits/rejected": -1.5546875,
1608
+ "logps/chosen": -191.0,
1609
+ "logps/rejected": -217.0,
1610
+ "loss": 0.2061,
1611
+ "rewards/accuracies": 0.925000011920929,
1612
+ "rewards/chosen": -2.078125,
1613
+ "rewards/margins": 2.265625,
1614
+ "rewards/rejected": -4.34375,
1615
+ "step": 1050
1616
+ },
1617
+ {
1618
+ "epoch": 1.2709832134292567,
1619
+ "grad_norm": 17.907683758231656,
1620
+ "learning_rate": 3.2030208796090623e-07,
1621
+ "logits/chosen": -1.5546875,
1622
+ "logits/rejected": -1.515625,
1623
+ "logps/chosen": -199.0,
1624
+ "logps/rejected": -218.0,
1625
+ "loss": 0.2564,
1626
+ "rewards/accuracies": 0.9375,
1627
+ "rewards/chosen": -2.125,
1628
+ "rewards/margins": 2.46875,
1629
+ "rewards/rejected": -4.59375,
1630
+ "step": 1060
1631
+ },
1632
+ {
1633
+ "epoch": 1.2829736211031175,
1634
+ "grad_norm": 13.891330543214426,
1635
+ "learning_rate": 3.180808529542425e-07,
1636
+ "logits/chosen": -1.578125,
1637
+ "logits/rejected": -1.53125,
1638
+ "logps/chosen": -220.0,
1639
+ "logps/rejected": -236.0,
1640
+ "loss": 0.2253,
1641
+ "rewards/accuracies": 0.949999988079071,
1642
+ "rewards/chosen": -2.5625,
1643
+ "rewards/margins": 2.078125,
1644
+ "rewards/rejected": -4.625,
1645
+ "step": 1070
1646
+ },
1647
+ {
1648
+ "epoch": 1.2949640287769784,
1649
+ "grad_norm": 12.255143244628048,
1650
+ "learning_rate": 3.1585961794757884e-07,
1651
+ "logits/chosen": -1.5703125,
1652
+ "logits/rejected": -1.5546875,
1653
+ "logps/chosen": -213.0,
1654
+ "logps/rejected": -220.0,
1655
+ "loss": 0.1986,
1656
+ "rewards/accuracies": 0.949999988079071,
1657
+ "rewards/chosen": -2.15625,
1658
+ "rewards/margins": 2.25,
1659
+ "rewards/rejected": -4.40625,
1660
+ "step": 1080
1661
+ },
1662
+ {
1663
+ "epoch": 1.3069544364508392,
1664
+ "grad_norm": 16.986661579673758,
1665
+ "learning_rate": 3.1363838294091517e-07,
1666
+ "logits/chosen": -1.5546875,
1667
+ "logits/rejected": -1.53125,
1668
+ "logps/chosen": -201.0,
1669
+ "logps/rejected": -215.0,
1670
+ "loss": 0.1726,
1671
+ "rewards/accuracies": 0.9750000238418579,
1672
+ "rewards/chosen": -1.5078125,
1673
+ "rewards/margins": 2.515625,
1674
+ "rewards/rejected": -4.0,
1675
+ "step": 1090
1676
+ },
1677
+ {
1678
+ "epoch": 1.3189448441247003,
1679
+ "grad_norm": 11.362437294228451,
1680
+ "learning_rate": 3.1141714793425145e-07,
1681
+ "logits/chosen": -1.5546875,
1682
+ "logits/rejected": -1.5546875,
1683
+ "logps/chosen": -217.0,
1684
+ "logps/rejected": -240.0,
1685
+ "loss": 0.1936,
1686
+ "rewards/accuracies": 0.9750000238418579,
1687
+ "rewards/chosen": -2.265625,
1688
+ "rewards/margins": 2.34375,
1689
+ "rewards/rejected": -4.625,
1690
+ "step": 1100
1691
+ },
1692
+ {
1693
+ "epoch": 1.330935251798561,
1694
+ "grad_norm": 18.06174034251652,
1695
+ "learning_rate": 3.091959129275877e-07,
1696
+ "logits/chosen": -1.5,
1697
+ "logits/rejected": -1.4921875,
1698
+ "logps/chosen": -184.0,
1699
+ "logps/rejected": -205.0,
1700
+ "loss": 0.2132,
1701
+ "rewards/accuracies": 0.949999988079071,
1702
+ "rewards/chosen": -1.78125,
1703
+ "rewards/margins": 2.40625,
1704
+ "rewards/rejected": -4.1875,
1705
+ "step": 1110
1706
+ },
1707
+ {
1708
+ "epoch": 1.3429256594724222,
1709
+ "grad_norm": 21.343977198752015,
1710
+ "learning_rate": 3.06974677920924e-07,
1711
+ "logits/chosen": -1.5625,
1712
+ "logits/rejected": -1.5078125,
1713
+ "logps/chosen": -221.0,
1714
+ "logps/rejected": -240.0,
1715
+ "loss": 0.2055,
1716
+ "rewards/accuracies": 0.9125000238418579,
1717
+ "rewards/chosen": -2.90625,
1718
+ "rewards/margins": 2.171875,
1719
+ "rewards/rejected": -5.09375,
1720
+ "step": 1120
1721
+ },
1722
+ {
1723
+ "epoch": 1.354916067146283,
1724
+ "grad_norm": 19.364261897999658,
1725
+ "learning_rate": 3.0475344291426033e-07,
1726
+ "logits/chosen": -1.546875,
1727
+ "logits/rejected": -1.515625,
1728
+ "logps/chosen": -196.0,
1729
+ "logps/rejected": -230.0,
1730
+ "loss": 0.1925,
1731
+ "rewards/accuracies": 0.9750000238418579,
1732
+ "rewards/chosen": -2.140625,
1733
+ "rewards/margins": 2.90625,
1734
+ "rewards/rejected": -5.03125,
1735
+ "step": 1130
1736
+ },
1737
+ {
1738
+ "epoch": 1.3669064748201438,
1739
+ "grad_norm": 12.49182324344103,
1740
+ "learning_rate": 3.025322079075966e-07,
1741
+ "logits/chosen": -1.484375,
1742
+ "logits/rejected": -1.484375,
1743
+ "logps/chosen": -197.0,
1744
+ "logps/rejected": -239.0,
1745
+ "loss": 0.2206,
1746
+ "rewards/accuracies": 0.9375,
1747
+ "rewards/chosen": -2.453125,
1748
+ "rewards/margins": 2.65625,
1749
+ "rewards/rejected": -5.09375,
1750
+ "step": 1140
1751
+ },
1752
+ {
1753
+ "epoch": 1.3788968824940047,
1754
+ "grad_norm": 12.604128787864076,
1755
+ "learning_rate": 3.003109729009329e-07,
1756
+ "logits/chosen": -1.5078125,
1757
+ "logits/rejected": -1.5234375,
1758
+ "logps/chosen": -197.0,
1759
+ "logps/rejected": -235.0,
1760
+ "loss": 0.2102,
1761
+ "rewards/accuracies": 0.9375,
1762
+ "rewards/chosen": -2.671875,
1763
+ "rewards/margins": 2.1875,
1764
+ "rewards/rejected": -4.875,
1765
+ "step": 1150
1766
+ },
1767
+ {
1768
+ "epoch": 1.3908872901678657,
1769
+ "grad_norm": 16.0572765821895,
1770
+ "learning_rate": 2.980897378942692e-07,
1771
+ "logits/chosen": -1.5546875,
1772
+ "logits/rejected": -1.546875,
1773
+ "logps/chosen": -207.0,
1774
+ "logps/rejected": -226.0,
1775
+ "loss": 0.1899,
1776
+ "rewards/accuracies": 0.925000011920929,
1777
+ "rewards/chosen": -2.6875,
1778
+ "rewards/margins": 2.171875,
1779
+ "rewards/rejected": -4.84375,
1780
+ "step": 1160
1781
+ },
1782
+ {
1783
+ "epoch": 1.4028776978417266,
1784
+ "grad_norm": 14.62570261847443,
1785
+ "learning_rate": 2.958685028876055e-07,
1786
+ "logits/chosen": -1.5703125,
1787
+ "logits/rejected": -1.546875,
1788
+ "logps/chosen": -204.0,
1789
+ "logps/rejected": -223.0,
1790
+ "loss": 0.2256,
1791
+ "rewards/accuracies": 0.9125000238418579,
1792
+ "rewards/chosen": -2.1875,
1793
+ "rewards/margins": 2.28125,
1794
+ "rewards/rejected": -4.46875,
1795
+ "step": 1170
1796
+ },
1797
+ {
1798
+ "epoch": 1.4148681055155876,
1799
+ "grad_norm": 16.69089195431221,
1800
+ "learning_rate": 2.936472678809418e-07,
1801
+ "logits/chosen": -1.5703125,
1802
+ "logits/rejected": -1.5703125,
1803
+ "logps/chosen": -215.0,
1804
+ "logps/rejected": -234.0,
1805
+ "loss": 0.2164,
1806
+ "rewards/accuracies": 0.925000011920929,
1807
+ "rewards/chosen": -2.625,
1808
+ "rewards/margins": 2.265625,
1809
+ "rewards/rejected": -4.90625,
1810
+ "step": 1180
1811
+ },
1812
+ {
1813
+ "epoch": 1.4268585131894485,
1814
+ "grad_norm": 14.208012618540776,
1815
+ "learning_rate": 2.914260328742781e-07,
1816
+ "logits/chosen": -1.5625,
1817
+ "logits/rejected": -1.546875,
1818
+ "logps/chosen": -206.0,
1819
+ "logps/rejected": -226.0,
1820
+ "loss": 0.1728,
1821
+ "rewards/accuracies": 0.9750000238418579,
1822
+ "rewards/chosen": -2.90625,
1823
+ "rewards/margins": 2.21875,
1824
+ "rewards/rejected": -5.125,
1825
+ "step": 1190
1826
+ },
1827
+ {
1828
+ "epoch": 1.4388489208633093,
1829
+ "grad_norm": 19.814051144030756,
1830
+ "learning_rate": 2.8920479786761437e-07,
1831
+ "logits/chosen": -1.59375,
1832
+ "logits/rejected": -1.59375,
1833
+ "logps/chosen": -206.0,
1834
+ "logps/rejected": -232.0,
1835
+ "loss": 0.2096,
1836
+ "rewards/accuracies": 0.9624999761581421,
1837
+ "rewards/chosen": -2.484375,
1838
+ "rewards/margins": 2.40625,
1839
+ "rewards/rejected": -4.875,
1840
+ "step": 1200
1841
+ },
1842
+ {
1843
+ "epoch": 1.4508393285371701,
1844
+ "grad_norm": 16.967462896165113,
1845
+ "learning_rate": 2.8698356286095065e-07,
1846
+ "logits/chosen": -1.59375,
1847
+ "logits/rejected": -1.5703125,
1848
+ "logps/chosen": -220.0,
1849
+ "logps/rejected": -239.0,
1850
+ "loss": 0.1481,
1851
+ "rewards/accuracies": 0.9750000238418579,
1852
+ "rewards/chosen": -2.5625,
1853
+ "rewards/margins": 2.796875,
1854
+ "rewards/rejected": -5.34375,
1855
+ "step": 1210
1856
+ },
1857
+ {
1858
+ "epoch": 1.4628297362110312,
1859
+ "grad_norm": 15.206385611452236,
1860
+ "learning_rate": 2.847623278542869e-07,
1861
+ "logits/chosen": -1.609375,
1862
+ "logits/rejected": -1.609375,
1863
+ "logps/chosen": -208.0,
1864
+ "logps/rejected": -229.0,
1865
+ "loss": 0.1846,
1866
+ "rewards/accuracies": 0.949999988079071,
1867
+ "rewards/chosen": -2.5625,
1868
+ "rewards/margins": 2.375,
1869
+ "rewards/rejected": -4.9375,
1870
+ "step": 1220
1871
+ },
1872
+ {
1873
+ "epoch": 1.474820143884892,
1874
+ "grad_norm": 17.869462519073895,
1875
+ "learning_rate": 2.825410928476233e-07,
1876
+ "logits/chosen": -1.515625,
1877
+ "logits/rejected": -1.53125,
1878
+ "logps/chosen": -187.0,
1879
+ "logps/rejected": -229.0,
1880
+ "loss": 0.1821,
1881
+ "rewards/accuracies": 0.9375,
1882
+ "rewards/chosen": -2.703125,
1883
+ "rewards/margins": 2.578125,
1884
+ "rewards/rejected": -5.28125,
1885
+ "step": 1230
1886
+ },
1887
+ {
1888
+ "epoch": 1.486810551558753,
1889
+ "grad_norm": 11.533712996176597,
1890
+ "learning_rate": 2.803198578409596e-07,
1891
+ "logits/chosen": -1.609375,
1892
+ "logits/rejected": -1.5625,
1893
+ "logps/chosen": -204.0,
1894
+ "logps/rejected": -228.0,
1895
+ "loss": 0.2133,
1896
+ "rewards/accuracies": 0.925000011920929,
1897
+ "rewards/chosen": -2.703125,
1898
+ "rewards/margins": 2.515625,
1899
+ "rewards/rejected": -5.21875,
1900
+ "step": 1240
1901
+ },
1902
+ {
1903
+ "epoch": 1.498800959232614,
1904
+ "grad_norm": 11.144943270392623,
1905
+ "learning_rate": 2.7809862283429586e-07,
1906
+ "logits/chosen": -1.5859375,
1907
+ "logits/rejected": -1.5703125,
1908
+ "logps/chosen": -193.0,
1909
+ "logps/rejected": -239.0,
1910
+ "loss": 0.1494,
1911
+ "rewards/accuracies": 0.987500011920929,
1912
+ "rewards/chosen": -2.3125,
1913
+ "rewards/margins": 3.078125,
1914
+ "rewards/rejected": -5.375,
1915
+ "step": 1250
1916
+ },
1917
+ {
1918
+ "epoch": 1.5107913669064748,
1919
+ "grad_norm": 15.93725977415015,
1920
+ "learning_rate": 2.7587738782763214e-07,
1921
+ "logits/chosen": -1.5625,
1922
+ "logits/rejected": -1.59375,
1923
+ "logps/chosen": -196.0,
1924
+ "logps/rejected": -236.0,
1925
+ "loss": 0.204,
1926
+ "rewards/accuracies": 0.8999999761581421,
1927
+ "rewards/chosen": -3.53125,
1928
+ "rewards/margins": 2.546875,
1929
+ "rewards/rejected": -6.0625,
1930
+ "step": 1260
1931
+ },
1932
+ {
1933
+ "epoch": 1.5227817745803356,
1934
+ "grad_norm": 23.59190890231536,
1935
+ "learning_rate": 2.736561528209684e-07,
1936
+ "logits/chosen": -1.640625,
1937
+ "logits/rejected": -1.640625,
1938
+ "logps/chosen": -210.0,
1939
+ "logps/rejected": -244.0,
1940
+ "loss": 0.1853,
1941
+ "rewards/accuracies": 0.9624999761581421,
1942
+ "rewards/chosen": -3.09375,
1943
+ "rewards/margins": 2.625,
1944
+ "rewards/rejected": -5.71875,
1945
+ "step": 1270
1946
+ },
1947
+ {
1948
+ "epoch": 1.5347721822541966,
1949
+ "grad_norm": 22.364117581141223,
1950
+ "learning_rate": 2.7143491781430474e-07,
1951
+ "logits/chosen": -1.5078125,
1952
+ "logits/rejected": -1.5390625,
1953
+ "logps/chosen": -189.0,
1954
+ "logps/rejected": -229.0,
1955
+ "loss": 0.1952,
1956
+ "rewards/accuracies": 0.9375,
1957
+ "rewards/chosen": -2.78125,
1958
+ "rewards/margins": 2.296875,
1959
+ "rewards/rejected": -5.09375,
1960
+ "step": 1280
1961
+ },
1962
+ {
1963
+ "epoch": 1.5467625899280577,
1964
+ "grad_norm": 11.715030229741336,
1965
+ "learning_rate": 2.692136828076411e-07,
1966
+ "logits/chosen": -1.578125,
1967
+ "logits/rejected": -1.5546875,
1968
+ "logps/chosen": -210.0,
1969
+ "logps/rejected": -234.0,
1970
+ "loss": 0.1717,
1971
+ "rewards/accuracies": 0.987500011920929,
1972
+ "rewards/chosen": -2.96875,
1973
+ "rewards/margins": 2.609375,
1974
+ "rewards/rejected": -5.5625,
1975
+ "step": 1290
1976
+ },
1977
+ {
1978
+ "epoch": 1.5587529976019185,
1979
+ "grad_norm": 16.74676425409118,
1980
+ "learning_rate": 2.6699244780097735e-07,
1981
+ "logits/chosen": -1.609375,
1982
+ "logits/rejected": -1.59375,
1983
+ "logps/chosen": -206.0,
1984
+ "logps/rejected": -231.0,
1985
+ "loss": 0.1883,
1986
+ "rewards/accuracies": 0.9624999761581421,
1987
+ "rewards/chosen": -2.875,
1988
+ "rewards/margins": 2.515625,
1989
+ "rewards/rejected": -5.375,
1990
+ "step": 1300
1991
+ },
1992
+ {
1993
+ "epoch": 1.5707434052757794,
1994
+ "grad_norm": 16.31256275621072,
1995
+ "learning_rate": 2.6477121279431363e-07,
1996
+ "logits/chosen": -1.59375,
1997
+ "logits/rejected": -1.5859375,
1998
+ "logps/chosen": -194.0,
1999
+ "logps/rejected": -226.0,
2000
+ "loss": 0.2258,
2001
+ "rewards/accuracies": 0.9375,
2002
+ "rewards/chosen": -2.5,
2003
+ "rewards/margins": 2.4375,
2004
+ "rewards/rejected": -4.9375,
2005
+ "step": 1310
2006
+ },
2007
+ {
2008
+ "epoch": 1.5827338129496402,
2009
+ "grad_norm": 15.174270462866287,
2010
+ "learning_rate": 2.625499777876499e-07,
2011
+ "logits/chosen": -1.59375,
2012
+ "logits/rejected": -1.5703125,
2013
+ "logps/chosen": -204.0,
2014
+ "logps/rejected": -229.0,
2015
+ "loss": 0.1577,
2016
+ "rewards/accuracies": 0.925000011920929,
2017
+ "rewards/chosen": -3.375,
2018
+ "rewards/margins": 2.421875,
2019
+ "rewards/rejected": -5.78125,
2020
+ "step": 1320
2021
+ },
2022
+ {
2023
+ "epoch": 1.594724220623501,
2024
+ "grad_norm": 20.949271053556263,
2025
+ "learning_rate": 2.6032874278098623e-07,
2026
+ "logits/chosen": -1.625,
2027
+ "logits/rejected": -1.53125,
2028
+ "logps/chosen": -218.0,
2029
+ "logps/rejected": -234.0,
2030
+ "loss": 0.1542,
2031
+ "rewards/accuracies": 0.987500011920929,
2032
+ "rewards/chosen": -2.46875,
2033
+ "rewards/margins": 3.078125,
2034
+ "rewards/rejected": -5.5625,
2035
+ "step": 1330
2036
+ },
2037
+ {
2038
+ "epoch": 1.6067146282973621,
2039
+ "grad_norm": 21.582166006491406,
2040
+ "learning_rate": 2.581075077743225e-07,
2041
+ "logits/chosen": -1.5625,
2042
+ "logits/rejected": -1.484375,
2043
+ "logps/chosen": -215.0,
2044
+ "logps/rejected": -231.0,
2045
+ "loss": 0.1642,
2046
+ "rewards/accuracies": 0.9750000238418579,
2047
+ "rewards/chosen": -3.0625,
2048
+ "rewards/margins": 2.703125,
2049
+ "rewards/rejected": -5.75,
2050
+ "step": 1340
2051
+ },
2052
+ {
2053
+ "epoch": 1.6187050359712232,
2054
+ "grad_norm": 12.490052192808845,
2055
+ "learning_rate": 2.558862727676588e-07,
2056
+ "logits/chosen": -1.625,
2057
+ "logits/rejected": -1.546875,
2058
+ "logps/chosen": -211.0,
2059
+ "logps/rejected": -227.0,
2060
+ "loss": 0.1549,
2061
+ "rewards/accuracies": 1.0,
2062
+ "rewards/chosen": -2.71875,
2063
+ "rewards/margins": 2.921875,
2064
+ "rewards/rejected": -5.625,
2065
+ "step": 1350
2066
+ },
2067
+ {
2068
+ "epoch": 1.630695443645084,
2069
+ "grad_norm": 14.27659020234575,
2070
+ "learning_rate": 2.536650377609951e-07,
2071
+ "logits/chosen": -1.6484375,
2072
+ "logits/rejected": -1.625,
2073
+ "logps/chosen": -208.0,
2074
+ "logps/rejected": -230.0,
2075
+ "loss": 0.1123,
2076
+ "rewards/accuracies": 0.9750000238418579,
2077
+ "rewards/chosen": -2.515625,
2078
+ "rewards/margins": 3.0625,
2079
+ "rewards/rejected": -5.5625,
2080
+ "step": 1360
2081
+ },
2082
+ {
2083
+ "epoch": 1.6426858513189448,
2084
+ "grad_norm": 14.433767508436462,
2085
+ "learning_rate": 2.514438027543314e-07,
2086
+ "logits/chosen": -1.6015625,
2087
+ "logits/rejected": -1.5625,
2088
+ "logps/chosen": -206.0,
2089
+ "logps/rejected": -235.0,
2090
+ "loss": 0.1312,
2091
+ "rewards/accuracies": 0.987500011920929,
2092
+ "rewards/chosen": -2.359375,
2093
+ "rewards/margins": 3.359375,
2094
+ "rewards/rejected": -5.71875,
2095
+ "step": 1370
2096
+ },
2097
+ {
2098
+ "epoch": 1.6546762589928057,
2099
+ "grad_norm": 13.424589730956683,
2100
+ "learning_rate": 2.492225677476677e-07,
2101
+ "logits/chosen": -1.609375,
2102
+ "logits/rejected": -1.6171875,
2103
+ "logps/chosen": -198.0,
2104
+ "logps/rejected": -242.0,
2105
+ "loss": 0.1744,
2106
+ "rewards/accuracies": 0.925000011920929,
2107
+ "rewards/chosen": -2.609375,
2108
+ "rewards/margins": 2.71875,
2109
+ "rewards/rejected": -5.34375,
2110
+ "step": 1380
2111
+ },
2112
+ {
2113
+ "epoch": 1.6666666666666665,
2114
+ "grad_norm": 23.17900601362576,
2115
+ "learning_rate": 2.47001332741004e-07,
2116
+ "logits/chosen": -1.625,
2117
+ "logits/rejected": -1.5703125,
2118
+ "logps/chosen": -203.0,
2119
+ "logps/rejected": -217.0,
2120
+ "loss": 0.1801,
2121
+ "rewards/accuracies": 0.949999988079071,
2122
+ "rewards/chosen": -2.109375,
2123
+ "rewards/margins": 2.8125,
2124
+ "rewards/rejected": -4.90625,
2125
+ "step": 1390
2126
+ },
2127
+ {
2128
+ "epoch": 1.6786570743405276,
2129
+ "grad_norm": 15.035036612326222,
2130
+ "learning_rate": 2.447800977343403e-07,
2131
+ "logits/chosen": -1.5859375,
2132
+ "logits/rejected": -1.5625,
2133
+ "logps/chosen": -195.0,
2134
+ "logps/rejected": -226.0,
2135
+ "loss": 0.1659,
2136
+ "rewards/accuracies": 0.9125000238418579,
2137
+ "rewards/chosen": -2.78125,
2138
+ "rewards/margins": 2.796875,
2139
+ "rewards/rejected": -5.5625,
2140
+ "step": 1400
2141
+ },
2142
+ {
2143
+ "epoch": 1.6906474820143886,
2144
+ "grad_norm": 23.256101786444912,
2145
+ "learning_rate": 2.425588627276766e-07,
2146
+ "logits/chosen": -1.59375,
2147
+ "logits/rejected": -1.5859375,
2148
+ "logps/chosen": -199.0,
2149
+ "logps/rejected": -238.0,
2150
+ "loss": 0.15,
2151
+ "rewards/accuracies": 0.9750000238418579,
2152
+ "rewards/chosen": -2.734375,
2153
+ "rewards/margins": 3.140625,
2154
+ "rewards/rejected": -5.875,
2155
+ "step": 1410
2156
+ },
2157
+ {
2158
+ "epoch": 1.7026378896882495,
2159
+ "grad_norm": 16.44114210272047,
2160
+ "learning_rate": 2.403376277210129e-07,
2161
+ "logits/chosen": -1.5859375,
2162
+ "logits/rejected": -1.578125,
2163
+ "logps/chosen": -207.0,
2164
+ "logps/rejected": -242.0,
2165
+ "loss": 0.1413,
2166
+ "rewards/accuracies": 0.925000011920929,
2167
+ "rewards/chosen": -3.265625,
2168
+ "rewards/margins": 2.890625,
2169
+ "rewards/rejected": -6.125,
2170
+ "step": 1420
2171
+ },
2172
+ {
2173
+ "epoch": 1.7146282973621103,
2174
+ "grad_norm": 19.104299358854444,
2175
+ "learning_rate": 2.3811639271434916e-07,
2176
+ "logits/chosen": -1.65625,
2177
+ "logits/rejected": -1.5390625,
2178
+ "logps/chosen": -216.0,
2179
+ "logps/rejected": -240.0,
2180
+ "loss": 0.1638,
2181
+ "rewards/accuracies": 0.9375,
2182
+ "rewards/chosen": -3.265625,
2183
+ "rewards/margins": 2.875,
2184
+ "rewards/rejected": -6.15625,
2185
+ "step": 1430
2186
+ },
2187
+ {
2188
+ "epoch": 1.7266187050359711,
2189
+ "grad_norm": 8.57838400028329,
2190
+ "learning_rate": 2.3589515770768546e-07,
2191
+ "logits/chosen": -1.59375,
2192
+ "logits/rejected": -1.5546875,
2193
+ "logps/chosen": -221.0,
2194
+ "logps/rejected": -236.0,
2195
+ "loss": 0.1491,
2196
+ "rewards/accuracies": 0.9375,
2197
+ "rewards/chosen": -3.09375,
2198
+ "rewards/margins": 2.875,
2199
+ "rewards/rejected": -5.96875,
2200
+ "step": 1440
2201
+ },
2202
+ {
2203
+ "epoch": 1.738609112709832,
2204
+ "grad_norm": 12.545882511992975,
2205
+ "learning_rate": 2.3367392270102177e-07,
2206
+ "logits/chosen": -1.6328125,
2207
+ "logits/rejected": -1.6171875,
2208
+ "logps/chosen": -200.0,
2209
+ "logps/rejected": -252.0,
2210
+ "loss": 0.1236,
2211
+ "rewards/accuracies": 0.9750000238418579,
2212
+ "rewards/chosen": -2.328125,
2213
+ "rewards/margins": 3.359375,
2214
+ "rewards/rejected": -5.6875,
2215
+ "step": 1450
2216
+ },
2217
+ {
2218
+ "epoch": 1.750599520383693,
2219
+ "grad_norm": 16.11932503670354,
2220
+ "learning_rate": 2.3145268769435804e-07,
2221
+ "logits/chosen": -1.6328125,
2222
+ "logits/rejected": -1.578125,
2223
+ "logps/chosen": -214.0,
2224
+ "logps/rejected": -242.0,
2225
+ "loss": 0.1705,
2226
+ "rewards/accuracies": 0.949999988079071,
2227
+ "rewards/chosen": -3.078125,
2228
+ "rewards/margins": 3.125,
2229
+ "rewards/rejected": -6.21875,
2230
+ "step": 1460
2231
+ },
2232
+ {
2233
+ "epoch": 1.762589928057554,
2234
+ "grad_norm": 19.767212861451107,
2235
+ "learning_rate": 2.2923145268769435e-07,
2236
+ "logits/chosen": -1.65625,
2237
+ "logits/rejected": -1.6171875,
2238
+ "logps/chosen": -197.0,
2239
+ "logps/rejected": -232.0,
2240
+ "loss": 0.1243,
2241
+ "rewards/accuracies": 0.9750000238418579,
2242
+ "rewards/chosen": -2.96875,
2243
+ "rewards/margins": 3.59375,
2244
+ "rewards/rejected": -6.5625,
2245
+ "step": 1470
2246
+ },
2247
+ {
2248
+ "epoch": 1.774580335731415,
2249
+ "grad_norm": 17.770676013426908,
2250
+ "learning_rate": 2.2701021768103065e-07,
2251
+ "logits/chosen": -1.609375,
2252
+ "logits/rejected": -1.5859375,
2253
+ "logps/chosen": -212.0,
2254
+ "logps/rejected": -244.0,
2255
+ "loss": 0.162,
2256
+ "rewards/accuracies": 0.9125000238418579,
2257
+ "rewards/chosen": -3.5,
2258
+ "rewards/margins": 3.0625,
2259
+ "rewards/rejected": -6.5625,
2260
+ "step": 1480
2261
+ },
2262
+ {
2263
+ "epoch": 1.7865707434052758,
2264
+ "grad_norm": 9.354271003661482,
2265
+ "learning_rate": 2.2478898267436695e-07,
2266
+ "logits/chosen": -1.6328125,
2267
+ "logits/rejected": -1.6328125,
2268
+ "logps/chosen": -207.0,
2269
+ "logps/rejected": -245.0,
2270
+ "loss": 0.1463,
2271
+ "rewards/accuracies": 0.949999988079071,
2272
+ "rewards/chosen": -3.421875,
2273
+ "rewards/margins": 3.109375,
2274
+ "rewards/rejected": -6.53125,
2275
+ "step": 1490
2276
+ },
2277
+ {
2278
+ "epoch": 1.7985611510791366,
2279
+ "grad_norm": 16.23301041511246,
2280
+ "learning_rate": 2.2256774766770323e-07,
2281
+ "logits/chosen": -1.5859375,
2282
+ "logits/rejected": -1.53125,
2283
+ "logps/chosen": -205.0,
2284
+ "logps/rejected": -218.0,
2285
+ "loss": 0.147,
2286
+ "rewards/accuracies": 0.949999988079071,
2287
+ "rewards/chosen": -3.046875,
2288
+ "rewards/margins": 3.0625,
2289
+ "rewards/rejected": -6.125,
2290
+ "step": 1500
2291
+ },
2292
+ {
2293
+ "epoch": 1.8105515587529974,
2294
+ "grad_norm": 10.88674570833714,
2295
+ "learning_rate": 2.2034651266103953e-07,
2296
+ "logits/chosen": -1.59375,
2297
+ "logits/rejected": -1.5546875,
2298
+ "logps/chosen": -186.0,
2299
+ "logps/rejected": -227.0,
2300
+ "loss": 0.1292,
2301
+ "rewards/accuracies": 0.9624999761581421,
2302
+ "rewards/chosen": -2.734375,
2303
+ "rewards/margins": 3.109375,
2304
+ "rewards/rejected": -5.84375,
2305
+ "step": 1510
2306
+ },
2307
+ {
2308
+ "epoch": 1.8225419664268585,
2309
+ "grad_norm": 11.649861361880111,
2310
+ "learning_rate": 2.1812527765437583e-07,
2311
+ "logits/chosen": -1.6328125,
2312
+ "logits/rejected": -1.578125,
2313
+ "logps/chosen": -211.0,
2314
+ "logps/rejected": -235.0,
2315
+ "loss": 0.1467,
2316
+ "rewards/accuracies": 0.9624999761581421,
2317
+ "rewards/chosen": -2.5,
2318
+ "rewards/margins": 3.046875,
2319
+ "rewards/rejected": -5.5625,
2320
+ "step": 1520
2321
+ },
2322
+ {
2323
+ "epoch": 1.8345323741007196,
2324
+ "grad_norm": 17.611539910897193,
2325
+ "learning_rate": 2.159040426477121e-07,
2326
+ "logits/chosen": -1.640625,
2327
+ "logits/rejected": -1.625,
2328
+ "logps/chosen": -216.0,
2329
+ "logps/rejected": -258.0,
2330
+ "loss": 0.1337,
2331
+ "rewards/accuracies": 0.949999988079071,
2332
+ "rewards/chosen": -3.546875,
2333
+ "rewards/margins": 3.0625,
2334
+ "rewards/rejected": -6.59375,
2335
+ "step": 1530
2336
+ },
2337
+ {
2338
+ "epoch": 1.8465227817745804,
2339
+ "grad_norm": 20.32024320505495,
2340
+ "learning_rate": 2.1368280764104841e-07,
2341
+ "logits/chosen": -1.640625,
2342
+ "logits/rejected": -1.6015625,
2343
+ "logps/chosen": -210.0,
2344
+ "logps/rejected": -262.0,
2345
+ "loss": 0.1444,
2346
+ "rewards/accuracies": 0.987500011920929,
2347
+ "rewards/chosen": -3.59375,
2348
+ "rewards/margins": 3.421875,
2349
+ "rewards/rejected": -7.0,
2350
+ "step": 1540
2351
+ },
2352
+ {
2353
+ "epoch": 1.8585131894484412,
2354
+ "grad_norm": 10.61654419093803,
2355
+ "learning_rate": 2.1146157263438472e-07,
2356
+ "logits/chosen": -1.59375,
2357
+ "logits/rejected": -1.609375,
2358
+ "logps/chosen": -208.0,
2359
+ "logps/rejected": -241.0,
2360
+ "loss": 0.1117,
2361
+ "rewards/accuracies": 0.9750000238418579,
2362
+ "rewards/chosen": -3.140625,
2363
+ "rewards/margins": 3.390625,
2364
+ "rewards/rejected": -6.53125,
2365
+ "step": 1550
2366
+ },
2367
+ {
2368
+ "epoch": 1.870503597122302,
2369
+ "grad_norm": 15.023137073497749,
2370
+ "learning_rate": 2.09240337627721e-07,
2371
+ "logits/chosen": -1.6640625,
2372
+ "logits/rejected": -1.59375,
2373
+ "logps/chosen": -216.0,
2374
+ "logps/rejected": -225.0,
2375
+ "loss": 0.1284,
2376
+ "rewards/accuracies": 0.9375,
2377
+ "rewards/chosen": -3.40625,
2378
+ "rewards/margins": 2.96875,
2379
+ "rewards/rejected": -6.375,
2380
+ "step": 1560
2381
+ },
2382
+ {
2383
+ "epoch": 1.882494004796163,
2384
+ "grad_norm": 23.48973810623808,
2385
+ "learning_rate": 2.070191026210573e-07,
2386
+ "logits/chosen": -1.5703125,
2387
+ "logits/rejected": -1.6015625,
2388
+ "logps/chosen": -214.0,
2389
+ "logps/rejected": -239.0,
2390
+ "loss": 0.1468,
2391
+ "rewards/accuracies": 0.9750000238418579,
2392
+ "rewards/chosen": -2.953125,
2393
+ "rewards/margins": 2.96875,
2394
+ "rewards/rejected": -5.9375,
2395
+ "step": 1570
2396
+ },
2397
+ {
2398
+ "epoch": 1.894484412470024,
2399
+ "grad_norm": 9.16497112579326,
2400
+ "learning_rate": 2.047978676143936e-07,
2401
+ "logits/chosen": -1.625,
2402
+ "logits/rejected": -1.578125,
2403
+ "logps/chosen": -220.0,
2404
+ "logps/rejected": -242.0,
2405
+ "loss": 0.1041,
2406
+ "rewards/accuracies": 0.987500011920929,
2407
+ "rewards/chosen": -2.84375,
2408
+ "rewards/margins": 3.453125,
2409
+ "rewards/rejected": -6.28125,
2410
+ "step": 1580
2411
+ },
2412
+ {
2413
+ "epoch": 1.906474820143885,
2414
+ "grad_norm": 11.983080775973534,
2415
+ "learning_rate": 2.025766326077299e-07,
2416
+ "logits/chosen": -1.640625,
2417
+ "logits/rejected": -1.609375,
2418
+ "logps/chosen": -207.0,
2419
+ "logps/rejected": -248.0,
2420
+ "loss": 0.1184,
2421
+ "rewards/accuracies": 0.987500011920929,
2422
+ "rewards/chosen": -2.984375,
2423
+ "rewards/margins": 3.28125,
2424
+ "rewards/rejected": -6.25,
2425
+ "step": 1590
2426
+ },
2427
+ {
2428
+ "epoch": 1.9184652278177459,
2429
+ "grad_norm": 20.978396501783582,
2430
+ "learning_rate": 2.0035539760106618e-07,
2431
+ "logits/chosen": -1.625,
2432
+ "logits/rejected": -1.6328125,
2433
+ "logps/chosen": -212.0,
2434
+ "logps/rejected": -256.0,
2435
+ "loss": 0.1042,
2436
+ "rewards/accuracies": 0.987500011920929,
2437
+ "rewards/chosen": -2.859375,
2438
+ "rewards/margins": 3.53125,
2439
+ "rewards/rejected": -6.40625,
2440
+ "step": 1600
2441
+ },
2442
+ {
2443
+ "epoch": 1.9304556354916067,
2444
+ "grad_norm": 9.307796901849636,
2445
+ "learning_rate": 1.9813416259440246e-07,
2446
+ "logits/chosen": -1.625,
2447
+ "logits/rejected": -1.6015625,
2448
+ "logps/chosen": -209.0,
2449
+ "logps/rejected": -233.0,
2450
+ "loss": 0.1347,
2451
+ "rewards/accuracies": 0.987500011920929,
2452
+ "rewards/chosen": -3.171875,
2453
+ "rewards/margins": 2.984375,
2454
+ "rewards/rejected": -6.15625,
2455
+ "step": 1610
2456
+ },
2457
+ {
2458
+ "epoch": 1.9424460431654675,
2459
+ "grad_norm": 22.24759097187433,
2460
+ "learning_rate": 1.9591292758773879e-07,
2461
+ "logits/chosen": -1.625,
2462
+ "logits/rejected": -1.6015625,
2463
+ "logps/chosen": -196.0,
2464
+ "logps/rejected": -251.0,
2465
+ "loss": 0.1543,
2466
+ "rewards/accuracies": 0.949999988079071,
2467
+ "rewards/chosen": -2.765625,
2468
+ "rewards/margins": 3.640625,
2469
+ "rewards/rejected": -6.40625,
2470
+ "step": 1620
2471
+ },
2472
+ {
2473
+ "epoch": 1.9544364508393284,
2474
+ "grad_norm": 22.879947315900267,
2475
+ "learning_rate": 1.9369169258107506e-07,
2476
+ "logits/chosen": -1.6328125,
2477
+ "logits/rejected": -1.5703125,
2478
+ "logps/chosen": -214.0,
2479
+ "logps/rejected": -239.0,
2480
+ "loss": 0.1174,
2481
+ "rewards/accuracies": 0.987500011920929,
2482
+ "rewards/chosen": -2.84375,
2483
+ "rewards/margins": 3.359375,
2484
+ "rewards/rejected": -6.21875,
2485
+ "step": 1630
2486
+ },
2487
+ {
2488
+ "epoch": 1.9664268585131894,
2489
+ "grad_norm": 10.630808616318225,
2490
+ "learning_rate": 1.9147045757441137e-07,
2491
+ "logits/chosen": -1.6328125,
2492
+ "logits/rejected": -1.609375,
2493
+ "logps/chosen": -205.0,
2494
+ "logps/rejected": -249.0,
2495
+ "loss": 0.113,
2496
+ "rewards/accuracies": 0.9750000238418579,
2497
+ "rewards/chosen": -3.6875,
2498
+ "rewards/margins": 3.140625,
2499
+ "rewards/rejected": -6.8125,
2500
+ "step": 1640
2501
+ },
2502
+ {
2503
+ "epoch": 1.9784172661870505,
2504
+ "grad_norm": 20.455023304923063,
2505
+ "learning_rate": 1.8924922256774767e-07,
2506
+ "logits/chosen": -1.5625,
2507
+ "logits/rejected": -1.484375,
2508
+ "logps/chosen": -214.0,
2509
+ "logps/rejected": -230.0,
2510
+ "loss": 0.133,
2511
+ "rewards/accuracies": 0.9750000238418579,
2512
+ "rewards/chosen": -3.46875,
2513
+ "rewards/margins": 2.84375,
2514
+ "rewards/rejected": -6.3125,
2515
+ "step": 1650
2516
+ },
2517
+ {
2518
+ "epoch": 1.9904076738609113,
2519
+ "grad_norm": 12.942728073820737,
2520
+ "learning_rate": 1.8702798756108395e-07,
2521
+ "logits/chosen": -1.65625,
2522
+ "logits/rejected": -1.578125,
2523
+ "logps/chosen": -216.0,
2524
+ "logps/rejected": -234.0,
2525
+ "loss": 0.1466,
2526
+ "rewards/accuracies": 0.987500011920929,
2527
+ "rewards/chosen": -2.71875,
2528
+ "rewards/margins": 3.078125,
2529
+ "rewards/rejected": -5.78125,
2530
+ "step": 1660
2531
+ },
2532
+ {
2533
+ "epoch": 2.0,
2534
+ "eval_logits/chosen": -1.6484375,
2535
+ "eval_logits/rejected": -1.6640625,
2536
+ "eval_logps/chosen": -219.0,
2537
+ "eval_logps/rejected": -228.0,
2538
+ "eval_loss": 0.7706417441368103,
2539
+ "eval_rewards/accuracies": 0.6397058963775635,
2540
+ "eval_rewards/chosen": -4.15625,
2541
+ "eval_rewards/margins": 1.03125,
2542
+ "eval_rewards/rejected": -5.1875,
2543
+ "eval_runtime": 23.2393,
2544
+ "eval_samples_per_second": 17.255,
2545
+ "eval_steps_per_second": 0.732,
2546
+ "step": 1668
2547
  }
2548
  ],
2549
  "logging_steps": 10,