{ "epoch": 3.0, "eval_logits/chosen": 0.3730663061141968, "eval_logits/rejected": 0.4475269019603729, "eval_logps/chosen": -338.3392028808594, "eval_logps/rejected": -370.232666015625, "eval_loss": 0.6289177536964417, "eval_rewards/accuracies": 0.7405063509941101, "eval_rewards/chosen": 0.7478683590888977, "eval_rewards/margins": 4.585729122161865, "eval_rewards/rejected": -3.8378612995147705, "eval_runtime": 70.1775, "eval_samples": 2500, "eval_samples_per_second": 35.624, "eval_steps_per_second": 1.126, "total_flos": 0.0, "train_loss": 0.5009220597491634, "train_runtime": 6227.6413, "train_samples": 26990, "train_samples_per_second": 13.002, "train_steps_per_second": 0.102 }