{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.479091167449951, "min": 2.4658448696136475, "max": 2.58561635017395, "count": 76 }, "SoccerTwos.Policy.Entropy.sum": { "value": 49899.1484375, "min": 8394.09375, "max": 59788.078125, "count": 76 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 55.18681318681319, "min": 35.1875, "max": 83.23728813559322, "count": 76 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 20088.0, "min": 2252.0, "max": 20324.0, "count": 76 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1649.8954830393834, "min": 1594.3697009364087, "max": 1650.8506175397752, "count": 76 }, "SoccerTwos.Self-play.ELO.sum": { "value": 300280.9779131678, "min": 51997.66731364747, "max": 331919.8479288847, "count": 76 }, "SoccerTwos.Step.mean": { "value": 6179996.0, "min": 5429978.0, "max": 6179996.0, "count": 76 }, "SoccerTwos.Step.sum": { "value": 6179996.0, "min": 5429978.0, "max": 6179996.0, "count": 76 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.07872677594423294, "min": -0.08215012401342392, "max": 0.08388468623161316, "count": 76 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 14.249547004699707, "min": -12.486818313598633, "max": 14.249547004699707, "count": 76 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.07752089947462082, "min": -0.08446293324232101, "max": 0.08493209630250931, "count": 76 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 14.031283378601074, "min": -12.83836555480957, "max": 14.031283378601074, "count": 76 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 76 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 76 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.18952817383392082, "min": -0.30000479135684627, "max": 0.28260000117800455, "count": 76 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 34.30459946393967, "min": -50.10080015659332, "max": 49.737600207328796, "count": 76 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.18952817383392082, "min": -0.30000479135684627, "max": 0.28260000117800455, "count": 76 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 34.30459946393967, "min": -50.10080015659332, "max": 49.737600207328796, "count": 76 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 76 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 76 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.015399371163221076, "min": 0.011844274809118361, "max": 0.024232410235951345, "count": 36 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.015399371163221076, "min": 0.011844274809118361, "max": 0.024232410235951345, "count": 36 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.06767735456426939, "min": 0.055261904994646706, "max": 0.06967596064011256, "count": 36 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.06767735456426939, "min": 0.055261904994646706, "max": 0.06967596064011256, "count": 36 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.0684792855133613, "min": 0.05549707127114137, "max": 0.0708642177283764, "count": 36 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.0684792855133613, "min": 0.05549707127114137, "max": 0.0708642177283764, "count": 36 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.00025000000000000006, "min": 0.00025000000000000006, "max": 0.00025000000000000006, "count": 36 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.00025000000000000006, "min": 0.00025000000000000006, "max": 0.00025000000000000006, "count": 36 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.25, "min": 0.25, "max": 0.25, "count": 36 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.25, "min": 0.25, "max": 0.25, "count": 36 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.010000000000000002, "min": 0.010000000000000002, "max": 0.010000000000000002, "count": 36 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.010000000000000002, "min": 0.010000000000000002, "max": 0.010000000000000002, "count": 36 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1683484175", "python_version": "3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:38:11) \n[Clang 14.0.6 ]", "command_line_arguments": "/Users/andreas.bjarlestam/mambaforge/envs/huggingface-rl-course/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.11.0", "numpy_version": "1.21.2", "end_time_seconds": "1683487657" }, "total": 3481.642826833, "count": 1, "self": 0.266701749999811, "children": { "run_training.setup": { "total": 0.03880720800000015, "count": 1, "self": 0.03880720800000015 }, "TrainerController.start_learning": { "total": 3481.3373178750003, "count": 1, "self": 0.7055969289958739, "children": { "TrainerController._reset_env": { "total": 1.8457452910002883, "count": 5, "self": 1.8457452910002883 }, "TrainerController.advance": { "total": 3478.6461231130047, "count": 52567, "self": 0.5951379080393053, "children": { "env_step": { "total": 2799.831084738981, "count": 52567, "self": 2701.921003805895, "children": { "SubprocessEnvManager._take_step": { "total": 97.46038615304738, "count": 52567, "self": 2.8379347090271096, "children": { "TorchPolicy.evaluate": { "total": 94.62245144402027, "count": 95322, "self": 94.62245144402027 } } }, "workers": { "total": 0.4496947800386013, "count": 52566, "self": 0.0, "children": { "worker_root": { "total": 3478.3395808320347, "count": 52566, "is_parallel": true, "self": 853.5667974280468, "children": { "steps_from_proto": { "total": 0.00852233499997479, "count": 10, "is_parallel": true, "self": 0.0010013339999064286, "children": { "_process_rank_one_or_two_observation": { "total": 0.007521001000068361, "count": 40, "is_parallel": true, "self": 0.007521001000068361 } } }, "UnityEnvironment.step": { "total": 2624.7642610689877, "count": 52566, "is_parallel": true, "self": 7.129942069017943, "children": { "UnityEnvironment._generate_step_input": { "total": 47.107922168025965, "count": 52566, "is_parallel": true, "self": 47.107922168025965 }, "communicator.exchange": { "total": 2473.9557799219765, "count": 52566, "is_parallel": true, "self": 2473.9557799219765 }, "steps_from_proto": { "total": 96.57061690996699, "count": 105132, "is_parallel": true, "self": 10.97493528192777, "children": { "_process_rank_one_or_two_observation": { "total": 85.59568162803922, "count": 420528, "is_parallel": true, "self": 85.59568162803922 } } } } } } } } } } }, "trainer_advance": { "total": 678.2199004659842, "count": 52566, "self": 4.748937736947255, "children": { "process_trajectory": { "total": 134.27234868403667, "count": 52566, "self": 134.07668543403662, "children": { "RLTrainer._checkpoint": { "total": 0.19566325000005236, "count": 1, "self": 0.19566325000005236 } } }, "_update_policy": { "total": 539.1986140450002, "count": 36, "self": 65.58856171399077, "children": { "TorchPOCAOptimizer.update": { "total": 473.61005233100946, "count": 1080, "self": 473.61005233100946 } } } } } } }, "trainer_threads": { "total": 4.5899969336460344e-07, "count": 1, "self": 4.5899969336460344e-07 }, "TrainerController._save_models": { "total": 0.13985208299982332, "count": 1, "self": 0.003031791000012163, "children": { "RLTrainer._checkpoint": { "total": 0.13682029199981116, "count": 1, "self": 0.13682029199981116 } } } } } } }