{ "name": "root", "gauges": { "SoccerTwos.Policy.Entropy.mean": { "value": 2.6859028339385986, "min": 2.498363971710205, "max": 3.235724687576294, "count": 472 }, "SoccerTwos.Policy.Entropy.sum": { "value": 51569.3359375, "min": 5978.3564453125, "max": 133290.4375, "count": 472 }, "SoccerTwos.Environment.EpisodeLength.mean": { "value": 56.29761904761905, "min": 41.939655172413794, "max": 999.0, "count": 472 }, "SoccerTwos.Environment.EpisodeLength.sum": { "value": 18916.0, "min": 11672.0, "max": 28972.0, "count": 472 }, "SoccerTwos.Self-play.ELO.mean": { "value": 1622.0498135052524, "min": 1209.20833714023, "max": 1622.0498135052524, "count": 469 }, "SoccerTwos.Self-play.ELO.sum": { "value": 272504.3686688824, "min": 2419.4037575825882, "max": 362356.75893695117, "count": 469 }, "SoccerTwos.Step.mean": { "value": 5419958.0, "min": 709630.0, "max": 5419958.0, "count": 472 }, "SoccerTwos.Step.sum": { "value": 5419958.0, "min": 709630.0, "max": 5419958.0, "count": 472 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": { "value": 0.16701048612594604, "min": -0.015125000849366188, "max": 0.2638693153858185, "count": 472 }, "SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": { "value": 28.057762145996094, "min": -0.8697319030761719, "max": 47.24671173095703, "count": 472 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.mean": { "value": 0.16469024121761322, "min": -0.01287874300032854, "max": 0.268201619386673, "count": 472 }, "SoccerTwos.Policy.ExtrinsicValueEstimate.sum": { "value": 27.66796112060547, "min": -1.0717203617095947, "max": 46.701171875, "count": 472 }, "SoccerTwos.Environment.CumulativeReward.mean": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 472 }, "SoccerTwos.Environment.CumulativeReward.sum": { "value": 0.0, "min": 0.0, "max": 0.0, "count": 472 }, "SoccerTwos.Policy.ExtrinsicReward.mean": { "value": 0.38452619243235814, "min": -0.5487578919059352, "max": 0.8240291377566508, "count": 472 }, "SoccerTwos.Policy.ExtrinsicReward.sum": { "value": 64.60040032863617, "min": -32.34760010242462, "max": 124.42839980125427, "count": 472 }, "SoccerTwos.Environment.GroupCumulativeReward.mean": { "value": 0.38452619243235814, "min": -0.5487578919059352, "max": 0.8240291377566508, "count": 472 }, "SoccerTwos.Environment.GroupCumulativeReward.sum": { "value": 64.60040032863617, "min": -32.34760010242462, "max": 124.42839980125427, "count": 472 }, "SoccerTwos.IsTraining.mean": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 472 }, "SoccerTwos.IsTraining.sum": { "value": 1.0, "min": 1.0, "max": 1.0, "count": 472 }, "SoccerTwos.Losses.PolicyLoss.mean": { "value": 0.012682984667480923, "min": 0.009875288437554749, "max": 0.023785847671388183, "count": 226 }, "SoccerTwos.Losses.PolicyLoss.sum": { "value": 0.012682984667480923, "min": 0.009875288437554749, "max": 0.023785847671388183, "count": 226 }, "SoccerTwos.Losses.ValueLoss.mean": { "value": 0.05950005787114302, "min": 0.0006123105908045545, "max": 0.06676147195200126, "count": 226 }, "SoccerTwos.Losses.ValueLoss.sum": { "value": 0.05950005787114302, "min": 0.0006123105908045545, "max": 0.06676147195200126, "count": 226 }, "SoccerTwos.Losses.BaselineLoss.mean": { "value": 0.06019010196129481, "min": 0.0006128643661213573, "max": 0.06780888587236404, "count": 226 }, "SoccerTwos.Losses.BaselineLoss.sum": { "value": 0.06019010196129481, "min": 0.0006128643661213573, "max": 0.06780888587236404, "count": 226 }, "SoccerTwos.Policy.LearningRate.mean": { "value": 0.00025000000000000006, "min": 0.00025, "max": 0.00025000000000000006, "count": 226 }, "SoccerTwos.Policy.LearningRate.sum": { "value": 0.00025000000000000006, "min": 0.00025, "max": 0.00025000000000000006, "count": 226 }, "SoccerTwos.Policy.Epsilon.mean": { "value": 0.25, "min": 0.25, "max": 0.25, "count": 226 }, "SoccerTwos.Policy.Epsilon.sum": { "value": 0.25, "min": 0.25, "max": 0.25, "count": 226 }, "SoccerTwos.Policy.Beta.mean": { "value": 0.010000000000000002, "min": 0.01, "max": 0.010000000000000002, "count": 226 }, "SoccerTwos.Policy.Beta.sum": { "value": 0.010000000000000002, "min": 0.01, "max": 0.010000000000000002, "count": 226 } }, "metadata": { "timer_format_version": "0.1.0", "start_time_seconds": "1683409330", "python_version": "3.9.16 | packaged by conda-forge | (main, Feb 1 2023, 21:38:11) \n[Clang 14.0.6 ]", "command_line_arguments": "/Users/andreas.bjarlestam/mambaforge/envs/huggingface-rl-course/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume", "mlagents_version": "0.31.0.dev0", "mlagents_envs_version": "0.31.0.dev0", "communication_protocol_version": "1.5.0", "pytorch_version": "1.11.0", "numpy_version": "1.21.2", "end_time_seconds": "1683430503" }, "total": 21172.304846875002, "count": 1, "self": 0.25797687500016764, "children": { "run_training.setup": { "total": 0.013629958000000109, "count": 1, "self": 0.013629958000000109 }, "TrainerController.start_learning": { "total": 21172.033240042, "count": 1, "self": 3.9736478637751134, "children": { "TrainerController._reset_env": { "total": 2.751025209993427, "count": 25, "self": 2.751025209993427 }, "TrainerController.advance": { "total": 21165.171262176234, "count": 320981, "self": 3.5263494238279236, "children": { "env_step": { "total": 17192.42758640993, "count": 320981, "self": 16551.716678152992, "children": { "SubprocessEnvManager._take_step": { "total": 638.1731869698121, "count": 320981, "self": 15.867049701842689, "children": { "TorchPolicy.evaluate": { "total": 622.3061372679695, "count": 597736, "self": 622.3061372679695 } } }, "workers": { "total": 2.537721287124503, "count": 320980, "self": 0.0, "children": { "worker_root": { "total": 21163.252892757322, "count": 320980, "is_parallel": true, "self": 5130.634733850904, "children": { "steps_from_proto": { "total": 0.04212875099739155, "count": 50, "is_parallel": true, "self": 0.004831761003669888, "children": { "_process_rank_one_or_two_observation": { "total": 0.03729698999372166, "count": 200, "is_parallel": true, "self": 0.03729698999372166 } } }, "UnityEnvironment.step": { "total": 16032.57603015542, "count": 320980, "is_parallel": true, "self": 40.666306921973955, "children": { "UnityEnvironment._generate_step_input": { "total": 288.5752638062028, "count": 320980, "is_parallel": true, "self": 288.5752638062028 }, "communicator.exchange": { "total": 15111.659874243118, "count": 320980, "is_parallel": true, "self": 15111.659874243118 }, "steps_from_proto": { "total": 591.6745851841252, "count": 641960, "is_parallel": true, "self": 65.19237516937369, "children": { "_process_rank_one_or_two_observation": { "total": 526.4822100147516, "count": 2567840, "is_parallel": true, "self": 526.4822100147516 } } } } } } } } } } }, "trainer_advance": { "total": 3969.217326342478, "count": 320980, "self": 28.617055645902383, "children": { "process_trajectory": { "total": 832.5525043215669, "count": 320980, "self": 832.2651062375664, "children": { "RLTrainer._checkpoint": { "total": 0.2873980840004151, "count": 2, "self": 0.2873980840004151 } } }, "_update_policy": { "total": 3108.047766375009, "count": 227, "self": 407.670854059937, "children": { "TorchPOCAOptimizer.update": { "total": 2700.376912315072, "count": 6813, "self": 2700.376912315072 } } } } } } }, "trainer_threads": { "total": 5.000001692678779e-07, "count": 1, "self": 5.000001692678779e-07 }, "TrainerController._save_models": { "total": 0.13730429199858918, "count": 1, "self": 0.0013856250006938353, "children": { "RLTrainer._checkpoint": { "total": 0.13591866699789534, "count": 1, "self": 0.13591866699789534 } } } } } } }