6.5 million steps

c601067 over 1 year ago

15.2 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.503404140472412,
	"min": 2.4523873329162598,
	"max": 2.5370821952819824,
	"count": 37
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 47905.140625,
	"min": 16055.9697265625,
	"max": 55159.47265625,
	"count": 37
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 64.93333333333334,
	"min": 49.91752577319588,
	"max": 76.84375,
	"count": 37
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 19480.0,
	"min": 4772.0,
	"max": 20184.0,
	"count": 37
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1652.4748588887048,
	"min": 1642.8951631352754,
	"max": 1669.7917318437467,
	"count": 37
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 247871.2288333057,
	"min": 76077.51671093542,
	"max": 321056.13316160464,
	"count": 37
	},
	"SoccerTwos.Step.mean": {
	"value": 6549982.0,
	"min": 6189948.0,
	"max": 6549982.0,
	"count": 37
	},
	"SoccerTwos.Step.sum": {
	"value": 6549982.0,
	"min": 6189948.0,
	"max": 6549982.0,
	"count": 37
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.051142871379852295,
	"min": -0.06000871956348419,
	"max": 0.0756918340921402,
	"count": 37
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 7.671430587768555,
	"min": -9.386945724487305,
	"max": 12.262077331542969,
	"count": 37
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.05099420249462128,
	"min": -0.05922776460647583,
	"max": 0.07353535294532776,
	"count": 37
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 7.649130344390869,
	"min": -9.559730529785156,
	"max": 11.912727355957031,
	"count": 37
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 37
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 37
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.07690666834513346,
	"min": -0.2579976199638276,
	"max": 0.22876027267273158,
	"count": 37
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 11.53600025177002,
	"min": -43.343600153923035,
	"max": 35.7960000038147,
	"count": 37
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.07690666834513346,
	"min": -0.2579976199638276,
	"max": 0.22876027267273158,
	"count": 37
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 11.53600025177002,
	"min": -43.343600153923035,
	"max": 35.7960000038147,
	"count": 37
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 37
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 37
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.013742929190630093,
	"min": 0.013162332853729216,
	"max": 0.021872061599666873,
	"count": 17
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.013742929190630093,
	"min": 0.013162332853729216,
	"max": 0.021872061599666873,
	"count": 17
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.060539432987570765,
	"min": 0.05456341157356898,
	"max": 0.06828681615491708,
	"count": 17
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.060539432987570765,
	"min": 0.05456341157356898,
	"max": 0.06828681615491708,
	"count": 17
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.06114285662770271,
	"min": 0.05531582894424598,
	"max": 0.0692600845048825,
	"count": 17
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.06114285662770271,
	"min": 0.05531582894424598,
	"max": 0.0692600845048825,
	"count": 17
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.00025000000000000006,
	"min": 0.00025000000000000006,
	"max": 0.00025000000000000006,
	"count": 17
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.00025000000000000006,
	"min": 0.00025000000000000006,
	"max": 0.00025000000000000006,
	"count": 17
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.25,
	"min": 0.25,
	"max": 0.25,
	"count": 17
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.25,
	"min": 0.25,
	"max": 0.25,
	"count": 17
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 17
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 17
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1683487709",
	"python_version": "3.9.16 \| packaged by conda-forge \| (main, Feb 1 2023, 21:38:11) \n[Clang 14.0.6 ]",
	"command_line_arguments": "/Users/andreas.bjarlestam/mambaforge/envs/huggingface-rl-course/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1683489353"
	},
	"total": 1643.169040458,
	"count": 1,
	"self": 0.11722787500002596,
	"children": {
	"run_training.setup": {
	"total": 0.013663792000000008,
	"count": 1,
	"self": 0.013663792000000008
	},
	"TrainerController.start_learning": {
	"total": 1643.038148791,
	"count": 1,
	"self": 0.31101058200579246,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.2248240839999744,
	"count": 3,
	"self": 2.2248240839999744
	},
	"TrainerController.advance": {
	"total": 1640.320858083994,
	"count": 25389,
	"self": 0.2661743909673078,
	"children": {
	"env_step": {
	"total": 1330.830156340017,
	"count": 25389,
	"self": 1284.3852757490256,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 46.23812745699054,
	"count": 25389,
	"self": 1.2487462569900671,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 44.98938120000047,
	"count": 46132,
	"self": 44.98938120000047
	}
	}
	},
	"workers": {
	"total": 0.20675313400085837,
	"count": 25388,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 1640.2869981199963,
	"count": 25388,
	"is_parallel": true,
	"self": 393.89563319499894,
	"children": {
	"steps_from_proto": {
	"total": 0.005132874999992598,
	"count": 6,
	"is_parallel": true,
	"self": 0.0006161679999161507,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0045167070000764475,
	"count": 24,
	"is_parallel": true,
	"self": 0.0045167070000764475
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1246.3862320499973,
	"count": 25388,
	"is_parallel": true,
	"self": 3.1823133230072926,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 22.304695663003482,
	"count": 25388,
	"is_parallel": true,
	"self": 22.304695663003482
	},
	"communicator.exchange": {
	"total": 1175.0904308869885,
	"count": 25388,
	"is_parallel": true,
	"self": 1175.0904308869885
	},
	"steps_from_proto": {
	"total": 45.808792176997834,
	"count": 50776,
	"is_parallel": true,
	"self": 5.091343413956125,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 40.71744876304171,
	"count": 203104,
	"is_parallel": true,
	"self": 40.71744876304171
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 309.2245273530094,
	"count": 25388,
	"self": 2.159055672025943,
	"children": {
	"process_trajectory": {
	"total": 68.22423439098337,
	"count": 25388,
	"self": 68.22423439098337
	},
	"_update_policy": {
	"total": 238.8412372900001,
	"count": 17,
	"self": 29.788104290999627,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 209.05313299900047,
	"count": 510,
	"self": 209.05313299900047
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.1600014810683206e-07,
	"count": 1,
	"self": 4.1600014810683206e-07
	},
	"TrainerController._save_models": {
	"total": 0.18145562500012602,
	"count": 1,
	"self": 0.0011100420001639577,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.18034558299996206,
	"count": 1,
	"self": 0.18034558299996206
	}
	}
	}
	}
	}
	}
	}