6 million steps

db4f938 over 1 year ago

15.6 kB

	{
	"name": "root",
	"gauges": {
	"SoccerTwos.Policy.Entropy.mean": {
	"value": 2.479091167449951,
	"min": 2.4658448696136475,
	"max": 2.58561635017395,
	"count": 76
	},
	"SoccerTwos.Policy.Entropy.sum": {
	"value": 49899.1484375,
	"min": 8394.09375,
	"max": 59788.078125,
	"count": 76
	},
	"SoccerTwos.Environment.EpisodeLength.mean": {
	"value": 55.18681318681319,
	"min": 35.1875,
	"max": 83.23728813559322,
	"count": 76
	},
	"SoccerTwos.Environment.EpisodeLength.sum": {
	"value": 20088.0,
	"min": 2252.0,
	"max": 20324.0,
	"count": 76
	},
	"SoccerTwos.Self-play.ELO.mean": {
	"value": 1649.8954830393834,
	"min": 1594.3697009364087,
	"max": 1650.8506175397752,
	"count": 76
	},
	"SoccerTwos.Self-play.ELO.sum": {
	"value": 300280.9779131678,
	"min": 51997.66731364747,
	"max": 331919.8479288847,
	"count": 76
	},
	"SoccerTwos.Step.mean": {
	"value": 6179996.0,
	"min": 5429978.0,
	"max": 6179996.0,
	"count": 76
	},
	"SoccerTwos.Step.sum": {
	"value": 6179996.0,
	"min": 5429978.0,
	"max": 6179996.0,
	"count": 76
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
	"value": 0.07872677594423294,
	"min": -0.08215012401342392,
	"max": 0.08388468623161316,
	"count": 76
	},
	"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
	"value": 14.249547004699707,
	"min": -12.486818313598633,
	"max": 14.249547004699707,
	"count": 76
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.07752089947462082,
	"min": -0.08446293324232101,
	"max": 0.08493209630250931,
	"count": 76
	},
	"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
	"value": 14.031283378601074,
	"min": -12.83836555480957,
	"max": 14.031283378601074,
	"count": 76
	},
	"SoccerTwos.Environment.CumulativeReward.mean": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 76
	},
	"SoccerTwos.Environment.CumulativeReward.sum": {
	"value": 0.0,
	"min": 0.0,
	"max": 0.0,
	"count": 76
	},
	"SoccerTwos.Policy.ExtrinsicReward.mean": {
	"value": 0.18952817383392082,
	"min": -0.30000479135684627,
	"max": 0.28260000117800455,
	"count": 76
	},
	"SoccerTwos.Policy.ExtrinsicReward.sum": {
	"value": 34.30459946393967,
	"min": -50.10080015659332,
	"max": 49.737600207328796,
	"count": 76
	},
	"SoccerTwos.Environment.GroupCumulativeReward.mean": {
	"value": 0.18952817383392082,
	"min": -0.30000479135684627,
	"max": 0.28260000117800455,
	"count": 76
	},
	"SoccerTwos.Environment.GroupCumulativeReward.sum": {
	"value": 34.30459946393967,
	"min": -50.10080015659332,
	"max": 49.737600207328796,
	"count": 76
	},
	"SoccerTwos.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 76
	},
	"SoccerTwos.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 76
	},
	"SoccerTwos.Losses.PolicyLoss.mean": {
	"value": 0.015399371163221076,
	"min": 0.011844274809118361,
	"max": 0.024232410235951345,
	"count": 36
	},
	"SoccerTwos.Losses.PolicyLoss.sum": {
	"value": 0.015399371163221076,
	"min": 0.011844274809118361,
	"max": 0.024232410235951345,
	"count": 36
	},
	"SoccerTwos.Losses.ValueLoss.mean": {
	"value": 0.06767735456426939,
	"min": 0.055261904994646706,
	"max": 0.06967596064011256,
	"count": 36
	},
	"SoccerTwos.Losses.ValueLoss.sum": {
	"value": 0.06767735456426939,
	"min": 0.055261904994646706,
	"max": 0.06967596064011256,
	"count": 36
	},
	"SoccerTwos.Losses.BaselineLoss.mean": {
	"value": 0.0684792855133613,
	"min": 0.05549707127114137,
	"max": 0.0708642177283764,
	"count": 36
	},
	"SoccerTwos.Losses.BaselineLoss.sum": {
	"value": 0.0684792855133613,
	"min": 0.05549707127114137,
	"max": 0.0708642177283764,
	"count": 36
	},
	"SoccerTwos.Policy.LearningRate.mean": {
	"value": 0.00025000000000000006,
	"min": 0.00025000000000000006,
	"max": 0.00025000000000000006,
	"count": 36
	},
	"SoccerTwos.Policy.LearningRate.sum": {
	"value": 0.00025000000000000006,
	"min": 0.00025000000000000006,
	"max": 0.00025000000000000006,
	"count": 36
	},
	"SoccerTwos.Policy.Epsilon.mean": {
	"value": 0.25,
	"min": 0.25,
	"max": 0.25,
	"count": 36
	},
	"SoccerTwos.Policy.Epsilon.sum": {
	"value": 0.25,
	"min": 0.25,
	"max": 0.25,
	"count": 36
	},
	"SoccerTwos.Policy.Beta.mean": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 36
	},
	"SoccerTwos.Policy.Beta.sum": {
	"value": 0.010000000000000002,
	"min": 0.010000000000000002,
	"max": 0.010000000000000002,
	"count": 36
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1683484175",
	"python_version": "3.9.16 \| packaged by conda-forge \| (main, Feb 1 2023, 21:38:11) \n[Clang 14.0.6 ]",
	"command_line_arguments": "/Users/andreas.bjarlestam/mambaforge/envs/huggingface-rl-course/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics --resume",
	"mlagents_version": "0.31.0.dev0",
	"mlagents_envs_version": "0.31.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.11.0",
	"numpy_version": "1.21.2",
	"end_time_seconds": "1683487657"
	},
	"total": 3481.642826833,
	"count": 1,
	"self": 0.266701749999811,
	"children": {
	"run_training.setup": {
	"total": 0.03880720800000015,
	"count": 1,
	"self": 0.03880720800000015
	},
	"TrainerController.start_learning": {
	"total": 3481.3373178750003,
	"count": 1,
	"self": 0.7055969289958739,
	"children": {
	"TrainerController._reset_env": {
	"total": 1.8457452910002883,
	"count": 5,
	"self": 1.8457452910002883
	},
	"TrainerController.advance": {
	"total": 3478.6461231130047,
	"count": 52567,
	"self": 0.5951379080393053,
	"children": {
	"env_step": {
	"total": 2799.831084738981,
	"count": 52567,
	"self": 2701.921003805895,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 97.46038615304738,
	"count": 52567,
	"self": 2.8379347090271096,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 94.62245144402027,
	"count": 95322,
	"self": 94.62245144402027
	}
	}
	},
	"workers": {
	"total": 0.4496947800386013,
	"count": 52566,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 3478.3395808320347,
	"count": 52566,
	"is_parallel": true,
	"self": 853.5667974280468,
	"children": {
	"steps_from_proto": {
	"total": 0.00852233499997479,
	"count": 10,
	"is_parallel": true,
	"self": 0.0010013339999064286,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.007521001000068361,
	"count": 40,
	"is_parallel": true,
	"self": 0.007521001000068361
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 2624.7642610689877,
	"count": 52566,
	"is_parallel": true,
	"self": 7.129942069017943,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 47.107922168025965,
	"count": 52566,
	"is_parallel": true,
	"self": 47.107922168025965
	},
	"communicator.exchange": {
	"total": 2473.9557799219765,
	"count": 52566,
	"is_parallel": true,
	"self": 2473.9557799219765
	},
	"steps_from_proto": {
	"total": 96.57061690996699,
	"count": 105132,
	"is_parallel": true,
	"self": 10.97493528192777,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 85.59568162803922,
	"count": 420528,
	"is_parallel": true,
	"self": 85.59568162803922
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 678.2199004659842,
	"count": 52566,
	"self": 4.748937736947255,
	"children": {
	"process_trajectory": {
	"total": 134.27234868403667,
	"count": 52566,
	"self": 134.07668543403662,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.19566325000005236,
	"count": 1,
	"self": 0.19566325000005236
	}
	}
	},
	"_update_policy": {
	"total": 539.1986140450002,
	"count": 36,
	"self": 65.58856171399077,
	"children": {
	"TorchPOCAOptimizer.update": {
	"total": 473.61005233100946,
	"count": 1080,
	"self": 473.61005233100946
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 4.5899969336460344e-07,
	"count": 1,
	"self": 4.5899969336460344e-07
	},
	"TrainerController._save_models": {
	"total": 0.13985208299982332,
	"count": 1,
	"self": 0.003031791000012163,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.13682029199981116,
	"count": 1,
	"self": 0.13682029199981116
	}
	}
	}
	}
	}
	}
	}