PPO playing Acrobot-v1 from https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c

Browse files

Files changed (16) hide show

README.md +4 -3
benchmark_publish.py +91 -0
colab_requirements.txt +3 -1
enjoy.py +13 -88
huggingface_publish.py +177 -0
lambda_labs/lambda_requirements.txt +3 -1
poetry.lock +47 -1
publish/markdown_format.py +210 -0
pyproject.toml +2 -0
replay.meta.json +1 -1
runner/config.py +11 -8
runner/evaluate.py +103 -0
runner/running_utils.py +4 -4
shared/callbacks/eval_callback.py +27 -20
shared/policy/policy.py +3 -1
shared/stats.py +3 -24

README.md CHANGED Viewed

@@ -42,7 +42,7 @@ By default training goes to a rl-algo-impls project while benchmarks go to
 rl-algo-impls-benchmarks. During training and benchmarking runs, videos of the best
 models and the model weights are uploaded to WandB.
-Before doing any of the runs below, you'll need to create a wandb account and run `wandb
 login`.
@@ -50,7 +50,7 @@ login`.
 ## Usage
 /sgoodfriend/rl-algo-impls: https://github.com/sgoodfriend/rl-algo-impls
-Note: While the model state dictionary and hyperaparameters are saved, the
 implementation could be sufficiently different to not be able to reproduce similar
 results. You might need to checkout the commit the agent was trained on:
 [5598ebc](https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c).
@@ -68,7 +68,8 @@ notebook.
 ## Training
 If you want the highest chance to reproduce these results, you'll want to checkout the
-commit the agent was trained on: [5598ebc](https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c).
 ```
 python train.py --algo ppo --env Acrobot-v1 --seed 4

 rl-algo-impls-benchmarks. During training and benchmarking runs, videos of the best
 models and the model weights are uploaded to WandB.
+Before doing anything below, you'll need to create a wandb account and run `wandb
 login`.
 ## Usage
 /sgoodfriend/rl-algo-impls: https://github.com/sgoodfriend/rl-algo-impls
+Note: While the model state dictionary and hyperaparameters are saved, the latest
 implementation could be sufficiently different to not be able to reproduce similar
 results. You might need to checkout the commit the agent was trained on:
 [5598ebc](https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c).
 ## Training
 If you want the highest chance to reproduce these results, you'll want to checkout the
+commit the agent was trained on: [5598ebc](https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c). While
+training is deterministic, different hardware will give different results.
 ```
 python train.py --algo ppo --env Acrobot-v1 --seed 4

benchmark_publish.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import argparse
+import subprocess
+import wandb
+import wandb.apis.public
+from collections import defaultdict
+from multiprocessing.pool import ThreadPool
+from typing import List, NamedTuple
+class RunGroup(NamedTuple):
+    algo: str
+    env_id: str
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--wandb-project-name",
+        type=str,
+        default="rl-algo-impls-benchmarks",
+        help="WandB project name to load runs from",
+    )
+    parser.add_argument(
+        "--wandb-entity",
+        type=str,
+        default=None,
+        help="WandB team of project. None uses default entity",
+    )
+    parser.add_argument("--wandb-tags", type=str, nargs="+", help="WandB tags")
+    parser.add_argument("--wandb-report-url", type=str, help="Link to WandB report")
+    parser.add_argument(
+        "--envs", type=str, nargs="*", help="Optional filter down to these envs"
+    )
+    parser.add_argument(
+        "--huggingface-user",
+        type=str,
+        default=None,
+        help="Huggingface user or team to upload model cards. Defaults to huggingface-cli login user",
+    )
+    parser.add_argument(
+        "--pool-size",
+        type=int,
+        default=3,
+        help="How many publish jobs can run in parallel",
+    )
+    parser.set_defaults(
+        wandb_tags=["benchmark_5598ebc", "host_192-9-145-26"],
+        wandb_report_url="https://api.wandb.ai/links/sgoodfriend/6p2sjqtn",
+        envs=["CartPole-v1", "Acrobot-v1"],
+    )
+    args = parser.parse_args()
+    print(args)
+    api = wandb.Api()
+    all_runs = api.runs(
+        f"{args.wandb_entity or api.default_entity}/{args.wandb_project_name}"
+    )
+    required_tags = set(args.wandb_tags)
+    runs: List[wandb.apis.public.Run] = [
+        r
+        for r in all_runs
+        if required_tags.issubset(set(r.config.get("wandb_tags", [])))
+    ]
+    runs_paths_by_group = defaultdict(list)
+    for r in runs:
+        algo = r.config["algo"]
+        env = r.config["env"]
+        if args.envs and env not in args.envs:
+            continue
+        run_group = RunGroup(algo, env)
+        runs_paths_by_group[run_group].append("/".join(r.path))
+    def run(run_paths: List[str]) -> None:
+        publish_args = ["python", "huggingface_publish.py"]
+        publish_args.append("--wandb-run-paths")
+        publish_args.extend(run_paths)
+        publish_args.append("--wandb-report-url")
+        publish_args.append(args.wandb_report_url)
+        if args.huggingface_user:
+            publish_args.append("--huggingface-user")
+            publish_args.append(args.huggingface_user)
+        subprocess.run(publish_args)
+    tp = ThreadPool(args.pool_size)
+    for run_paths in runs_paths_by_group.values():
+        tp.apply_async(run, (run_paths,))
+    tp.close()
+    tp.join()

colab_requirements.txt CHANGED Viewed

@@ -4,4 +4,6 @@ gym[box2d] >= 0.21.0, < 0.22
 pyglet == 1.5.27
 wandb >= 0.13.9, < 0.14
 pyvirtualdisplay == 3.0
-pybullet >= 3.2.5, < 3.3

 pyglet == 1.5.27
 wandb >= 0.13.9, < 0.14
 pyvirtualdisplay == 3.0
+pybullet >= 3.2.5, < 3.3
+tabulate >= 0.9.0, < 0.10
+huggingface-hub >= 0.12.0, < 0.13

enjoy.py CHANGED Viewed

@@ -3,103 +3,28 @@ import os
 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-import shutil
-import yaml
-from dataclasses import dataclass
-from typing import Optional
-from runner.env import make_eval_env
-from runner.config import Config, RunArgs
-from runner.running_utils import (
-    base_parser,
-    load_hyperparams,
-    set_seeds,
-    get_device,
-    make_policy,
-)
-from shared.callbacks.eval_callback import evaluate
-@dataclass
-class EvalArgs(RunArgs):
-    render: bool = True
-    best: bool = True
-    n_envs: int = 1
-    n_episodes: int = 3
-    deterministic: Optional[bool] = None
-    wandb_run_path: Optional[str] = None
 if __name__ == "__main__":
-    parser = base_parser()
     parser.add_argument("--render", default=True, type=bool)
     parser.add_argument("--best", default=True, type=bool)
     parser.add_argument("--n_envs", default=1, type=int)
     parser.add_argument("--n_episodes", default=3, type=int)
-    parser.add_argument("--deterministic", default=None, type=bool)
     parser.add_argument("--wandb-run-path", default=None, type=str)
     parser.set_defaults(
-        wandb_run_path="sgoodfriend/rl-algo-impls/sfi78a3t",
     )
     args = EvalArgs(**vars(parser.parse_args()))
-    if args.wandb_run_path:
-        import wandb
-        api = wandb.Api()
-        run = api.run(args.wandb_run_path)
-        hyperparams = run.config
-        args.algo = hyperparams["algo"]
-        args.env = hyperparams["env"]
-        args.use_deterministic_algorithms = hyperparams.get(
-            "use_deterministic_algorithms", True
-        )
-        config = Config(args, hyperparams, os.path.dirname(__file__))
-        model_path = config.model_dir_path(best=args.best, downloaded=True)
-        model_archive_name = config.model_dir_name(best=args.best, extension=".zip")
-        run.file(model_archive_name).download()
-        if os.path.isdir(model_path):
-            shutil.rmtree(model_path)
-        shutil.unpack_archive(model_archive_name, model_path)
-        os.remove(model_archive_name)
-    else:
-        hyperparams = load_hyperparams(args.algo, args.env, os.path.dirname(__file__))
-        config = Config(args, hyperparams, os.path.dirname(__file__))
-        model_path = config.model_dir_path(best=args.best)
-    print(args)
-    set_seeds(args.seed, args.use_deterministic_algorithms)
-    env = make_eval_env(
-        config,
-        override_n_envs=args.n_envs,
-        render=args.render,
-        normalize_load_path=model_path,
-        **config.env_hyperparams,
-    )
-    device = get_device(config.device, env)
-    policy = make_policy(
-        args.algo,
-        env,
-        device,
-        load_path=model_path,
-        **config.policy_hyperparams,
-    ).eval()
-    if args.deterministic is None:
-        deterministic = config.eval_params.get("deterministic", True)
-    else:
-        deterministic = args.deterministic
-    evaluate(
-        env,
-        policy,
-        args.n_episodes,
-        render=args.render,
-        deterministic=deterministic,
-    )

 os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+from runner.evaluate import EvalArgs, evaluate_model
+from runner.running_utils import base_parser
 if __name__ == "__main__":
+    parser = base_parser(multiple=False)
     parser.add_argument("--render", default=True, type=bool)
     parser.add_argument("--best", default=True, type=bool)
     parser.add_argument("--n_envs", default=1, type=int)
     parser.add_argument("--n_episodes", default=3, type=int)
+    parser.add_argument("--deterministic-eval", default=None, type=bool)
+    parser.add_argument(
+        "--no-print-returns", action="store_true", help="Limit printing"
+    )
+    # wandb-run-path overrides base RunArgs
     parser.add_argument("--wandb-run-path", default=None, type=str)
     parser.set_defaults(
+        algo=["ppo"],
     )
+    args = parser.parse_args()
+    args.algo = args.algo[0]
+    args.env = args.env[0]
     args = EvalArgs(**vars(parser.parse_args()))
+    evaluate_model(args, os.path.dirname(__file__))

huggingface_publish.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+import argparse
+import requests
+import shutil
+import subprocess
+import tempfile
+import wandb
+import wandb.apis.public
+from typing import List, Optional
+from huggingface_hub.hf_api import HfApi, upload_folder
+from huggingface_hub.repocard import metadata_save
+from publish.markdown_format import EvalTableData, model_card_text
+from runner.evaluate import EvalArgs, evaluate_model
+from runner.env import make_eval_env
+from shared.callbacks.eval_callback import evaluate
+from wrappers.vec_episode_recorder import VecEpisodeRecorder
+def publish(
+    wandb_run_paths: List[str],
+    wandb_report_url: str,
+    huggingface_user: Optional[str] = None,
+    huggingface_token: Optional[str] = None,
+) -> None:
+    api = wandb.Api()
+    runs = [api.run(rp) for rp in wandb_run_paths]
+    algo = runs[0].config["algo"]
+    env = runs[0].config["env"]
+    evaluations = [
+        evaluate_model(
+            EvalArgs(
+                algo,
+                env,
+                seed=r.config.get("seed", None),
+                render=False,
+                best=True,
+                n_envs=None,
+                n_episodes=10,
+                no_print_returns=True,
+                wandb_run_path="/".join(r.path),
+            ),
+            os.path.dirname(__file__),
+        )
+        for r in runs
+    ]
+    run_metadata = requests.get(runs[0].file("wandb-metadata.json").url).json()
+    table_data = list(EvalTableData(r, e) for r, e in zip(runs, evaluations))
+    best_eval = sorted(
+        table_data, key=lambda d: d.evaluation.stats.score, reverse=True
+    )[0]
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        _, (policy, stats, config) = best_eval
+        repo_name = config.model_name(include_seed=False)
+        repo_dir_path = os.path.join(tmpdirname, repo_name)
+        # Locally clone this repo to a temp directory
+        subprocess.run(["git", "clone", ".", repo_dir_path])
+        shutil.rmtree(os.path.join(repo_dir_path, ".git"))
+        model_path = config.model_dir_path(best=True, downloaded=True)
+        shutil.copytree(
+            model_path,
+            os.path.join(
+                repo_dir_path, "saved_models", config.model_dir_name(best=True)
+            ),
+        )
+        github_url = "https://github.com/sgoodfriend/rl-algo-impls"
+        commit_hash = run_metadata.get("git", {}).get("commit", None)
+        card_text = model_card_text(
+            algo,
+            env,
+            github_url,
+            commit_hash,
+            wandb_report_url,
+            table_data,
+            best_eval,
+        )
+        readme_filepath = os.path.join(repo_dir_path, "README.md")
+        os.remove(readme_filepath)
+        with open(readme_filepath, "w") as f:
+            f.write(card_text)
+        metadata = {
+            "library_name": "rl-algo-impls",
+            "tags": [
+                env,
+                algo,
+                "deep-reinforcement-learning",
+                "reinforcement-learning",
+            ],
+            "model-index": [
+                {
+                    "name": algo,
+                    "results": [
+                        {
+                            "metrics": [
+                                {
+                                    "type": "mean_reward",
+                                    "value": str(stats.score),
+                                    "name": "mean_reward",
+                                }
+                            ],
+                            "task": {
+                                "type": "reinforcement-learning",
+                                "name": "reinforcement-learning",
+                            },
+                            "dataset": {
+                                "name": env,
+                                "type": env,
+                            },
+                        }
+                    ],
+                }
+            ],
+        }
+        metadata_save(readme_filepath, metadata)
+        video_env = VecEpisodeRecorder(
+            make_eval_env(
+                config,
+                override_n_envs=1,
+                normalize_load_path=model_path,
+                **config.env_hyperparams,
+            ),
+            os.path.join(repo_dir_path, "replay"),
+            max_video_length=3600,
+        )
+        evaluate(
+            video_env,
+            policy,
+            1,
+            deterministic=config.eval_params.get("deterministic", True),
+        )
+        api = HfApi()
+        huggingface_user = huggingface_user or api.whoami()["name"]
+        huggingface_repo = f"{huggingface_user}/{repo_name}"
+        api.create_repo(
+            token=huggingface_token,
+            repo_id=huggingface_repo,
+            private=True,
+            exist_ok=True,
+        )
+        repo_url = upload_folder(
+            repo_id=huggingface_repo,
+            folder_path=repo_dir_path,
+            path_in_repo="",
+            commit_message=f"{algo.upper()} playing {env} from {github_url}/tree/{commit_hash}",
+            token=huggingface_token,
+        )
+        print(f"Pushed model to the hub: {repo_url}")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--wandb-run-paths",
+        type=str,
+        nargs="+",
+        help="Run paths of the form entity/project/run_id",
+    )
+    parser.add_argument("--wandb-report-url", type=str, help="Link to WandB report")
+    parser.add_argument(
+        "--huggingface-user",
+        type=str,
+        help="Huggingface user or team to upload model cards",
+        default=None,
+    )
+    args = parser.parse_args()
+    print(args)
+    publish(**vars(args))

lambda_labs/lambda_requirements.txt CHANGED Viewed

@@ -6,4 +6,6 @@ gym[box2d] >= 0.21.0, < 0.22
 pyglet == 1.5.27
 wandb >= 0.13.9, < 0.14
 pyvirtualdisplay == 3.0
-pybullet >= 3.2.5, < 3.3

 pyglet == 1.5.27
 wandb >= 0.13.9, < 0.14
 pyvirtualdisplay == 3.0
+pybullet >= 3.2.5, < 3.3
+tabulate >= 0.9.0, < 0.10
+huggingface-hub >= 0.12.0, < 0.13

poetry.lock CHANGED Viewed

@@ -1217,6 +1217,37 @@ chardet = ["chardet (>=2.2)"]
 genshi = ["genshi"]
 lxml = ["lxml"]
 [[package]]
 name = "idna"
 version = "3.4"
@@ -3687,6 +3718,21 @@ pure-eval = "*"
 [package.extras]
 tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 [[package]]
 name = "tensorboard"
 version = "2.11.0"
@@ -4152,4 +4198,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10"
-content-hash = "c017f434016a4a1e42e01a10957b1de2f2596b1720d79d992a3d794ad8760ae3"

 genshi = ["genshi"]
 lxml = ["lxml"]
+[[package]]
+name = "huggingface-hub"
+version = "0.12.0"
+description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
+category = "main"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "huggingface_hub-0.12.0-py3-none-any.whl", hash = "sha256:93809eabbfb2058a808bddf8b2a70f645de3f9df73ce87ddf5163d4c74b71c0c"},
+    {file = "huggingface_hub-0.12.0.tar.gz", hash = "sha256:da82c9ec8f9d8f976ffd3fd8249d20bb35c2dd3145a9f7ca1106f0ebefd9afa0"},
+]
+[package.dependencies]
+filelock = "*"
+packaging = ">=20.9"
+pyyaml = ">=5.1"
+requests = "*"
+tqdm = ">=4.42.1"
+typing-extensions = ">=3.7.4.3"
+[package.extras]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (==22.3)", "flake8 (>=3.8.3)", "flake8-bugbear", "isort (>=5.5.4)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+cli = ["InquirerPy (==0.3.4)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (==22.3)", "flake8 (>=3.8.3)", "flake8-bugbear", "isort (>=5.5.4)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
+fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
+quality = ["black (==22.3)", "flake8 (>=3.8.3)", "flake8-bugbear", "isort (>=5.5.4)", "mypy (==0.982)"]
+tensorflow = ["graphviz", "pydot", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "isort (>=5.5.4)", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"]
+torch = ["torch"]
+typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
 [[package]]
 name = "idna"
 version = "3.4"
 [package.extras]
 tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+description = "Pretty-print tabular data"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
+    {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
+]
+[package.extras]
+widechars = ["wcwidth"]
 [[package]]
 name = "tensorboard"
 version = "2.11.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.10"
+content-hash = "89d4861857be881d3c6cb591d17fb98396b8c117b24a8d4ce4b6593ac8048670"

publish/markdown_format.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import os
+import pandas as pd
+import wandb.apis.public
+import yaml
+from collections import defaultdict
+from dataclasses import dataclass, asdict
+from typing import Any, Dict, Iterable, List, NamedTuple, Optional, TypeVar
+from urllib.parse import urlparse
+from runner.evaluate import Evaluation
+EvaluationRowSelf = TypeVar("EvaluationRowSelf", bound="EvaluationRow")
+@dataclass
+class EvaluationRow:
+    algo: str
+    env: str
+    seed: Optional[int]
+    reward_mean: float
+    reward_std: float
+    eval_episodes: int
+    best: str
+    wandb_url: str
+    @staticmethod
+    def data_frame(rows: List[EvaluationRowSelf]) -> pd.DataFrame:
+        results = defaultdict(list)
+        for r in rows:
+            for k, v in asdict(r).items():
+                results[k].append(v)
+        return pd.DataFrame(results)
+class EvalTableData(NamedTuple):
+    run: wandb.apis.public.Run
+    evaluation: Evaluation
+def evaluation_table(table_data: Iterable[EvalTableData]) -> str:
+    best_stats = sorted(
+        [d.evaluation.stats for d in table_data], key=lambda r: r.score, reverse=True
+    )[0]
+    table_data = sorted(table_data, key=lambda d: d.evaluation.config.seed() or 0)
+    rows = [
+        EvaluationRow(
+            config.algo,
+            config.env_id,
+            config.seed(),
+            stats.score.mean,
+            stats.score.std,
+            len(stats),
+            "*" if stats == best_stats else "",
+            f"[wandb]({r.url})",
+        )
+        for (r, (_, stats, config)) in table_data
+    ]
+    df = EvaluationRow.data_frame(rows)
+    return df.to_markdown(index=False)
+def github_project_link(github_url: str) -> str:
+    return f"[{urlparse(github_url).path}]({github_url})"
+def header_section(algo: str, env: str, github_url: str, wandb_report_url: str) -> str:
+    algo_caps = algo.upper()
+    lines = [
+        f"# **{algo_caps}** Agent playing **{env}**",
+        f"This is a trained model of a **{algo_caps}** agent playing **{env}** using "
+        f"the {github_project_link(github_url)} repo.",
+        f"All models trained at this commit can be found at {wandb_report_url}.",
+    ]
+    return "\n\n".join(lines)
+def github_tree_link(github_url: str, commit_hash: Optional[str]) -> str:
+    if not commit_hash:
+        return github_project_link(github_url)
+    return f"[{commit_hash[:7]}]({github_url}/tree/{commit_hash})"
+def results_section(
+    table_data: List[EvalTableData], algo: str, github_url: str, commit_hash: str
+) -> str:
+    # type: ignore
+    lines = [
+        "## Training Results",
+        f"This model was trained from {len(table_data)} trainings of **{algo.upper()}** "
+        + "agents using different initial seeds. "
+        + f"These agents were trained by checking out "
+        + f"{github_tree_link(github_url, commit_hash)}. "
+        + "The best and last models were kept from each training. "
+        + "This submission has loaded the best models from each training, reevaluates "
+        + "them, and selects the best model from these latest evaluations (mean - std).",
+    ]
+    lines.append(evaluation_table(table_data))
+    return "\n\n".join(lines)
+def prerequisites_section() -> str:
+    return """
+### Prerequisites: Weights & Biases (WandB)
+Training and benchmarking assumes you have a Weights & Biases project to upload runs to.
+By default training goes to a rl-algo-impls project while benchmarks go to
+rl-algo-impls-benchmarks. During training and benchmarking runs, videos of the best
+models and the model weights are uploaded to WandB.
+Before doing anything below, you'll need to create a wandb account and run `wandb
+login`.
+"""
+def usage_section(github_url: str, run_path: str, commit_hash: str) -> str:
+    return f"""
+## Usage
+{urlparse(github_url).path}: {github_url}
+Note: While the model state dictionary and hyperaparameters are saved, the latest
+implementation could be sufficiently different to not be able to reproduce similar
+results. You might need to checkout the commit the agent was trained on:
+{github_tree_link(github_url, commit_hash)}.
+```
+# Downloads the model, sets hyperparameters, and runs agent for 3 episodes
+python enjoy.py --wandb-run-path={run_path}
+```
+Setup hasn't been completely worked out yet, so you might be best served by using Google
+Colab starting from the
+[colab_enjoy.ipynb](https://github.com/sgoodfriend/rl-algo-impls/blob/main/colab_enjoy.ipynb)
+notebook.
+"""
+def training_setion(
+    github_url: str, commit_hash: str, algo: str, env: str, seed: Optional[int]
+) -> str:
+    return f"""
+## Training
+If you want the highest chance to reproduce these results, you'll want to checkout the
+commit the agent was trained on: {github_tree_link(github_url, commit_hash)}. While
+training is deterministic, different hardware will give different results.
+```
+python train.py --algo {algo} --env {env} {'--seed ' + str(seed) if seed is not None else ''}
+```
+Setup hasn't been completely worked out yet, so you might be best served by using Google
+Colab starting from the
+[colab_train.ipynb](https://github.com/sgoodfriend/rl-algo-impls/blob/main/colab_train.ipynb)
+notebook.
+"""
+def benchmarking_section(report_url: str) -> str:
+    return f"""
+## Benchmarking (with Lambda Labs instance)
+This and other models from {report_url} were generated by running a script on a Lambda
+Labs instance. In a Lambda Labs instance terminal:
+```
+git clone [email protected]:sgoodfriend/rl-algo-impls.git
+cd rl-algo-impls
+bash ./lambda_labs/setup.sh
+wandb login
+bash ./lambda_labs/benchmark.sh
+```
+### Alternative: Google Colab Pro+
+As an alternative,
+[colab_benchmark.ipynb](https://github.com/sgoodfriend/rl-algo-impls/tree/main/benchmarks#:~:text=colab_benchmark.ipynb),
+can be used. However, this requires a Google Colab Pro+ subscription and running across
+4 separate instances because otherwise running all jobs will exceed the 24-hour limit.
+"""
+def hyperparams_section(run_config: Dict[str, Any]) -> str:
+    return f"""
+## Hyperparameters
+This isn't exactly the format of hyperparams in {os.path.join("hyperparams",
+run_config["algo"] + ".yml")}, but instead the Wandb Run Config. However, it's very
+close and has some additional data:
+```
+{yaml.dump(run_config)}
+```
+"""
+def model_card_text(
+    algo: str,
+    env: str,
+    github_url: str,
+    commit_hash: str,
+    wandb_report_url: str,
+    table_data: List[EvalTableData],
+    best_eval: EvalTableData,
+) -> str:
+    run, (_, _, config) = best_eval
+    run_path = "/".join(run.path)
+    return "\n\n".join(
+        [
+            header_section(algo, env, github_url, wandb_report_url),
+            results_section(table_data, algo, github_url, commit_hash),
+            prerequisites_section(),
+            usage_section(github_url, run_path, commit_hash),
+            training_setion(github_url, commit_hash, algo, env, config.seed()),
+            benchmarking_section(wandb_report_url),
+            hyperparams_section(run.config),
+        ]
+    )

pyproject.toml CHANGED Viewed

@@ -21,6 +21,8 @@ wandb = "^0.13.9"
 conda-lock = "^1.3.0"
 torch-tb-profiler = "^0.4.1"
 jupyter = "^1.0.0"
 [build-system]
 requires = ["poetry-core"]

 conda-lock = "^1.3.0"
 torch-tb-profiler = "^0.4.1"
 jupyter = "^1.0.0"
+tabulate = "^0.9.0"
+huggingface-hub = "^0.12.0"
 [build-system]
 requires = ["poetry-core"]

replay.meta.json CHANGED Viewed

@@ -1 +1 @@

- {"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/~~tmp7t2v9jcd~~/ppo-Acrobot-v1/replay.mp4"]}, "episode": {"r": -73.0, "l": 74, "t": 1.~~272925~~}}

+ {"content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers\\nbuilt with clang version 14.0.6\\nconfiguration: --prefix=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl --cc=arm64-apple-darwin20.0.0-clang --cxx=arm64-apple-darwin20.0.0-clang++ --nm=arm64-apple-darwin20.0.0-nm --ar=arm64-apple-darwin20.0.0-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-cross-compile --arch=arm64 --target-os=darwin --cross-prefix=arm64-apple-darwin20.0.0- --host-cc=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/x86_64-apple-darwin13.4.0-clang --enable-neon --enable-gnutls --enable-libmp3lame --enable-libvpx --enable-pthreads --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-libsvtav1 --enable-libxml2 --enable-pic --enable-shared --disable-static --enable-version3 --enable-zlib --pkg-config=/Users/runner/miniforge3/conda-bld/ffmpeg_1671040513231/_build_env/bin/pkg-config\\nlibavutil 57. 28.100 / 57. 28.100\\nlibavcodec 59. 37.100 / 59. 37.100\\nlibavformat 59. 27.100 / 59. 27.100\\nlibavdevice 59. 7.100 / 59. 7.100\\nlibavfilter 8. 44.100 / 8. 44.100\\nlibswscale 6. 7.100 / 6. 7.100\\nlibswresample 4. 7.100 / 4. 7.100\\nlibpostproc 56. 6.100 / 56. 6.100\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "500x500", "-pix_fmt", "rgb24", "-framerate", "30", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "30", "/var/folders/9g/my5557_91xddp6lx00nkzly80000gn/T/tmpz2flad47/ppo-Acrobot-v1/replay.mp4"]}, "episode": {"r": -73.0, "l": 74, "t": 1.297341}}

runner/config.py CHANGED Viewed

@@ -59,14 +59,17 @@ class Config:
     def eval_params(self) -> Dict[str, Any]:
         return self.hyperparams.get("eval_params", {})
     @property
     def env_id(self) -> str:
         return self.args.env
-    @property
-    def model_name(self) -> str:
-        parts = [self.args.algo, self.env_id]
-        if self.args.seed is not None:
             parts.append(f"S{self.args.seed}")
         make_kwargs = self.env_hyperparams.get("make_kwargs", {})
         if make_kwargs:
@@ -81,7 +84,7 @@ class Config:
     @property
     def run_name(self) -> str:
-        parts = [self.model_name, self.run_id]
         return "-".join(parts)
     @property
@@ -97,7 +100,7 @@ class Config:
         best: bool = False,
         extension: str = "",
     ) -> str:
-        return self.model_name + ("-best" if best else "") + extension
     def model_dir_path(self, best: bool = False, downloaded: bool = False) -> str:
         return os.path.join(
@@ -123,8 +126,8 @@ class Config:
     @property
     def video_prefix(self) -> str:
-        return os.path.join(self.videos_dir, self.model_name)
     @property
     def best_videos_dir(self) -> str:
-        return os.path.join(self.videos_dir, f"{self.model_name}-best")

     def eval_params(self) -> Dict[str, Any]:
         return self.hyperparams.get("eval_params", {})
+    @property
+    def algo(self) -> str:
+        return self.args.algo
     @property
     def env_id(self) -> str:
         return self.args.env
+    def model_name(self, include_seed: bool = True) -> str:
+        parts = [self.algo, self.env_id]
+        if include_seed and self.args.seed is not None:
             parts.append(f"S{self.args.seed}")
         make_kwargs = self.env_hyperparams.get("make_kwargs", {})
         if make_kwargs:
     @property
     def run_name(self) -> str:
+        parts = [self.model_name(), self.run_id]
         return "-".join(parts)
     @property
         best: bool = False,
         extension: str = "",
     ) -> str:
+        return self.model_name() + ("-best" if best else "") + extension
     def model_dir_path(self, best: bool = False, downloaded: bool = False) -> str:
         return os.path.join(
     @property
     def video_prefix(self) -> str:
+        return os.path.join(self.videos_dir, self.model_name())
     @property
     def best_videos_dir(self) -> str:
+        return os.path.join(self.videos_dir, f"{self.model_name()}-best")

runner/evaluate.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import os
+import shutil
+from dataclasses import dataclass
+from typing import NamedTuple, Optional
+from runner.env import make_eval_env
+from runner.config import Config, RunArgs
+from runner.running_utils import (
+    load_hyperparams,
+    set_seeds,
+    get_device,
+    make_policy,
+)
+from shared.callbacks.eval_callback import evaluate
+from shared.policy.policy import Policy
+from shared.stats import EpisodesStats
+@dataclass
+class EvalArgs(RunArgs):
+    render: bool = True
+    best: bool = True
+    n_envs: Optional[int] = 1
+    n_episodes: int = 3
+    deterministic_eval: Optional[bool] = None
+    no_print_returns: bool = False
+    wandb_run_path: Optional[str] = None
+class Evaluation(NamedTuple):
+    policy: Policy
+    stats: EpisodesStats
+    config: Config
+def evaluate_model(args: EvalArgs, root_dir: str) -> Evaluation:
+    if args.wandb_run_path:
+        import wandb
+        api = wandb.Api()
+        run = api.run(args.wandb_run_path)
+        hyperparams = run.config
+        args.algo = hyperparams["algo"]
+        args.env = hyperparams["env"]
+        args.seed = hyperparams.get("seed", None)
+        args.use_deterministic_algorithms = hyperparams.get(
+            "use_deterministic_algorithms", True
+        )
+        config = Config(args, hyperparams, root_dir)
+        model_path = config.model_dir_path(best=args.best, downloaded=True)
+        model_archive_name = config.model_dir_name(best=args.best, extension=".zip")
+        run.file(model_archive_name).download()
+        if os.path.isdir(model_path):
+            shutil.rmtree(model_path)
+        shutil.unpack_archive(model_archive_name, model_path)
+        os.remove(model_archive_name)
+    else:
+        hyperparams = load_hyperparams(args.algo, args.env, root_dir)
+        config = Config(args, hyperparams, root_dir)
+        model_path = config.model_dir_path(best=args.best)
+    print(args)
+    set_seeds(args.seed, args.use_deterministic_algorithms)
+    env = make_eval_env(
+        config,
+        override_n_envs=args.n_envs,
+        render=args.render,
+        normalize_load_path=model_path,
+        **config.env_hyperparams,
+    )
+    device = get_device(config.device, env)
+    policy = make_policy(
+        args.algo,
+        env,
+        device,
+        load_path=model_path,
+        **config.policy_hyperparams,
+    ).eval()
+    deterministic = (
+        args.deterministic_eval
+        if args.deterministic_eval is not None
+        else config.eval_params.get("deterministic", True)
+    )
+    return Evaluation(
+        policy,
+        evaluate(
+            env,
+            policy,
+            args.n_episodes,
+            render=args.render,
+            deterministic=deterministic,
+            print_returns=not args.no_print_returns,
+        ),
+        config,
+    )

runner/running_utils.py CHANGED Viewed

@@ -40,28 +40,28 @@ POLICIES: Dict[str, Type[Policy]] = {
 HYPERPARAMS_PATH = "hyperparams"
-def base_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--algo",
         default="dqn",
         type=str,
         choices=list(ALGOS.keys()),
-        nargs="+",
         help="Abbreviation(s) of algorithm(s)",
     )
     parser.add_argument(
         "--env",
         default="CartPole-v1",
         type=str,
-        nargs="+",
         help="Name of environment(s) in gym",
     )
     parser.add_argument(
         "--seed",
         default=1,
         type=int,
-        nargs="*",
         help="Seeds to run experiment. Unset will do one run with no set seed",
     )
     parser.add_argument(

 HYPERPARAMS_PATH = "hyperparams"
+def base_parser(multiple: bool = True) -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--algo",
         default="dqn",
         type=str,
         choices=list(ALGOS.keys()),
+        nargs="+" if multiple else 1,
         help="Abbreviation(s) of algorithm(s)",
     )
     parser.add_argument(
         "--env",
         default="CartPole-v1",
         type=str,
+        nargs="+" if multiple else 1,
         help="Name of environment(s) in gym",
     )
     parser.add_argument(
         "--seed",
         default=1,
         type=int,
+        nargs="*" if multiple else "?",
         help="Seeds to run experiment. Unset will do one run with no set seed",
     )
     parser.add_argument(

shared/callbacks/eval_callback.py CHANGED Viewed

@@ -22,7 +22,10 @@ class EvaluateAccumulator(EpisodeAccumulator):
         self.print_returns = print_returns
     def on_done(self, ep_idx: int, episode: Episode) -> None:
-        if len(self.completed_episodes_by_env_idx[ep_idx]) >= self.goal_episodes_per_env:
             return
         self.completed_episodes_by_env_idx[ep_idx].append(episode)
         if self.print_returns:
@@ -36,11 +39,14 @@ class EvaluateAccumulator(EpisodeAccumulator):
         return sum(len(ce) for ce in self.completed_episodes_by_env_idx)
     @property
-    def episodes(self) -> bool:
-        return list(itertools.chain(*self.completed_episodes_by_env_idx))
     def is_done(self) -> bool:
-        return all(len(ce) == self.goal_episodes_per_env for ce in self.completed_episodes_by_env_idx)
 def evaluate(
@@ -108,7 +114,7 @@ class EvalCallback(Callback):
     def on_step(self, timesteps_elapsed: int = 1) -> bool:
         super().on_step(timesteps_elapsed)
         if self.timesteps_elapsed // self.step_freq >= len(self.stats):
-            self.sync_vec_normalize(self.env)
             self.evaluate()
         return True
@@ -134,10 +140,12 @@ class EvalCallback(Callback):
                 assert self.best_model_path
                 self.policy.save(self.best_model_path)
                 print("Saved best model")
-            self.best.write_to_tensorboard(self.tb_writer, "best_eval", self.timesteps_elapsed)
             if strictly_better and self.record_best_videos:
                 assert self.video_env and self.best_video_dir
-                self.sync_vec_normalize(self.video_env)
                 self.best_video_base_path = os.path.join(
                     self.best_video_dir, str(self.timesteps_elapsed)
                 )
@@ -159,16 +167,15 @@ class EvalCallback(Callback):
         return eval_stat
-    def sync_vec_normalize(self, destination_env: VecEnv) -> None:
-        if self.policy.vec_normalize is not None:
-            eval_env_wrapper = destination_env
-            while isinstance(eval_env_wrapper, VecEnvWrapper):
-                if isinstance(eval_env_wrapper, VecNormalize):
-                    if hasattr(self.policy.vec_normalize, "obs_rms"):
-                        eval_env_wrapper.obs_rms = deepcopy(
-                            self.policy.vec_normalize.obs_rms
-                        )
-                    eval_env_wrapper.ret_rms = deepcopy(
-                        self.policy.vec_normalize.ret_rms
-                    )
-                eval_env_wrapper = eval_env_wrapper.venv

         self.print_returns = print_returns
     def on_done(self, ep_idx: int, episode: Episode) -> None:
+        if (
+            len(self.completed_episodes_by_env_idx[ep_idx])
+            >= self.goal_episodes_per_env
+        ):
             return
         self.completed_episodes_by_env_idx[ep_idx].append(episode)
         if self.print_returns:
         return sum(len(ce) for ce in self.completed_episodes_by_env_idx)
     @property
+    def episodes(self) -> List[Episode]:
+        return list(itertools.chain(*self.completed_episodes_by_env_idx))
     def is_done(self) -> bool:
+        return all(
+            len(ce) == self.goal_episodes_per_env
+            for ce in self.completed_episodes_by_env_idx
+        )
 def evaluate(
     def on_step(self, timesteps_elapsed: int = 1) -> bool:
         super().on_step(timesteps_elapsed)
         if self.timesteps_elapsed // self.step_freq >= len(self.stats):
+            sync_vec_normalize(self.policy.vec_normalize, self.env)
             self.evaluate()
         return True
                 assert self.best_model_path
                 self.policy.save(self.best_model_path)
                 print("Saved best model")
+            self.best.write_to_tensorboard(
+                self.tb_writer, "best_eval", self.timesteps_elapsed
+            )
             if strictly_better and self.record_best_videos:
                 assert self.video_env and self.best_video_dir
+                sync_vec_normalize(self.policy.vec_normalize, self.video_env)
                 self.best_video_base_path = os.path.join(
                     self.best_video_dir, str(self.timesteps_elapsed)
                 )
         return eval_stat
+def sync_vec_normalize(
+    origin_vec_normalize: Optional[VecNormalize], destination_env: VecEnv
+) -> None:
+    if origin_vec_normalize is not None:
+        eval_env_wrapper = destination_env
+        while isinstance(eval_env_wrapper, VecEnvWrapper):
+            if isinstance(eval_env_wrapper, VecNormalize):
+                if hasattr(origin_vec_normalize, "obs_rms"):
+                    eval_env_wrapper.obs_rms = deepcopy(origin_vec_normalize.obs_rms)
+                eval_env_wrapper.ret_rms = deepcopy(origin_vec_normalize.ret_rms)
+            eval_env_wrapper = eval_env_wrapper.venv

shared/policy/policy.py CHANGED Viewed

@@ -54,7 +54,9 @@ class Policy(nn.Module, ABC):
     @abstractmethod
     def load(self, path: str) -> None:
         # VecNormalize load occurs in env.py
-        self.load_state_dict(torch.load(os.path.join(path, MODEL_FILENAME)))
     def reset_noise(self) -> None:
         pass

     @abstractmethod
     def load(self, path: str) -> None:
         # VecNormalize load occurs in env.py
+        self.load_state_dict(
+            torch.load(os.path.join(path, MODEL_FILENAME), map_location=self.device)
+        )
     def reset_noise(self) -> None:
         pass

shared/stats.py CHANGED Viewed

@@ -94,6 +94,9 @@ class EpisodesStats:
             f"Length: {self.length}"
         )
     def _asdict(self) -> dict:
         return {
             "n_episodes": len(self.episodes),
@@ -147,27 +150,3 @@ class EpisodeAccumulator:
     def stats(self) -> EpisodesStats:
         return EpisodesStats(self.episodes)
-class RolloutStats(EpisodeAccumulator):
-    def __init__(self, num_envs: int, print_n_episodes: int, tb_writer: SummaryWriter):
-        super().__init__(num_envs)
-        self.print_n_episodes = print_n_episodes
-        self.epochs: List[EpisodesStats] = []
-        self.tb_writer = tb_writer
-    def on_done(self, ep_idx: int, episode: Episode) -> None:
-        if (
-            self.print_n_episodes >= 0
-            and len(self.episodes) % self.print_n_episodes == 0
-        ):
-            sample = self.episodes[-self.print_n_episodes :]
-            epoch = EpisodesStats(sample)
-            self.epochs.append(epoch)
-            total_steps = np.sum([e.length for e in self.episodes])
-            print(
-                f"Episode: {len(self.episodes)} | "
-                f"{epoch} | "
-                f"Total Steps: {total_steps}"
-            )
-            epoch.write_to_tensorboard(self.tb_writer, "train", global_step=total_steps)

             f"Length: {self.length}"
         )
+    def __len__(self) -> int:
+        return len(self.episodes)
     def _asdict(self) -> dict:
         return {
             "n_episodes": len(self.episodes),
     def stats(self) -> EpisodesStats:
         return EpisodesStats(self.episodes)