rojas-diego · July 7, 2020 15:09
diff --git a/main.py b/main.py
 """
 Hyperparameter tuning and RL optimisation using RLlib, Tune and Neptune.

 The script below does the following:
    - Declare a NeptuneLogger which extends the tune.logger.Logger class to
      send data to the neptune.ai API during training.

    - Use Tune SearchAlgorithm to perform a grid search on the "lr" hyperparam.

    - Define a Tune Scheduler to perform hyperparameter tune to obtain the
      optimal learning rate.

    - Setup and run the RLlib env and use Tune to run it.

    - Report to Neptune the analysis report, including the best configuration.
 """

 import neptune

 import json

 from ray import tune
 from ray.tune.schedulers import ASHAScheduler
 from ray.rllib.agents.ppo import PPOTrainer

 from ray.tune.logger import Logger, DEFAULT_LOGGERS

 import argparse

 # Initialisation of the argparser
 parser = argparse.ArgumentParser()
 parser.add_argument("--training-iterations", type=int,
                    dest="training_iterations")
 args = parser.parse_args()

 # Fetching of API key
 with open(".neptune-key", "r") as f:
    data = f.read().replace("\n", "")

 # Initialisation of the Neptune environment
 neptune.init(
    api_token=data,
    project_qualified_name="{name/sandbox}",
 )

 # Creation of the Neptune experiment
 neptune.create_experiment(
    name="IntroductoryExp-v0",
    tags=["alpha"],
    description="Hyperparameter sweeping and RL optimisation using RLlib, Tune and Neptune.",
 )

 class NeptuneLogger(Logger):
    """
    NeptuneLogger is an extension of the tune.logger.Logger class and aims at
    being injected in the tune process as a Logger.

    As of now, NeptuneLogger extracts the following from the result object.
    - The episode_reward_mean: to plot the evolution of the model in
      `ui.neptune.ai`.
    """
    def _init(self):
        pass
    def close(self):
        pass

    def on_result(self, result):
        neptune.log_metric("episode_reward_mean/" + result["trial_id"],
            result["episode_reward_mean"])

 # Setup the experiment configuration
 TUNE_CONFIG = {
    "env": "CartPole-v0",
    "lr": tune.grid_search([1., 0.1, 0.01, 0.001]),
    "log_level": "ERROR",
 }

 # We run the experiment passing NeptuneLogger as the only Logger.
 analysis = tune.run(
    PPOTrainer,
    stop={
        "training_iteration": args.training_iterations and args.training_iterations > 0 if args.training_iterations else 10,
    },
    scheduler=ASHAScheduler(metric="episode_reward_mean", mode="max"),
    config=TUNE_CONFIG,
    loggers=(NeptuneLogger, )
 )

 # Obtaining the best config from the Tune analysis object
 best_config = analysis.get_best_config("episode_reward_mean", mode="max")
 neptune.log_text("best_config_lr", "Optimal learning rate is " +
    str(best_config["lr"]))

 # Logging the best configuration to a JSON file.
 with open('config.json', 'w') as fp:
    json.dump(best_config, fp)

 neptune.log_artifact("config.json")
diff --git a/RayxNeptune.md b/RayxNeptune.md
	"""
	Hyperparameter tuning and RL optimisation using RLlib, Tune and Neptune.

	The script below does the following:
	- Declare a NeptuneLogger which extends the tune.logger.Logger class to
	send data to the neptune.ai API during training.

	- Use Tune SearchAlgorithm to perform a grid search on the "lr" hyperparam.

	- Define a Tune Scheduler to perform hyperparameter tune to obtain the
	optimal learning rate.

	- Setup and run the RLlib env and use Tune to run it.

	- Report to Neptune the analysis report, including the best configuration.
	"""

	import neptune

	import json

	from ray import tune
	from ray.tune.schedulers import ASHAScheduler
	from ray.rllib.agents.ppo import PPOTrainer

	from ray.tune.logger import Logger, DEFAULT_LOGGERS

	import argparse

	# Initialisation of the argparser
	parser = argparse.ArgumentParser()
	parser.add_argument("--training-iterations", type=int,
	dest="training_iterations")
	args = parser.parse_args()

	# Fetching of API key
	with open(".neptune-key", "r") as f:
	data = f.read().replace("\n", "")

	# Initialisation of the Neptune environment
	neptune.init(
	api_token=data,
	project_qualified_name="{name/sandbox}",
	)

	# Creation of the Neptune experiment
	neptune.create_experiment(
	name="IntroductoryExp-v0",
	tags=["alpha"],
	description="Hyperparameter sweeping and RL optimisation using RLlib, Tune and Neptune.",
	)

	class NeptuneLogger(Logger):
	"""
	NeptuneLogger is an extension of the tune.logger.Logger class and aims at
	being injected in the tune process as a Logger.

	As of now, NeptuneLogger extracts the following from the result object.
	- The episode_reward_mean: to plot the evolution of the model in
	`ui.neptune.ai`.
	"""
	def _init(self):
	pass
	def close(self):
	pass

	def on_result(self, result):
	neptune.log_metric("episode_reward_mean/" + result["trial_id"],
	result["episode_reward_mean"])

	# Setup the experiment configuration
	TUNE_CONFIG = {
	"env": "CartPole-v0",
	"lr": tune.grid_search([1., 0.1, 0.01, 0.001]),
	"log_level": "ERROR",
	}

	# We run the experiment passing NeptuneLogger as the only Logger.
	analysis = tune.run(
	PPOTrainer,
	stop={
	"training_iteration": args.training_iterations and args.training_iterations > 0 if args.training_iterations else 10,
	},
	scheduler=ASHAScheduler(metric="episode_reward_mean", mode="max"),
	config=TUNE_CONFIG,
	loggers=(NeptuneLogger, )
	)

	# Obtaining the best config from the Tune analysis object
	best_config = analysis.get_best_config("episode_reward_mean", mode="max")
	neptune.log_text("best_config_lr", "Optimal learning rate is " +
	str(best_config["lr"]))

	# Logging the best configuration to a JSON file.
	with open('config.json', 'w') as fp:
	json.dump(best_config, fp)

	neptune.log_artifact("config.json")