add RL Notebook (6f246ad3) · Commits · einfuehrungki / lab-students-materials

ReinforcementLearning/cliff.ipynb

0 → 100644

+102 −0

File added.

Preview size limit exceeded, changes collapsed.

0 → 100644

+794 −0

File added.

Preview size limit exceeded, changes collapsed.

0 → 100644

+55 −0

Original line number	Diff line number	Diff line
		%% Cell type:code id: tags:

		``` python
		import gymnasium as gym
		import matplotlib.pyplot as plt
		from IPython.display import clear_output

		env = gym.make("CliffWalking-v0", render_mode="rgb_array")
		env.action_space.seed(42)

		observation, info = env.reset(seed=42)

		for _ in range(100):
		observation, reward, terminated, truncated, info = env.step(env.action_space.sample())

		if terminated:
		observation, info = env.reset(return_info=True)

		clear_output(wait=True)
		plt.imshow(env.render())
		plt.show()
		```

		%% Cell type:code id: tags:

		``` python
		from cliff import CliffAgent
		from tqdm import tqdm

		learning_rate = 0.01
		n_episodes = 100_0
		start_epsilon = 1.0
		final_epsilon = 0.0
		discount_factor = 0.95
		max_steps_per_epoch = 100_0
		show_env = False

		# update_algorithm either "q_learning" or "sarsa"
		agent = CliffAgent(
		show_env=show_env,
		learning_rate=learning_rate,
		initial_epsilon=start_epsilon,
		final_epsilon=final_epsilon,
		discount_factor=discount_factor,
		max_steps_per_epoch=max_steps_per_epoch,
		update_algorithm="q_learning"
		# update_algorithm="sarsa"
		)

		# agent.env = gym.wrappers.RecordEpisodeStatistics(agent.env)
		for episode in tqdm(range(n_episodes)):
		agent.perform_epoch(eps_decay_over_n_epochs=n_episodes)

		agent.vis_results(n_episodes)
		```