Commit 6f246ad3 authored by Jonas Boysen's avatar Jonas Boysen
Browse files

add RL Notebook

parent 0a0536c3
Loading
Loading
Loading
Loading
+102 −0

File added.

Preview size limit exceeded, changes collapsed.

+794 −0

File added.

Preview size limit exceeded, changes collapsed.

+55 −0
Original line number Diff line number Diff line
%% Cell type:code id: tags:

``` python
import gymnasium as gym
import matplotlib.pyplot as plt
from IPython.display import clear_output

env = gym.make("CliffWalking-v0", render_mode="rgb_array")
env.action_space.seed(42)

observation, info = env.reset(seed=42)

for _ in range(100):
    observation, reward, terminated, truncated, info = env.step(env.action_space.sample())

    if terminated:
        observation, info = env.reset(return_info=True)

    clear_output(wait=True)
    plt.imshow(env.render())
    plt.show()
```

%% Cell type:code id: tags:

``` python
from cliff import CliffAgent
from tqdm import tqdm

learning_rate = 0.01
n_episodes = 100_0
start_epsilon = 1.0
final_epsilon = 0.0
discount_factor = 0.95
max_steps_per_epoch = 100_0
show_env = False

# update_algorithm either "q_learning" or "sarsa"
agent = CliffAgent(
    show_env=show_env,
    learning_rate=learning_rate,
    initial_epsilon=start_epsilon,
    final_epsilon=final_epsilon,
    discount_factor=discount_factor,
    max_steps_per_epoch=max_steps_per_epoch,
    update_algorithm="q_learning"
    # update_algorithm="sarsa"
)

# agent.env = gym.wrappers.RecordEpisodeStatistics(agent.env)
for episode in tqdm(range(n_episodes)):
    agent.perform_epoch(eps_decay_over_n_epochs=n_episodes)

agent.vis_results(n_episodes)
```