Source code for momba.gym.env
# -*- coding:utf-8 -*-
#
# Copyright (C) 2019-2021, Saarland University
# Copyright (C) 2019-2021, Maximilian Köhl <koehl@cs.uni-saarland.de>
#
# type: ignore
from __future__ import annotations
import typing as t
import gymnasium as gym # type: ignore
import numpy
from gymnasium import spaces # type: ignore
from gymnasium.error import UnsupportedMode # type: ignore
from . import abstract
class Renderer(t.Protocol):
def render(self, state: abstract.StateVector, mode: str) -> None:
raise NotImplementedError()
[docs]
class MombaEnv(gym.Env): # type: ignore
"""
Implementation of an OpenAI Gym environment.
Arguments:
explorer: The :class:`abstract.Explorer` to use.
renderer: An optional renderer for the OpenAI Gym API.
"""
explorer: abstract.Explorer
action_space: gym.Space
observation_space: gym.Space
renderer: t.Optional[Renderer]
def __init__(
self, explorer: abstract.Explorer, *, renderer: t.Optional[Renderer] = None
) -> None:
super().__init__()
self.explorer = explorer
self.action_space = spaces.Discrete(self.explorer.num_actions)
self.observation_space = spaces.Box(
low=float("-inf"), high=float("inf"), shape=(self.explorer.num_features,)
)
self.renderer = renderer
@property
def available_actions(self) -> numpy.ndarray: # type: ignore
return numpy.array(self.explorer.available_actions)
@property
def available_transitions(self) -> t.Sequence[abstract.Transition]:
return self.explorer.available_transitions
@property
def is_done(self) -> bool:
return self.explorer.has_terminated
@property
def state_vector(self) -> numpy.ndarray: # type: ignore
return numpy.array(self.explorer.state_vector)
[docs]
def fork(self) -> MombaEnv:
"""Forks the environment."""
return MombaEnv(self.explorer.fork(), renderer=self.renderer)
[docs]
def step(self, action: int) -> t.Tuple[numpy.ndarray, float, bool, t.Any]: # type: ignore
"""Takes a decision in response to the last observation."""
reward = self.explorer.step(action)
state = numpy.array(self.explorer.state_vector)
return state, reward, self.explorer.has_terminated, {}
[docs]
def reset(self) -> numpy.ndarray: # type: ignore
"""Resets the environment to an initial state and returns an initial observation."""
self.explorer.reset()
return numpy.array(self.explorer.state_vector)
[docs]
def render(self, mode: str = "human") -> None:
"""Renders the environment assuming a :code:`render` has been supplied."""
if self.renderer is None:
raise UnsupportedMode("`MombaGym` does not support rendering")
else:
self.renderer.render(self.explorer.state_vector, mode)