Source code for momba.gym
# -*- coding:utf-8 -*-
#
# Copyright (C) 2019-2021, Saarland University
# Copyright (C) 2019-2021, Maximilian Köhl <koehl@cs.uni-saarland.de>
#
# type: ignore
"""
A formal methods based toolbox for reinforcement learning.
"""
from __future__ import annotations
import typing as t
from .. import model, engine
from . import abstract, env, checker, generic
[docs]
def create_generic_env(
network: model.Network,
controlled_instance: model.Instance,
property_name: str,
*,
parameters: engine.Parameters = None,
rewards: generic.Rewards = generic.DEFAULT_REWARD_STRUCTURE,
actions: generic.Actions = generic.Actions.EDGE_BY_INDEX,
observations: generic.Observations = generic.Observations.GLOBAL_ONLY,
renderer: t.Optional[env.Renderer] = None,
) -> env.MombaEnv:
"""
Constructs a generic training environment from a JANI model based on the provided options.
Arguments:
network: A JANI automaton network.
controlled_instance: An instance of an automaton in the provided network. The
decision-making agent trained on the resulting environment is assumed to act
by resolving the non-determinism in this automaton.
property_name: The name of a reach-avoid JANI property (specified as part of the
JANI model the network originates from) for which the agent should be trained.
parameters: Allows defining values for parameters of the JANI model.
rewards: Specifies the reward structure used for training.
actions: Specifies the action space for the environment.
observations: Specifies the observation space for the environment.
renderer: Is an optional renderer for the OpenAI Gym API.
"""
return env.MombaEnv(
generic.GenericExplorer.create(
engine.Explorer.new_discrete_time(network, parameters=parameters),
controlled_instance,
property_name,
rewards=rewards,
actions=actions,
observations=observations,
),
renderer=renderer,
)
__all__ = ["abstract", "env", "checker", "create_generic_env"]