-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmdp.py
103 lines (70 loc) · 1.56 KB
/
mdp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
class MDP(object):
"""Docstring for MDP. """
def __init__(self, env):
"""TODO: to be defined1.
Parameters
----------
env : TODO
"""
self._env = env
# @todo: make this flexible
self._s = env.reset()
def step(self, a):
"""TODO: Docstring for function.
Parameters
----------
arg1 : action
Returns
-------
TODO
"""
return env.step(a)
def reset(self):
"""TODO: Docstring for reset.
Parameters
----------
arg1 : TODO
Returns
-------
TODO
"""
s0 = env.reset()
self._s = s0
return s0
class MDPR(MDP):
"""Docstring for MDP.
assuming we have simualtor
"""
def __init__(self, env, T, R):
"""TODO: to be defined1.
assuming we have openai env
Parameters
----------
env : TODO
"""
self._env = env
# @todo: make this flexible
self._s = env.reset()
def step(self, a):
"""TODO: Docstring for function.
Parameters
----------
arg1 : action
Returns
-------
TODO
"""
s_next, r, done, _ = env.step(a)
return s_next, self._R(s, a), done, _
def reset(self):
"""TODO: Docstring for reset.
Parameters
----------
arg1 : TODO
Returns
-------
TODO
"""
s0 = env.reset()
self._s = s0
return s0