-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhmm.py
41 lines (30 loc) · 929 Bytes
/
hmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
class State:
def __init__(self, x, y):
self.x = x
self.y = y
creditForRenting = 10
costToMove = 2
# number of cars requested and returned at each location are Poisson random variables, (l^n)/(factorial(n)) * e^(-l)
rent1 = 3
rent2 = 4
return1 = 3
return2 = 2
# no more than 20 cars at each location
gamma = 0.9 # discount rate
# continuing finite MDP , time steps are days, state is the number of cars at each location at the end of the day, actions are net numbers of cars moved (max 5)
states = []
actions = np.arange(-5, 6, 1)
for i in range(21):
for y in range(21):
states.append(State(i, y))
states = np.array(states)
rewards = np.zeros(states.shape)
for reward in rewards:
sv = np.zeros(states.shape)
probs = np.zeros(states.shape + (len(actions),))
print(actions)
# for state in states:
# oldV = sv