-
Notifications
You must be signed in to change notification settings - Fork 0
/
CourseRecEnv.py
208 lines (176 loc) · 8.05 KB
/
CourseRecEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import os
import random
import time as time
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3.common.callbacks import BaseCallback
import matchings
class CourseRecEnv(gym.Env):
# The CourseRecEnv class is a gym environment that simulates the recommendation of courses to learners. It is used to train the Reinforce model.
def __init__(self, dataset, threshold=0.8, k=3):
self.dataset = dataset
self.nb_skills = len(dataset.skills)
self.mastery_levels = [
elem for elem in list(dataset.mastery_levels.values()) if elem > 0
]
self.max_level = max(self.mastery_levels)
self.nb_courses = len(dataset.courses)
self.min_skills = min([len(learner) for learner in dataset.learners])
self.max_skills = max([len(learner) for learner in dataset.learners])
self.threshold = threshold
self.k = k
# The observation space is a vector of length nb_skills that represents the learner's skills
self.observation_space = gym.spaces.Box(
low=0, high=self.max_level, shape=(self.nb_skills,), dtype=np.int32
)
# The action space is a discrete space of size nb_courses that represents the courses to be recommended
self.action_space = gym.spaces.Discrete(self.nb_courses)
def _get_obs(self):
"""Method required by the gym environment. It returns the current observation of the environment.
Returns:
np.array: the current observation of the environment, that is the learner's skills
"""
return self._agent_skills
def _get_info(self):
"""Method required by the gym environment. It returns the current info of the environment.
Returns:
dict: the current info of the environment, that is the number of applicable jobs
"""
learner = self.obs_to_learner()
return {
"nb_applicable_jobs": self.dataset.get_nb_applicable_jobs(
learner, threshold=self.threshold
)
}
def obs_to_learner(self):
"""Converts the observation from a numpy array to a list of skills and levels.
Returns:
list: the list of skills and levels of the learner
"""
learner = []
for skill, level in enumerate(self._agent_skills):
if level > 0:
learner.append((skill, level))
return learner
def learner_to_obs(self, learner):
"""Converts the list of skills and levels to a numpy array.
Args:
learner (list): list of skills and levels of the learner
Returns:
np.array: the observation of the environment, that is the learner's skills
"""
obs = np.zeros(self.nb_skills, dtype=np.int32)
for skill, level in learner:
obs[skill] = level
return obs
def get_random_learner(self):
"""Creates a random learner with a random number of skills and levels. This method is used to initialize the environment.
Returns:
np.array: the initial observation of the environment, that is the learner's initial skills
"""
# Choose the number of skills the agent has randomly
n_skills = random.randint(self.min_skills, self.max_skills)
initial_skills = np.zeros(self.nb_skills, dtype=np.int32)
skills = np.random.choice(self.nb_skills, size=n_skills, replace=False)
levels = np.random.choice(
self.mastery_levels,
n_skills,
replace=True,
)
for skill, level in zip(skills, levels):
initial_skills[skill] = level
return initial_skills
def reset(self, seed=None, learner=None):
"""Method required by the gym environment. It resets the environment to its initial state.
Args:
seed (int, optional): Random seed. Defaults to None.
learner (list, optional): Learner to initialize the environment with, if None, the environment is initialized with a random learner. Defaults to None.
Returns:
_type_: _description_
"""
# We need the following line to seed self.np_random
super().reset(seed=seed)
if learner is not None:
self._agent_skills = self.learner_to_obs(learner)
else:
self._agent_skills = self.get_random_learner()
self.nb_recommendations = 0
observation = self._get_obs()
info = self._get_info()
return observation, info
def step(self, action):
"""Method required by the gym environment. It performs the action in the environment and returns the new observation, the reward, whether the episode is terminated and additional information.
Args:
action (int): the course to be recommended
Returns:
tuple: the new observation, the reward, whether the episode is terminated, additional information
"""
# Update the agent's skills with the course provided_skills
course = self.dataset.courses[action]
learner = self.obs_to_learner()
required_matching = matchings.learner_course_required_matching(learner, course)
provided_matching = matchings.learner_course_provided_matching(learner, course)
if required_matching < self.threshold or provided_matching >= 1.0:
observation = self._get_obs()
reward = -1
terminated = True
info = self._get_info()
return observation, reward, terminated, False, info
for skill, level in course[1]:
self._agent_skills[skill] = max(self._agent_skills[skill], level)
observation = self._get_obs()
info = self._get_info()
reward = info["nb_applicable_jobs"]
self.nb_recommendations += 1
terminated = self.nb_recommendations == self.k
return observation, reward, terminated, False, info
class EvaluateCallback(BaseCallback):
# The EvaluateCallback class is a callback that evaluates the model at regular intervals during the training.
def __init__(self, eval_env, eval_freq, all_results_filename, verbose=1):
super(EvaluateCallback, self).__init__(verbose)
self.eval_env = eval_env
self.eval_freq = eval_freq
self.all_results_filename = all_results_filename
self.mode = "w"
def _on_step(self):
"""Method required by the callback. It is called at each step of the training. It evaluates the model every eval_freq steps.
Returns:
bool: Always returns True to continue training
"""
if self.n_calls % self.eval_freq == 0:
time_start = time.time()
avg_jobs = 0
for learner in self.eval_env.dataset.learners:
self.eval_env.reset(learner=learner)
done = False
tmp_avg_jobs = self.eval_env._get_info()["nb_applicable_jobs"]
while not done:
obs = self.eval_env._get_obs()
action, _state = self.model.predict(obs, deterministic=True)
obs, reward, done, _, info = self.eval_env.step(action)
if reward != -1:
tmp_avg_jobs = reward
avg_jobs += tmp_avg_jobs
time_end = time.time()
print(
f"Iteration {self.n_calls}. Average jobs: {avg_jobs / len(self.eval_env.dataset.learners)} Time: {time_end - time_start}"
)
with open(
os.path.join(
self.eval_env.dataset.config["results_path"],
self.all_results_filename,
),
self.mode,
) as f:
f.write(
str(self.n_calls)
+ " "
+ str(avg_jobs / len(self.eval_env.dataset.learners))
+ " "
+ str(time_end - time_start)
+ "\n"
)
if self.mode == "w":
self.mode = "a"
return True # Return True to continue training