-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
90 lines (67 loc) · 2.95 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pandas as pd
from sklearn.linear_model import LinearRegression, RidgeCV
from datetime import *
supply_words = ["pan", "rasp", "kom"]
import pandas as pd
def getRecipesDF():
df = pd.read_csv("data/lunch_recipes.csv") # Read lunch recipes dataframe.
for wrd in supply_words:
def hulp_clean(text):
# This function cleans text by seperating all the words and removing punctuation
#
# text:str
str_list = ["".join(O for O in str if O.isalnum()) for str in text.split()]
str_list = [str.lower() for str in str_list]
return str_list
df[f"{wrd}"] = df.recipe.apply(
lambda text: hulp_clean(text).count(wrd) > 0
) ## count the amount of times a word occurs in the recipe.
df[f"{wrd}"] = df[f"{wrd}"].apply(lambda x: x is True)
df = df.drop("servings", axis=1)
df = df.drop("recipe", axis=1)
df["date"] = df.date.apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
df = df.drop("url", axis=1)
df = df.drop("dish", axis=1)
return df
def attendance_sheet_uitlezen():
df = pd.read_csv("../clean_code/data/key_tag_logs.csv")
df["timestamp2"] = df.timestamp.apply(
lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
)
df["date"] = df.timestamp.apply(lambda x: datetime.strptime(x[:10], "%Y-%m-%d"))
df["time"] = df.timestamp2.apply(lambda x: x.time())
df["timestamp"] = df["timestamp2"]
df = df.drop("timestamp2", axis=1)
import numpy as np
result = pd.DataFrame(np.array(df.date), columns=["date"]).drop_duplicates()
# print(df.name.unique())
for name in df.name.unique():
lunchdates = []
for datum in df.date.unique():
df2 = df[df.name == name]
df2 = df2[df2.date == datum]
dataframe_check_in = df2[df2.event == "check in"]
dataframe_check_in = dataframe_check_in[
dataframe_check_in.time < time(12, 0, 0)
]
df_check_out = df2[df2.event == "check out"]
df_check_out = df_check_out[df_check_out.time > time(12, 0, 0)]
if df_check_out.shape[0] > 0 and dataframe_check_in.shape[0] > 0:
lunchdates.append(datum)
result[f"{name}"] = result.date.apply(
lambda x: 1 if x in list(lunchdates) else 0
)
result["date"] = result["date"] # .apply(str)
return result
def train_model(alpha=0.1):
recipes = getRecipesDF()
attendance = attendance_sheet_uitlezen()
l = pd.read_csv("data/dishwasher_log.csv")
l["date"] = l.date.apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
df = recipes.merge(attendance, on="date", how="outer").merge(l).fillna(0)
reg = LinearRegression(fit_intercept=False, positive=True).fit(
df.drop(["dishwashers", "date"], axis=1), df["dishwashers"]
)
return dict(zip(reg.feature_names_in_, [round(c, 3) for c in reg.coef_]))
if __name__ == "__main__":
print(train_model())