Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update run.py in machine_learning/forecasting #8957

Merged
merged 4 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@
* [Minimum Tickets Cost](dynamic_programming/minimum_tickets_cost.py)
* [Optimal Binary Search Tree](dynamic_programming/optimal_binary_search_tree.py)
* [Palindrome Partitioning](dynamic_programming/palindrome_partitioning.py)
* [Regex Match](dynamic_programming/regex_match.py)
* [Rod Cutting](dynamic_programming/rod_cutting.py)
* [Subset Generation](dynamic_programming/subset_generation.py)
* [Sum Of Subset](dynamic_programming/sum_of_subset.py)
Expand Down
2 changes: 1 addition & 1 deletion machine_learning/forecasting/ex_data.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
total_user,total_events,days
total_users,total_events,days
18231,0.0,1
22621,1.0,2
15675,0.0,3
Expand Down
35 changes: 17 additions & 18 deletions machine_learning/forecasting/run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
this is code for forecasting
but i modified it and used it for safety checker of data
but I modified it and used it for safety checker of data
for ex: you have an online shop and for some reason some data are
missing (the amount of data that u expected are not supposed to be)
then we can use it
Expand Down Expand Up @@ -102,6 +102,10 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
"""
safe = 0
not_safe = 0

if not isinstance(actual_result, float):
raise TypeError("Actual result should be float. Value passed is a list")

for i in list_vote:
if i > actual_result:
safe = not_safe + 1
Expand All @@ -114,16 +118,11 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:


if __name__ == "__main__":
# data_input_df = pd.read_csv("ex_data.csv", header=None)
data_input = [[18231, 0.0, 1], [22621, 1.0, 2], [15675, 0.0, 3], [23583, 1.0, 4]]
data_input_df = pd.DataFrame(
data_input, columns=["total_user", "total_even", "days"]
)

"""
data column = total user in a day, how much online event held in one day,
what day is that(sunday-saturday)
"""
data_input_df = pd.read_csv("ex_data.csv")

# start normalization
normalize_df = Normalizer().fit_transform(data_input_df.values)
Expand All @@ -138,23 +137,23 @@ def data_safety_checker(list_vote: list, actual_result: float) -> bool:
x_test = x[len(x) - 1 :]

# for linear regression & sarimax
trn_date = total_date[: len(total_date) - 1]
trn_user = total_user[: len(total_user) - 1]
trn_match = total_match[: len(total_match) - 1]
train_date = total_date[: len(total_date) - 1]
train_user = total_user[: len(total_user) - 1]
train_match = total_match[: len(total_match) - 1]

tst_date = total_date[len(total_date) - 1 :]
tst_user = total_user[len(total_user) - 1 :]
tst_match = total_match[len(total_match) - 1 :]
test_date = total_date[len(total_date) - 1 :]
test_user = total_user[len(total_user) - 1 :]
test_match = total_match[len(total_match) - 1 :]

# voting system with forecasting
res_vote = [
linear_regression_prediction(
trn_date, trn_user, trn_match, tst_date, tst_match
train_date, train_user, train_match, test_date, test_match
),
sarimax_predictor(trn_user, trn_match, tst_match),
support_vector_regressor(x_train, x_test, trn_user),
sarimax_predictor(train_user, train_match, test_match),
support_vector_regressor(x_train, x_test, train_user),
]

# check the safety of today's data
not_str = "" if data_safety_checker(res_vote, tst_user) else "not "
print("Today's data is {not_str}safe.")
not_str = "" if data_safety_checker(res_vote, test_user[0]) else "not "
print(f"Today's data is {not_str}safe.")