-
Notifications
You must be signed in to change notification settings - Fork 0
/
EXP10_Naive_Bayes.py
113 lines (99 loc) · 4.04 KB
/
EXP10_Naive_Bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
'''
Step-by-Step Algorithm for Naive Bayes Classifier
Input:
Training data with features (Feature1, Feature2)
Target variable (Class)
Test instance for prediction
Steps:
Load and Preprocess Data
Read CSV file into DataFrame
Extract features and target variables
Store features as numpy array
Store target/class labels as numpy array
Calculate Prior Probabilities
Find unique classes in target variable
Count occurrences of each class
Calculate prior probability for each class:
P(class) = count(class) / total_samples
Calculate Likelihood Probabilities
For each class:
Select features where target equals current class
For each feature:
Find unique values and their counts
Calculate likelihood for each value:
P(feature|class) = count(feature_value) / count(class)
Store likelihoods in dictionary
Get Test Instance
Accept user input for test features
Validate input format
Convert input to numpy array
Calculate Posterior Probabilities
For each class:
Calculate log of prior probability
For each feature in test instance:
Get likelihood from stored values
Add log of likelihood to running sum
Calculate posterior = log(prior) + sum(log(likelihoods))
Store posterior probability
Make Prediction
Find class with maximum posterior probability
Return predicted class
'''
import pandas as pd
import numpy as np
# Load the dataset
df = pd.read_csv('Data/Naive_Bayes.csv')
# Preprocess the data
# Extract features (Feature1, Feature2) and target (Class) from the DataFrame
features = df[['Feature1', 'Feature2']].values
target = df['Class'].values
# Calculate prior probabilities for each class
classes, class_counts = np.unique(target, return_counts=True)
priors = class_counts / len(target)
# Calculate likelihoods for each feature per class
likelihoods = {}
for cls in classes:
# Select rows where the class is equal to the current class
class_features = features[target == cls]
likelihoods[cls] = {}
for feature_index in range(features.shape[1]):
# Initialize a dictionary to store the likelihoods for the current feature
feature_likelihoods = {}
# Calculate the frequency of each feature value for the current class
feature_values, feature_counts = np.unique(class_features[:, feature_index], return_counts=True)
for value, count in zip(feature_values, feature_counts):
# Calculate the likelihood of each feature value for the current class
feature_likelihoods[value] = count / len(class_features)
# Store the likelihoods for the current feature in the likelihoods dictionary
likelihoods[cls][feature_index] = feature_likelihoods
# User input for test data
while True:
user_input = input("Enter test data (Feature1, Feature2) or 'exit' to quit: ")
if user_input.lower() == 'exit':
break
user_features = user_input.split(',')
if len(user_features) != 2:
print("Invalid input. Please enter two features separated by a comma.")
continue
# Convert user input to a NumPy array
user_features = np.array(user_features).reshape(1, -1)
posteriors = []
for idx, cls in enumerate(classes):
# Calculate the prior probability for the current class
prior = np.log(priors[idx])
likelihood = 0
for feature_index in range(features.shape[1]):
# Calculate the likelihood of the user input features for the current class
if user_features[0, feature_index] in likelihoods[cls][feature_index]:
likelihood += np.log(likelihoods[cls][feature_index][user_features[0, feature_index]])
else:
likelihood += np.log(1e-6) # Smoothing for unseen feature values
# Calculate the posterior probability for the current class
posterior = prior + likelihood
print("prior of ",cls,": ",prior)
print("Likelihood of ",cls,": " ,likelihood)
print("Posterior of ",cls,": ",posterior)
posteriors.append(posterior)
# Predict the class with the highest posterior probability
prediction = classes[np.argmax(posteriors)]
print(f"Predicted Class: {prediction}")