diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7d..6a06b88 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a..f249e6d 100644 Binary files a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc and b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/__pycache__/build.cpython-36.pyc b/q01_outlier_removal/__pycache__/build.cpython-36.pyc index 8248a16..e84b2ef 100644 Binary files a/q01_outlier_removal/__pycache__/build.cpython-36.pyc and b/q01_outlier_removal/__pycache__/build.cpython-36.pyc differ diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..0cd3f90 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,8 +1,16 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd +import numpy as np loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) +def outlier_removal(loan_data): + df= loan_data.quantile(.95) + loan_data = loan_data.drop(loan_data [loan_data['ApplicantIncome'] > df['ApplicantIncome']].index) + loan_data = loan_data.drop(loan_data [loan_data['CoapplicantIncome'] > df['CoapplicantIncome']].index) + loan_data = loan_data.drop(loan_data [loan_data['LoanAmount'] > df['LoanAmount']].index) + return loan_data + -# Write your Solution here: diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc index 5a057ff..1c1c419 100644 Binary files a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc and b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc index 4c0b6c7..7946ac6 100644 Binary files a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc and b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..ffaa7f1 Binary files /dev/null and b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..1781450 Binary files /dev/null and b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/build.py b/q02_data_cleaning_all/build.py index b56e2bc..1fd08ea 100644 --- a/q02_data_cleaning_all/build.py +++ b/q02_data_cleaning_all/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all/build.py # Default Imports import sys, os sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname('__file__')))) @@ -9,6 +10,23 @@ loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) +loan_data.columns +def data_cleaning (housing_data): + missinglist = list(housing_data.columns[housing_data.isnull().any()]) + cate=[] + num=[] + for k in missinglist : + if (str(housing_data[k].dtypes) == 'object' ): + housing_data[k].fillna(housing_data[k].mode()[0], inplace =True) + cate.append(k) + else: + housing_data[k].fillna(housing_data[k].mean(), inplace =True) + num.append(k) + X=housing_data.drop('Loan_Status',1) + y=housing_data['Loan_Status'] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=9) + return X ,y, X_train, X_test, y_train, y_test +data_cleaning(loan_data) + -# Write your solution here : diff --git a/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..047f6a5 Binary files /dev/null and b/q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc new file mode 100644 index 0000000..d193782 Binary files /dev/null and b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..8067a4c Binary files /dev/null and b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..b1887d3 Binary files /dev/null and b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py index e20ff7b..e0eee15 100644 --- a/q02_data_cleaning_all_2/build.py +++ b/q02_data_cleaning_all_2/build.py @@ -1,3 +1,4 @@ +# %load q02_data_cleaning_all_2/build.py # Default Imports import pandas as pd import numpy as np @@ -8,6 +9,24 @@ loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) +def data_cleaning_2(X_train, X_test, y_train, y_test): + col=['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area'] + allcolumn = X_train.columns.values + + df=pd.get_dummies(X_train , columns=col ) + X_train = pd.concat([X_train,df], axis=1) + X_train = X_train.drop(col,axis=1) + X_train = X_train.drop(allcolumn,axis=1) + X_train = X_train.drop('Dependents_3+',axis=1) + + df1=pd.get_dummies(X_test , columns=col ) + X_test = pd.concat([X_test,df1], axis=1) + X_test = X_test.drop(col,axis=1) + X_test = X_test.drop(allcolumn,axis=1) + X_test = X_test.drop('Dependents_3+',axis=1) + + return X_train ,X_test,y_train,y_test + + -# Write your solution here : diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..f5d200e Binary files /dev/null and b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc new file mode 100644 index 0000000..6d3cf9d Binary files /dev/null and b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc differ diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..7d7eb09 Binary files /dev/null and b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..ebae60d Binary files /dev/null and b/q03_logistic_regression/__pycache__/build.cpython-36.pyc differ diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py index cdbd506..44fe044 100644 --- a/q03_logistic_regression/build.py +++ b/q03_logistic_regression/build.py @@ -1,3 +1,4 @@ +# %load q03_logistic_regression/build.py # Default Imports import pandas as pd from sklearn.preprocessing import StandardScaler @@ -11,8 +12,23 @@ loan_data = loan_data.drop('Loan_ID', 1) loan_data = outlier_removal(loan_data) X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) + + X_train, X_test, y_train, y_test = data_cleaning_2(X_train, X_test, y_train, y_test) +def logistic_regression(X_train, X_test, y_train, y_test ): + scaler = StandardScaler() + li= ['ApplicantIncome','CoapplicantIncome','LoanAmount'] + X_train[li] = scaler.fit_transform(X_train[li]) + X_test[li] = scaler.fit_transform(X_test[li]) + lr= LogisticRegression(random_state=9) + lr.fit(X_train,y_train) + y_pred= lr.predict(X_test) + cnf_matrix = confusion_matrix(y_test, y_pred) + return cnf_matrix + + + + -# Write your solution code here: diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..568425e Binary files /dev/null and b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc new file mode 100644 index 0000000..4f05141 Binary files /dev/null and b/q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc differ