Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues on import joblib and pd.concat #20

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@
"source": [
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n",
"\n",
"def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n",
Expand Down Expand Up @@ -407,8 +407,8 @@
"# Make sure that the files you create are in the correct format.\n",
"\n",
"# Solution:\n",
"pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
"pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@
"source": [
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n",
"\n",
"def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@
"source": [
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n",
"\n",
"def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n",
Expand Down Expand Up @@ -396,11 +396,11 @@
"# Solution:\n",
"# The test data shouldn't contain the ground truth labels as they are what the model is\n",
"# trying to predict. We will end up using them afterward to compare the predictions to.\n",
"# pd.concat([test_y, test_X], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"# pd.concat([pd.DataFrame(test_y), pd.DataFrame(test_X)], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"\n",
"pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
"pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@
"source": [
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n",
"\n",
"def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n",
Expand Down Expand Up @@ -393,9 +393,9 @@
"# First, save the test data to test.csv in the data_dir directory. Note that we do not save the associated ground truth\n",
"# labels, instead we will use them later to compare with our model output.\n",
"\n",
"pd.concat([test_y, test_X], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
"pd.concat([pd.DataFrame(test_y), pd.DataFrame(test_X)], axis=1).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@
"source": [
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n",
"\n",
"def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n",
Expand Down Expand Up @@ -411,8 +411,8 @@
"source": [
"pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"\n",
"pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
"pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@
"source": [
"import numpy as np\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"# joblib is an enhanced version of pickle that is more efficient for storing NumPy arrays\n",
"\n",
"def extract_BoW_features(words_train, words_test, vocabulary_size=5000,\n",
Expand Down Expand Up @@ -411,8 +411,8 @@
"source": [
"pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"\n",
"pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
"pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions Tutorials/IMDB Sentiment Analysis - XGBoost - Web App.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@
"from sklearn.feature_extraction.text import CountVectorizer\n",
"\n",
"# sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. \n",
"# from sklearn.externals import joblib\n",
"# import joblib\n",
"\n",
"# Import joblib package directly\n",
"import joblib\n",
Expand Down Expand Up @@ -402,8 +402,8 @@
"source": [
"pd.DataFrame(test_X).to_csv(os.path.join(data_dir, 'test.csv'), header=False, index=False)\n",
"\n",
"pd.concat([val_y, val_X], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([train_y, train_X], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
"pd.concat([pd.DataFrame(val_y), pd.DataFrame(val_X)], axis=1).to_csv(os.path.join(data_dir, 'validation.csv'), header=False, index=False)\n",
"pd.concat([pd.DataFrame(train_y), pd.DataFrame(train_X)], axis=1).to_csv(os.path.join(data_dir, 'train.csv'), header=False, index=False)"
]
},
{
Expand Down