Skip to content

Commit

Permalink
chore: eliminate sklearn datasets dep (#2337)
Browse files Browse the repository at this point in the history
  • Loading branch information
mhamilton723 authored Jan 9, 2025
1 parent 6d5a90e commit a6d2614
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 11 deletions.
17 changes: 10 additions & 7 deletions docs/Explore Algorithms/Hyperparameter Tuning/HyperOpt.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.datasets import fetch_california_housing\n",
"import time"
"import time\n",
"import requests"
]
},
{
Expand All @@ -158,12 +159,14 @@
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" california = fetch_california_housing()\n",
"except EOFError:\n",
" print(\"Encountered EOFError while downloading, retrying once...\")\n",
" time.sleep(5)\n",
" california = fetch_california_housing()\n",
"with open(\"cal_housing_py3.pkz\", \"wb\") as f:\n",
" f.write(\n",
" requests.get(\n",
" \"https://mmlspark.blob.core.windows.net/datasets/cal_housing_py3.pkz\"\n",
" ).content\n",
" )\n",
"\n",
"california = fetch_california_housing(data_home=\".\", download_if_missing=False)\n",
"\n",
"feature_cols = [\"f\" + str(i) for i in range(california.data.shape[1])]\n",
"header = [\"target\"] + feature_cols\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"import pandas as pd\n",
"from pyspark.ml.feature import VectorAssembler\n",
"from pyspark.ml.regression import LinearRegression\n",
"from sklearn.datasets import fetch_california_housing"
"from sklearn.datasets import fetch_california_housing\n",
"import requests"
]
},
{
Expand All @@ -47,7 +48,14 @@
"metadata": {},
"outputs": [],
"source": [
"california = fetch_california_housing()\n",
"with open(\"cal_housing_py3.pkz\", \"wb\") as f:\n",
" f.write(\n",
" requests.get(\n",
" \"https://mmlspark.blob.core.windows.net/datasets/cal_housing_py3.pkz\"\n",
" ).content\n",
" )\n",
"\n",
"california = fetch_california_housing(data_home=\".\", download_if_missing=False)\n",
"\n",
"feature_cols = [\"f\" + str(i) for i in range(california.data.shape[1])]\n",
"header = [\"target\"] + feature_cols\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,8 @@
"from synapse.ml.vw import VowpalWabbitRegressor, VowpalWabbitFeaturizer\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.datasets import fetch_california_housing"
"from sklearn.datasets import fetch_california_housing\n",
"import requests"
]
},
{
Expand All @@ -393,7 +394,14 @@
"metadata": {},
"outputs": [],
"source": [
"california = fetch_california_housing()\n",
"with open(\"cal_housing_py3.pkz\", \"wb\") as f:\n",
" f.write(\n",
" requests.get(\n",
" \"https://mmlspark.blob.core.windows.net/datasets/cal_housing_py3.pkz\"\n",
" ).content\n",
" )\n",
"\n",
"california = fetch_california_housing(data_home=\".\", download_if_missing=False)\n",
"\n",
"feature_cols = [\"f\" + str(i) for i in range(california.data.shape[1])]\n",
"header = [\"target\"] + feature_cols\n",
Expand Down

0 comments on commit a6d2614

Please sign in to comment.