propagate bugfix 0.1.1 from grouplabeencode to onehot

ulf1 · ulf1 · commit d3837b39f6a9 · 2018-10-25T09:49:03.000+02:00
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,4 +1,8 @@
 
+# 0.1.2 / 2018-10-25
+
+  * propagate bugfix 0.1.1 from grouplabeencode to onehot
+
 # 0.1.1 / 2018-10-24
 
   * allow to disable sparse matrix output
diff --git a/examples/bugfix 0.1.2 from grouplabelencode.ipynb b/examples/bugfix 0.1.2 from grouplabelencode.ipynb
@@ -0,0 +1,196 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('..')\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from onehot import OneHotDummy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"../data/train.csv\")\n",
+    "#df.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Check it"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "OneHotDummy(droprule=None, mapping={0: 0, 1: 1, 2: 2, 3: 3}, nametyp=None,\n",
+       "      nastate=False, prefix='BsmtFullBath', sep='_', sparse=False)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s = 'BsmtFullBath'\n",
+    "obj = OneHotDummy(sparse=False, prefix=s)\n",
+    "obj.fit(df[s])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "count    1460.000000\n",
+       "mean        0.425342\n",
+       "std         0.518911\n",
+       "min         0.000000\n",
+       "25%         0.000000\n",
+       "50%         0.000000\n",
+       "75%         1.000000\n",
+       "max         3.000000\n",
+       "Name: BsmtFullBath, dtype: float64"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#print(df[s].head())\n",
+    "df[s].describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Check 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformer = dict()\n",
+    "\n",
+    "cols = [\n",
+    "    'MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', \n",
+    "        'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', \n",
+    "        'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', \n",
+    "        'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', \n",
+    "        'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', \n",
+    "        'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', \n",
+    "        'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', \n",
+    "        'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', \n",
+    "        'SaleCondition', 'MSSubClass', 'MoSold',\n",
+    "    'OverallQual', 'OverallCond', \n",
+    "    'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', \n",
+    "        'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars']\n",
+    "\n",
+    "for i, s in enumerate(cols):\n",
+    "        obj = OneHotDummy(sparse=False, prefix=s)\n",
+    "        obj.fit(df[s])\n",
+    "        transformer[s] = obj"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Check 3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from grouplabelencode import grouplabelencode\n",
+    "s = 'GarageCars'\n",
+    "mapping = [1,2,3,4]\n",
+    "encoded = grouplabelencode(df[s], mapping)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 2, 0, None, 3], dtype=object)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.unique(encoded)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "looks good now"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
@@ -5,4 +5,4 @@ numpy>=1.14.5
 scipy>=1.1.0
 pandas>=0.23.3
 scikit-learn>=0.19.2
-grouplabelencode>=0.1.0
+grouplabelencode>=0.1.1
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@ def read(fname):
 
 
 setup(name='onehot',
-      version='0.1.1',
+      version='0.1.2',
       description=(
           "One-Hot encoder with sklearn-ish API interface that process "
           "mixed string and numeric labels directly."),
@@ -25,6 +25,6 @@ def read(fname):
           'pandas>=0.23.3',
           'scikit-learn>=0.19.2',
           'numpy>=1.14.5',
-          'grouplabelencode>=0.1.0'],
+          'grouplabelencode>=0.1.1'],
       python_requires='>=3.6',
       zip_safe=False)