Skip to content

Commit d3837b3

Browse files
committed
propagate bugfix 0.1.1 from grouplabeencode to onehot
1 parent 0094c88 commit d3837b3

File tree

4 files changed

+203
-3
lines changed

4 files changed

+203
-3
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11

2+
# 0.1.2 / 2018-10-25
3+
4+
* propagate bugfix 0.1.1 from grouplabeencode to onehot
5+
26
# 0.1.1 / 2018-10-24
37

48
* allow to disable sparse matrix output
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"sys.path.append('..')\n",
11+
"\n",
12+
"import pandas as pd\n",
13+
"import numpy as np\n",
14+
"from onehot import OneHotDummy"
15+
]
16+
},
17+
{
18+
"cell_type": "markdown",
19+
"metadata": {},
20+
"source": [
21+
"## Load Data"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": 2,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"df = pd.read_csv(\"../data/train.csv\")\n",
31+
"#df.describe()"
32+
]
33+
},
34+
{
35+
"cell_type": "markdown",
36+
"metadata": {},
37+
"source": [
38+
"## Check it"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 3,
44+
"metadata": {},
45+
"outputs": [
46+
{
47+
"data": {
48+
"text/plain": [
49+
"OneHotDummy(droprule=None, mapping={0: 0, 1: 1, 2: 2, 3: 3}, nametyp=None,\n",
50+
" nastate=False, prefix='BsmtFullBath', sep='_', sparse=False)"
51+
]
52+
},
53+
"execution_count": 3,
54+
"metadata": {},
55+
"output_type": "execute_result"
56+
}
57+
],
58+
"source": [
59+
"s = 'BsmtFullBath'\n",
60+
"obj = OneHotDummy(sparse=False, prefix=s)\n",
61+
"obj.fit(df[s])"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": 4,
67+
"metadata": {},
68+
"outputs": [
69+
{
70+
"data": {
71+
"text/plain": [
72+
"count 1460.000000\n",
73+
"mean 0.425342\n",
74+
"std 0.518911\n",
75+
"min 0.000000\n",
76+
"25% 0.000000\n",
77+
"50% 0.000000\n",
78+
"75% 1.000000\n",
79+
"max 3.000000\n",
80+
"Name: BsmtFullBath, dtype: float64"
81+
]
82+
},
83+
"execution_count": 4,
84+
"metadata": {},
85+
"output_type": "execute_result"
86+
}
87+
],
88+
"source": [
89+
"#print(df[s].head())\n",
90+
"df[s].describe()"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"metadata": {},
96+
"source": [
97+
"## Check 2"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": 5,
103+
"metadata": {},
104+
"outputs": [],
105+
"source": [
106+
"transformer = dict()\n",
107+
"\n",
108+
"cols = [\n",
109+
" 'MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', \n",
110+
" 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', \n",
111+
" 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', \n",
112+
" 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', \n",
113+
" 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', \n",
114+
" 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', \n",
115+
" 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', \n",
116+
" 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', \n",
117+
" 'SaleCondition', 'MSSubClass', 'MoSold',\n",
118+
" 'OverallQual', 'OverallCond', \n",
119+
" 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', \n",
120+
" 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars']\n",
121+
"\n",
122+
"for i, s in enumerate(cols):\n",
123+
" obj = OneHotDummy(sparse=False, prefix=s)\n",
124+
" obj.fit(df[s])\n",
125+
" transformer[s] = obj"
126+
]
127+
},
128+
{
129+
"cell_type": "markdown",
130+
"metadata": {},
131+
"source": [
132+
"## Check 3"
133+
]
134+
},
135+
{
136+
"cell_type": "code",
137+
"execution_count": 6,
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"from grouplabelencode import grouplabelencode\n",
142+
"s = 'GarageCars'\n",
143+
"mapping = [1,2,3,4]\n",
144+
"encoded = grouplabelencode(df[s], mapping)"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": 11,
150+
"metadata": {},
151+
"outputs": [
152+
{
153+
"data": {
154+
"text/plain": [
155+
"array([1, 2, 0, None, 3], dtype=object)"
156+
]
157+
},
158+
"execution_count": 11,
159+
"metadata": {},
160+
"output_type": "execute_result"
161+
}
162+
],
163+
"source": [
164+
"pd.unique(encoded)"
165+
]
166+
},
167+
{
168+
"cell_type": "markdown",
169+
"metadata": {},
170+
"source": [
171+
"looks good now"
172+
]
173+
}
174+
],
175+
"metadata": {
176+
"kernelspec": {
177+
"display_name": "Python 3",
178+
"language": "python",
179+
"name": "python3"
180+
},
181+
"language_info": {
182+
"codemirror_mode": {
183+
"name": "ipython",
184+
"version": 3
185+
},
186+
"file_extension": ".py",
187+
"mimetype": "text/x-python",
188+
"name": "python",
189+
"nbconvert_exporter": "python",
190+
"pygments_lexer": "ipython3",
191+
"version": "3.6.2"
192+
}
193+
},
194+
"nbformat": 4,
195+
"nbformat_minor": 2
196+
}

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ numpy>=1.14.5
55
scipy>=1.1.0
66
pandas>=0.23.3
77
scikit-learn>=0.19.2
8-
grouplabelencode>=0.1.0
8+
grouplabelencode>=0.1.1

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def read(fname):
77

88

99
setup(name='onehot',
10-
version='0.1.1',
10+
version='0.1.2',
1111
description=(
1212
"One-Hot encoder with sklearn-ish API interface that process "
1313
"mixed string and numeric labels directly."),
@@ -25,6 +25,6 @@ def read(fname):
2525
'pandas>=0.23.3',
2626
'scikit-learn>=0.19.2',
2727
'numpy>=1.14.5',
28-
'grouplabelencode>=0.1.0'],
28+
'grouplabelencode>=0.1.1'],
2929
python_requires='>=3.6',
3030
zip_safe=False)

0 commit comments

Comments
 (0)