Skip to content

Commit

Permalink
fixes #376
Browse files Browse the repository at this point in the history
  • Loading branch information
“oguiza” committed Jan 21, 2022
1 parent f8798d1 commit 636aef6
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 131 deletions.
254 changes: 167 additions & 87 deletions nbs/016_data.preprocessing.ipynb

Large diffs are not rendered by default.

115 changes: 78 additions & 37 deletions nbs/017_data.transforms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPSklEQVR4nO3cf6yeZX3H8fdnFPyBCwU5a1jb7JDYaIiJQk4Qx2Ic3RZ+GMsfyDCbdqxL/0GH00Sr/5gl+wOTRcVkYWmoWjYmEtS0UXSSgjH+Qefhx5AfEs4Y2HYtPSpUHXGO+d0f5+p2KC3nOT3Pcx569f1KntzXfV3X/dzfO20+5z7XuZ8nVYUkqS+/Me4CJEnDZ7hLUocMd0nqkOEuSR0y3CWpQyvGXQDA2WefXZOTk+MuQ5JOKPfdd9+Pq2riaGOviHCfnJxkenp63GVI0gklydPHGnNZRpI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOvSK+ISqTm6TW74xsvd+6oYrRvbeg+r9+vTK5J27JHVooDv3JCuBm4E3AwX8OfA48GVgEngKuLqqnk0S4EbgcuB54M+q6v5hFy4dL++kdTIYdFnmRuBbVXVVktOA1wKfAHZV1Q1JtgBbgI8BlwHr2uttwE1tqxPEqMLP4Fs+/htqwWWZJGcA7wC2AVTVr6rqOWADsL1N2w5c2dobgFtqzr3AyiTnDLluSdLLGGTN/VxgFvhCkgeS3JzkdGBVVe1vcw4Aq1p7NbBn3vF7W9+LJNmcZDrJ9Ozs7PFfgSTpJQZZllkBXAB8sKp2J7mRuSWY/1NVlaQWc+Kq2gpsBZiamlrUsePkeq30Ui4DvfIMcue+F9hbVbvb/h3Mhf0zh5db2vZgG98HrJ13/JrWJ0laJgveuVfVgSR7kryxqh4H1gOPttdG4Ia23dEO2Ql8IMltzP0h9dC85RtJWjJ/U1jYoE/LfBC4tT0p8yRwLXN3/bcn2QQ8DVzd5t7J3GOQM8w9CnntUCuWJC1ooHCvqgeBqaMMrT/K3AKuW1pZms+7FEmL5SdUJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDA4V7kqeS/CDJg0mmW99ZSe5K8kTbntn6k+RzSWaSPJTkglFegCTppRZz5/77VfXWqppq+1uAXVW1DtjV9gEuA9a112bgpmEVK0kazFKWZTYA21t7O3DlvP5bas69wMok5yzhPJKkRRo03Av4dpL7kmxufauqan9rHwBWtfZqYM+8Y/e2vhdJsjnJdJLp2dnZ4yhdknQsKwac93tVtS/JbwF3Jfnh/MGqqiS1mBNX1VZgK8DU1NSijpUkvbyB7tyral/bHgS+BlwIPHN4uaVtD7bp+4C18w5f0/okSctkwXBPcnqS3zzcBv4IeBjYCWxs0zYCO1p7J/D+9tTMRcChecs3kqRlMMiyzCrga0kOz/+nqvpWku8DtyfZBDwNXN3m3wlcDswAzwPXDr1qSdLLWjDcq+pJ4C1H6f8JsP4o/QVcN5TqJEnHxU+oSlKHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOjRwuCc5JckDSb7e9s9NsjvJTJIvJzmt9b+q7c+08ckR1S5JOobF3LlfDzw2b/9TwGeq6g3As8Cm1r8JeLb1f6bNkyQto4HCPcka4Arg5rYf4BLgjjZlO3Bla29o+7Tx9W2+JGmZDHrn/lngo8Cv2/7rgeeq6oW2vxdY3dqrgT0AbfxQmy9JWiYLhnuSdwEHq+q+YZ44yeYk00mmZ2dnh/nWknTSG+TO/WLg3UmeAm5jbjnmRmBlkhVtzhpgX2vvA9YCtPEzgJ8c+aZVtbWqpqpqamJiYkkXIUl6sQXDvao+XlVrqmoSuAa4u6r+BLgHuKpN2wjsaO2dbZ82fndV1VCrliS9rKU85/4x4MNJZphbU9/W+rcBr2/9Hwa2LK1ESdJirVh4yv+rqu8A32ntJ4ELjzLnl8B7hlCbJOk4+QlVSeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SerQguGe5NVJ/iXJvyZ5JMlft/5zk+xOMpPky0lOa/2vavszbXxyxNcgSTrCigHm/BdwSVX9IsmpwPeSfBP4MPCZqrotyd8Dm4Cb2vbZqnpDkmuATwF/PKL6mdzyjVG9NU/dcMXI3luSRmnBO/ea84u2e2p7FXAJcEfr3w5c2dob2j5tfH2SDKtgSdLCBlpzT3JKkgeBg8BdwL8Bz1XVC23KXmB1a68G9gC08UPA64/ynpuTTCeZnp2dXdJFSJJebKBwr6r/qaq3AmuAC4E3LfXEVbW1qqaqampiYmKpbydJmmdRT8tU1XPAPcDbgZVJDq/ZrwH2tfY+YC1AGz8D+MkwipUkDWaQp2Umkqxs7dcAfwg8xlzIX9WmbQR2tPbOtk8bv7uqaog1S5IWMMjTMucA25OcwtwPg9ur6utJHgVuS/I3wAPAtjZ/G/APSWaAnwLXjKBuSVo2J+JTeQuGe1U9BJx/lP4nmVt/P7L/l8B7hlKdJOm4+AlVSeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SerQguGeZG2Se5I8muSRJNe3/rOS3JXkibY9s/UnyeeSzCR5KMkFo74ISdKLDXLn/gLwkao6D7gIuC7JecAWYFdVrQN2tX2Ay4B17bUZuGnoVUuSXtaC4V5V+6vq/tb+OfAYsBrYAGxv07YDV7b2BuCWmnMvsDLJOcMuXJJ0bItac08yCZwP7AZWVdX+NnQAWNXaq4E98w7b2/qOfK/NSaaTTM/Ozi62bknSyxg43JO8DvgK8KGq+tn8saoqoBZz4qraWlVTVTU1MTGxmEMlSQsYKNyTnMpcsN9aVV9t3c8cXm5p24Otfx+wdt7ha1qfJGmZDPK0TIBtwGNV9el5QzuBja29Edgxr//97amZi4BD85ZvJEnLYMUAcy4G3gf8IMmDre8TwA3A7Uk2AU8DV7exO4HLgRngeeDaYRYsSVrYguFeVd8Dcozh9UeZX8B1S6xLkrQEfkJVkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUoQXDPcnnkxxM8vC8vrOS3JXkibY9s/UnyeeSzCR5KMkFoyxeknR0g9y5fxG49Ii+LcCuqloH7Gr7AJcB69prM3DTcMqUJC3GguFeVd8FfnpE9wZge2tvB66c139LzbkXWJnknCHVKkka0PGuua+qqv2tfQBY1dqrgT3z5u1tfS+RZHOS6STTs7Ozx1mGJOlolvwH1aoqoI7juK1VNVVVUxMTE0stQ5I0z/GG+zOHl1va9mDr3wesnTdvTeuTJC2j4w33ncDG1t4I7JjX//721MxFwKF5yzeSpGWyYqEJSb4EvBM4O8le4JPADcDtSTYBTwNXt+l3ApcDM8DzwLUjqFmStIAFw72q3nuMofVHmVvAdUstSpK0NH5CVZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6NJJwT3JpkseTzCTZMopzSJKObejhnuQU4O+Ay4DzgPcmOW/Y55EkHdso7twvBGaq6smq+hVwG7BhBOeRJB1Dqmq4b5hcBVxaVX/R9t8HvK2qPnDEvM3A5rb7RuDxoRZybGcDP16mc42D13fi6/0avb7h+Z2qmjjawIplKuAlqmorsHW5z5tkuqqmlvu8y8XrO/H1fo1e3/IYxbLMPmDtvP01rU+StExGEe7fB9YlOTfJacA1wM4RnEeSdAxDX5apqheSfAD4Z+AU4PNV9ciwz7MEy74UtMy8vhNf79fo9S2Dof9BVZI0fn5CVZI6ZLhLUodOmnDv+SsRkqxNck+SR5M8kuT6cdc0KklOSfJAkq+Pu5ZhS7IyyR1JfpjksSRvH3dNw5bkr9r/0YeTfCnJq8dd01Ik+XySg0kentd3VpK7kjzRtmeOo7aTItxPgq9EeAH4SFWdB1wEXNfZ9c13PfDYuIsYkRuBb1XVm4C30Nl1JlkN/CUwVVVvZu6Bi2vGW9WSfRG49Ii+LcCuqloH7Gr7y+6kCHc6/0qEqtpfVfe39s+ZC4XV461q+JKsAa4Abh53LcOW5AzgHcA2gKr6VVU9N9aiRmMF8JokK4DXAv8x5nqWpKq+C/z0iO4NwPbW3g5cuZw1HXayhPtqYM+8/b10GH4ASSaB84HdYy5lFD4LfBT49ZjrGIVzgVngC23Z6eYkp4+7qGGqqn3A3wI/AvYDh6rq2+OtaiRWVdX+1j4ArBpHESdLuJ8UkrwO+Arwoar62bjrGaYk7wIOVtV9465lRFYAFwA3VdX5wH8ypl/nR6WtPW9g7gfZbwOnJ/nT8VY1WjX3rPlYnjc/WcK9+69ESHIqc8F+a1V9ddz1jMDFwLuTPMXcstolSf5xvCUN1V5gb1Ud/o3rDubCvid/APx7Vc1W1X8DXwV+d8w1jcIzSc4BaNuD4yjiZAn3rr8SIUmYW6t9rKo+Pe56RqGqPl5Va6pqkrl/v7urqpu7vqo6AOxJ8sbWtR54dIwljcKPgIuSvLb9n11PZ380bnYCG1t7I7BjHEWM7Vshl9MJ8JUIS3Ux8D7gB0kebH2fqKo7x1eSjsMHgVvbDciTwLVjrmeoqmp3kjuA+5l7wusBXiEf1T9eSb4EvBM4O8le4JPADcDtSTYBTwNXj6U2v35AkvpzsizLSNJJxXCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHfpfchr5WWe14doAAAAASUVORK5CYII=\n",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPTUlEQVR4nO3df6yeZX3H8fdnFPy5UZCzhrV1JbHRkCUIOWF1LMbRbeGHsfyhDLNJR7r0H3Q4TbT6j1myPzBZVEgWlkbUsjGVVE0bZc6mYIx/0HkqDIFqOGNg27X0+IOqI44xv/vjXHWH2nKe9jzPeeh13q/kyX1d1309z/2908Pn3Od67uchVYUkqS+/Nu4CJEnDZ7hLUocMd0nqkOEuSR0y3CWpQ8vGXQDABRdcUGvWrBl3GZJ0Rtm7d+8PqmriRPteEuG+Zs0apqamxl2GJJ1Rkjx1sn0uy0hShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUodeEp9Q1Ytbs+UrI3ndJ2+9diSvK2n8BrpyT7I8yfYk302yL8mbkpyfZFeSx9v2vDY3SW5PMp3k4SSXjfYUJEnHG3RZ5jbgq1X1BuASYB+wBdhdVWuB3a0PcDWwtj02A3cMtWJJ0rzmXZZJci7wZuDPAarqOeC5JBuAt7Rp24CvAx8ENgB31ez/nPWBdtV/YVUdGnr1kpYklyrnN8ia+0XADPDpJJcAe4FbgBVzAvswsKK1VwL75zz/QBt7Qbgn2czslT2vfe1rT7d+jYD/4Zz5Fvvf0J+Zl55Bwn0ZcBnwnqrak+Q2/n8JBoCqqiR1Kgeuqq3AVoDJyclTeu44jeqHGPxBljQ8g6y5HwAOVNWe1t/ObNg/neRCgLY90vYfBFbPef6qNiZJWiTzhntVHQb2J3l9G1oPPAbsBDa2sY3AjtbeCdzY7ppZBxx1vV2SFteg97m/B7g7yTnAE8BNzP5iuCfJJuAp4Po2917gGmAaeLbNlSQtooHCvaoeAiZPsGv9CeYWcPPCytJS4vsY0vD59QOS1CG/fkAaMf8y0TgY7lpyDFstBS7LSFKHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDg0U7kmeTPKdJA8lmWpj5yfZleTxtj2vjSfJ7Ummkzyc5LJRnoAk6VedypX7H1TVG6tqsvW3ALurai2wu/UBrgbWtsdm4I5hFStJGsxClmU2ANtaextw3Zzxu2rWA8DyJBcu4DiSpFM0aLgX8LUke5NsbmMrqupQax8GVrT2SmD/nOceaGMvkGRzkqkkUzMzM6dRuiTpZJYNOO/3q+pgkt8EdiX57tydVVVJ6lQOXFVbga0Ak5OTp/RcSdKLG+jKvaoOtu0R4EvA5cDTx5Zb2vZIm34QWD3n6avamCRpkcwb7kleleTXj7WBPwYeAXYCG9u0jcCO1t4J3NjumlkHHJ2zfCNJWgSDLMusAL6U5Nj8f6qqryb5FnBPkk3AU8D1bf69wDXANPAscNPQq5Ykvah5w72qngAuOcH4D4H1Jxgv4OahVCdJOi1+QlWSOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOjRwuCc5K8mDSb7c+hcl2ZNkOsnnk5zTxl/W+tNt/5oR1S5JOolTuXK/Bdg3p/9R4ONV9Trgx8CmNr4J+HEb/3ibJ0laRAOFe5JVwLXAJ1s/wJXA9jZlG3Bda29ofdr+9W2+JGmRDHrl/gngA8AvWv81wDNV9XzrHwBWtvZKYD9A23+0zX+BJJuTTCWZmpmZOb3qJUknNG+4J3krcKSq9g7zwFW1taomq2pyYmJimC8tSUvesgHmXAG8Lck1wMuB3wBuA5YnWdauzlcBB9v8g8Bq4ECSZcC5wA+HXrkk6aTmvXKvqg9V1aqqWgPcANxXVX8K3A+8vU3bCOxo7Z2tT9t/X1XVUKuWJL2ohdzn/kHgfUmmmV1Tv7ON3wm8po2/D9iysBIlSadqkGWZX6qqrwNfb+0ngMtPMOfnwDuGUJsk6TT5CVVJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KH5g33JC9P8q9J/i3Jo0n+uo1flGRPkukkn09yTht/WetPt/1rRnwOkqTjLBtgzn8DV1bVz5KcDXwzyT8D7wM+XlWfS/L3wCbgjrb9cVW9LskNwEeBPxlR/ZI0cmu2fGVkr/3krdeO5HXnvXKvWT9r3bPbo4Arge1tfBtwXWtvaH3a/vVJMqyCJUnzG2jNPclZSR4CjgC7gH8Hnqmq59uUA8DK1l4J7Ado+48CrznBa25OMpVkamZmZkEnIUl6oYHCvar+t6reCKwCLgfesNADV9XWqpqsqsmJiYmFvpwkaY5Tulumqp4B7gfeBCxPcmzNfhVwsLUPAqsB2v5zgR8Oo1hJ0mAGuVtmIsny1n4F8EfAPmZD/u1t2kZgR2vvbH3a/vuqqoZYsyRpHoPcLXMhsC3JWcz+Mrinqr6c5DHgc0n+BngQuLPNvxP4hyTTwI+AG0ZQ9y+die9iS9KozRvuVfUwcOkJxp9gdv39+PGfA+8YSnWSpNPiJ1QlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUPzhnuS1UnuT/JYkkeT3NLGz0+yK8njbXteG0+S25NMJ3k4yWWjPglJ0gsNcuX+PPD+qroYWAfcnORiYAuwu6rWArtbH+BqYG17bAbuGHrVkqQXNW+4V9Whqvp2a/8U2AesBDYA29q0bcB1rb0BuKtmPQAsT3LhsAuXJJ3cKa25J1kDXArsAVZU1aG26zCworVXAvvnPO1AGzv+tTYnmUoyNTMzc6p1S5JexMDhnuTVwBeA91bVT+buq6oC6lQOXFVbq2qyqiYnJiZO5amSpHkMFO5JzmY22O+uqi+24aePLbe07ZE2fhBYPefpq9qYJGmRDHK3TIA7gX1V9bE5u3YCG1t7I7BjzviN7a6ZdcDROcs3kqRFsGyAOVcA7wK+k+ShNvZh4FbgniSbgKeA69u+e4FrgGngWeCmYRYsSZrfvOFeVd8EcpLd608wv4CbF1iXJGkB/ISqJHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHVo3nBP8qkkR5I8Mmfs/CS7kjzetue18SS5Pcl0koeTXDbK4iVJJzbIlftngKuOG9sC7K6qtcDu1ge4GljbHpuBO4ZTpiTpVMwb7lX1DeBHxw1vALa19jbgujnjd9WsB4DlSS4cUq2SpAGd7pr7iqo61NqHgRWtvRLYP2fegTb2K5JsTjKVZGpmZuY0y5AknciC31CtqgLqNJ63taomq2pyYmJioWVIkuY43XB/+thyS9seaeMHgdVz5q1qY5KkRXS64b4T2NjaG4Edc8ZvbHfNrAOOzlm+kSQtkmXzTUjyWeAtwAVJDgAfAW4F7kmyCXgKuL5Nvxe4BpgGngVuGkHNkqR5zBvuVfXOk+xaf4K5Bdy80KIkSQvjJ1QlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUMjCfckVyX5XpLpJFtGcQxJ0skNPdyTnAX8HXA1cDHwziQXD/s4kqSTG8WV++XAdFU9UVXPAZ8DNozgOJKkk0hVDfcFk7cDV1XVX7T+u4Dfrap3HzdvM7C5dV8PfG+ohZzcBcAPFulY4+D5nfl6P0fPb3h+u6omTrRj2SIV8CuqaiuwdbGPm2SqqiYX+7iLxfM78/V+jp7f4hjFssxBYPWc/qo2JklaJKMI928Ba5NclOQc4AZg5wiOI0k6iaEvy1TV80neDfwLcBbwqap6dNjHWYBFXwpaZJ7fma/3c/T8FsHQ31CVJI2fn1CVpA4Z7pLUoSUT7j1/JUKS1UnuT/JYkkeT3DLumkYlyVlJHkzy5XHXMmxJlifZnuS7SfYledO4axq2JH/VfkYfSfLZJC8fd00LkeRTSY4keWTO2PlJdiV5vG3PG0dtSyLcl8BXIjwPvL+qLgbWATd3dn5z3QLsG3cRI3Ib8NWqegNwCZ2dZ5KVwF8Ck1X1O8zecHHDeKtasM8AVx03tgXYXVVrgd2tv+iWRLjT+VciVNWhqvp2a/+U2VBYOd6qhi/JKuBa4JPjrmXYkpwLvBm4E6CqnquqZ8Za1GgsA16RZBnwSuA/x1zPglTVN4AfHTe8AdjW2tuA6xazpmOWSrivBPbP6R+gw/ADSLIGuBTYM+ZSRuETwAeAX4y5jlG4CJgBPt2WnT6Z5FXjLmqYquog8LfA94FDwNGq+tp4qxqJFVV1qLUPAyvGUcRSCfclIcmrgS8A762qn4y7nmFK8lbgSFXtHXctI7IMuAy4o6ouBf6LMf05Pypt7XkDs7/Ifgt4VZI/G29Vo1Wz95qP5X7zpRLu3X8lQpKzmQ32u6vqi+OuZwSuAN6W5Elml9WuTPKP4y1pqA4AB6rq2F9c25kN+578IfAfVTVTVf8DfBH4vTHXNApPJ7kQoG2PjKOIpRLuXX8lQpIwu1a7r6o+Nu56RqGqPlRVq6pqDbP/fvdVVTdXfVV1GNif5PVtaD3w2BhLGoXvA+uSvLL9zK6nszeNm53AxtbeCOwYRxFj+1bIxXQGfCXCQl0BvAv4TpKH2tiHq+re8ZWk0/Ae4O52AfIEcNOY6xmqqtqTZDvwbWbv8HqQl8hH9U9Xks8CbwEuSHIA+AhwK3BPkk3AU8D1Y6nNrx+QpP4slWUZSVpSDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUof8D6U/r8dhfh0kAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
Expand Down Expand Up @@ -1087,17 +1087,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/nacho/opt/anaconda3/envs/py37/lib/python3.7/site-packages/torch/_tensor.py:579: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.\n",
"To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at ../aten/src/ATen/native/BinaryOps.cpp:467.)\n",
" return torch.floor_divide(other, self)\n"
]
}
],
"outputs": [],
"source": [
"test_eq(TSMaskOut()(xb, split_idx=0).shape, xb.shape)\n",
"test_ne(TSMaskOut()(xb, split_idx=0), xb)"
Expand Down Expand Up @@ -1457,7 +1447,6 @@
"outputs": [],
"source": [
"#export\n",
"\n",
"class TSRandomConv(RandTransform):\n",
" \"\"\"Applies a convolution with a random kernel and random weights with required_grad=False\"\"\"\n",
" order = 90\n",
Expand Down Expand Up @@ -1489,6 +1478,75 @@
" test_eq(o.shape, xb.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#export\n",
"from fastai.vision.augment import RandTransform\n",
"class TSAddNan(RandTransform):\n",
" \"Randomly sets selected variables of type `TSTensor` to Nan values\"\n",
" order = 90\n",
" def __init__(self, nan_perc=0.1, sel_vars=None, static=False, **kwargs):\n",
" self.sel_vars = sel_vars if sel_vars is not None else None\n",
" self.nan_perc = nan_perc\n",
" self.static = static\n",
" super().__init__(**kwargs)\n",
"\n",
" def encodes(self, o:TSTensor):\n",
" if self.static:\n",
" nan_vals = torch.rand(*o.shape[:-1])\n",
" else:\n",
" nan_vals = torch.rand(*o.shape)\n",
" if self.sel_vars is not None:\n",
" nan_vals[:, ~torch.isin(torch.arange(o.shape[1]), tensor(self.sel_vars))] = 0\n",
" o[nan_vals > 1 - self.nan_perc] = np.nan\n",
" return o"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[1., nan, 1., 1.],\n",
" [1., 1., 1., 1.],\n",
" [1., nan, 1., 1.]],\n",
"\n",
" [[nan, nan, 1., nan],\n",
" [1., 1., 1., 1.],\n",
" [nan, nan, nan, nan]]])"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t = TSTensor(torch.ones(2,3,4))\n",
"TSAddNan(nan_perc=.5, sel_vars=[0,2])(t, split_idx=0).data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"t = TSTensor(torch.ones(2,3,100))\n",
"test_gt(np.isnan(TSAddNan(nan_perc=.5)(t, split_idx=0)).sum().item(), 0)\n",
"t = TSTensor(torch.ones(2,3,100))\n",
"test_gt(np.isnan(TSAddNan(nan_perc=.5, sel_vars=[0,2])(t, split_idx=0)[:, [0,2]]).sum().item(), 0)\n",
"t = TSTensor(torch.ones(2,3,100))\n",
"test_eq(np.isnan(TSAddNan(nan_perc=.5, sel_vars=[0,2])(t, split_idx=0)[:, 1]).sum().item(), 0)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -1649,29 +1707,11 @@
"outputs": [
{
"data": {
"text/html": [
"<img src onerror=\"\n",
" this.nextElementSibling.focus();\n",
" this.dispatchEvent(new KeyboardEvent('keydown', {key:'s', keyCode: 83, metaKey: true}));\n",
" \" style=\"display:none\"><input style=\"width:0;height:0;border:0\">"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<img src onerror=\"\n",
" this.nextElementSibling.focus();\n",
" this.dispatchEvent(new KeyboardEvent('keydown', {key:'s', keyCode: 83, metaKey: true}));\n",
" \" style=\"display:none\"><input style=\"width:0;height:0;border:0\">"
"application/javascript": [
"IPython.notebook.save_checkpoint();"
],
"text/plain": [
"<IPython.core.display.HTML object>"
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
Expand All @@ -1681,12 +1721,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"017_data.transforms.ipynb saved at 2022-01-21 09:07:58.\n",
"Converted 017_data.transforms.ipynb.\n",
"\n",
"\n",
"Correct conversion! 😃\n",
"Total time elapsed 0.647 s\n",
"Thursday 30/09/21 20:55:29 CEST\n"
"Total time elapsed 0.119 s\n",
"Friday 21/01/22 09:08:03 CET\n"
]
},
{
Expand Down Expand Up @@ -1718,7 +1759,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
}
Expand Down
2 changes: 2 additions & 0 deletions tsai/_nbdev.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@
"TSRollingMean": "016_data.preprocessing.ipynb",
"TSLogReturn": "016_data.preprocessing.ipynb",
"TSAdd": "016_data.preprocessing.ipynb",
"TSClipByVar": "016_data.preprocessing.ipynb",
"TSShrinkDataFrame": "016_data.preprocessing.ipynb",
"TSOneHotEncoder": "016_data.preprocessing.ipynb",
"TSCategoricalEncoder": "016_data.preprocessing.ipynb",
Expand Down Expand Up @@ -340,6 +341,7 @@
"TSDownUpScale": "017_data.transforms.ipynb",
"TSRandomDownUpScale": "017_data.transforms.ipynb",
"TSRandomConv": "017_data.transforms.ipynb",
"TSAddNan": "017_data.transforms.ipynb",
"all_TS_randaugs": "017_data.transforms.ipynb",
"RandAugment": "017_data.transforms.ipynb",
"TestTfm": "017_data.transforms.ipynb",
Expand Down
24 changes: 20 additions & 4 deletions tsai/data/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

__all__ = ['ToNumpyCategory', 'OneHot', 'TSNan2Value', 'Nan2Value', 'TSStandardize', 'TSNormalize', 'TSClipOutliers',
'TSClip', 'TSRobustScale', 'TSDiff', 'TSLog', 'TSCyclicalPosition', 'TSLinearPosition', 'TSPosition',
'TSMissingness', 'TSPositionGaps', 'TSRollingMean', 'TSLogReturn', 'TSAdd', 'TSShrinkDataFrame',
'TSOneHotEncoder', 'TSCategoricalEncoder', 'TSDateTimeEncoder', 'default_date_attr', 'TSMissingnessEncoder',
'Preprocessor', 'StandardScaler', 'RobustScaler', 'Normalizer', 'BoxCox', 'YeoJohnshon', 'Quantile',
'ReLabeler']
'TSMissingness', 'TSPositionGaps', 'TSRollingMean', 'TSLogReturn', 'TSAdd', 'TSClipByVar',
'TSShrinkDataFrame', 'TSOneHotEncoder', 'TSCategoricalEncoder', 'TSDateTimeEncoder', 'default_date_attr',
'TSMissingnessEncoder', 'Preprocessor', 'StandardScaler', 'RobustScaler', 'Normalizer', 'BoxCox',
'YeoJohnshon', 'Quantile', 'ReLabeler']

# Cell
from ..imports import *
Expand Down Expand Up @@ -546,6 +546,22 @@ def encodes(self, o:TSTensor):
return torch.add(o, self.add)
def __repr__(self): return f'{self.__class__.__name__}(lag={self.lag}, pad={self.pad})'

# Cell
class TSClipByVar(Transform):
"""Clip batch of type `TSTensor` by variable
Args:
var_min_max: list of tuples containing variable index, min value (or None) and max value (or None)
"""
order = 90
def __init__(self, var_min_max):
self.var_min_max = var_min_max

def encodes(self, o:TSTensor):
for v,m,M in self.var_min_max:
o[:, v] = torch.clamp(o[:, v], m, M)
return o

# Cell
from sklearn.base import BaseEstimator, TransformerMixin
from fastai.data.transforms import CategoryMap
Expand Down
Loading

0 comments on commit 636aef6

Please sign in to comment.