diff --git a/README.md b/README.md index 7fe7793..f55d802 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,17 @@ In this open source solution you will find references to the [neptune.ml](https: # How to start? ## Learn about our solutions 1. Check [Kaggle forum](https://www.kaggle.com/c/home-credit-default-risk/discussion/57175) and participate in the discussions. -1. Check our [Wiki pages :page_facing_up:](https://github.com/neptune-ml/open-solution-home-credit/wiki), where we document our work. Click on the house to get started [:house_with_garden:](https://github.com/neptune-ml/open-solution-home-credit/wiki). +1. Check our [Wiki pages :house_with_garden:](https://github.com/neptune-ml/open-solution-home-credit/wiki), where we document our work. See solutions below: -## Start experimenting with our ready-to-use code +| link to code | name | CV | LB | link to description | +|:---:|:---:|:---:|:---:|:---:| +|[solution 1](https://github.com/neptune-ml/open-solution-home-credit/tree/solution-1)|*chestnut* :chestnut:|?|0.742|[LightGBM and basic features](https://github.com/neptune-ml/open-solution-home-credit/wiki/LightGBM-and-basic-features)| +|[solution 2](https://github.com/neptune-ml/open-solution-home-credit/tree/solution-2)|*seedling* :seedling:|?|0.747|[Sklearn and XGBoost algorithms and groupby features](https://github.com/neptune-ml/open-solution-home-credit/wiki/Sklearn-and-XGBoost-algorithms-and-groupby-features)| +|[solution 3](https://github.com/neptune-ml/open-solution-home-credit/tree/solution-3)|*blossom* :blossom:|0.7840|0.790|[LightGBM on selected features](https://github.com/neptune-ml/open-solution-home-credit/wiki/LightGBM-on-selected-features)| +|[solution 4](https://github.com/neptune-ml/open-solution-home-credit/tree/solution-4)|*tulip* :tulip: |0.7905|0.801|[LightGBM with smarter features](https://github.com/neptune-ml/open-solution-home-credit/wiki/LightGBM-with-smarter-features)| +|solution 5|*sunflower* :sunflower:|||*(in progress...)* :construction_worker_woman:| + +## Start experimenting with ready-to-use code You can jump start your participation in the competition by using our starter pack. Installation instruction below will guide you through the setup. ### Installation *(fast track)* @@ -39,7 +47,7 @@ neptune run --config neptune_random_search.yaml main.py train_evaluate_predict - :snake: ```bash -python main.py train_evaluate_predict --pipeline_name lightGBM +python main.py -- train_evaluate_predict --pipeline_name lightGBM ``` ### Installation *(step by step)* @@ -49,7 +57,7 @@ git clone https://github.com/minerva-ml/open-solution-home-credit.git ``` 2. Install requirements in your Python3 environment ```bash -pip3 install requirements.txt +pip3 install -r requirements.txt ``` 3. Register to the [neptune.ml](https://neptune.ml) _(if you wish to use it)_ 4. Update data directories in the [neptune.yaml](https://github.com/minerva-ml/open-solution-home-credit/blob/master/neptune.yaml) configuration file. @@ -63,7 +71,7 @@ neptune run --config neptune_random_search.yaml main.py train_evaluate_predict - :snake: ```bash -python main.py train_evaluate_predict --pipeline_name lightGBM +python main.py -- train_evaluate_predict --pipeline_name lightGBM ``` 6. collect submit from `experiment_directory` specified in the [neptune.yaml](https://github.com/minerva-ml/open-solution-home-credit/blob/master/neptune.yaml) diff --git a/src/feature_extraction.py b/src/feature_extraction.py index a6c0199..45af14a 100644 --- a/src/feature_extraction.py +++ b/src/feature_extraction.py @@ -255,7 +255,14 @@ def __init__(self, categorical_columns, numerical_columns): 'external_sources_mean', 'external_sources_nanmedian', 'short_employment', - 'young_age'] + 'young_age', + 'cnt_non_child', + 'child_to_non_child_ratio', + 'income_per_non_child', + 'credit_per_person', + 'credit_per_child', + 'credit_per_non_child', + ] def transform(self, X, **kwargs): X['annuity_income_percentage'] = X['AMT_ANNUITY'] / X['AMT_INCOME_TOTAL'] @@ -273,6 +280,12 @@ def transform(self, X, **kwargs): X['phone_to_birth_ratio'] = X['DAYS_LAST_PHONE_CHANGE'] / X['DAYS_BIRTH'] X['phone_to_employ_ratio'] = X['DAYS_LAST_PHONE_CHANGE'] / X['DAYS_EMPLOYED'] X['external_sources_weighted'] = X.EXT_SOURCE_1 * 2 + X.EXT_SOURCE_2 * 3 + X.EXT_SOURCE_3 * 4 + X['cnt_non_child'] = X['CNT_FAM_MEMBERS'] - X['CNT_CHILDREN'] + X['child_to_non_child_ratio'] = X['CNT_CHILDREN'] / X['cnt_non_child'] + X['income_per_non_child'] = X['AMT_INCOME_TOTAL'] / X['cnt_non_child'] + X['credit_per_person'] = X['AMT_CREDIT'] / X['CNT_FAM_MEMBERS'] + X['credit_per_child'] = X['AMT_CREDIT'] / (1 + X['CNT_CHILDREN']) + X['credit_per_non_child'] = X['AMT_CREDIT'] / X['cnt_non_child'] for function_name in ['min', 'max', 'sum', 'mean', 'nanmedian']: X['external_sources_{}'.format(function_name)] = eval('np.{}'.format(function_name))( X[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']], axis=1) diff --git a/src/pipeline_config.py b/src/pipeline_config.py index f969e46..35f4ea5 100644 --- a/src/pipeline_config.py +++ b/src/pipeline_config.py @@ -139,13 +139,14 @@ 'YEARS_BUILD_MEDI', 'YEARS_BUILD_MODE'] +cols_to_agg = ['AMT_CREDIT', 'AMT_ANNUITY', 'AMT_INCOME_TOTAL', 'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3'] +aggs = ['min', 'mean', 'max', 'sum', 'var'] +aggregation_pairs = [(col, agg) for col in cols_to_agg for agg in aggs] + APPLICATION_AGGREGATION_RECIPIES = [ - (['CODE_GENDER', 'NAME_EDUCATION_TYPE'], [('AMT_ANNUITY', 'max'), - ('AMT_CREDIT', 'max'), - ('EXT_SOURCE_1', 'mean'), - ('EXT_SOURCE_2', 'mean'), - ('OWN_CAR_AGE', 'max'), - ('OWN_CAR_AGE', 'sum')]), + (['NAME_EDUCATION_TYPE', 'CODE_GENDER'], aggregation_pairs), + (['NAME_FAMILY_STATUS', 'NAME_EDUCATION_TYPE'], aggregation_pairs), + (['NAME_FAMILY_STATUS', 'CODE_GENDER'], aggregation_pairs), (['CODE_GENDER', 'ORGANIZATION_TYPE'], [('AMT_ANNUITY', 'mean'), ('AMT_INCOME_TOTAL', 'mean'), ('DAYS_REGISTRATION', 'mean'),