From 34b2d337c9626a7109c6903ff3c2713c396b1f70 Mon Sep 17 00:00:00 2001 From: Kuan Tung Date: Fri, 7 Oct 2022 17:56:51 +0200 Subject: [PATCH] Update the tutorial of "Visualise pipelines" (#1913) * Change a file extention to match the previous article Signed-off-by: dinotuku * Add a missing import Signed-off-by: dinotuku * Change both preprocessed datasets to parquet files Signed-off-by: dinotuku * Change data type to ParquetDataSet for parquet files Signed-off-by: dinotuku * Add a note for installing seaborn if it is not installed Signed-off-by: dinotuku Signed-off-by: dinotuku --- docs/source/tutorial/visualise_pipeline.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/source/tutorial/visualise_pipeline.md b/docs/source/tutorial/visualise_pipeline.md index 8d28ce83c1..a742cc90e4 100644 --- a/docs/source/tutorial/visualise_pipeline.md +++ b/docs/source/tutorial/visualise_pipeline.md @@ -53,18 +53,18 @@ shuttles: layer: raw preprocessed_companies: - type: pandas.CSVDataSet - filepath: data/02_intermediate/preprocessed_companies.csv + type: pandas.ParquetDataSet + filepath: data/02_intermediate/preprocessed_companies.pq layer: intermediate preprocessed_shuttles: - type: pandas.CSVDataSet - filepath: data/02_intermediate/preprocessed_shuttles.csv + type: pandas.ParquetDataSet + filepath: data/02_intermediate/preprocessed_shuttles.pq layer: intermediate model_input_table: - type: pandas.CSVDataSet - filepath: data/03_primary/model_input_table.csv + type: pandas.ParquetDataSet + filepath: data/03_primary/model_input_table.pq layer: primary regressor: @@ -180,6 +180,7 @@ The below functions can be added to the `nodes.py` and `pipeline.py` files respe ```python # nodes.py import plotly.express as px +import plotly.graph_objs as go import pandas as pd # the below function uses plotly.express @@ -277,6 +278,8 @@ def create_confusion_matrix(companies: pd.DataFrame): return plt ``` +> You might have to execute `pip install seaborn` if the [seaborn library](https://seaborn.pydata.org/) is not installed yet. + ```python # pipeline.py def create_pipeline(**kwargs) -> Pipeline: