From 34b2d337c9626a7109c6903ff3c2713c396b1f70 Mon Sep 17 00:00:00 2001
From: Kuan Tung <kuan.tung@epfl.ch>
Date: Fri, 7 Oct 2022 17:56:51 +0200
Subject: [PATCH] Update the tutorial of "Visualise pipelines" (#1913)

* Change a file extention to match the previous article

Signed-off-by: dinotuku <kuan.tung@epfl.ch>

* Add a missing import

Signed-off-by: dinotuku <kuan.tung@epfl.ch>

* Change both preprocessed datasets to parquet files

Signed-off-by: dinotuku <kuan.tung@epfl.ch>

* Change data type to ParquetDataSet for parquet files

Signed-off-by: dinotuku <kuan.tung@epfl.ch>

* Add a note for installing seaborn if it is not installed

Signed-off-by: dinotuku <kuan.tung@epfl.ch>

Signed-off-by: dinotuku <kuan.tung@epfl.ch>
---
 docs/source/tutorial/visualise_pipeline.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/docs/source/tutorial/visualise_pipeline.md b/docs/source/tutorial/visualise_pipeline.md
index 8d28ce83c1..a742cc90e4 100644
--- a/docs/source/tutorial/visualise_pipeline.md
+++ b/docs/source/tutorial/visualise_pipeline.md
@@ -53,18 +53,18 @@ shuttles:
   layer: raw
 
 preprocessed_companies:
-  type: pandas.CSVDataSet
-  filepath: data/02_intermediate/preprocessed_companies.csv
+  type: pandas.ParquetDataSet
+  filepath: data/02_intermediate/preprocessed_companies.pq
   layer: intermediate
 
 preprocessed_shuttles:
-  type: pandas.CSVDataSet
-  filepath: data/02_intermediate/preprocessed_shuttles.csv
+  type: pandas.ParquetDataSet
+  filepath: data/02_intermediate/preprocessed_shuttles.pq
   layer: intermediate
 
 model_input_table:
-  type: pandas.CSVDataSet
-  filepath: data/03_primary/model_input_table.csv
+  type: pandas.ParquetDataSet
+  filepath: data/03_primary/model_input_table.pq
   layer: primary
 
 regressor:
@@ -180,6 +180,7 @@ The below functions can be added to the `nodes.py` and `pipeline.py` files respe
 ```python
 # nodes.py
 import plotly.express as px
+import plotly.graph_objs as go
 import pandas as pd
 
 # the below function uses plotly.express
@@ -277,6 +278,8 @@ def create_confusion_matrix(companies: pd.DataFrame):
     return plt
 ```
 
+> You might have to execute `pip install seaborn` if the [seaborn library](https://seaborn.pydata.org/) is not installed yet.
+
 ```python
 # pipeline.py
 def create_pipeline(**kwargs) -> Pipeline: