diff --git a/.gitignore b/.gitignore
index 7627003d..fe61b21f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
 old-code/*
 config_files/*
 data/*
+callflow/unused/*
 
 app/node_modules*
 app/dist
diff --git a/app/src/components/callflow.js b/app/src/components/callflow.js
index 753a5f80..07cc0126 100644
--- a/app/src/components/callflow.js
+++ b/app/src/components/callflow.js
@@ -115,7 +115,7 @@ export default {
 		selectedOutlierBand: 4,
 		defaultCallSite: "<program root>",
 		modes: ["Ensemble", "Single"],
-		selectedMode: "Ensemble",
+		selectedMode: "Single",
 		// Presentation mode variables
 		exhibitModes: ["Presentation", "Default"],
 		selectedExhibitMode: "Default",
@@ -164,9 +164,11 @@ export default {
 
 	mounted() {
 		var socket = io.connect(this.server, { reconnect: false });
+		console.log(this.selectedMode)
 		this.$socket.emit("init", {
-			caseStudy: this.selectedCaseStudy
+			mode: this.selectedMode
 		});
+
 		EventHandler.$on("lasso_selection", () => {
 			this.$store.resetTargetDataset = true;
 
@@ -201,6 +203,7 @@ export default {
 			this.setTargetDataset();
 			this.setComponentMap();
 
+			console.log(this.selectedFormat.length, this.selectedMode)
 			if (this.selectedFormat == "SuperGraph") {
 				if (this.selectedMode == "Single") {
 					this.$socket.emit("single_callsite_data", {
@@ -265,13 +268,6 @@ export default {
 	},
 
 	methods: {
-		// Feature: Sortby the datasets and show the time. 
-		formatRuntimeWithoutUnits(val) {
-			let format = d3.format(".2");
-			let ret = format(val);
-			return ret;
-		},
-
 		// Feature: Sortby the datasets and show the time. 
 		sortDatasetsByAttr(datasets, attr) {
 			let ret = datasets.sort((a, b) => {
@@ -330,10 +326,10 @@ export default {
 				this.selectedMode = "Single";
 			}
 
-			this.$store.maxExcTime = data["max_excTime"];
-			this.$store.minExcTime = data["min_excTime"];
-			this.$store.maxIncTime = data["max_incTime"];
-			this.$store.minIncTime = data["min_incTime"];
+			this.$store.maxExcTime = data["maxExcTime"];
+			this.$store.minExcTime = data["minExcTime"];
+			this.$store.maxIncTime = data["maxIncTime"];
+			this.$store.minIncTime = data["minIncTime"];
 
 			this.$store.numOfRanks = data["numOfRanks"];
 			this.$store.moduleCallsiteMap = data["module_callsite_map"];
@@ -342,8 +338,6 @@ export default {
 			this.$store.selectedMPIBinCount = this.selectedMPIBinCount;
 			this.$store.selectedRunBinCount = this.selectedRunBinCount;
 
-			this.selectedIncTime = ((this.selectedFilterPerc * this.$store.maxIncTime[this.selectedTargetDataset] * 0.000001) / 100).toFixed(3);
-
 			this.setViewDimensions();
 
 			this.$store.auxiliarySortBy = this.auxiliarySortBy;
@@ -383,6 +377,7 @@ export default {
 				this.$store.resetTargetDataset = true;
 			}
 			this.$store.selectedMetric = this.selectedMetric;
+			console.log(this.$store.selectedDatasets)
 			this.datasets = this.sortDatasetsByAttr(this.$store.selectedDatasets, "Inclusive");
 
 			let max_dataset = "";
@@ -411,8 +406,9 @@ export default {
 			else {
 				this.$store.selectedTargetDataset = this.selectedTargetDataset;
 			}
+			this.selectedIncTime = ((this.selectedFilterPerc * this.$store.maxIncTime[this.selectedTargetDataset] * 0.000001) / 100).toFixed(3);
 
-			console.log("Minimum among all runtimes: ", this.selectedTargetDataset);
+			console.log("Maximum among all runtimes: ", this.selectedTargetDataset);
 		},
 
 		setComponentMap() {
@@ -497,6 +493,7 @@ export default {
 
 		// Feature: the Supernode hierarchy is automatically selected from the mean metric runtime. 
 		sortModulesByMetric(attr) {
+			console.log(this.$store.modules)
 			let module_list = Object.keys(this.$store.modules["ensemble"]);
 
 			// Create a map for each dataset mapping the respective mean times. 
@@ -595,7 +592,9 @@ export default {
 			this.setupColors();
 			this.setOtherData();
 			this.setTargetDataset();
-			this.setSelectedModule();
+			if(this.selectedFormat == 'SuperGraph' && this.selectedMode == 'Ensemble'){
+				this.setSelectedModule();
+			}
 
 			console.log("Mode : ", this.selectedMode);
 			console.log("Number of runs :", this.$store.numOfRuns);
@@ -623,6 +622,7 @@ export default {
 					this.loadComponents(this.currentEnsembleCallGraphComponents);
 				}
 				else if (this.selectedFormat == "CCT") {
+					console.log(this.currentEnsembleCCTComponents)
 					this.initComponents(this.currentEnsembleCCTComponents);
 				}
 			}
diff --git a/app/src/components/ensembleHistogram/ensembleHistogram.js b/app/src/components/ensembleHistogram/ensembleHistogram.js
index 9b3bcf5d..203071bb 100644
--- a/app/src/components/ensembleHistogram/ensembleHistogram.js
+++ b/app/src/components/ensembleHistogram/ensembleHistogram.js
@@ -105,7 +105,6 @@ export default {
 
 			EventHandler.$emit("ensemble_histogram", {
 				module: this.$store.selectedModule,
-				name: "main",
 				dataset: this.$store.runNames,
 			});
 		},
diff --git a/app/src/components/ensembleScatterplot/ensembleScatterplot.js b/app/src/components/ensembleScatterplot/ensembleScatterplot.js
index 91367e79..6bce6200 100644
--- a/app/src/components/ensembleScatterplot/ensembleScatterplot.js
+++ b/app/src/components/ensembleScatterplot/ensembleScatterplot.js
@@ -79,7 +79,6 @@ export default {
 			this.$refs.ToolTip.init(this.svgID);
 			EventHandler.$emit("ensemble_scatterplot", {
 				module: this.$store.selectedModule,
-				name: "main",
 				dataset: this.$store.selectedDatasets,
 			});
 		},
diff --git a/callflow/__init__.py b/callflow/__init__.py
index f45be28f..579257c8 100644
--- a/callflow/__init__.py
+++ b/callflow/__init__.py
@@ -5,11 +5,10 @@
 
 from .datastructures.graphframe import GraphFrame
 
+from .datastructures.supergraph import SuperGraph
+from .datastructures.ensemblegraph import EnsembleGraph
+from .datastructures.cct import CCT
 from .datastructures.supergraph_ensemble import EnsembleSuperGraph
 from .datastructures.supergraph_single import SingleSuperGraph
-from .datastructures.cct_ensemble import EnsembleCCT
-from .datastructures.cct_single import SingleCCT
 
-from .callflow_base import BaseCallFlow
-from .callflow_single import SingleCallFlow
-from .callflow_ensemble import EnsembleCallFlow
+from .callflow import CallFlow
diff --git a/callflow/callflow.py b/callflow/callflow.py
new file mode 100644
index 00000000..fb1d61ea
--- /dev/null
+++ b/callflow/callflow.py
@@ -0,0 +1,464 @@
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+# ------------------------------------------------------------------------------
+# Library imports
+import os
+import json
+
+# ------------------------------------------------------------------------------
+# CallFlow imports
+import callflow
+from callflow import SuperGraph, EnsembleGraph, CCT, EnsembleSuperGraph
+from callflow.modules import (
+    EnsembleAuxiliary,
+    ModuleHierarchy,
+    ParameterProjection,
+    FunctionList,
+)
+
+LOGGER = callflow.get_logger(__name__)
+
+# ------------------------------------------------------------------------------
+# CallFlow class
+class CallFlow:
+    def __init__(self, config={}, process=False, ensemble=False):
+        """
+        Entry interface to access CallFlow's functionalities. "
+        """
+
+        # Assert if config is provided.
+        assert config != None
+
+        # Convert config json to props. Never touch self.config ever.
+        self.props = json.loads(json.dumps(config, default=lambda o: o.__dict__))
+        # Assert ensemble if it really contains more than 1 dataset.
+        assert ensemble == (len(self.props["dataset_names"]) > 1)
+
+        # Based on option, either process into .callflow or read from .callflow.
+        if process:
+            self._create_dot_callflow_folder()
+            if ensemble:
+                self._process_ensemble()
+            else:
+                self._process_single()
+        else:  # Rendering of call graphs.
+            if ensemble:
+                self.supergraphs = self._read_ensemble()
+                # assertion here is 1 less than self.supergraph.keys, becasuse
+                # self.supergraphs contains the ensemble supergraph as well.
+                assert (
+                    len(self.props["dataset_names"]) == len(self.supergraphs.keys()) - 1
+                )
+            else:
+                self.supergraphs = self._read_single()
+                assert len(self.props["dataset_names"]) == 1
+
+            # Adds basic information to props.
+            # Props is later return to client app on "init" request.
+            self.add_basic_info_to_props()
+
+    # --------------------------------------------------------------------------
+    # Processing methods.
+    def _create_dot_callflow_folder(self):
+        """
+        Create a .callflow directory and empty files.
+        """
+        LOGGER.debug(f"Saved .callflow directory is: {self.props['save_path']}")
+
+        if not os.path.exists(self.props["save_path"]):
+            os.makedirs(self.props["save_path"])
+            os.makedirs(os.path.join(self.props["save_path"], "ensemble"))
+
+        dataset_folders = []
+        for dataset in self.props["datasets"]:
+            dataset_folders.append(dataset["name"])
+        dataset_folders.append("ensemble")
+
+        for dataset in dataset_folders:
+            dataset_dir = os.path.join(self.props["save_path"], dataset)
+            LOGGER.debug(dataset_dir)
+            if not os.path.exists(dataset_dir):
+                # if self.debug:
+                LOGGER.debug(f"Creating .callflow directory for dataset : {dataset}")
+                os.makedirs(dataset_dir)
+
+            files = ["df.csv", "nxg.json", "hatchet_tree.txt", "auxiliary_data.json"]
+            for f in files:
+                fname = os.path.join(dataset_dir, f)
+                if not os.path.exists(fname):
+                    open(fname, "w").close()
+
+    def _remove_dot_callflow_folder(self):
+        """
+        TODO: We might want to delete the .callflow folder when we re-process/re-write. 
+        """
+        pass
+
+    def _process_single(self):
+        """
+        Single dataset processing. 
+        """
+        dataset_name = self.props["dataset_names"][0]
+        supergraph = SuperGraph(props=self.props, tag=dataset_name, mode="process")
+        LOGGER.info("#########################################")
+        LOGGER.info(f"Run: {dataset_name}")
+        LOGGER.info("#########################################")
+
+        # Process each graphframe.
+        supergraph.process_gf()
+
+        # Filter by inclusive or exclusive time.
+        supergraph.filter_gf(mode="single")
+
+        # Group by module.
+        supergraph.group_gf(group_by="module")
+
+        # Store the graphframe.
+        supergraph.write_gf("entire")
+
+        supergraph.single_auxiliary(
+            dataset=dataset_name, binCount=20, process=True,
+        )
+
+    def _process_ensemble(self):
+        """
+        Ensemble processing of datasets. 
+        """
+        # Before we process the ensemble, we perform single processing on all datasets.
+        single_supergraphs = {}
+        for idx, dataset_name in enumerate(self.props["dataset_names"]):
+            # Create an instance of dataset.
+            single_supergraphs[dataset_name] = SuperGraph(
+                props=self.props, tag=dataset_name, mode="process"
+            )
+            LOGGER.info("#########################################")
+            LOGGER.info(f"Run: {dataset_name}")
+            LOGGER.info("#########################################")
+
+            # Process each graphframe.
+            single_supergraphs[dataset_name].process_gf()
+
+            # Write the entire graphframe into .callflow.
+            single_supergraphs[dataset_name].write_gf("entire")
+
+            # Single auxiliary processing.
+            single_supergraphs[dataset_name].single_auxiliary(
+                dataset=dataset_name, binCount=20, process=True,
+            )
+
+        # Create a supergraph class for ensemble case.
+        ensemble_supergraph = EnsembleGraph(
+            self.props, "ensemble", mode="process", supergraphs=single_supergraphs
+        )
+
+        # Write the graphframe to file.
+        ensemble_supergraph.write_gf("entire")
+
+        # Filter the ensemble graphframe.
+        ensemble_supergraph.filter_gf(mode="ensemble")
+
+        # Write the filtered graphframe.
+        ensemble_supergraph.write_gf("filter")
+
+        # Group by module.
+        ensemble_supergraph.group_gf(group_by="module")
+
+        # Write the grouped graphframe.
+        ensemble_supergraph.write_gf("group")
+
+        # Ensemble auxiliary processing.
+        ensemble_supergraph.ensemble_auxiliary(
+            # MPIBinCount=self.currentMPIBinCount,
+            # RunBinCount=self.currentRunBinCount,
+            datasets=self.props["dataset_names"],
+            MPIBinCount=20,
+            RunBinCount=20,
+            process=True,
+            write=True,
+        )
+
+    def _read_single(self):
+        """
+        Read the single .callflow files required for client.
+        """
+        supergraphs = {}
+        # Only consider the first dataset from the listing.
+        dataset_name = self.props["dataset_names"][0]
+        supergraphs[dataset_name] = SuperGraph(
+            props=self.props, tag=dataset_name, mode="render"
+        )
+
+        return supergraphs
+
+    def _read_ensemble(self):
+        """
+        Read the ensemble .callflow files required for client.
+        """
+        supergraphs = {}
+
+        for idx, dataset_name in enumerate(self.props["dataset_names"]):
+            supergraphs[dataset_name] = SuperGraph(
+                self.props, dataset_name, mode="render"
+            )
+            supergraphs[dataset_name].read_gf(
+                read_parameter=self.props["read_parameter"]
+            )
+
+        supergraphs["ensemble"] = EnsembleGraph(
+            props=self.props, tag="ensemble", mode="render"
+        )
+        supergraphs["ensemble"].read_gf(read_parameter=self.props["read_parameter"])
+        supergraphs["ensemble"].read_auxiliary_data()
+        return supergraphs
+
+    # --------------------------------------------------------------------------
+    # Reading and rendering methods.
+    # All the functions below are Public methods that are accessed by the server.
+
+    def add_basic_info_to_props(self):
+        """
+        Adds basic information (like max, min inclusive and exclusive runtime) to self.props.
+        """
+        self.props["maxIncTime"] = {}
+        self.props["maxExcTime"] = {}
+        self.props["minIncTime"] = {}
+        self.props["minExcTime"] = {}
+        self.props["numOfRanks"] = {}
+        maxIncTime = 0
+        maxExcTime = 0
+        minIncTime = 0
+        minExcTime = 0
+        maxNumOfRanks = 0
+        for idx, tag in enumerate(self.supergraphs):
+            self.props["maxIncTime"][tag] = (
+                self.supergraphs[tag].gf.df["time (inc)"].max()
+            )
+            self.props["maxExcTime"][tag] = self.supergraphs[tag].gf.df["time"].max()
+            self.props["minIncTime"][tag] = (
+                self.supergraphs[tag].gf.df["time (inc)"].min()
+            )
+            self.props["minExcTime"][tag] = self.supergraphs[tag].gf.df["time"].min()
+            # self.props["numOfRanks"][dataset] = len(
+            #     self.datasets[dataset].gf.df["rank"].unique()
+            # )
+            maxExcTime = max(self.props["maxExcTime"][tag], maxExcTime)
+            maxIncTime = max(self.props["maxIncTime"][tag], maxIncTime)
+            minExcTime = min(self.props["minExcTime"][tag], minExcTime)
+            minIncTime = min(self.props["minIncTime"][tag], minIncTime)
+            # maxNumOfRanks = max(self.props["numOfRanks"][dataset], maxNumOfRanks)
+
+        self.props["maxIncTime"]["ensemble"] = maxIncTime
+        self.props["maxExcTime"]["ensemble"] = maxExcTime
+        self.props["minIncTime"]["ensemble"] = minIncTime
+        self.props["minExcTime"]["ensemble"] = minExcTime
+        # self.props["numOfRanks"]["ensemble"] = maxNumOfRanks
+
+    def request_single(self, operation):
+        """
+        TODO: Write individual functiosn to do this.
+        Handles all the socket requests connected to Single CallFlow. 
+        """
+        LOGGER.info(f"[Single Mode] {operation}")
+        operation_tag = operation["name"]
+
+        if operation_tag == "init":
+            return self.props
+
+        if "groupBy" in operation:
+            LOGGER.info("Grouping by: {0}".format(operation["groupBy"]))
+        else:
+            operation["groupBy"] = "name"
+
+        dataset = operation["dataset"]
+
+        LOGGER.info("The selected Dataset is {0}".format(dataset))
+
+        # Compare against the different operations
+        # TODO: Probably remove.
+        if operation_tag == "reset":
+            datasets = [dataset]
+            self.reProcess = True
+            self.states = self.pipeline(
+                datasets, operation["filterBy"], operation["filterPerc"]
+            )
+            self.reProcess = False
+            self.states = self.pipeline(datasets)
+            return {}
+
+        elif operation_tag == "auxiliary":
+            return self.supergraphs[dataset].auxiliary_data
+
+        elif operation_tag == "supergraph":
+            return self.supergraphs[dataset].gf.nxg
+
+        elif operation_tag == "mini-histogram":
+            minihistogram = MiniHistogram(state)
+            return minihistogram.result
+
+        elif operation_tag == "cct":
+            graph = CCT(
+                supergraphs=self.supergraphs,
+                tag=operation["dataset"],
+                callsite_count=operation["functionsInCCT"],
+            )
+            return graph.g
+
+        elif operation_tag == "function":
+            functionlist = FunctionList(state, operation["module"], operation["nid"])
+            return functionlist.result
+
+    def request_ensemble(self, operation):
+        """
+        TODO: Write individual functiosn to do this.
+        Handles all the socket requests connected to Single CallFlow. 
+        """
+        operation_tag = operation["name"]
+        datasets = self.props["dataset_names"]
+
+        if operation_tag == "init":
+            return self.props
+
+        elif operation_tag == "ensemble_cct":
+            result = CCT(
+                supergraphs=self.supergraphs,
+                tag="ensemble",
+                props=self.props,
+                callsite_count=operation["functionsInCCT"],
+            )
+            return result.gf.nxg
+
+        elif operation_tag == "supergraph":
+            if "reveal_callsites" in operation:
+                reveal_callsites = operation["reveal_callsites"]
+            else:
+                reveal_callsites = []
+
+            if "split_entry_module" in operation:
+                split_entry_module = operation["split_entry_module"]
+            else:
+                split_entry_module = ""
+
+            if "split_callee_module" in operation:
+                split_callee_module = operation["split_callee_module"]
+            else:
+                split_callee_module = ""
+
+            ensemble_super_graph = EnsembleSuperGraph(
+                supergraphs=self.supergraphs,
+                tag="ensemble",
+                path="group_path",
+                group_by_attr="module",
+                props=self.props,
+                construct_graph=True,
+                add_data=True,
+                reveal_callsites=reveal_callsites,
+                split_entry_module=split_entry_module,
+                split_callee_module=split_callee_module,
+            )
+            return ensemble_super_graph.agg_nxg
+
+        # Not used.
+        elif operation_tag == "scatterplot":
+            assert False
+            if operation["plot"] == "bland-altman":
+                state1 = self.states[operation["dataset"]]
+                state2 = self.states[operation["dataset2"]]
+                col = operation["col"]
+                catcol = operation["catcol"]
+                dataset1 = operation["dataset"]
+                dataset2 = operation["dataset2"]
+                ret = BlandAltman(
+                    state1, state2, col, catcol, dataset1, dataset2
+                ).results
+            return ret
+
+        # Not used.
+        elif operation_tag == "similarity":
+            assert False
+            if operation["module"] == "all":
+                dirname = self.config.callflow_dir
+                name = self.config.runName
+                similarity_filepath = dirname + "/" + "similarity.json"
+                with open(similarity_filepath, "r") as similarity_file:
+                    self.similarities = json.load(similarity_file)
+            else:
+                self.similarities = {}
+                for idx, dataset in enumerate(datasets):
+                    self.similarities[dataset] = []
+                    for idx_2, dataset2 in enumerate(datasets):
+                        union_similarity = Similarity(
+                            self.states[dataset2].g, self.states[dataset].g
+                        )
+                    self.similarities[dataset].append(union_similarity.result)
+            return self.similarities
+
+        elif operation_tag == "hierarchy":
+            mH = ModuleHierarchy(self.supergraphs["ensemble"], operation["module"])
+            return mH.result
+
+        elif operation_tag == "projection":
+            self.similarities = {}
+            # dirname = self.config.callflow_dir
+            # name = self.config.runName
+            # similarity_filepath = dirname  + '/' + 'similarity.json'
+            # with open(similarity_filepath, 'r') as similarity_file:
+            #     self.similarities = json.load(similarity_file)
+            result = ParameterProjection(
+                self.supergraphs["ensemble"],
+                self.similarities,
+                operation["targetDataset"],
+                n_cluster=operation["numOfClusters"],
+            ).result
+            return result.to_json(orient="columns")
+
+        # Not used.
+        elif operation_tag == "run-information":
+            assert False
+            ret = []
+            for idx, state in enumerate(self.states):
+                self.states[state].projection_data["dataset"] = state
+                ret.append(self.states[state].projection_data)
+            return ret
+
+        # TODO: need to handle re-processing case.
+        # The commented code below was used to enable re-processing.
+        elif operation_tag == "auxiliary":
+            # print(f"Reprocessing: {operation['re-process']}")
+            # aux = EnsembleAuxiliary(
+            #     self.states,
+            #     MPIBinCount=operation["MPIBinCount"],
+            #     RunBinCount=operation["RunBinCount"],
+            #     datasets=operation["datasets"],
+            #     config=self.config,
+            #     process=True,
+            #     write=False,
+            # )
+            # if operation["re-process"] == 1:
+            #     result = aux.run()
+            # else:
+
+            # Need these two variables to belong to some class. Not sure where.
+            # Will take care when pre-processing is done.
+            # self.currentMPIBinCount = operation["MPIBinCount"]
+            # self.currentRunBinCount = operation["RunBinCount"]
+
+            return self.supergraphs["ensemble"].auxiliary_data
+
+        elif operation_tag == "compare":
+            compareDataset = operation["compareDataset"]
+            targetDataset = operation["targetDataset"]
+            if operation["selectedMetric"] == "Inclusive":
+                selectedMetric = "time (inc)"
+            elif operation["selectedMetric"] == "Exclusive":
+                selectedMetric = "time"
+
+            compare = DiffView(
+                self.supergraphs["ensemble"],
+                compareDataset,
+                targetDataset,
+                selectedMetric,
+            )
+            return compare.result
diff --git a/callflow/callflow_base.py b/callflow/callflow_base.py
deleted file mode 100644
index 0b0b7c03..00000000
--- a/callflow/callflow_base.py
+++ /dev/null
@@ -1,131 +0,0 @@
-import os
-
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-from callflow.pipeline import Pipeline
-
-
-class AppState:
-    def __init__(self, config):
-        self.config = config
-
-        self.maxIncTime = {}
-        self.maxExcTime = {}
-        self.minIncTime = {}
-        self.minExcTime = {}
-        self.numOfRanks = {}
-
-    def add_target_df(self):
-        self.target_df = {}
-        for dataset in self.config.dataset_names:
-            self.target_df[dataset] = self.states["ensemble_entire"].new_gf.df.loc[
-                self.states["ensemble_entire"].new_gf.df["dataset"] == dataset
-            ]
-
-    def add_basic_info(self):
-        maxIncTime = 0
-        maxExcTime = 0
-        minIncTime = 0
-        minExcTime = 0
-        maxNumOfRanks = 0
-        for idx, dataset in enumerate(self.config.dataset_names):
-            self.maxIncTime[dataset] = self.target_df[dataset]["time (inc)"].max()
-            self.maxExcTime[dataset] = self.target_df[dataset]["time"].max()
-            self.minIncTime[dataset] = self.target_df[dataset]["time (inc)"].min()
-            self.minExcTime[dataset] = self.target_df[dataset]["time"].min()
-            self.numOfRanks[dataset] = len(self.target_df[dataset]["rank"].unique())
-            max_exclusive_time = max(self.maxExcTime[dataset], maxExcTime)
-            max_inclusive_time = max(self.maxIncTime[dataset], maxIncTime)
-            min_exclusive_time = min(self.minExcTime[dataset], minExcTime)
-            min_inclusive_time = min(self.minIncTime[dataset], minIncTime)
-            max_numOfRanks = max(self.numOfRanks[dataset], max_numOfRanks)
-        self.maxIncTime["ensemble"] = maxIncTime
-        self.maxExcTime["ensemble"] = maxExcTime
-        self.minIncTime["ensemble"] = minIncTime
-        self.minExcTime["ensemble"] = minExcTime
-        self.numOfRanks["ensemble"] = maxNumOfRanks
-
-
-class Config:
-    def __init__(self):
-        pass
-
-
-class BaseCallFlow:
-    def __init__(self, config={}, process=False):
-
-        # Assert if config is provided.
-        assert config != None
-        self.config = config
-
-        if process:
-            self.pipeline = Pipeline(self.config)
-            self._create_dot_callflow_folder()
-            self.process_states()
-
-        else:
-            self.appState = AppState(self.config)
-            self.read_states()
-
-    # --------------------------------------------------------------------------
-    # public API. child classes should implement these functions
-    def process_states(self):
-        self._process_states()
-
-    def read_states(self):
-        self._read_states()
-
-    def request(self, operation):
-        self._request(operation)
-
-    # --------------------------------------------------------------------------
-    def displayStats(self, name):
-        log.warn("==========================")
-        log.info("Number of datasets : {0}".format(len(self.config[name].paths.keys())))
-        log.info("Stats: Dataset ({0}) ".format(name))
-        log.warn("==========================")
-        max_inclusive_time = utils.getMaxIncTime(gf)
-        max_exclusive_time = utils.getMaxExcTime(gf)
-        avg_inclusive_time = utils.getAvgIncTime(gf)
-        avg_exclusive_time = utils.getAvgExcTime(gf)
-        num_of_nodes = utils.getNumOfNodes(gf)
-        log.info("[] Rows in dataframe: {0}".format(self.states[name].df.shape[0]))
-        log.info("Max Inclusive time = {0} ".format(max_inclusive_time))
-        log.info("Max Exclusive time = {0} ".format(max_exclusive_time))
-        log.info("Avg Inclusive time = {0} ".format(avg_inclusive_time))
-        log.info("Avg Exclusive time = {0} ".format(avg_exclusive_time))
-        log.info("Number of nodes in CCT = {0}".format(num_of_nodes))
-
-    # --------------------------------------------------------------------------
-    def _create_dot_callflow_folder(self):
-        """
-        Create a .callflow directory and empty files.
-        """
-        LOGGER.debug(f"Saved .callflow directory is: {self.config.save_path}")
-
-        if not os.path.exists(self.config.save_path):
-            os.makedirs(self.config.save_path)
-
-        for dataset in self.config.datasets:
-            dataset_dir = os.path.join(self.config.save_path, dataset["name"])
-            LOGGER.debug(dataset_dir)
-            if not os.path.exists(dataset_dir):
-                #if self.debug:
-                LOGGER.debug(
-                    f"Creating .callflow directory for dataset : {dataset['name']}"
-                    )
-                os.makedirs(dataset_dir)
-
-            files = [
-                "entire_df.csv",
-                "filter_df.csv",
-                "entire_graph.json",
-                "filter_graph.json",
-            ]
-            for f in files:
-                fname = os.path.join(dataset_dir, f)
-                if not os.path.exists(fname):
-                    open(fname, "w").close()
-
-    # --------------------------------------------------------------------------
diff --git a/callflow/callflow_ensemble.py b/callflow/callflow_ensemble.py
deleted file mode 100644
index e6def1bd..00000000
--- a/callflow/callflow_ensemble.py
+++ /dev/null
@@ -1,326 +0,0 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-
-import time
-import json
-import pandas as pd
-
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-from callflow.pipeline import State, Pipeline
-
-from callflow.utils import getMaxExcTime, getMinExcTime, getMaxIncTime, getMinIncTime
-from callflow.timer import Timer
-from callflow import EnsembleCCT, EnsembleSuperGraph, BaseCallFlow
-from callflow.modules import (
-    RankHistogram,
-    EnsembleAuxiliary,
-    Gradients,
-    ModuleHierarchy,
-    ParameterProjection,
-    DiffView,
-)
-from callflow.algorithms import DeltaConSimilarity
-
-# Create states for each dataset.
-# Note: gf would never change from create_gf.
-# # Note: fgf would be changed when filter props are changed by client.
-# Note: df is always updated.
-# Note: graph is always updated.
-class EnsembleCallFlow(BaseCallFlow):
-    def __init__(self, config=None, process=None):
-        super(SingleCallFlow, self).__init__(config, process)
-
-        # Config contains properties set by the input config file.
-        self.currentMPIBinCount = 20
-        self.currentRunBinCount = 20
-
-        # TODO: should go in appstate
-        # self.target_df = {}
-        # for dataset in self.config.dataset_names:
-        #     self.target_df[dataset] = self.states["ensemble_entire"].new_gf.df.loc[
-        #         self.states["ensemble_entire"].new_gf.df["dataset"] == dataset
-        #     ]
-
-    # --------------------------------------------------------------------------
-    # TODo: look at the difference in signature
-    def _process_states(self, filterBy="Inclusive", filterPerc="10"):
-        states = {}
-        # col_names = ["stage", "time"]
-        # time_perf_df = pd.DataFrame(columns=col_names)
-        for idx, dataset_name in enumerate(self.config.dataset_names):
-            states[dataset_name] = State(dataset_name)
-            LOGGER.info("#########################################")
-            LOGGER.info(f"Run: {dataset_name}")
-            LOGGER.info("#########################################")
-
-            stage1 = time.perf_counter()
-            states[dataset_name] = self.pipeline.create_gf(dataset_name)
-            stage2 = time.perf_counter()
-            LOGGER.info(f"Create GraphFrame: {stage2 - stage1}")
-            LOGGER.info("-----------------------------------------")
-
-            states[dataset_name] = self.pipeline.process_gf(
-                states[dataset_name], "entire"
-            )
-            stage3 = time.perf_counter()
-
-            LOGGER.info(f"Preprocess GraphFrame: {stage3 - stage2}")
-            LOGGER.info("-----------------------------------------")
-
-            states[dataset_name] = self.pipeline.hatchetToNetworkX(
-                states[dataset_name], "path"
-            )
-            stage4 = time.perf_counter()
-            LOGGER.info(f"Convert to NetworkX graph: {stage4 - stage3}")
-            LOGGER.info("-----------------------------------------")
-
-            states[dataset_name] = self.pipeline.group(states[dataset_name], "module")
-            stage5 = time.perf_counter()
-            LOGGER.info(f"Convert to NetworkX graph: {stage4 - stage3}")
-            LOGGER.info("-----------------------------------------")
-
-            self.pipeline.write_dataset_gf(
-                states[dataset_name], dataset_name, "entire", write_graph=False
-            )
-            stage6 = time.perf_counter()
-            LOGGER.info(f"Write GraphFrame: {stage6 - stage5}")
-            LOGGER.info("-----------------------------------------")
-            self.pipeline.write_hatchet_graph(states, dataset_name)
-
-        for idx, dataset_name in enumerate(self.config.dataset_names):
-            states[dataset_name] = self.pipeline.read_dataset_gf(dataset_name)
-
-        stage7 = time.perf_counter()
-        states["ensemble_entire"] = self.pipeline.union(states)
-        stage8 = time.perf_counter()
-
-        LOGGER.info(f"Union GraphFrame: {stage8 - stage7}")
-        LOGGER.info("-----------------------------------------")
-
-        self.pipeline.write_ensemble_gf(states, "ensemble_entire")
-        stage9 = time.perf_counter()
-        LOGGER.info(f"Writing ensemble graph: {stage9 - stage8}")
-        LOGGER.info("-----------------------------------------")
-
-        stage10 = time.perf_counter()
-        states["ensemble_filter"] = self.pipeline.filterNetworkX(
-            states["ensemble_entire"], self.config.filter_perc
-        )
-        stage11 = time.perf_counter()
-
-        LOGGER.info(f"Filter ensemble graph: {stage11 - stage10}")
-        LOGGER.info("-----------------------------------------")
-
-        stage12 = time.perf_counter()
-        self.pipeline.write_ensemble_gf(states, "ensemble_filter")
-        stage13 = time.perf_counter()
-        LOGGER.info(f"Writing ensemble graph: {stage13 - stage12}")
-        LOGGER.info("-----------------------------------------")
-
-        stage14 = time.perf_counter()
-        states["ensemble_group"] = self.pipeline.ensemble_group(states, "module")
-        stage15 = time.perf_counter()
-
-        LOGGER.info(f"Group ensemble graph: {stage15 - stage14}")
-        LOGGER.info("-----------------------------------------")
-        stage16 = time.perf_counter()
-        self.pipeline.write_ensemble_gf(states, "ensemble_group")
-        stage17 = time.perf_counter()
-
-        LOGGER.info(f"Write group ensemble graph: {stage17 - stage16}")
-        LOGGER.info("-----------------------------------------")
-
-        # Need to remove the dependence on reading the dataframe again.
-        states = {}
-        states["ensemble_entire"] = self.pipeline.read_ensemble_gf("ensemble_entire")
-
-        stage18 = time.perf_counter()
-        aux = EnsembleAuxiliary(
-            states,
-            MPIBinCount=self.currentMPIBinCount,
-            RunBinCount=self.currentRunBinCount,
-            datasets=self.config.dataset_names,
-            config=self.config,
-            process=True,
-            write=True,
-        )
-        aux.run()
-        stage19 = time.perf_counter()
-        LOGGER.info(f"Dump Gradient, distribution and variations: {stage19 - stage18}")
-        LOGGER.info("-----------------------------------------")
-
-        return states
-
-    def _readState(self):
-        states = {}
-        states["ensemble_entire"] = self.pipeline.read_ensemble_gf("ensemble_entire")
-        states["ensemble_filter"] = self.pipeline.read_ensemble_gf("ensemble_filter")
-        states["ensemble_group"] = self.pipeline.read_ensemble_gf("ensemble_group")
-        states["all_data"] = self.pipeline.read_all_data()
-
-        return states
-
-    def _request(self, action):
-        action_name = action["name"]
-        LOGGER.info(f"Action: {action_name}")
-        datasets = self.config.dataset_names
-
-        if action_name == "init":
-            self.addIncExcTime()
-            return self.config
-
-        elif action_name == "ensemble_cct":
-            nx = EnsembleCCT(
-                self.states["ensemble_entire"], action["functionsInCCT"], self.config
-            )
-            return nx.g
-
-        elif action_name == "supergraph":
-            if "reveal_callsites" in action:
-                reveal_callsites = action["reveal_callsites"]
-            else:
-                reveal_callsites = []
-
-            if "split_entry_module" in action:
-                split_entry_module = action["split_entry_module"]
-            else:
-                split_entry_module = ""
-
-            if "split_callee_module" in action:
-                split_callee_module = action["split_callee_module"]
-            else:
-                split_callee_module = ""
-
-            self.states["ensemble_group"].g = EnsembleSuperGraph(
-                self.states,
-                "group_path",
-                construct_graph=True,
-                add_data=True,
-                reveal_callsites=reveal_callsites,
-                split_entry_module=split_entry_module,
-                split_callee_module=split_callee_module,
-            ).agg_g
-            return self.states["ensemble_group"].g
-
-        elif action_name == "scatterplot":
-            if action["plot"] == "bland-altman":
-                state1 = self.states[action["dataset"]]
-                state2 = self.states[action["dataset2"]]
-                col = action["col"]
-                catcol = action["catcol"]
-                dataset1 = action["dataset"]
-                dataset2 = action["dataset2"]
-                ret = BlandAltman(
-                    state1, state2, col, catcol, dataset1, dataset2
-                ).results
-            return ret
-
-        elif action_name == "Gromov-wasserstein":
-            ret = {}
-            return ret
-
-        elif action_name == "similarity":
-            if action["module"] == "all":
-                dirname = self.config.callflow_dir
-                name = self.config.runName
-                similarity_filepath = dirname + "/" + "similarity.json"
-                with open(similarity_filepath, "r") as similarity_file:
-                    self.similarities = json.load(similarity_file)
-            else:
-                self.similarities = {}
-                for idx, dataset in enumerate(datasets):
-                    self.similarities[dataset] = []
-                    for idx_2, dataset2 in enumerate(datasets):
-                        union_similarity = Similarity(
-                            self.states[dataset2].g, self.states[dataset].g
-                        )
-                    self.similarities[dataset].append(union_similarity.result)
-            return self.similarities
-
-        elif action_name == "hierarchy":
-            mH = ModuleHierarchy(
-                self.states["ensemble_entire"], action["module"], config=self.config
-            )
-            return mH.result
-
-        elif action_name == "projection":
-            self.similarities = {}
-            # dirname = self.config.callflow_dir
-            # name = self.config.runName
-            # similarity_filepath = dirname  + '/' + 'similarity.json'
-            # with open(similarity_filepath, 'r') as similarity_file:
-            #     self.similarities = json.load(similarity_file)
-            result = ParameterProjection(
-                self.states["ensemble_entire"],
-                self.similarities,
-                action["targetDataset"],
-                n_cluster=action["numOfClusters"],
-            ).result
-            return result.to_json(orient="columns")
-
-        elif action_name == "run-information":
-            ret = []
-            for idx, state in enumerate(self.states):
-                self.states[state].projection_data["dataset"] = state
-                ret.append(self.states[state].projection_data)
-            return ret
-
-        elif action_name == "mini-histogram":
-            minihistogram = MiniHistogram(
-                self.states["ensemble"], target_datasets=action["target-datasets"]
-            )
-            return minihistogram.result
-
-        elif action_name == "histogram":
-            histogram = RankHistogram(self.states["ensemble"], action["module"])
-            return histogram.result
-
-        elif action_name == "auxiliary":
-            print(f"Reprocessing: {action['re-process']}")
-            aux = EnsembleAuxiliary(
-                self.states,
-                MPIBinCount=action["MPIBinCount"],
-                RunBinCount=action["RunBinCount"],
-                datasets=action["datasets"],
-                config=self.config,
-                process=True,
-                write=False,
-            )
-            if action["re-process"] == 1:
-                result = aux.run()
-            else:
-                result = self.states["all_data"]
-                # result = aux.filter_dict(result)
-            self.currentMPIBinCount = action["MPIBinCount"]
-            self.currentRunBinCount = action["RunBinCount"]
-
-            return result
-
-        elif action_name == "compare":
-            compareDataset = action["compareDataset"]
-            targetDataset = action["targetDataset"]
-            if action["selectedMetric"] == "Inclusive":
-                selectedMetric = "time (inc)"
-            elif action["selectedMetric"] == "Exclusive":
-                selectedMetric = "time"
-
-            compare = DiffView(
-                self.states["ensemble_entire"],
-                compareDataset,
-                targetDataset,
-                selectedMetric,
-            )
-            return compare.result
-
-    # --------------------------------------------------------------------------
diff --git a/callflow/callflow_single.py b/callflow/callflow_single.py
deleted file mode 100644
index 783f34b8..00000000
--- a/callflow/callflow_single.py
+++ /dev/null
@@ -1,148 +0,0 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-
-import time
-import json
-
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-
-from callflow.timer import Timer
-from callflow.pipeline import State, Pipeline
-from callflow.utils import (
-    getMaxExcTime,
-    getMinExcTime,
-    getMaxIncTime,
-    getMinIncTime,
-)
-
-from callflow import SingleCCT, SingleSuperGraph, BaseCallFlow
-
-from callflow.modules import (
-    SingleAuxiliary,
-    RankHistogram,
-    MiniHistogram,
-    RuntimeScatterplot,
-    FunctionList,
-)
-
-
-class SingleCallFlow(BaseCallFlow):
-
-    def __init__(self, config=None, process=False):
-        super(SingleCallFlow, self).__init__(config, process)
-
-    # --------------------------------------------------------------------------
-    def _process_states(self):
-        for dataset_name in self.config.dataset_names:
-            state = State(dataset_name)
-            LOGGER.info("#########################################")
-            LOGGER.info(f"Run: {dataset_name}")
-            LOGGER.info("#########################################")
-
-            stage1 = time.perf_counter()
-            state = self.pipeline.create_gf(dataset_name)
-            stage2 = time.perf_counter()
-            LOGGER.info(f"Create GraphFrame: {stage2 - stage1}")
-            LOGGER.info("-----------------------------------------")
-
-            states = self.pipeline.process_gf(state, "entire")
-            stage3 = time.perf_counter()
-            LOGGER.info(f"Preprocess GraphFrame: {stage3 - stage2}")
-            LOGGER.info("-----------------------------------------")
-
-            state = self.pipeline.hatchetToNetworkX(state, "path")
-            stage4 = time.perf_counter()
-            LOGGER.info(f"Convert to NetworkX graph: {stage4 - stage3}")
-            LOGGER.info("-----------------------------------------")
-
-            state = self.pipeline.group(state, "module")
-            stage5 = time.perf_counter()
-            LOGGER.info(f"Group GraphFrame: {stage5 - stage4}")
-            LOGGER.info("-----------------------------------------")
-
-            self.pipeline.write_dataset_gf(
-                state, dataset_name, "entire", write_graph=False
-            )
-            stage6 = time.perf_counter()
-            LOGGER.info(f"Write GraphFrame: {stage6 - stage5}")
-            LOGGER.info("-----------------------------------------")
-            LOGGER.info(f'Module: {state.new_gf.df["module"].unique()}')
-
-        return state
-
-    def _read_states(self, datasets):
-        states = {}
-        for idx, dataset in enumerate(datasets):
-            states[dataset] = self.pipeline.read_dataset_gf(dataset)
-        return states
-
-    def _request(self, action):
-        LOGGER.info("[Single Mode]", action)
-        action_name = action["name"]
-
-        if action_name == "init":
-            self.setConfig()
-            return self.config
-
-        if "groupBy" in action:
-            LOGGER.info("Grouping by: {0}".format(action["groupBy"]))
-        else:
-            action["groupBy"] = "name"
-
-        dataset = action["dataset"]
-        state = self.states[dataset]
-
-        LOGGER.info("The selected Dataset is {0}".format(dataset))
-
-        # Compare against the different operations
-        if action_name == "reset":
-            datasets = [dataset]
-            self.reProcess = True
-            self.states = self.pipeline(
-                datasets, action["filterBy"], action["filterPerc"]
-            )
-            self.reProcess = False
-            self.states = self.pipeline(datasets)
-            return {}
-
-        elif action_name == "auxiliary":
-            auxiliary = Auxiliary(
-                self.states[action["dataset"]],
-                binCount=action["binCount"],
-                dataset=action["dataset"],
-                config=self.config,
-            )
-            return auxiliary.result
-
-        elif action_name == "supergraph":
-            self.states[dataset].g = SuperGraph(
-                self.states, dataset, "group_path", construct_graph=True, add_data=True
-            ).g
-            return self.states[dataset].g
-
-        elif action_name == "mini-histogram":
-            minihistogram = MiniHistogram(state)
-            return minihistogram.result
-
-        elif action_name == "cct":
-            graph = singleCCT(
-                self.states[action["dataset"]], action["functionsInCCT"], self.config
-            )
-            return graph.g
-
-        elif action_name == "function":
-            functionlist = FunctionList(state, action["module"], action["nid"])
-            return functionlist.result
-
-    # --------------------------------------------------------------------------
diff --git a/callflow/datastructures/cct.py b/callflow/datastructures/cct.py
index e69de29b..55628009 100644
--- a/callflow/datastructures/cct.py
+++ b/callflow/datastructures/cct.py
@@ -0,0 +1,336 @@
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+# ------------------------------------------------------------------------------
+# Library imports
+import math
+import pandas as pd
+import networkx as nx
+from ast import literal_eval as make_tuple
+
+# ------------------------------------------------------------------------------
+# CallFlow imports
+import callflow
+from callflow.timer import Timer
+from callflow import SuperGraph
+
+# ------------------------------------------------------------------------------
+# CCT Rendering class.
+class CCT(SuperGraph):
+    def __init__(self, supergraphs={}, tag="", props={}, callsite_count=50):
+        # Call the SuperGraph class init.
+        super(CCT, self).__init__(props=props, tag=tag, mode="render")
+
+        # set the current graph being rendered.
+        self.supergraph = supergraphs[tag]
+
+        # Number of runs in the state.
+        self.runs = self.supergraph.gf.df["dataset"].unique()
+        self.columns = ["time (inc)", "time", "name", "module"]
+
+        # callsite count is bounded by the user's input.
+        if callsite_count == None:
+            self.callsite_count = len(self.supergraph.gf.df["name"].unique())
+        else:
+            self.callsite_count = int(callsite_count)
+
+        # Put the top callsites into a list.
+        self.callsites = self.get_top_n_callsites_by_attr(
+            df=self.supergraph.gf.df,
+            callsite_count=self.callsite_count,
+            sort_attr="time (inc)",
+        )
+
+        # Filter out the callsites not in the list.
+        self.supergraph.gf.df = self.supergraph.gf.df[
+            self.supergraph.gf.df["name"].isin(self.callsites)
+        ]
+        self.datasets = self.supergraph.gf.df["dataset"].unique()
+
+        with self.timer.phase(f"Creating the ensemble CCT: {self.datasets}"):
+            self.supergraph.gf.nxg = nx.DiGraph()
+
+            # Add paths by "column" = path.
+            self.add_paths("path")
+
+        # Add node and edge attributes.
+        with self.timer.phase(f"Add node and edge attributes."):
+            self.add_node_attributes()
+            self.add_edge_attributes()
+
+        # Find cycles in the CCT.
+        with self.timer.phase(f"Find cycles"):
+            self.supergraph.gf.nxg.cycles = self.find_cycle(self.supergraph.gf.nxg)
+
+        print(self.timer)
+
+    def get_top_n_callsites_by_attr(
+        self, df=pd.DataFrame([]), callsite_count=50, sort_attr="time (inc)"
+    ):
+        """
+        Fetches the top n callsites based on attribute (time/time (inc)).
+        """
+        xgroup_df = self.supergraph.gf.df.groupby(["name"]).mean()
+        sort_xgroup_df = xgroup_df.sort_values(by=[sort_attr], ascending=False)
+        callsites_df = sort_xgroup_df.nlargest(callsite_count, sort_attr)
+        return callsites_df.index.values.tolist()
+
+    def ensemble_map(self, df, nodes):
+        ret = {}
+        """
+        Construct the ensemble map
+        """
+        for callsite in self.supergraph.gf.nxg.nodes():
+            if callsite not in self.props["callsite_module_map"]:
+                module = self.supergraph.gf.df.loc[
+                    self.supergraph.gf.df["name"] == callsite
+                ]["module"].unique()[0]
+            else:
+                module = self.props["callsite_module_map"][callsite]
+
+            for column in self.columns:
+                if column not in ret:
+                    ret[column] = {}
+                if column == "time (inc)":
+                    ret[column][callsite] = self.name_time_inc_map[(module, callsite)]
+                elif column == "time":
+                    ret[column][callsite] = self.name_time_exc_map[(module, callsite)]
+                elif column == "name":
+                    ret[column][callsite] = callsite
+                elif column == "module":
+                    ret[column][callsite] = module
+
+        return ret
+
+    def dataset_map(self, nodes, run):
+        """
+        Construct maps for each dataset. 
+        """
+        ret = {}
+        for callsite in self.supergraph.gf.nxg.nodes():
+            if callsite not in self.props["callsite_module_map"]:
+                module = self.supergraph.gf.df.loc[
+                    self.supergraph.gf.df["name"] == callsite
+                ]["module"].unique()[0]
+            else:
+                module = self.props["callsite_module_map"][callsite]
+
+            if callsite in self.target_module_callsite_map[run].keys():
+                if callsite not in ret:
+                    ret[callsite] = {}
+
+                for column in self.columns:
+                    if column == "time (inc)":
+                        ret[callsite][column] = self.target_module_time_inc_map[run][
+                            module
+                        ]
+
+                    elif column == "time":
+                        ret[callsite][column] = self.target_module_time_exc_map[run][
+                            module
+                        ]
+
+                    elif column == "module":
+                        ret[callsite][column] = module
+
+                    elif column == "name":
+                        ret[callsite][column] = callsite
+
+        return ret
+
+    def add_node_attributes(self):
+        ensemble_mapping = self.ensemble_map(
+            self.supergraph.gf.df, self.supergraph.gf.nxg.nodes()
+        )
+
+        for idx, key in enumerate(ensemble_mapping):
+            nx.set_node_attributes(
+                self.supergraph.gf.nxg, name=key, values=ensemble_mapping[key]
+            )
+
+        dataset_mapping = {}
+        for run in self.runs:
+            dataset_mapping[run] = self.dataset_map(self.supergraph.gf.nxg.nodes(), run)
+            nx.set_node_attributes(
+                self.supergraph.gf.nxg, name=run, values=dataset_mapping[run]
+            )
+
+    def add_edge_attributes(self):
+        num_of_calls_mapping = self.edge_map(
+            self.supergraph.gf.nxg.edges(), "component_path"
+        )
+        nx.set_edge_attributes(
+            self.supergraph.gf.nxg, name="count", values=num_of_calls_mapping
+        )
+
+    def edge_map(self, edges, attr, source=None, orientation=None):
+        counter = {}
+        if not self.supergraph.gf.nxg.is_directed() or orientation in (
+            None,
+            "original",
+        ):
+
+            def tailhead(edge):
+                return edge[:2]
+
+        elif orientation == "reverse":
+
+            def tailhead(edge):
+                return edge[1], edge[0]
+
+        elif orientation == "ignore":
+
+            def tailhead(edge):
+                if edge[-1] == "reverse":
+                    return edge[1], edge[0]
+                return edge[:2]
+
+        ret = {}
+        explored = []
+        for start_node in self.supergraph.gf.nxg.nbunch_iter(source):
+            if start_node in explored:
+                # No loop is possible.
+                continue
+
+            edges = []
+            # All nodes seen in this iteration of edge_dfs
+            seen = {start_node}
+            # Nodes in active path.
+            active_nodes = {start_node}
+            previous_head = None
+
+            for edge in nx.edge_dfs(self.supergraph.gf.nxg, start_node, orientation):
+                tail, head = tailhead(edge)
+                if edge not in counter:
+                    counter[edge] = 0
+                if tail == head:
+                    counter[edge] += 1
+                else:
+                    counter[edge] = 1
+
+        return counter
+
+    def create_source_targets(self, path):
+        module = ""
+        edges = []
+
+        for idx, callsite in enumerate(path):
+            if idx == len(path) - 1:
+                break
+
+            source = callflow.utils.sanitize_name(path[idx])
+            target = callflow.utils.sanitize_name(path[idx + 1])
+
+            edges.append(
+                {"source": source, "target": target,}
+            )
+        return edges
+
+    def add_paths(self, path):
+        paths = self.supergraph.gf.df[path].tolist()
+
+        for idx, path in enumerate(paths):
+            if isinstance(path, float):
+                return []
+            path = make_tuple(path)
+            source_targets = self.create_source_targets(path)
+            for edge in source_targets:
+                source = edge["source"]
+                target = edge["target"]
+                if not self.supergraph.gf.nxg.has_edge(source, target):
+                    self.supergraph.gf.nxg.add_edge(source, target)
+
+    def find_cycle(self, G, source=None, orientation=None):
+        if not G.is_directed() or orientation in (None, "original"):
+
+            def tailhead(edge):
+                return edge[:2]
+
+        elif orientation == "reverse":
+
+            def tailhead(edge):
+                return edge[1], edge[0]
+
+        elif orientation == "ignore":
+
+            def tailhead(edge):
+                if edge[-1] == "reverse":
+                    return edge[1], edge[0]
+                return edge[:2]
+
+        explored = set()
+        cycle = []
+        count = 0
+        final_node = None
+        for start_node in G.nbunch_iter(source):
+            if start_node in explored:
+                # No loop is possible.
+                continue
+
+            edges = []
+            # All nodes seen in this iteration of edge_dfs
+            seen = {start_node}
+            # Nodes in active path.
+            active_nodes = {start_node}
+            previous_head = None
+
+            for edge in nx.edge_dfs(G, start_node, orientation):
+                # Determine if this edge is a continuation of the active path.
+                tail, head = tailhead(edge)
+                if head in explored:
+                    # Then we've already explored it. No loop is possible.
+                    continue
+                if previous_head is not None and tail != previous_head:
+                    # This edge results from backtracking.
+                    # Pop until we get a node whose head equals the current tail.
+                    # So for example, we might have:
+                    #  (0, 1), (1, 2), (2, 3), (1, 4)
+                    # which must become:
+                    #  (0, 1), (1, 4)
+                    while True:
+                        try:
+                            popped_edge = edges.pop()
+                        except IndexError:
+                            edges = []
+                            active_nodes = {tail}
+                            break
+                        else:
+                            popped_head = tailhead(popped_edge)[1]
+                            active_nodes.remove(popped_head)
+
+                        if edges:
+                            last_head = tailhead(edges[-1])[1]
+                            if tail == last_head:
+                                break
+                edges.append(edge)
+
+                if head in active_nodes:
+                    # We have a loop!
+                    cycle.extend(edges)
+                    final_node = head
+                    break
+                else:
+                    seen.add(head)
+                    active_nodes.add(head)
+                    previous_head = head
+
+            if cycle:
+                count += 1
+                break
+            else:
+                explored.update(seen)
+
+        else:
+            assert len(cycle) == 0
+            # raise nx.exception.NetworkXNoCycle('No cycle found.')
+
+        # We now have a list of edges which ends on a cycle.
+        # So we need to remove from the beginning edges that are not relevant.
+        i = 0
+        for i, edge in enumerate(cycle):
+            tail, head = tailhead(edge)
+            if tail == final_node:
+                break
+        return cycle[i:]
diff --git a/callflow/datastructures/cct_ensemble.py b/callflow/datastructures/cct_ensemble.py
deleted file mode 100644
index d4431937..00000000
--- a/callflow/datastructures/cct_ensemble.py
+++ /dev/null
@@ -1,372 +0,0 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-
-import pandas as pd
-import networkx as nx
-from ast import literal_eval as make_tuple
-import math
-from callflow.timer import Timer
-from callflow.utils import sanitizeName
-
-
-class EnsembleCCT:
-    def __init__(self, state, functionsInCCT, config):
-        self.timer = Timer()
-        self.config = config
-        number_of_nodes = len(state.new_gf.df["name"].unique())
-        self.functionsInCCT = int(functionsInCCT)
-
-        # self.entire_graph = state.g
-        # self.entire_df = state.df
-        self.entire_graph = state.new_gf.g
-        self.entire_df = state.new_gf.df
-
-        self.runs = self.entire_df["dataset"].unique()
-        self.columns = ["time (inc)", "time", "name", "module"]
-        # 'imbalance_perc']
-
-        self.sort_attr = "time"
-        self.callsites = self.get_top_n_callsites_by(self.functionsInCCT)
-
-        self.fdf = self.entire_df[self.entire_df["name"].isin(self.callsites)]
-
-        self.datasets = self.fdf["dataset"].unique()
-        with self.timer.phase(f"Creating the ensemble CCT: {self.datasets}"):
-            self.g = nx.DiGraph()
-            self.add_paths("path")
-
-        with self.timer.phase(f"Creating the data maps."):
-            self.cct_df = self.entire_df[self.entire_df["name"].isin(self.g.nodes())]
-            self.create_ensemble_maps(self.cct_df)
-            self.create_target_maps(self.cct_df)
-
-        with self.timer.phase(f"Add node and edge attributes."):
-            self.add_node_attributes()
-            self.add_edge_attributes()
-
-        with self.timer.phase(f"Find cycles"):
-            self.g.cycles = self.find_cycle(self.g)
-
-        print(self.timer)
-
-    def get_top_n_callsites_by(self, count):
-        xgroup_df = self.entire_df.groupby(["name"]).mean()
-        sort_xgroup_df = xgroup_df.sort_values(by=[self.sort_attr], ascending=False)
-        callsites_df = sort_xgroup_df.nlargest(self.functionsInCCT, "time (inc)")
-
-        return callsites_df.index.values.tolist()
-
-    def create_target_maps(self, df):
-        self.target_df = {}
-        self.target_modules = {}
-        self.target_module_group_df = {}
-        self.target_module_name_group_df = {}
-        self.target_module_callsite_map = {}
-        self.target_module_time_inc_map = {}
-        self.target_module_time_exc_map = {}
-        self.target_name_time_inc_map = {}
-        self.target_name_time_exc_map = {}
-
-        for run in self.runs:
-            # Reduce the entire_df to respective target dfs.
-            self.target_df[run] = df.loc[df["dataset"] == run]
-
-            # Unique modules in the target run
-            self.target_modules[run] = self.target_df[run]["module"].unique()
-
-            # Group the dataframe in two ways.
-            # 1. by module
-            # 2. by module and callsite
-            self.target_module_group_df[run] = self.target_df[run].groupby(["module"])
-            self.target_module_name_group_df[run] = self.target_df[run].groupby(
-                ["name"]
-            )
-
-            # Module map for target run {'module': [Array of callsites]}
-            self.target_module_callsite_map[run] = self.target_module_group_df[run][
-                "name"
-            ].unique()
-
-            # Inclusive time maps for the module level and callsite level.
-            self.target_module_time_inc_map[run] = (
-                self.target_module_group_df[run]["time (inc)"].max().to_dict()
-            )
-            self.target_name_time_inc_map[run] = (
-                self.target_module_name_group_df[run]["time (inc)"].max().to_dict()
-            )
-
-            # Exclusive time maps for the module level and callsite level.
-            self.target_module_time_exc_map[run] = (
-                self.target_module_group_df[run]["time"].max().to_dict()
-            )
-            self.target_name_time_exc_map[run] = (
-                self.target_module_name_group_df[run]["time"].max().to_dict()
-            )
-
-    def create_ensemble_maps(self, df):
-        self.modules = df["module"].unique()
-
-        self.module_name_group_df = df.groupby(["module", "name"])
-        self.module_group_df = df.groupby(["module"])
-
-        # Module map for ensemble {'module': [Array of callsites]}
-        self.module_callsite_map = df["name"].unique()
-
-        # Inclusive time maps for the module level and callsite level.
-        self.module_time_inc_map = self.module_group_df["time (inc)"].max().to_dict()
-        self.name_time_inc_map = self.module_name_group_df["time (inc)"].max().to_dict()
-
-        # Exclusive time maps for the module level and callsite level.
-        self.module_time_exc_map = self.module_group_df["time"].max().to_dict()
-        self.name_time_exc_map = self.module_name_group_df["time"].max().to_dict()
-
-    def ensemble_map(self, df, nodes):
-        ret = {}
-
-        # loop through the nodes
-        for callsite in self.g.nodes():
-            if callsite not in self.config.callsite_module_map:
-                module = self.entire_df.loc[self.entire_df["name"] == callsite][
-                    "module"
-                ].unique()[0]
-            else:
-                module = self.config.callsite_module_map[callsite]
-
-            for column in self.columns:
-                if column not in ret:
-                    ret[column] = {}
-                if column == "time (inc)":
-                    ret[column][callsite] = self.name_time_inc_map[(module, callsite)]
-                elif column == "time":
-                    ret[column][callsite] = self.name_time_exc_map[(module, callsite)]
-                elif column == "name":
-                    ret[column][callsite] = callsite
-                elif column == "module":
-                    ret[column][callsite] = module
-
-        return ret
-
-    def dataset_map(self, nodes, run):
-        ret = {}
-        for callsite in self.g.nodes():
-            if callsite not in self.config.callsite_module_map:
-                module = self.entire_df.loc[self.entire_df["name"] == callsite][
-                    "module"
-                ].unique()[0]
-            else:
-                module = self.config.callsite_module_map[callsite]
-
-            if callsite in self.target_module_callsite_map[run].keys():
-                if callsite not in ret:
-                    ret[callsite] = {}
-
-                for column in self.columns:
-                    if column == "time (inc)":
-                        ret[callsite][column] = self.target_module_time_inc_map[run][
-                            module
-                        ]
-
-                    elif column == "time":
-                        ret[callsite][column] = self.target_module_time_exc_map[run][
-                            module
-                        ]
-
-                    elif column == "module":
-                        ret[callsite][column] = module
-
-                    elif column == "name":
-                        ret[callsite][column] = callsite
-
-        return ret
-
-    def add_node_attributes(self):
-        ensemble_mapping = self.ensemble_map(self.entire_df, self.g.nodes())
-
-        for idx, key in enumerate(ensemble_mapping):
-            nx.set_node_attributes(self.g, name=key, values=ensemble_mapping[key])
-
-        # dataset_mapping = {}
-        # for run in self.runs:
-        #     dataset_mapping[run] = self.dataset_map(self.g.nodes(), run)
-
-        #     nx.set_node_attributes(self.g, name=run, values=dataset_mapping[run])
-
-    def add_edge_attributes(self):
-        num_of_calls_mapping = self.edge_map(self.g.edges(), "component_path")
-        nx.set_edge_attributes(self.g, name="count", values=num_of_calls_mapping)
-
-    def edge_map(self, edges, attr, source=None, orientation=None):
-        counter = {}
-        if not self.g.is_directed() or orientation in (None, "original"):
-
-            def tailhead(edge):
-                return edge[:2]
-
-        elif orientation == "reverse":
-
-            def tailhead(edge):
-                return edge[1], edge[0]
-
-        elif orientation == "ignore":
-
-            def tailhead(edge):
-                if edge[-1] == "reverse":
-                    return edge[1], edge[0]
-                return edge[:2]
-
-        ret = {}
-        explored = []
-        for start_node in self.g.nbunch_iter(source):
-            if start_node in explored:
-                # No loop is possible.
-                continue
-
-            edges = []
-            # All nodes seen in this iteration of edge_dfs
-            seen = {start_node}
-            # Nodes in active path.
-            active_nodes = {start_node}
-            previous_head = None
-
-            for edge in nx.edge_dfs(self.g, start_node, orientation):
-                tail, head = tailhead(edge)
-                if edge not in counter:
-                    counter[edge] = 0
-                if tail == head:
-                    counter[edge] += 1
-                else:
-                    counter[edge] = 1
-
-        return counter
-
-    def create_source_targets(self, path):
-        module = ""
-        edges = []
-
-        for idx, callsite in enumerate(path):
-            if idx == len(path) - 1:
-                break
-
-            source = sanitizeName(path[idx])
-            target = sanitizeName(path[idx + 1])
-
-            edges.append(
-                {"source": source, "target": target,}
-            )
-        return edges
-
-    def add_paths(self, path):
-        paths = self.fdf[path].tolist()
-
-        for idx, path in enumerate(paths):
-            if isinstance(path, float):
-                return []
-            path = make_tuple(path)
-            source_targets = self.create_source_targets(path)
-            for edge in source_targets:
-                source = edge["source"]
-                target = edge["target"]
-                if not self.g.has_edge(source, target):
-                    self.g.add_edge(source, target)
-
-    def find_cycle(self, G, source=None, orientation=None):
-        if not G.is_directed() or orientation in (None, "original"):
-
-            def tailhead(edge):
-                return edge[:2]
-
-        elif orientation == "reverse":
-
-            def tailhead(edge):
-                return edge[1], edge[0]
-
-        elif orientation == "ignore":
-
-            def tailhead(edge):
-                if edge[-1] == "reverse":
-                    return edge[1], edge[0]
-                return edge[:2]
-
-        explored = set()
-        cycle = []
-        count = 0
-        final_node = None
-        for start_node in G.nbunch_iter(source):
-            if start_node in explored:
-                # No loop is possible.
-                continue
-
-            edges = []
-            # All nodes seen in this iteration of edge_dfs
-            seen = {start_node}
-            # Nodes in active path.
-            active_nodes = {start_node}
-            previous_head = None
-
-            for edge in nx.edge_dfs(G, start_node, orientation):
-                # Determine if this edge is a continuation of the active path.
-                tail, head = tailhead(edge)
-                if head in explored:
-                    # Then we've already explored it. No loop is possible.
-                    continue
-                if previous_head is not None and tail != previous_head:
-                    # This edge results from backtracking.
-                    # Pop until we get a node whose head equals the current tail.
-                    # So for example, we might have:
-                    #  (0, 1), (1, 2), (2, 3), (1, 4)
-                    # which must become:
-                    #  (0, 1), (1, 4)
-                    while True:
-                        try:
-                            popped_edge = edges.pop()
-                        except IndexError:
-                            edges = []
-                            active_nodes = {tail}
-                            break
-                        else:
-                            popped_head = tailhead(popped_edge)[1]
-                            active_nodes.remove(popped_head)
-
-                        if edges:
-                            last_head = tailhead(edges[-1])[1]
-                            if tail == last_head:
-                                break
-                edges.append(edge)
-
-                if head in active_nodes:
-                    # We have a loop!
-                    cycle.extend(edges)
-                    final_node = head
-                    break
-                else:
-                    seen.add(head)
-                    active_nodes.add(head)
-                    previous_head = head
-
-            if cycle:
-                count += 1
-                break
-            else:
-                explored.update(seen)
-
-        else:
-            assert len(cycle) == 0
-            # raise nx.exception.NetworkXNoCycle('No cycle found.')
-
-        # We now have a list of edges which ends on a cycle.
-        # So we need to remove from the beginning edges that are not relevant.
-        i = 0
-        for i, edge in enumerate(cycle):
-            tail, head = tailhead(edge)
-            if tail == final_node:
-                break
-        return cycle[i:]
diff --git a/callflow/datastructures/cct_single.py b/callflow/datastructures/cct_single.py
deleted file mode 100644
index 7c6206bb..00000000
--- a/callflow/datastructures/cct_single.py
+++ /dev/null
@@ -1,280 +0,0 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-
-import pandas as pd
-import networkx as nx
-from ast import literal_eval as make_tuple
-import math
-from callflow.timer import Timer
-from callflow.utils import sanitizeName
-
-
-class SingleCCT:
-    def __init__(self, state, functionsInCCT, config):
-        self.timer = Timer()
-
-        self.g = state.new_gf.nxg
-        self.df = state.new_gf.df
-        self.functionsInCCT = int(functionsInCCT)
-        self.config = config
-
-        self.columns = ["time (inc)", "time", "name", "module"]
-        # 'imbalance_perc']
-
-        self.sort_attr = "time"
-
-        print(f"Total callsite in CCT: {len(self.df['name'].unique())}")
-
-        with self.timer.phase("Creating data maps"):
-            self.create_ensemble_maps()
-
-        self.callsites = self.get_top_n_callsites_by(self.functionsInCCT)
-        self.fdf = self.df[self.df["name"].isin(self.callsites)]
-
-        self.dataset = self.fdf["dataset"].unique()
-        with self.timer.phase(f"Creating the single CCT {self.dataset}"):
-            self.run()
-        print(self.timer)
-
-    def get_top_n_callsites_by(self, count):
-        xgroup_df = self.df.groupby(["name"]).mean()
-        sort_xgroup_df = xgroup_df.sort_values(by=[self.sort_attr], ascending=False)
-        callsites_df = sort_xgroup_df.nlargest(self.functionsInCCT, self.sort_attr)
-
-        return callsites_df.index.values.tolist()
-
-    def create_ensemble_maps(self):
-        self.modules = self.df["module"].unique()
-
-        self.module_name_group_df = self.df.groupby(["module", "name"])
-        self.module_group_df = self.df.groupby(["module"])
-
-        # Module map for ensemble {'module': [Array of callsites]}
-        self.module_callsite_map = self.module_group_df["name"].unique()
-
-        # Inclusive time maps for the module level and callsite level.
-        self.module_time_inc_map = self.module_group_df["time (inc)"].max().to_dict()
-        self.name_time_inc_map = self.module_name_group_df["time (inc)"].max().to_dict()
-
-        # Exclusive time maps for the module level and callsite level.
-        self.module_time_exc_map = self.module_group_df["time"].max().to_dict()
-        self.name_time_exc_map = self.module_name_group_df["time"].max().to_dict()
-
-    def dataset_map(self):
-        ret = {}
-        for callsite in self.g.nodes():
-            if callsite not in self.config.callsite_module_map:
-                module = self.df.loc[self.df["name"] == callsite]["module"].unique()[0]
-            else:
-                module = self.config.callsite_module_map[callsite]
-
-            for column in self.columns:
-                if column not in ret:
-                    ret[column] = {}
-
-                if column == "time (inc)":
-                    ret[column][callsite] = self.name_time_inc_map[(module, callsite)]
-
-                elif column == "time":
-                    ret[column][callsite] = self.name_time_exc_map[(module, callsite)]
-
-                elif column == "module":
-                    ret[column][callsite] = module
-
-                elif column == "name":
-                    ret[column][callsite] = callsite
-
-        return ret
-
-    def add_node_attributes(self):
-        dataset_mapping = self.dataset_map()
-
-        for idx, key in enumerate(dataset_mapping):
-            nx.set_node_attributes(self.g, name=key, values=dataset_mapping[key])
-
-    def add_edge_attributes(self):
-        num_of_calls_mapping = self.edge_map(self.g.edges(), "component_path")
-        nx.set_edge_attributes(self.g, name="count", values=num_of_calls_mapping)
-
-    def edge_map(self, edges, attr, source=None, orientation=None):
-        counter = {}
-        if not self.g.is_directed() or orientation in (None, "original"):
-
-            def tailhead(edge):
-                return edge[:2]
-
-        elif orientation == "reverse":
-
-            def tailhead(edge):
-                return edge[1], edge[0]
-
-        elif orientation == "ignore":
-
-            def tailhead(edge):
-                if edge[-1] == "reverse":
-                    return edge[1], edge[0]
-                return edge[:2]
-
-        ret = {}
-        explored = []
-        for start_node in self.g.nbunch_iter(source):
-            if start_node in explored:
-                # No loop is possible.
-                continue
-
-            edges = []
-            # All nodes seen in this iteration of edge_dfs
-            seen = {start_node}
-            # Nodes in active path.
-            active_nodes = {start_node}
-            previous_head = None
-
-            for edge in nx.edge_dfs(self.g, start_node, orientation):
-                tail, head = tailhead(edge)
-                if edge not in counter:
-                    counter[edge] = 0
-                if tail == head:
-                    counter[edge] += 1
-                else:
-                    counter[edge] = 1
-
-        return counter
-
-    def create_source_targets(self, path):
-        module = ""
-        edges = []
-
-        for idx, callsite in enumerate(path):
-            if idx == len(path) - 1:
-                break
-
-            source = sanitizeName(path[idx])
-            target = sanitizeName(path[idx + 1])
-
-            edges.append(
-                {"source": source, "target": target,}
-            )
-        return edges
-
-    def add_paths(self, path):
-        paths = self.fdf[path].tolist()
-
-        for idx, path in enumerate(paths):
-            if isinstance(path, float):
-                return []
-            path = make_tuple(path)
-            source_targets = self.create_source_targets(path)
-            for edge in source_targets:
-                source = edge["source"]
-                target = edge["target"]
-                if not self.g.has_edge(source, target):
-                    self.g.add_edge(source, target)
-
-    def find_cycle(self, G, source=None, orientation=None):
-        if not G.is_directed() or orientation in (None, "original"):
-
-            def tailhead(edge):
-                return edge[:2]
-
-        elif orientation == "reverse":
-
-            def tailhead(edge):
-                return edge[1], edge[0]
-
-        elif orientation == "ignore":
-
-            def tailhead(edge):
-                if edge[-1] == "reverse":
-                    return edge[1], edge[0]
-                return edge[:2]
-
-        explored = set()
-        cycle = []
-        count = 0
-        final_node = None
-        for start_node in G.nbunch_iter(source):
-            if start_node in explored:
-                # No loop is possible.
-                continue
-
-            edges = []
-            # All nodes seen in this iteration of edge_dfs
-            seen = {start_node}
-            # Nodes in active path.
-            active_nodes = {start_node}
-            previous_head = None
-
-            for edge in nx.edge_dfs(G, start_node, orientation):
-                # Determine if this edge is a continuation of the active path.
-                tail, head = tailhead(edge)
-                if head in explored:
-                    # Then we've already explored it. No loop is possible.
-                    continue
-                if previous_head is not None and tail != previous_head:
-                    # This edge results from backtracking.
-                    # Pop until we get a node whose head equals the current tail.
-                    # So for example, we might have:
-                    #  (0, 1), (1, 2), (2, 3), (1, 4)
-                    # which must become:
-                    #  (0, 1), (1, 4)
-                    while True:
-                        try:
-                            popped_edge = edges.pop()
-                        except IndexError:
-                            edges = []
-                            active_nodes = {tail}
-                            break
-                        else:
-                            popped_head = tailhead(popped_edge)[1]
-                            active_nodes.remove(popped_head)
-
-                        if edges:
-                            last_head = tailhead(edges[-1])[1]
-                            if tail == last_head:
-                                break
-                edges.append(edge)
-
-                if head in active_nodes:
-                    # We have a loop!
-                    cycle.extend(edges)
-                    final_node = head
-                    break
-                else:
-                    seen.add(head)
-                    active_nodes.add(head)
-                    previous_head = head
-
-            if cycle:
-                count += 1
-                break
-            else:
-                explored.update(seen)
-
-        else:
-            assert len(cycle) == 0
-            # raise nx.exception.NetworkXNoCycle('No cycle found.')
-
-        # We now have a list of edges which ends on a cycle.
-        # So we need to remove from the beginning edges that are not relevant.
-        i = 0
-        for i, edge in enumerate(cycle):
-            tail, head = tailhead(edge)
-            if tail == final_node:
-                break
-        return cycle[i:]
-
-    def run(self):
-        self.g = nx.DiGraph()
-        self.add_paths("path")
-        self.add_node_attributes()
-        self.add_edge_attributes()
-        self.g.cycles = self.find_cycle(self.g)
diff --git a/callflow/datastructures/ensemblegraph.py b/callflow/datastructures/ensemblegraph.py
new file mode 100644
index 00000000..21a667e7
--- /dev/null
+++ b/callflow/datastructures/ensemblegraph.py
@@ -0,0 +1,140 @@
+import networkx as nx
+import pandas as pd
+import callflow
+from callflow import GraphFrame, SuperGraph
+
+LOGGER = callflow.get_logger(__name__)
+
+
+class EnsembleGraph(SuperGraph):
+    """
+    TODO: Clean this up.
+    SuperGraph that handles the ensemble processing. 
+    """
+
+    def __init__(self, props={}, tag="", mode="process", supergraphs={}):
+        # this stores the mapping for each run's data (i.e., Dataset)
+        self.supergraphs = supergraphs
+
+        super().__init__(props, tag, mode)
+
+        # For each callsite we store the vector here.
+        self.vector = {}
+
+    def create_gf(self, data=None):
+        """
+        Create the graphframes for the ensemble operation. 
+        """
+        # Set the gf as first of the dataset's gf
+        if data:
+            self.gf = callflow.GraphFrame.from_data(data)
+        else:
+            first_dataset = list(self.supergraphs.keys())[0]
+            LOGGER.debug(f"Base for the union operation is: {first_dataset}")
+
+            # TODO: do a deep copy.
+            # Instead of a deep copy, create a new graphframe and return it.
+            self.gf = self.supergraphs[first_dataset].gf
+            self.gf.df = self.union_df()
+            # There is no way to convert networkX to hatchet graph yet. So we are setting this to None.
+            self.gf.graph = None
+            self.gf.nxg = self.union_nxg()
+
+            assert isinstance(self.gf, callflow.GraphFrame)
+
+    def union_df(self):
+        """
+        Union the dataframes. 
+        """
+        df = pd.DataFrame([])
+        for idx, tag in enumerate(self.supergraphs):
+            gf = self.supergraphs[tag].gf
+
+            df = pd.concat([df, gf.df], sort=True)
+
+        assert isinstance(df, pd.DataFrame)
+        return df
+
+    def union_nxg(self):
+        """
+        Union the netwprkX graph. 
+        """
+        nxg = nx.DiGraph()
+        for idx, tag in enumerate(self.supergraphs):
+            LOGGER.debug("-=========================-")
+            LOGGER.debug(tag)
+            self.union_nxg_recurse(nxg, self.supergraphs[tag].gf.nxg)
+
+        return nxg
+
+    # Return the union of graphs G and H.
+    def union_nxg_recurse(self, nxg_1, nxg_2, name=None, rename=(None, None)):
+        """
+        Iterative concatenation of nodes from nxg_2 to nxg_1. 
+        """
+        if not nxg_1.is_multigraph() == nxg_2.is_multigraph():
+            raise nx.NetworkXError("G and H must both be graphs or multigraphs.")
+
+        nxg_1.update(nxg_2)
+
+        renamed_nodes = self.add_prefix(nxg_1, rename[1])
+
+        is_same = set(nxg_1) == set(nxg_2)
+        LOGGER.debug(f"Nodes in Graph 1 and Graph 2 are same? : {is_same}")
+        if set(nxg_1) != set(nxg_2):
+            LOGGER.debug(f"Difference is { list(set(nxg_1) - set(nxg_2))}")
+            LOGGER.debug(f"Nodes in Graph 1: {set(nxg_1)}")
+            LOGGER.debug(f"Nodes in Graph 2: {set(nxg_2)}")
+        LOGGER.debug("-=========================-")
+
+        if nxg_2.is_multigraph():
+            new_edges = nxg_2.edges(keys=True, data=True)
+        else:
+            new_edges = nxg_2.edges(data=True)
+
+        # add nodes and edges.
+        nxg_1.add_nodes_from(nxg_2)
+        nxg_1.add_edges_from(new_edges)
+
+        # # add node attributes for each run
+        # for n in renamed_nodes:
+        #     self.add_node_attributes(nxg_1, n, name)
+
+        return nxg_1
+
+    # rename graph to obtain disjoint node labels
+    def add_prefix(self, graph, prefix):
+        if prefix is None:
+            return graph
+
+        def label(x):
+            if is_string_like(x):
+                name = prefix + x
+            else:
+                name = prefix + repr(x)
+            return name
+
+        return nx.relabel_nodes(graph, label)
+
+    def add_edge_attributes(self):
+        number_of_runs_mapping = self.number_of_runs()
+        nx.set_edge_attributes(
+            self.union, name="number_of_runs", values=number_of_runs_mapping
+        )
+
+    def number_of_runs(self):
+        ret = {}
+        for idx, name in enumerate(self.unionuns):
+            for edge in self.unionuns[name].edges():
+                if edge not in ret:
+                    ret[edge] = 0
+                ret[edge] += 1
+        return ret
+
+    def add_node_attributes(self, H, node, dataset_name):
+        """
+        TODO: Hoist this information to the df directly. 
+        """
+        for idx, (key, val) in enumerate(H.nodes.items()):
+            if dataset_name not in self.nxg.nodes[node]:
+                self.nxg.nodes[node] = self.vector[node]
diff --git a/callflow/datastructures/graphframe.py b/callflow/datastructures/graphframe.py
index 159421f6..abe2fc10 100644
--- a/callflow/datastructures/graphframe.py
+++ b/callflow/datastructures/graphframe.py
@@ -1,13 +1,28 @@
-import os
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
 
+# ------------------------------------------------------------------------------
+# Library imports
+import os
+import pandas as pd
 import hatchet as ht
+import networkx as nx
+
+# ------------------------------------------------------------------------------
+# CallFlow imports
 import callflow
 
 LOGGER = callflow.get_logger(__name__)
 
-
+# ------------------------------------------------------------------------------
+# GraphFrame Class
 class GraphFrame(ht.GraphFrame):
     def __init__(self, graph=None, dataframe=None, exc_metrics=None, inc_metrics=None):
+        """
+
+        """
 
         # TODO: will we ever want to create a graphframe without data?
         if graph is not None and dataframe is not None:
@@ -17,45 +32,183 @@ def __init__(self, graph=None, dataframe=None, exc_metrics=None, inc_metrics=Non
             self.df = self.dataframe
 
         # save a networkx graph
-        self.nxg = None
+        if graph:
+            self.nxg = self.hatchet_graph_to_nxg(graph)
 
     # --------------------------------------------------------------------------
-    # promote a hatchet graph frame to callflow graph frame
+    # Hatchet's GraphFrame utilities.
+
     @staticmethod
     def from_hatchet(gf):
-
+        """
+        Promotes a hatchet graph frame to callflow graph frame
+        """
         assert isinstance(gf, ht.GraphFrame)
         return GraphFrame(gf.graph, gf.dataframe, gf.exc_metrics, gf.inc_metrics)
 
     # create a graph frame directly from the config
     @staticmethod
     def from_config(config, name):
-
+        """
+        Uses config file to create a graphframe.
+        """
         LOGGER.info(f"Creating graphframes: {name}")
-        LOGGER.info(f"Data path: {config.data_path}")
+        LOGGER.info(f"Data path: {config['data_path']}")
 
-        if config.format[name] == "hpctoolkit":
-            gf = ht.GraphFrame.from_hpctoolkit(config.data_path)
+        if config["format"][name] == "hpctoolkit":
+            gf = ht.GraphFrame.from_hpctoolkit(config["data_path"])
 
-        elif config.format[name] == "caliper":
-            gf = ht.GraphFrame.from_caliper(config.data_path)
+        elif config["format"][name] == "caliper":
+            gf = ht.GraphFrame.from_caliper(config["data_path"])
 
-        elif config.format[name] == "caliper_json":
-            data_path = os.path.join(config.data_path, config.paths[name])
+        elif config["format"][name] == "caliper_json":
+            data_path = os.path.join(config["data_path"], config["paths"][name])
             gf = ht.GraphFrame.from_caliper(data_path, query="")
 
-        elif config.format[name] == "gprof":
-            gf = ht.GraphFrame.from_grof_dot(config.data_path)
+        elif config["format"][name] == "gprof":
+            gf = ht.GraphFrame.from_grof_dot(config["data_path"])
 
-        elif config.format[name] == "literal":
-            gf = ht.GraphFrame.from_literal(config.data_path)
+        elif config["format"][name] == "literal":
+            gf = ht.GraphFrame.from_literal(config["data_path"])
 
-        elif config.format[name] == "lists":
-            gf = ht.GraphFrame.from_lists(config.data_path)
+        elif config["format"][name] == "lists":
+            gf = ht.GraphFrame.from_lists(config["data_path"])
 
         return GraphFrame.from_hatchet(gf)
 
+    @staticmethod
+    def from_data(data):
+        """
+        Create GraphFrame from 3 sets of information : df, graph, nxg. 
+        """
+        # Hatchet requires node and rank to be indexes.
+        data["df"] = data["df"].set_index(["node", "rank"])
+
+        # Create a graphframe using Hatchet.
+        gf = GraphFrame(dataframe=data["df"], graph=data["graph"])
+
+        # Store the nxg.
+        gf.nxg = data["nxg"]
+
+        # remove the set indexes to maintain consistency.
+        gf.df = gf.df.reset_index(drop=False)
+        return gf
+
     # --------------------------------------------------------------------------
+    # callflow.graph utilities.
+    #
+    @staticmethod
+    def hatchet_graph_to_nxg(hatchet_graph):
+        """
+        Constructs a networkX graph from hatchet graph. 
+        """
+        nxg = nx.DiGraph()
+        for root in hatchet_graph.roots:
+            node_gen = root.traverse()
+
+            root_dict = callflow.utils.node_dict_from_frame(root.frame)
+            root_name = root_dict["name"]
+            root_paths = root.paths()
+            node = root
+
+            try:
+                while node:
+                    # `getNodeDictFromFrame` converts the hatchet's frame to
+                    node_dict = callflow.utils.node_dict_from_frame(node.frame)
+                    node_name = node_dict["name"]
+
+                    # Get all node paths from hatchet.
+                    node_paths = node.paths()
+
+                    # Loop through all the node paths.
+                    for node_path in node_paths:
+                        if len(node_path) >= 2:
+
+                            source_node_dict = callflow.utils.node_dict_from_frame(
+                                node_path[-2]
+                            )
+                            target_node_dict = callflow.utils.node_dict_from_frame(
+                                node_path[-1]
+                            )
+
+                            if source_node_dict["line"] != "NA":
+                                source_node_name = (
+                                    callflow.utils.sanitize_name(
+                                        source_node_dict["name"]
+                                    )
+                                    + ":"
+                                    + str(source_node_dict["line"])
+                                )
+                            else:
+                                source_node_name = callflow.utils.sanitize_name(
+                                    source_node_dict["name"]
+                                )
+                            if target_node_dict["line"] != "NA":
+                                target_node_name = (
+                                    callflow.utils.sanitize_name(
+                                        target_node_dict["name"]
+                                    )
+                                    + ":"
+                                    + str(target_node_dict["line"])
+                                )
+                            else:
+                                target_node_name = callflow.utils.sanitize_name(
+                                    target_node_dict["name"]
+                                )
+
+                            nxg.add_edge(source_node_name, target_node_name)
+
+                    node = next(node_gen)
+
+            except StopIteration:
+                pass
+            finally:
+                del root
+
+        return nxg
+
+    # --------------------------------------------------------------------------
+    # callflow.nxg utilities.
+
+    @staticmethod
+    def add_prefix(graph, prefix):
+        """
+        Rename graph to obtain disjoint node labels
+        """
+        if prefix is None:
+            return graph
+
+        def label(x):
+            if is_string_like(x):
+                name = prefix + x
+            else:
+                name = prefix + repr(x)
+            return name
+
+        return nx.relabel_nodes(graph, label)
+
+    @staticmethod
+    def tailhead(edge):
+        return (edge[0], edge[1])
+
+    @staticmethod
+    def tailheadDir(edge):
+        return (str(edge[0]), str(edge[1]), self.edge_direction[edge])
+
+    @staticmethod
+    def leaves_below(nxg, node):
+        return set(
+            sum(
+                (
+                    [vv for vv in v if nxg.out_degree(vv) == 0]
+                    for k, v in nx.dfs_successors(nxg, node).items()
+                ),
+                [],
+            )
+        )
+
+    # --------------------------------------------------------------------------
+    # callflow.df utilities
     def lookup(self, node):
         return self.df.loc[
             (self.df["name"] == node.callpath[-1]) & (self.df["nid"] == node.nid)
@@ -70,30 +223,7 @@ def lookup_with_name(self, name):
     def lookup_with_vis_nodeName(self, name):
         return self.df.loc[self.df["vis_node_name"] == name]
 
-    """
-    def lookup_by_column(self, _hash, col_name):
-        ret = []
-        node_df = self.df.loc[self.df["node"] == self.map[str(_hash)]]
-        node_df_T = node_df.T.squeeze()
-        node_df_T_attr = node_df_T.loc[col_name]
-        if node_df_T_attr is not None:
-            if type(node_df_T_attr) is str or type(node_df_T_attr) is float:
-                ret.append(node_df_T_attr)
-            else:
-                ret = node_df_T_attr.tolist()
-        return ret
-    """
-    # --------------------------------------------------------------------------
     def update_df(self, col_name, mapping):
         self.df[col_name] = self.df["name"].apply(
             lambda node: mapping[node] if node in mapping.keys() else ""
         )
-
-    def grouped_df(self, attr):
-        pass
-        """
-        self.gdf[attr] = self.df.groupby(attr, as_index=True, squeeze=True)
-        self.gdfKeys = self.gdf[attr].groups.keys()
-        """
-
-    # --------------------------------------------------------------------------
diff --git a/callflow/datastructures/supergraph.py b/callflow/datastructures/supergraph.py
index 525a26ca..50d4ae45 100644
--- a/callflow/datastructures/supergraph.py
+++ b/callflow/datastructures/supergraph.py
@@ -1,21 +1,418 @@
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+# ------------------------------------------------------------------------------
+# Library imports
+import os
+import json
+import copy
+import pandas as pd
 import networkx as nx
+from networkx.readwrite import json_graph
+from ast import literal_eval as make_list
+
+# ------------------------------------------------------------------------------
+# CallFlow imports
+import callflow
+from callflow.timer import Timer
+from callflow.operations import Process, Group, Filter
+from callflow.modules import EnsembleAuxiliary, SingleAuxiliary
+
+LOGGER = callflow.get_logger(__name__)
+
+# ------------------------------------------------------------------------------
+# SuperGraph Class
+class SuperGraph(object):
+    def __init__(self, props={}, tag="", mode="process"):
+        self.timer = Timer()
+
+        # Props is the information contained in config object.
+        # We duplicate this to add more information to config and not modify it as a side effect.
+        self.props = props
+        self.dirname = self.props["save_path"]
+
+        # it appears we're using name as "union", "filter", etc.
+        # this is not a data set name!
+        self.tag = tag
+
+        # Mode is either process or render.
+        self.mode = mode
+
+        # Variables used by `create_target_maps`.
+        self.target_df = {}
+        self.target_modules = {}
+        self.target_module_group_df = {}
+        self.target_module_name_group_df = {}
+        self.target_module_callsite_map = {}
+        self.target_module_time_inc_map = {}
+        self.target_module_time_exc_map = {}
+        self.target_name_time_inc_map = {}
+        self.target_name_time_exc_map = {}
+
+        # Create a graphframe based on the mode.
+        if mode == "process":
+            self.create_gf()
+        elif mode == "render":
+            data = self.read_gf(read_parameter=self.props["read_parameter"])
+            self.create_gf(data=data)
+            self.auxiliary_data = self.read_auxiliary_data()
+
+            with self.timer.phase(f"Creating the data maps."):
+                self.cct_df = self.gf.df[self.gf.df["name"].isin(self.gf.nxg.nodes())]
+                self.create_ensemble_maps()
+                for dataset in self.props["dataset_names"]:
+                    self.create_target_maps(dataset)
+
+        self.projection_data = {}
+
+    def _getter(self):
+        """
+        Getter for graphframe. Returns the graphframe.
+        """
+        return self.gf
+
+    def _setter(self, gf):
+        """
+        Setter for graphframe. Hooks the graphframe.
+        """
+        assert isinstance(gf, callflow.GraphFrame)
+
+        self.gf = gf
+
+    def create_gf(self, data=None):
+        """
+        Creates a graphframe using config and networkX grapg from hatchet graph.
+        Each graphframe is tagged by a unique identifier. 
+        e.g., here is the runName from config file or JSON.
+        """
+        if data:
+            self.gf = callflow.GraphFrame.from_data(data)
+        else:
+            gf = callflow.GraphFrame.from_config(self.props, self.tag)
+            self.gf = copy.deepcopy(gf)
+
+    def process_gf(self):
+        """
+        Process graphframe to add properties depending on the format. 
+        Current processing is supported for hpctoolkit and caliper. 
+        """
+        gf = self._getter()
+        if self.props["format"][self.tag] == "hpctoolkit":
+            process = (
+                Process.Builder(gf, self.tag)
+                .add_path()
+                .create_name_module_map()
+                .add_callers_and_callees()
+                .add_dataset_name()
+                .add_imbalance_perc()
+                .add_module_name_hpctoolkit()
+                .add_vis_node_name()
+                .build()
+            )
+        elif self.props["format"][self.tag] == "caliper_json":
+            process = (
+                Process.Builder(gf, self.tag)
+                .add_time_columns()
+                .add_rank_column()
+                .add_callers_and_callees()
+                .add_dataset_name()
+                .add_imbalance_perc()
+                .add_module_name_caliper(self.props["callsite_module_map"])
+                .create_name_module_map()
+                .add_vis_node_name()
+                .add_path()
+                .build()
+            )
+
+        self._setter(process.gf)
+
+    def group_gf(self, group_by="module"):
+        """
+        Group the graphframe based on `group_by` parameter. 
+        """
+        gf = self._getter()
+        group = Group(gf, group_by)
+
+        self._setter(group.gf)
+
+    def filter_gf(self, mode="single"):
+        """
+        Filter the graphframe. 
+        """
+        gf = self._getter()
+        filter_res = Filter(
+            gf=gf,
+            mode=mode,
+            filter_by=self.props["filter_by"],
+            filter_perc=self.props["filter_perc"],
+        )
+        self._setter(filter_res.gf)
+
+    def ensemble_gf(self, supergraphs):
+
+        EnsembleGraph(
+            self.props, "ensemble", mode="process", supergraphs=single_supergraphs
+        )
+
+    def ensemble_auxiliary(
+        self, datasets, MPIBinCount=20, RunBinCount=20, process=True, write=True
+    ):
+        gf = self._getter()
+        EnsembleAuxiliary(
+            gf,
+            datasets=datasets,
+            props=self.props,
+            MPIBinCount=MPIBinCount,
+            RunBinCount=RunBinCount,
+            process=process,
+            write=write,
+        )
+
+    def single_auxiliary(self, dataset="", binCount=20, process=True):
+        gf = self._getter()
+        SingleAuxiliary(
+            gf,
+            dataset=dataset,
+            props=self.props,
+            MPIBinCount=binCount,
+            process=process,
+        )
+
+    # ------------------------------------------------------------------------------
+    # Utilities.
+
+    def get_top_n_callsites_by_attr(self, count, sort_attr):
+        """
+        Returns an array of callsites (sorted by `sort_attr`)
+        """
+        xgroup_df = self.entire_df.groupby(["name"]).mean()
+        sort_xgroup_df = xgroup_df.sort_values(by=[sort_attr], ascending=False)
+        callsites_df = sort_xgroup_df.nlargest(count, sort_attr)
+        return callsites_df.index.values.tolist()
+
+    def read_gf(self, read_parameter=True, read_graph=False):
+        """
+        # Read a single dataset stored in .callflow directory.
+        """
+        LOGGER.info("Reading the dataset: {0}".format(self.tag))
+
+        df_file_name = "df.csv"
+        df_file_path = os.path.join(self.dirname, self.tag, df_file_name)
+        df = pd.read_csv(df_file_path)
+        if df.empty:
+            raise ValueError(f"{df_file_path} is empty.")
+
+        nxg_file_name = "nxg.json"
+        nxg_file_path = os.path.join(self.dirname, self.tag, nxg_file_name)
+        with open(nxg_file_path, "r") as nxg_file:
+            graph = json.load(nxg_file)
+        nxg = json_graph.node_link_graph(graph)
+        assert nxg != None
+
+        graph = {}
+        if read_graph:
+            graph_file_name = "hatchet_tree.txt"
+            graph_file_path = os.path.join(self.dirname, self.tag, graph_file_name)
+            with open(graph_file_path, "r") as graph_file:
+                graph = json.load(graph_file)
+            assert isinstance(graph, ht.GraphFrame.Graph)
+
+        parameters = {}
+        if read_parameter:
+            parameters_filepath = os.path.join(self.dirname, self.tag, "env_params.txt")
+            for line in open(parameters_filepath, "r"):
+                s = 0
+                for num in line.strip().split(","):
+                    split_num = num.split("=")
+                    parameters[split_num[0]] = split_num[1]
+
+        return {"df": df, "nxg": nxg, "graph": graph, "parameters": parameters}
+
+    def write_gf(self, write_df=True, write_graph=False, write_nxg=True):
+        """
+        # Write the dataset to .callflow directory.
+        """
+        # Get the save path.
+        dirname = self.props["save_path"]
+
+        gf = self.gf
+        # dump the filtered dataframe to csv if write_df is true.
+        if write_df:
+            df_file_name = "df.csv"
+            df_file_path = os.path.join(dirname, self.tag, df_file_name)
+            gf.df.to_csv(df_file_path)
+
+        # TODO: Writing fails.
+        if write_nxg:
+            nxg_file_name = "nxg.json"
+            nxg_file_path = os.path.join(dirname, self.tag, nxg_file_name)
+            nxg_data = json_graph.node_link_data(self.gf.nxg)
+            with open(nxg_file_path, "w") as nxg_file:
+                json.dump(nxg_data, nxg_file)
+
+        if write_graph:
+            graph_filepath = os.path.join(dirname, self.tag, "hatchet_tree.txt")
+            with open(graph_filepath, "a") as hatchet_graphFile:
+                hatchet_graphFile.write(self.gf.tree(color=False))
+
+    def write_similarity(self, datasets, states, type):
+        """
+        # Write the pair-wise graph similarities into .callflow directory.
+        """
+        ret = {}
+        for idx, dataset in enumerate(datasets):
+            ret[dataset] = []
+            for idx_2, dataset2 in enumerate(datasets):
+                union_similarity = Similarity(states[dataset2].g, states[dataset].g)
+                ret[dataset].append(union_similarity.result)
+
+        dirname = self.config.callflow_dir
+        name = self.config.runName
+        # similarity_filepath = dirname + "/" + "similarity.json"
+        similarity_filepath = os.path.join(dirname, "similarity.json")
+        with open(similarity_filepath, "w") as json_file:
+            json.dump(ret, json_file)
+
+    def read_auxiliary_data(self):
+        """
+        # Read the auxiliary data from all_data.json. 
+        """
+        all_data_filepath = os.path.join(
+            self.props["save_path"], self.tag, "auxiliary_data.json"
+        )
+        LOGGER.info(f"[Read] {all_data_filepath}")
+        with open(all_data_filepath, "r") as filter_graphFile:
+            data = json.load(filter_graphFile)
+        return data
+
+    # ------------------------------------------------------------------------------
+    # NetworkX graph utility functions.
+    def create_target_maps(self, dataset):
+        # Reduce the entire_df to respective target dfs.
+        self.target_df[dataset] = self.gf.df.loc[self.gf.df["dataset"] == dataset]
+
+        # Unique modules in the target run
+        self.target_modules[dataset] = self.target_df[dataset]["module"].unique()
+
+        # Group the dataframe in two ways.
+        # 1. by module
+        # 2. by module and callsite
+        self.target_module_group_df[dataset] = self.target_df[dataset].groupby(
+            ["module"]
+        )
+        self.target_module_name_group_df[dataset] = self.target_df[dataset].groupby(
+            ["module", "name"]
+        )
+
+        # Module map for target run {'module': [Array of callsites]}
+        self.target_module_callsite_map[dataset] = (
+            self.target_module_group_df[dataset]["name"].unique().to_dict()
+        )
+
+        # Inclusive time maps for the module level and callsite level.
+        self.target_module_time_inc_map[dataset] = (
+            self.target_module_group_df[dataset]["time (inc)"].max().to_dict()
+        )
+        self.target_name_time_inc_map[dataset] = (
+            self.target_module_name_group_df[dataset]["time (inc)"].max().to_dict()
+        )
+
+        # Exclusive time maps for the module level and callsite level.
+        self.target_module_time_exc_map[dataset] = (
+            self.target_module_group_df[dataset]["time"].max().to_dict()
+        )
+        self.target_name_time_exc_map[dataset] = (
+            self.target_module_name_group_df[dataset]["time"].max().to_dict()
+        )
 
+    def create_ensemble_maps(self):
+        self.modules = self.gf.df["module"].unique()
 
-class SuperGraph(ht.GraphFrame):
-    def __init__(self, graph=None, dataframe=None, exc_metrics=None, inc_metrics=None):
+        self.module_name_group_df = self.gf.df.groupby(["module", "name"])
+        self.module_group_df = self.gf.df.groupby(["module"])
+        self.name_group_df = self.gf.df.groupby(["name"])
 
-        # TODO: will we ever want to create a graphframe without data?
-        if graph is not None and dataframe is not None:
-            super().__init__(graph, dataframe, exc_metrics, inc_metrics)
+        # Module map for ensemble {'module': [Array of callsites]}
+        self.module_callsite_map = self.module_group_df["name"].unique().to_dict()
 
-            # shortcut!
-            self.df = self.dataframe
+        # Inclusive time maps for the module level and callsite level.
+        self.module_time_inc_map = self.module_group_df["time (inc)"].max().to_dict()
+        self.name_time_inc_map = self.module_name_group_df["time (inc)"].max().to_dict()
 
-        # save a networkx graph
-        self.nxg = None
+        # Exclusive time maps for the module level and callsite level.
+        self.module_time_exc_map = self.module_group_df["time"].max().to_dict()
+        self.name_time_exc_map = self.module_name_group_df["time"].max().to_dict()
+
+    def remove_cycles_in_paths(self, path):
+        ret = []
+        moduleMapper = {}
+        dataMap = {}
+
+        if isinstance(path, float):
+            return []
+        path = make_list(path)
+        for idx, elem in enumerate(path):
+            callsite = elem.split("=")[1]
+            module = elem.split("=")[0]
+            if module not in dataMap:
+                moduleMapper[module] = 0
+                dataMap[module] = [
+                    {"callsite": callsite, "module": module, "level": idx}
+                ]
+            else:
+                flag = [p["level"] == idx for p in dataMap[module]]
+                if np.any(np.array(flag)):
+                    moduleMapper[module] += 1
+                    dataMap[module].append(
+                        {
+                            "callsite": callsite,
+                            "module": module + "=" + callsite,
+                            "level": idx,
+                        }
+                    )
+                else:
+                    dataMap[module].append(
+                        {"callsite": callsite, "module": module, "level": idx}
+                    )
+            ret.append(dataMap[module][-1])
+
+        return ret
+
+    def print_information(self):
+        LOGGER.info("Modules: {0}".format(self.supergraph.gf.df["module"].unique()))
+        LOGGER.info("Top 10 Inclusive time: ")
+        top = 10
+        rank_df = self.supergraph.gf.df.groupby(["name", "nid"]).mean()
+        top_inclusive_df = rank_df.nlargest(top, "time (inc)", keep="first")
+        for name, row in top_inclusive_df.iterrows():
+            LOGGER.info("{0} [{1}]".format(name, row["time (inc)"]))
+
+        LOGGER.info("Top 10 Enclusive time: ")
+        top_exclusive_df = rank_df.nlargest(top, "time", keep="first")
+        for name, row in top_exclusive_df.iterrows():
+            LOGGER.info("{0} [{1}]".format(name, row["time"]))
+
+        for node in self.supergraph.gf.nxg.nodes(data=True):
+            LOGGER.info("Node: {0}".format(node))
+        for edge in self.supergraph.gf.nxg.edges():
+            LOGGER.info("Edge: {0}".format(edge))
+
+        LOGGER.info("Nodes in the tree: {0}".format(len(self.supergraph.gf.nxg.nodes)))
+        LOGGER.info("Edges in the tree: {0}".format(len(self.supergraph.gf.nxg.edges)))
+        LOGGER.info("Is it a tree? : {0}".format(nx.is_tree(self.supergraph.gf.nxg)))
+        LOGGER.info(
+            "Flow hierarchy: {0}".format(nx.flow_hierarchy(self.supergraph.gf.nxg))
+        )
+
+    # ------------------------------------------------------------------------------
+    # Module hierarchy.
+    # TODO: we might have to delete the module hierarchy file in modules later.
+    # TODO: This might fail.
 
     @staticmethod
-    def _create_source_targets(self, path):
+    def _create_source_targets(path):
         module = ""
         edges = []
 
@@ -30,7 +427,7 @@ def _create_source_targets(self, path):
         return edges
 
     @staticmethod
-    def _check_cycles(self, hierarchy, G):
+    def _check_cycles(hierarchy, G):
         try:
             cycles = list(nx.find_cycle(self.hierarchy, orientation="ignore"))
         except:
@@ -39,7 +436,7 @@ def _check_cycles(self, hierarchy, G):
         return cycles
 
     @staticmethod
-    def _remove_cycles(self, hierarchy, G, cycles):
+    def _remove_cycles(hierarchy, G, cycles):
         for cycle in cycles:
             source = cycle[0]
             target = cycle[1]
@@ -93,3 +490,239 @@ def module_hierarchy(self, module=None):
             print(f"cycles: {cycles}")
 
         return hierarchy
+
+    # ------------------------------------------------------------------------------
+    # Add paths according to what input is provided.
+    # Should be implemented by the child classes.
+    def add_paths(self, path):
+        pass
+
+    def add_node_attributes(self):
+        pass
+
+    def add_edge_attribtues(self):
+        pass
+
+    # ------------------------------------------------------------------------------
+    # Reveal a callsite's path
+    # TODO: not tested. Could break.
+    def create_source_targets(self, component_path):
+        module = ""
+        edges = []
+        for idx, callsite in enumerate(component_path):
+            if idx == 0:
+                module = component_path[0]
+                edges.append(
+                    {
+                        "module": module,
+                        "source": module,
+                        "target": module + "=" + component_path[idx + 1],
+                    }
+                )
+                pass
+            elif idx == len(component_path) - 1:
+                pass
+            else:
+                edges.append(
+                    {
+                        "module": module,
+                        "source": module + "=" + component_path[idx],
+                        "target": module + "=" + component_path[idx + 1],
+                    }
+                )
+
+        return edges
+
+    def callsite_paths(self, callsites):
+        paths = []
+        for callsite in callsites:
+            df = self.name_group_df.get_group(callsite)
+            paths.append(
+                {
+                    "group_path": make_list(df["group_path"].unique()[0]),
+                    "path": make_list(df["path"].unique()[0]),
+                    "component_path": make_list(df["component_path"].unique()[0]),
+                }
+            )
+        return paths
+
+    def add_reveal_paths(self, reveal_callsites):
+        paths = self.callsite_paths(reveal_callsites)
+
+        for path in paths:
+            component_edges = self.create_source_targets(path["component_path"])
+            for idx, edge in enumerate(component_edges):
+                module = edge["module"]
+
+                # format module +  '=' + callsite
+                source = edge["source"]
+                target = edge["target"]
+
+                if not self.supergraph.gf.nxg.has_edge(source, target):
+                    if idx == 0:
+                        source_callsite = source
+                        source_df = self.module_group_df.get_group((module))
+                        source_node_type = "super-node"
+                    else:
+                        source_callsite = source.split("=")[1]
+                        source_df = self.module_name_group_df.get_group(
+                            (module, source_callsite)
+                        )
+                        source_node_type = "component-node"
+
+                    target_callsite = target.split("=")[1]
+                    target_df = self.module_name_group_df.get_group(
+                        (module, target_callsite)
+                    )
+                    target_node_type = "component-node"
+
+                    source_weight = source_df["time (inc)"].max()
+                    target_weight = target_df["time (inc)"].max()
+
+                    edge_type = "normal"
+
+                    print(f"Adding edge: {source_callsite}, {target_callsite}")
+                    self.supergraph.gf.nxg.add_node(
+                        source, attr_dict={"type": source_node_type}
+                    )
+                    self.supergraph.gf.nxg.add_node(
+                        target, attr_dict={"type": target_node_type}
+                    )
+                    self.supergraph.gf.nxg.add_edge(
+                        source,
+                        target,
+                        attr_dict=[
+                            {
+                                "source_callsite": source_callsite,
+                                "target_callsite": target_callsite,
+                                "edge_type": edge_type,
+                                "weight": target_weight,
+                                "edge_type": "reveal_edge",
+                            }
+                        ],
+                    )
+
+    def add_exit_callsite():
+        # TODO: This code is missing for some reason.
+        pass
+
+    # ------------------------------------------------------------------------------
+    # Create a module hierarchy for a chosen module.
+    # Not fully tested. Might break.
+    def module_entry_functions_map(self, graph):
+        entry_functions = {}
+        for edge in graph.edges(data=True):
+            attr_dict = edge[2]["attr_dict"]
+            edge_tuple = (edge[0], edge[1])
+            for edge_attr in attr_dict:
+                if edge_tuple[1] not in entry_functions:
+                    entry_functions[edge_tuple[1]] = []
+                entry_functions[edge_tuple[1]].append(edge_attr["target_callsite"])
+        return entry_functions
+
+    def create_source_targets_from_group_path(self, path):
+        module = ""
+        edges = []
+        for idx, callsite in enumerate(path):
+            if idx == len(path) - 1:
+                break
+            source = path[idx].split("=")
+            target = path[idx + 1].split("=")
+            edges.append(
+                {
+                    "source": source[0],
+                    "target": target[0],
+                    "source_callsite": source[1],
+                    "target_callsite": target[1],
+                }
+            )
+        return edges
+
+    def same_source_edges(self, component_edges, reveal_module):
+        ret = []
+        for idx, edge in enumerate(component_edges):
+            source = edge["source"]
+            target = edge["target"]
+
+            if source == reveal_module:
+                ret.append(edge)
+        return ret
+
+    def same_target_edges(self, component_edges, reveal_module):
+        ret = []
+        for idx, edge in enumerate(component_edges):
+            source = edge["source"]
+            target = edge["target"]
+
+            if target == reveal_module:
+                ret.append(edge)
+        return ret
+
+    def add_entry_callsite(self, reveal_module):
+        entry_functions_map = self.module_entry_functions_map(self.supergraph.gf.nxg)
+        reveal_callsites = entry_functions_map[reveal_module]
+        paths = self.callsitePathInformation(reveal_callsites)
+
+        for path in paths:
+            component_edges = self.create_source_targets_from_group_path(
+                path["group_path"]
+            )
+            source_edges_to_remove = self.same_source_edges(
+                component_edges, reveal_module
+            )
+            target_edges_to_remove = self.same_target_edges(
+                component_edges, reveal_module
+            )
+
+            if len(source_edges_to_remove) != 0:
+                for edge in source_edges_to_remove:
+                    if self.supergraph.gf.nxg.has_edge(edge["source"], edge["target"]):
+                        self.supergraph.gf.nxg.remove_edge(
+                            (edge["source"], edge["target"])
+                        )
+                    self.supergraph.gf.nxg.add_node(
+                        reveal_module + "=" + edge["source_callsite"],
+                        attr_dict={"type": "component-node"},
+                    )
+                    self.supergraph.gf.nxg.add_edge(
+                        (reveal_module + "=" + edge["source_callsite"], edge["target"]),
+                        attr_dict=[
+                            {
+                                "source_callsite": edge["source_callsite"],
+                                "target_callsite": edge["target_callsite"],
+                                "edge_type": "normal",
+                                "weight": self.module_name_group_df.get_group(
+                                    (reveal_module, edge["source_callsite"])
+                                )["time (inc)"].max(),
+                                "edge_type": "reveal_edge",
+                            }
+                        ],
+                    )
+
+            if len(target_edges_to_remove) != 0:
+                for edge in target_edges_to_remove:
+                    if self.supergraph.gf.nxg.has_edge(edge["source"], edge["target"]):
+                        self.supergraph.gf.nxg.remove_edge(
+                            edge["source"], edge["target"]
+                        )
+                    self.supergraph.gf.nxg.add_node(
+                        reveal_module + "=" + edge["target_callsite"],
+                        attr_dict={"type": "component-node"},
+                    )
+                    self.supergraph.gf.nxg.add_edge(
+                        edge["source"],
+                        reveal_module + "=" + edge["target_callsite"],
+                        attr_dict=[
+                            {
+                                "source_callsite": edge["source_callsite"],
+                                "target_callsite": edge["target_callsite"],
+                                "edge_type": "normal",
+                                "weight": self.module_name_group_df.get_group(
+                                    (edge["target"], edge["target_callsite"])
+                                )["time (inc)"].max(),
+                                "edge_type": "reveal_edge",
+                            }
+                        ],
+                    )
+
+        self.supergraph.gf.nxg.remove_node(reveal_module)
diff --git a/callflow/datastructures/supergraph_ensemble.py b/callflow/datastructures/supergraph_ensemble.py
index 116c6f50..d54aaa6e 100644
--- a/callflow/datastructures/supergraph_ensemble.py
+++ b/callflow/datastructures/supergraph_ensemble.py
@@ -1,52 +1,53 @@
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: MIT
+
+# ------------------------------------------------------------------------------
+# Library imports
 import networkx as nx
 import numpy as np
 import pandas as pd
 import math, json
 from ast import literal_eval as make_list
 
+# ------------------------------------------------------------------------------
+# CallFlow imports
 import callflow
 
 LOGGER = callflow.get_logger(__name__)
-from callflow.timer import Timer
-
+from callflow import SuperGraph
 
-class EnsembleSuperGraph(nx.Graph):
-    # Attributes:
-    # 1. State => Pass the state which needs to be handled.
-    # 2. path => '', 'path', 'group_path' or 'component_path'
-    # 3. construct_graph -> To decide if we should construct graph from path
-    # 4. add_data => To
+# ------------------------------------------------------------------------------
+# Ensemble Super Graph class.
+class EnsembleSuperGraph(SuperGraph):
     def __init__(
         self,
-        states,
-        path,
+        supergraphs={},
+        tag="",
+        path="path",
         group_by_attr="module",
+        props={},
         construct_graph=True,
         add_data=False,
         reveal_callsites=[],
         split_entry_module="",
         split_callee_module="",
     ):
-        super(EnsembleSuperGraph, self).__init__()
-        self.states = states
-        self.timer = Timer()
-
-        # Store the ensemble graph (Since it is already processed.)
-        self.state_entire = self.states["ensemble_entire"]
-        self.state_filter = self.states["ensemble_filter"]
-        self.state_group = self.states["ensemble_group"]
-        self.ensemble_g = self.state_group.new_gf.nxg
-        self.node_list = np.array(list(self.ensemble_g.nodes()))
-
-        # Path type to group by
-        # TODO: Generalize to any group the user provides.
+        # Call the SuperGraph class init.
+        super(EnsembleSuperGraph, self).__init__(props=props, tag=tag, mode="render")
+
+        # Stores all the SuperGraphs using a Map.
+        self.supergraphs = supergraphs
+
         self.path = path
         self.group_by = group_by_attr
 
-        self.entire_df = self.state_entire.new_gf.df
-        self.group_df = self.state_group.new_gf.df
+        # Need to remove.
+        self.ensemble_supergraph = self.supergraphs["ensemble"]
+        self.group_df = self.ensemble_supergraph.gf.df
+
         # Columns to consider.
-        # TODO: Generalize it either all columns or let user specify the value using config.json
         self.columns = [
             "time (inc)",
             "module",
@@ -57,13 +58,7 @@ def __init__(
             "actual_time",
         ]
 
-        # Store all the names of runs in self.runs.
-        # TODO: Change name in the df from 'dataset' to 'run'
-        self.runs = self.entire_df["dataset"].unique()
-
-        with self.timer.phase("Creating data maps"):
-            self.create_ensemble_maps()
-            self.create_target_maps()
+        self.runs = self.group_df["dataset"].unique()
 
         self.reveal_callsites = reveal_callsites
         self.split_entry_module = split_entry_module
@@ -72,11 +67,11 @@ def __init__(
         with self.timer.phase("Construct Graph"):
             if construct_graph:
                 LOGGER.info(
-                    "Creating a SuperGraph for {0}.".format(self.state_group.name)
+                    "Creating a SuperGraph for {0}.".format(self.supergraphs.keys())
                 )
 
                 self.cct = nx.DiGraph()
-                self.agg_g = nx.DiGraph()
+                self.agg_nxg = nx.DiGraph()
                 self.add_paths(path)
                 self.add_reveal_paths(self.reveal_callsites)
                 if self.split_entry_module != "":
@@ -84,331 +79,18 @@ def __init__(
                 if self.split_callee_module != "":
                     self.add_exit_callees_paths()
             else:
-                print("Using the existing graph from state {0}".format(self.state.name))
+                LOGGER.debug(f"Using the existing graph from state {self.state.name}")
 
-        add_data = True
         with self.timer.phase("Add graph attributes"):
-            if add_data == True:
-                self.add_node_attributes()
-                self.add_edge_attributes()
-            # else:
-            # print("Creating a Graph without node or edge attributes.")
+            self.add_node_attributes()
+            self.add_edge_attributes()
         print(self.timer)
 
-    def create_target_maps(self):
-        self.target_df = {}
-        self.target_modules = {}
-        self.target_module_group_df = {}
-        self.target_module_name_group_df = {}
-        self.target_module_callsite_map = {}
-        self.target_module_time_inc_map = {}
-        self.target_module_time_exc_map = {}
-        self.target_name_time_inc_map = {}
-        self.target_name_time_exc_map = {}
-
-        for run in self.runs:
-            # Reduce the entire_df to respective target dfs.
-            self.target_df[run] = self.entire_df.loc[self.entire_df["dataset"] == run]
-
-            # Unique modules in the target run
-            self.target_modules[run] = self.target_df[run]["module"].unique()
-
-            # Group the dataframe in two ways.
-            # 1. by module
-            # 2. by module and callsite
-            self.target_module_group_df[run] = self.target_df[run].groupby(["module"])
-            self.target_module_name_group_df[run] = self.target_df[run].groupby(
-                ["module", "name"]
-            )
-
-            # Module map for target run {'module': [Array of callsites]}
-            self.target_module_callsite_map[run] = (
-                self.target_module_group_df[run]["name"].unique().to_dict()
-            )
-
-            # Inclusive time maps for the module level and callsite level.
-            self.target_module_time_inc_map[run] = (
-                self.target_module_group_df[run]["time (inc)"].max().to_dict()
-            )
-            self.target_name_time_inc_map[run] = (
-                self.target_module_name_group_df[run]["time (inc)"].max().to_dict()
-            )
-
-            # Exclusive time maps for the module level and callsite level.
-            self.target_module_time_exc_map[run] = (
-                self.target_module_group_df[run]["time"].max().to_dict()
-            )
-            self.target_name_time_exc_map[run] = (
-                self.target_module_name_group_df[run]["time"].max().to_dict()
-            )
-
-    def create_ensemble_maps(self):
-        self.modules = self.entire_df["module"].unique()
-
-        self.module_name_group_df = self.entire_df.groupby(["module", "name"])
-        self.module_group_df = self.entire_df.groupby(["module"])
-        self.name_group_df = self.entire_df.groupby(["name"])
-
-        # Module map for ensemble {'module': [Array of callsites]}
-        self.module_callsite_map = self.module_group_df["name"].unique().to_dict()
-
-        # Inclusive time maps for the module level and callsite level.
-        self.module_time_inc_map = self.module_group_df["time (inc)"].max().to_dict()
-        self.name_time_inc_map = self.module_name_group_df["time (inc)"].max().to_dict()
-
-        # Exclusive time maps for the module level and callsite level.
-        self.module_time_exc_map = self.module_group_df["time"].max().to_dict()
-        self.name_time_exc_map = self.module_name_group_df["time"].max().to_dict()
-
-    def construct_cycle_free_paths(self, path):
-        ret = []
-        moduleMapper = {}
-        dataMap = {}
-
-        if isinstance(path, float):
-            return []
-        path = make_list(path)
-        for idx, elem in enumerate(path):
-            callsite = elem.split("=")[1]
-            module = elem.split("=")[0]
-            if module not in dataMap:
-                moduleMapper[module] = 0
-                dataMap[module] = [
-                    {"callsite": callsite, "module": module, "level": idx}
-                ]
-            else:
-                flag = [p["level"] == idx for p in dataMap[module]]
-                if np.any(np.array(flag)):
-                    moduleMapper[module] += 1
-                    dataMap[module].append(
-                        {
-                            "callsite": callsite,
-                            "module": module + "=" + callsite,
-                            "level": idx,
-                        }
-                    )
-                else:
-                    dataMap[module].append(
-                        {"callsite": callsite, "module": module, "level": idx}
-                    )
-            ret.append(dataMap[module][-1])
-
-        return ret
-
-    def create_source_targets(self, component_path):
-        module = ""
-        edges = []
-        for idx, callsite in enumerate(component_path):
-            if idx == 0:
-                module = component_path[0]
-                edges.append(
-                    {
-                        "module": module,
-                        "source": module,
-                        "target": module + "=" + component_path[idx + 1],
-                    }
-                )
-                pass
-            elif idx == len(component_path) - 1:
-                pass
-            else:
-                edges.append(
-                    {
-                        "module": module,
-                        "source": module + "=" + component_path[idx],
-                        "target": module + "=" + component_path[idx + 1],
-                    }
-                )
-
-        return edges
-
-    def callsitePathInformation(self, callsites):
-        paths = []
-        for callsite in callsites:
-            df = self.name_group_df.get_group(callsite)
-            paths.append(
-                {
-                    "group_path": make_list(df["group_path"].unique()[0]),
-                    "path": make_list(df["path"].unique()[0]),
-                    "component_path": make_list(df["component_path"].unique()[0]),
-                }
-            )
-        return paths
-
-    def add_reveal_paths(self, reveal_callsites):
-        paths = self.callsitePathInformation(reveal_callsites)
-
-        for path in paths:
-            component_edges = self.create_source_targets(path["component_path"])
-            for idx, edge in enumerate(component_edges):
-                module = edge["module"]
-
-                # format module +  '=' + callsite
-                source = edge["source"]
-                target = edge["target"]
-
-                if not self.agg_g.has_edge(source, target):
-                    if idx == 0:
-                        source_callsite = source
-                        source_df = self.module_group_df.get_group((module))
-                        source_node_type = "super-node"
-                    else:
-                        source_callsite = source.split("=")[1]
-                        source_df = self.module_name_group_df.get_group(
-                            (module, source_callsite)
-                        )
-                        source_node_type = "component-node"
-
-                    target_callsite = target.split("=")[1]
-                    target_df = self.module_name_group_df.get_group(
-                        (module, target_callsite)
-                    )
-                    target_node_type = "component-node"
-
-                    source_weight = source_df["time (inc)"].max()
-                    target_weight = target_df["time (inc)"].max()
-
-                    edge_type = "normal"
-
-                    print(f"Adding edge: {source_callsite}, {target_callsite}")
-                    self.agg_g.add_node(source, attr_dict={"type": source_node_type})
-                    self.agg_g.add_node(target, attr_dict={"type": target_node_type})
-                    self.agg_g.add_edge(
-                        source,
-                        target,
-                        attr_dict=[
-                            {
-                                "source_callsite": source_callsite,
-                                "target_callsite": target_callsite,
-                                "edge_type": edge_type,
-                                "weight": target_weight,
-                                "edge_type": "reveal_edge",
-                            }
-                        ],
-                    )
-
-    ######################### Entry function interaction ################################
-
-    def module_entry_functions_map(self, graph):
-        entry_functions = {}
-        for edge in graph.edges(data=True):
-            attr_dict = edge[2]["attr_dict"]
-            edge_tuple = (edge[0], edge[1])
-            print(edge_tuple)
-            for edge_attr in attr_dict:
-                if edge_tuple[1] not in entry_functions:
-                    entry_functions[edge_tuple[1]] = []
-                entry_functions[edge_tuple[1]].append(edge_attr["target_callsite"])
-        return entry_functions
-
-    def create_source_targets_from_group_path(self, path):
-        module = ""
-        edges = []
-        for idx, callsite in enumerate(path):
-            if idx == len(path) - 1:
-                break
-            source = path[idx].split("=")
-            target = path[idx + 1].split("=")
-            edges.append(
-                {
-                    "source": source[0],
-                    "target": target[0],
-                    "source_callsite": source[1],
-                    "target_callsite": target[1],
-                }
-            )
-        return edges
-
-    def same_source_edges(self, component_edges, reveal_module):
-        ret = []
-        for idx, edge in enumerate(component_edges):
-            source = edge["source"]
-            target = edge["target"]
-
-            if source == reveal_module:
-                ret.append(edge)
-        return ret
-
-    def same_target_edges(self, component_edges, reveal_module):
-        ret = []
-        for idx, edge in enumerate(component_edges):
-            source = edge["source"]
-            target = edge["target"]
-
-            if target == reveal_module:
-                ret.append(edge)
-        return ret
-
-    def add_entry_callsite_paths(self, reveal_module):
-        entry_functions_map = self.module_entry_functions_map(self.agg_g)
-        reveal_callsites = entry_functions_map[reveal_module]
-        paths = self.callsitePathInformation(reveal_callsites)
-
-        for path in paths:
-            component_edges = self.create_source_targets_from_group_path(
-                path["group_path"]
-            )
-            source_edges_to_remove = self.same_source_edges(
-                component_edges, reveal_module
-            )
-            target_edges_to_remove = self.same_target_edges(
-                component_edges, reveal_module
-            )
-
-            if len(source_edges_to_remove) != 0:
-                for edge in source_edges_to_remove:
-                    if self.agg_g.has_edge(edge["source"], edge["target"]):
-                        self.agg_g.remove_edge((edge["source"], edge["target"]))
-                    self.agg_g.add_node(
-                        reveal_module + "=" + edge["source_callsite"],
-                        attr_dict={"type": "component-node"},
-                    )
-                    self.agg_g.add_edge(
-                        (reveal_module + "=" + edge["source_callsite"], edge["target"]),
-                        attr_dict=[
-                            {
-                                "source_callsite": edge["source_callsite"],
-                                "target_callsite": edge["target_callsite"],
-                                "edge_type": "normal",
-                                "weight": self.module_name_group_df.get_group(
-                                    (reveal_module, edge["source_callsite"])
-                                )["time (inc)"].max(),
-                                "edge_type": "reveal_edge",
-                            }
-                        ],
-                    )
-
-            if len(target_edges_to_remove) != 0:
-                for edge in target_edges_to_remove:
-                    if self.agg_g.has_edge(edge["source"], edge["target"]):
-                        self.agg_g.remove_edge(edge["source"], edge["target"])
-                    self.agg_g.add_node(
-                        reveal_module + "=" + edge["target_callsite"],
-                        attr_dict={"type": "component-node"},
-                    )
-                    self.agg_g.add_edge(
-                        edge["source"],
-                        reveal_module + "=" + edge["target_callsite"],
-                        attr_dict=[
-                            {
-                                "source_callsite": edge["source_callsite"],
-                                "target_callsite": edge["target_callsite"],
-                                "edge_type": "normal",
-                                "weight": self.module_name_group_df.get_group(
-                                    (edge["target"], edge["target_callsite"])
-                                )["time (inc)"].max(),
-                                "edge_type": "reveal_edge",
-                            }
-                        ],
-                    )
-
-        self.agg_g.remove_node(reveal_module)
-
     def add_paths(self, path):
         paths_df = self.group_df.groupby(["name", "group_path"])
 
         for (callsite, path_str), path_df in paths_df:
-            path_list = self.construct_cycle_free_paths(path_str)
+            path_list = self.remove_cycles_in_paths(path_str)
             for callsite_idx, callsite in enumerate(path_list):
                 if callsite_idx != len(path_list) - 1:
                     source = path_list[callsite_idx]
@@ -427,8 +109,10 @@ def add_paths(self, path):
                         (target_module, target_callsite)
                     )
 
-                    has_caller_edge = self.agg_g.has_edge(source_module, target_module)
-                    has_callback_edge = self.agg_g.has_edge(
+                    has_caller_edge = self.agg_nxg.has_edge(
+                        source_module, target_module
+                    )
+                    has_callback_edge = self.agg_nxg.has_edge(
                         target_module, source_module
                     )
                     has_cct_edge = self.cct.has_edge(source_callsite, target_callsite)
@@ -468,21 +152,21 @@ def add_paths(self, path):
                         print(
                             f"Add {edge_type} edge for : {source_module}--{target_module}"
                         )
-                        self.agg_g.add_node(source_module, attr_dict=node_dict)
-                        self.agg_g.add_node(target_module, attr_dict=node_dict)
-                        self.agg_g.add_edge(
+                        self.agg_nxg.add_node(source_module, attr_dict=node_dict)
+                        self.agg_nxg.add_node(target_module, attr_dict=node_dict)
+                        self.agg_nxg.add_edge(
                             source_module, target_module, attr_dict=[edge_dict]
                         )
 
                     elif not has_cct_edge and not has_callback_edge:
                         # print(f"Edge already exists for : {source_module}--{target_module}")
-                        edge_data = self.agg_g.get_edge_data(
+                        edge_data = self.agg_nxg.get_edge_data(
                             *(source_module, target_module)
                         )
-                        self.agg_g[source_module][target_module]["attr_dict"].append(
+                        self.agg_nxg[source_module][target_module]["attr_dict"].append(
                             edge_dict
                         )
-                        # print(agg_g[source_module][target_module])
+                        # print(agg_nxg[source_module][target_module])
 
                     if not has_cct_edge:
                         self.cct.add_edge(
@@ -492,21 +176,23 @@ def add_paths(self, path):
                         )
 
     def add_edge_attributes(self):
-        # runs_mapping = self.run_counts(self.agg_g)
-        # nx.set_edge_attributes(self.agg_g, name="number_of_runs", values=runs_mapping)
-        edge_type_mapping = self.edge_type(self.agg_g)
-        nx.set_edge_attributes(self.agg_g, name="edge_type", values=edge_type_mapping)
-        flow_mapping = self.flows(self.agg_g)
-        nx.set_edge_attributes(self.agg_g, name="weight", values=flow_mapping)
-        # target_flow_mapping = self.target_flows(self.agg_g)
-        # nx.set_edge_attributes(self.agg_g, name="target_weight", values=target_flow_mapping)
-        entry_functions_mapping = self.entry_functions(self.agg_g)
+        # runs_mapping = self.run_counts(self.agg_nxg)
+        # nx.set_edge_attributes(self.agg_nxg, name="number_of_runs", values=runs_mapping)
+
+        edge_type_mapping = self.edge_type(self.agg_nxg)
+        nx.set_edge_attributes(self.agg_nxg, name="edge_type", values=edge_type_mapping)
+
+        flow_mapping = self.flows(self.agg_nxg)
+        nx.set_edge_attributes(self.agg_nxg, name="weight", values=flow_mapping)
+
+        entry_functions_mapping = self.entry_functions(self.agg_nxg)
         nx.set_edge_attributes(
-            self.agg_g, name="entry_callsites", values=entry_functions_mapping
+            self.agg_nxg, name="entry_callsites", values=entry_functions_mapping
         )
-        exit_functions_mapping = self.exit_functions(self.agg_g)
+
+        exit_functions_mapping = self.exit_functions(self.agg_nxg)
         nx.set_edge_attributes(
-            self.agg_g, name="exit_callsites", values=exit_functions_mapping
+            self.agg_nxg, name="exit_callsites", values=exit_functions_mapping
         )
 
     def run_counts(self, graph):
@@ -523,7 +209,7 @@ def edge_type(self, graph):
 
     def flows(self, graph):
         self.weight_map = {}
-        for edge in self.agg_g.edges(data=True):
+        for edge in self.agg_nxg.edges(data=True):
             if (edge[0], edge[1]) not in self.weight_map:
                 self.weight_map[(edge[0], edge[1])] = 0
 
@@ -537,7 +223,6 @@ def flows(self, graph):
             if edge_tuple not in self.weight_map:
                 # Check if it s a reveal edge
                 attr_dict = edge[2]["attr_dict"]
-                print(attr_dict)
                 if attr_dict["edge_type"] == "reveal_edge":
                     self.weight_map[edge_tuple] = attr_dict["weight"]
                     ret[edge_tuple] = self.weight_map[edge_tuple]
@@ -548,9 +233,10 @@ def flows(self, graph):
 
         return ret
 
+    # Not used.
     def target_flows(self, graph):
         self.weight_map = {}
-        for edge in self.agg_g.edges(data=True):
+        for edge in self.agg_nxg.edges(data=True):
             if (edge[0], edge[1]) not in self.weight_map:
                 self.weight_map[(edge[0], edge[1])] = 0
 
@@ -564,7 +250,6 @@ def target_flows(self, graph):
             if edge_tuple not in self.weight_map:
                 # Check if it s a reveal edge
                 attr_dict = edge[2]["attr_dict"]
-                print(attr_dict)
                 if attr_dict["edge_type"] == "reveal_edge":
                     self.weight_map[edge_tuple] = attr_dict["weight"]
                     ret[edge_tuple] = self.weight_map[edge_tuple]
@@ -608,16 +293,16 @@ def exit_functions(self, graph):
         return exit_functions
 
     def add_node_attributes(self):
-        ensemble_mapping = self.ensemble_map(self.agg_g.nodes())
+        ensemble_mapping = self.ensemble_map(self.agg_nxg.nodes())
 
         for idx, key in enumerate(ensemble_mapping):
-            nx.set_node_attributes(self.agg_g, name=key, values=ensemble_mapping[key])
+            nx.set_node_attributes(self.agg_nxg, name=key, values=ensemble_mapping[key])
 
         dataset_mapping = {}
         for run in self.runs:
-            dataset_mapping[run] = self.dataset_map(self.agg_g.nodes(), run)
+            dataset_mapping[run] = self.dataset_map(self.agg_nxg.nodes(), run)
 
-            nx.set_node_attributes(self.agg_g, name=run, values=dataset_mapping[run])
+            nx.set_node_attributes(self.agg_nxg, name=run, values=dataset_mapping[run])
 
     def callsite_time(self, group_df, module, callsite):
         callsite_df = group_df.get_group((module, callsite))
@@ -642,13 +327,11 @@ def ensemble_map(self, nodes):
         ret = {}
 
         # loop through the nodes
-        for node in self.agg_g.nodes(data=True):
+        for node in self.agg_nxg.nodes(data=True):
             node_name = node[0]
             node_dict = node[1]["attr_dict"]
-            print(node_name, node_dict)
 
             if node_dict["type"] == "component-node":
-                print(node_name, node_dict)
                 module = node_name.split("=")[0]
                 callsite = node_name.split("=")[1]
                 actual_time = self.callsite_time(
@@ -696,7 +379,7 @@ def ensemble_map(self, nodes):
 
     def dataset_map(self, nodes, run):
         ret = {}
-        for node in self.agg_g.nodes(data=True):
+        for node in self.agg_nxg.nodes(data=True):
             node_name = node[0]
             node_dict = node[1]["attr_dict"]
             if node_name in self.target_module_callsite_map[run].keys():
diff --git a/callflow/datastructures/supergraph_single.py b/callflow/datastructures/supergraph_single.py
index 06756e1f..0c11f1c9 100644
--- a/callflow/datastructures/supergraph_single.py
+++ b/callflow/datastructures/supergraph_single.py
@@ -1,92 +1,67 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
 #
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
+# SPDX-License-Identifier: MIT
 
+# ------------------------------------------------------------------------------
+# Library imports
 import sys
 import networkx as nx
 import math
 import json
 from ast import literal_eval as make_tuple
+
+# ------------------------------------------------------------------------------
+# CallFlow imports
+import callflow
 from callflow.timer import Timer
+from callflow import SuperGraph
 
+LOGGER = callflow.get_logger(__name__)
 
-class SingleSuperGraph(nx.Graph):
+# ------------------------------------------------------------------------------
+# Single Super Graph class.
+class SingleSuperGraph(SuperGraph):
     def __init__(
         self,
-        states,
+        supergraphs,
+        tag,
         dataset,
         path,
         group_by_attr="module",
         construct_graph=True,
         add_data=True,
-        debug=True,
     ):
-        super(SingleSuperGraph, self).__init__()
-        self.log = Log("supergraph")
-        self.state = states[dataset]
-        self.dataset = dataset
-        self.timer = Timer()
+        super(SingleSuperGraph, self).__init__(props=props, tag=tag, mode="render")
 
-        self.graph = state.new_gf.graph
-        self.df = state.new_gf.df
-        self.g = state.new_gf.nxg
+        self.ensemble_supergraph = self.supergraphs[tag]
+        self.group_df = self.ensemble_supergraph.gf.df
 
+        self.path = path
         self.group_by = group_by_attr
 
+        # Columns to consider.
         self.columns = [
             "time (inc)",
-            "group_path",
+            "module",
             "name",
             "time",
-            "callers",
-            "callees",
-            "vis_name",
+            "type",
             "module",
-            "show_node",
+            "actual_time",
         ]
 
         with self.timer.phase("Construct Graph"):
             if construct_graph:
-                log.info("Creating the SuperGraph for {0}.".format(self.state.name))
+                LOGGER.info("Creating the SuperGraph for {0}.".format(self.state.name))
                 self.mapper = {}
                 self.g = nx.DiGraph()
                 self.add_paths(path)
+                self.add_callback_paths()
             else:
                 print("Using the existing graph from state {0}".format(self.state.name))
 
-        if debug:
-            log.warn("Modules: {0}".format(self.df["module"].unique()))
-            log.warn("Top 10 Inclusive time: ")
-            top = 10
-            rank_df = self.df.groupby(["name", "nid"]).mean()
-            top_inclusive_df = rank_df.nlargest(top, "time (inc)", keep="first")
-            for name, row in top_inclusive_df.iterrows():
-                log.info("{0} [{1}]".format(name, row["time (inc)"]))
-
-            log.warn("Top 10 Enclusive time: ")
-            top_exclusive_df = rank_df.nlargest(top, "time", keep="first")
-            for name, row in top_exclusive_df.iterrows():
-                log.info("{0} [{1}]".format(name, row["time"]))
-
-            for node in self.g.nodes(data=True):
-                log.info("Node: {0}".format(node))
-            for edge in self.g.edges():
-                log.info("Edge: {0}".format(edge))
-
-            log.warn("Nodes in the tree: {0}".format(len(self.g.nodes)))
-            log.warn("Edges in the tree: {0}".format(len(self.g.edges)))
-            log.warn("Is it a tree? : {0}".format(nx.is_tree(self.g)))
-            log.warn("Flow hierarchy: {0}".format(nx.flow_hierarchy(self.g)))
-
-        # Variables to control the data properties globally.
+        # Remove.
         self.callbacks = []
         self.edge_direction = {}
 
@@ -94,30 +69,12 @@ def __init__(
             if add_data == True:
                 self.add_node_attributes()
                 self.add_edge_attributes()
-            # else:
-            # print("Creating a Graph without node or edge attributes.")
-
-        log.info(self.timer)
-
-    def no_cycle_path(self, path):
-        ret = []
-        moduleMapper = {}
-        for idx, elem in enumerate(path):
-            call_site = elem.split("=")[1]
-            module = self.df.loc[self.df.name == call_site]["module"].tolist()[0]
-            if module not in moduleMapper and elem in self.mapper:
-                self.mapper[elem] += 1
-                moduleMapper[module] = True
-                ret.append(elem)
-            elif elem not in self.mapper:
-                self.mapper[elem] = 0
             else:
-                self.mapper[elem] += 1
-        return tuple(ret)
+                LOGGER.info("Creating a Graph without node or edge attributes.")
+
+        LOGGER.debug(self.timer)
 
     def add_paths(self, path):
-        # path_df = self.df[path].fillna("()")
-        # paths = path_df.drop_duplicates().tolist()
         paths = self.df[path].unique()
         for idx, path_str in enumerate(paths):
             if not isinstance(path_str, float):
@@ -138,6 +95,7 @@ def add_paths(self, path):
                         },
                     )
 
+    # TODO: remove this if not needed.
     def add_callback_paths(self):
         for from_module, to_modules in self.callbacks.items():
             for idx, to_module in enumerate(to_modules):
@@ -196,23 +154,6 @@ def calculate_flows(self, graph):
 
         return ret
 
-    def tailhead(self, edge):
-        return (edge[0], edge[1])
-
-    def tailheadDir(self, edge):
-        return (str(edge[0]), str(edge[1]), self.edge_direction[edge])
-
-    def leaves_below(self, graph, node):
-        return set(
-            sum(
-                (
-                    [vv for vv in v if graph.out_degree(vv) == 0]
-                    for k, v in nx.dfs_successors(graph, node).items()
-                ),
-                [],
-            )
-        )
-
     def dataset_map(self, nodes, dataset):
         ret = {}
         for node in self.g.nodes():
diff --git a/callflow/datastructures/uniongraph.py b/callflow/datastructures/uniongraph.py
deleted file mode 100644
index 426f4c90..00000000
--- a/callflow/datastructures/uniongraph.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import networkx as nx
-
-
-class UnionGraph:
-    def __init__(self):
-        self.R = nx.DiGraph()
-        self.runs = {}
-        self.diffset = {}
-
-    # Return the union of graphs G and H.
-    def unionize(self, H, name=None, rename=(None, None)):
-        if not self.R.is_multigraph() == H.is_multigraph():
-            raise nx.NetworkXError("G and H must both be graphs or multigraphs.")
-
-        self.R.graph.update(H.graph)
-
-        renamed_nodes = self.add_prefix(H, rename[1])
-
-        debug = False
-        if debug:
-            print("-=========================-")
-            print("Nodes in R and H are same? ", set(self.R) == set(H))
-            if set(self.R) != set(H):
-                print("Difference is ", list(set(H) - set(self.R)))
-                print("Nodes in R", set(self.R)),
-                print("Nodes in H", set(H))
-            print("-=========================-")
-
-        if H.is_multigraph():
-            H_edges = H.edges(keys=True, data=True)
-        else:
-            H_edges = H.edges(data=True)
-
-        # add nodes and edges.
-        self.R.add_nodes_from(H)
-        self.R.add_edges_from(H_edges)
-
-        # add node attributes for each run
-        for n in renamed_nodes:
-            self.add_node_attributes(H, n, name)
-
-    # rename graph to obtain disjoint node labels
-    def add_prefix(self, graph, prefix):
-        if prefix is None:
-            return graph
-
-        def label(x):
-            if is_string_like(x):
-                name = prefix + x
-            else:
-                name = prefix + repr(x)
-            return name
-
-        return nx.relabel_nodes(graph, label)
-
-    def add_edge_attributes(self):
-        number_of_runs_mapping = self.number_of_runs()
-        nx.set_edge_attributes(
-            self.R, name="number_of_runs", values=number_of_runs_mapping
-        )
-
-    def number_of_runs(self):
-        ret = {}
-        for idx, name in enumerate(self.runs):
-            for edge in self.runs[name].edges():
-                if edge not in ret:
-                    ret[edge] = 0
-                ret[edge] += 1
-        return ret
-
-    def add_node_attributes(self, H, node, dataset_name):
-        for idx, (key, val) in enumerate(H.nodes.items()):
-            if dataset_name not in self.R.nodes[node]:
-                self.R.nodes[node][dataset_name] = 0
-            if key == node:
-                self.R.nodes[node][dataset_name] = 1
diff --git a/callflow/logger.py b/callflow/logger.py
index c94fc790..39f0a94e 100644
--- a/callflow/logger.py
+++ b/callflow/logger.py
@@ -25,9 +25,6 @@ def init_logger(**kwargs):
     level = int(kwargs.get("level", 2))
     do_color = str(kwargs.get("color", True))
 
-    # print ('level = ({})'.format(level))
-    # print ('do_color = ({})'.format(do_color))
-
     # --------------------------------------------------------------------------
     # get logging level in "logging" format
     assert level >= 1 and level <= 5
@@ -44,18 +41,6 @@ def init_logger(**kwargs):
 
     # --------------------------------------------------------------------------
     # get loging format
-    """
-    aliases = {
-        logging.DEBUG: "%(log_color)s(%(name)s.py) %(msg)s ",
-        logging.ERROR: "\033 %(log_color)s(%(name)s.py) ERROR: %(msg)s",
-        logging.CRITICAL: "\033 %(log_color)s(%(name)s.py) CRITICAL: %(msg)s",
-        logging.WARNING: "\033 %(log_color)s(%(name)s.py) WARN: %(msg)s",
-        logging.INFO: "%(log_color)s%(msg)s",
-    }
-    LOG_FMT = aliases[level]
-    """
-
-    # Harsh's suggestion
     # here, the initialization of the format doesnt depend upon "level"
     LOG_FMT = (
         "%(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(levelname)s - %(message)s"
diff --git a/callflow/modules/auxiliary_ensemble.py b/callflow/modules/auxiliary_ensemble.py
index 7077526c..5fbbd69d 100644
--- a/callflow/modules/auxiliary_ensemble.py
+++ b/callflow/modules/auxiliary_ensemble.py
@@ -30,27 +30,57 @@
 class EnsembleAuxiliary:
     def __init__(
         self,
-        states,
+        gf=callflow.GraphFrame,
+        datasets=[],
+        props={},
         MPIBinCount="20",
         RunBinCount="20",
-        datasets=[],
-        config={},
         process=True,
         write=False,
     ):
-        self.timer = Timer()
-        self.df = self.select_rows(states["ensemble_entire"].new_gf.df, datasets)
+        self.gf = gf
         self.MPIBinCount = MPIBinCount
         self.RunBinCount = RunBinCount
-        self.config = config
-        self.states = states
+        self.timer = Timer()
+        self.props = props
+        self.datasets = self.props["dataset_names"]
+
+        self.df = self.select_rows(self.gf.df, self.datasets)
+
         self.process = process
         self.write = write
-        self.datasets = datasets
 
-        self.props = ["rank", "name", "dataset", "all_ranks"]
+        self.hist_props = ["rank", "name", "dataset", "all_ranks"]
         self.filter = True
 
+        if process:
+            self.compute()
+        else:
+            self.read()
+        print(self.timer)
+
+    def compute(self):
+        ret = {}
+        path = os.path.join(self.props["save_path"], "ensemble/auxiliary_data.json")
+
+        LOGGER.info("Calculating Gradients, Mean runtime variations, and Distribution.")
+        with self.timer.phase("Process data"):
+            self.group_frames()
+        with self.timer.phase("Collect Callsite data"):
+            ret["callsite"] = self.callsite_data()
+        with self.timer.phase("Collect Module data"):
+            ret["module"] = self.module_data()
+        with self.timer.phase("Module callsite map data"):
+            ret["moduleCallsiteMap"] = self.get_module_callsite_map()
+        # with self.timer.phase("Callsite module map data"):
+        #     ret['callsiteModuleMap'] = self.get_callsite_module_map()
+        # if self.write:
+        with self.timer.phase("Writing data"):
+            with open(path, "w") as f:
+                json.dump(ret, f)
+
+        return ret
+
     def filter_dict(self, result):
         ret = {}
 
@@ -61,14 +91,14 @@ def filter_dict(self, result):
         ret["callsite"] = {}
 
         group_df = self.df.groupby(["name"]).mean()
-        if self.config.filter_by == "time":
+        if self.props["filter_by"] == "time":
             f_group_df = group_df.loc[
-                group_df[self.config.filter_by] > self.config.filter_below
+                group_df[self.props["filter_by"]] > self.props["filter_below"]
             ]
-        elif self.config.filter_by == "time (inc)":
+        elif self.props["filter_by"] == "time (inc)":
             f_group_df = group_df.loc[
-                group_df[self.config.filter_by]
-                > 0.01 * self.config.filter_perc * group_df["time (inc)"].max()
+                group_df[self.props["filter_by"]]
+                > 0.01 * self.props["filter_perc"] * group_df["time (inc)"].max()
             ]
         callsites = f_group_df.index.values.tolist()
 
@@ -85,13 +115,12 @@ def filter_dict(self, result):
 
     def group_frames(self):
         if self.filter:
-            # self.df = self.df.loc[self.df['time'] > 0.01*self.config.filter_perc*self.df['time'].max() ]
-            # self.df = self.df.loc[self.df['time (inc)'] > self.config.filter_perc]['name'].unique()
             xgroup_df = self.df.groupby(["name"]).mean()
             sort_xgroup_df = xgroup_df.sort_values(by=["time (inc)"], ascending=False)
             top100callsites = sort_xgroup_df.nlargest(50, "time (inc)")
             self.df = self.df[self.df["name"].isin(top100callsites.index.values)]
 
+        self.df.drop(["rank"], axis=1)
         self.module_name_group_df = self.df.groupby(["module", "name"])
         self.module_group_df = self.df.groupby(["module"])
         self.name_group_df = self.df.groupby(["name"])
@@ -112,6 +141,122 @@ def group_frames(self):
                 ["name"]
             )
 
+    # Callsite grouped information
+    def callsite_data(self):
+        ret = {}
+
+        # Create the data dict.
+        ensemble = {}
+        for callsite, callsite_df in self.name_group_df:
+            callsite_ensemble_df = self.name_group_df.get_group(callsite)
+            hists = {}
+            hists["Inclusive"] = {}
+            hists["Exclusive"] = {}
+            for prop in self.hist_props:
+                prop_histograms = self.histogram_by_property_ensemble(
+                    callsite_ensemble_df, prop
+                )
+                hists["Inclusive"][prop] = prop_histograms["Inclusive"]
+                hists["Exclusive"][prop] = prop_histograms["Exclusive"]
+
+            gradients = Gradients(self.target_df, binCount=self.RunBinCount).run(
+                columnName="name", callsiteOrModule=callsite
+            )
+            boxplot = BoxPlot(callsite_df)
+            ensemble[callsite] = self.pack_json(
+                callsite_df,
+                callsite,
+                gradients=gradients,
+                q=boxplot.q,
+                outliers=boxplot.outliers,
+                prop_hists=hists,
+            )
+
+        ret["ensemble"] = ensemble
+
+        ## Target data.
+        # Loop through datasets and group the callsite by name.
+        for dataset in self.datasets:
+            name_grouped = self.target_name_group_df[dataset]
+            target = {}
+            for callsite, callsite_df in name_grouped:
+                callsite_ensemble_df = self.name_group_df.get_group(callsite)
+                callsite_target_df = callsite_df
+
+                if not callsite_df.empty:
+                    hists = {}
+                    hists["Inclusive"] = {}
+                    hists["Exclusive"] = {}
+                    for prop in self.hist_props:
+                        prop_histograms = self.histogram_by_property(
+                            callsite_ensemble_df, callsite_target_df, prop
+                        )
+                        hists["Inclusive"][prop] = prop_histograms["Inclusive"]
+                        hists["Exclusive"][prop] = prop_histograms["Exclusive"]
+
+                    boxplot = BoxPlot(callsite_df)
+                    target[callsite] = self.pack_json(
+                        df=callsite_target_df,
+                        name=callsite,
+                        prop_hists=hists,
+                        q=boxplot.q,
+                        outliers=boxplot.outliers,
+                    )
+            ret[dataset] = target
+
+        return ret
+
+    def module_data(self):
+        ret = {}
+        # Module grouped information
+        modules = self.df["module"].unique()
+        ensemble = {}
+        for module, module_df in self.module_group_df:
+            module_ensemble_df = self.module_group_df.get_group(module)
+            hists = {"Inclusive": {}, "Exclusive": {}}
+            for prop in self.hist_props:
+                prop_histograms = self.histogram_by_property_ensemble(
+                    module_ensemble_df, prop
+                )
+                hists["Inclusive"][prop] = prop_histograms["Inclusive"]
+                hists["Exclusive"][prop] = prop_histograms["Exclusive"]
+
+            # Calculate gradients
+            gradients = Gradients(self.target_df, binCount=self.RunBinCount).run(
+                columnName="module", callsiteOrModule=module
+            )
+            ensemble[module] = self.pack_json(
+                df=module_df, name=module, gradients=gradients, prop_hists=hists
+            )
+
+        ret["ensemble"] = ensemble
+
+        for dataset in self.datasets:
+            target = {}
+            module_group_df = self.target_module_group_df[dataset]
+            for module, module_df in module_group_df:
+                module_ensemble_df = self.module_group_df.get_group(module)
+                module_target_df = module_df
+                gradients = {"Inclusive": {}, "Exclusive": {}}
+                hists = {"Inclusive": {}, "Exclusive": {}}
+                if not module_target_df.empty:
+                    for prop in self.hist_props:
+                        prop_histograms = self.histogram_by_property(
+                            module_ensemble_df, module_target_df, prop
+                        )
+                        hists["Inclusive"][prop] = prop_histograms["Inclusive"]
+                        hists["Exclusive"][prop] = prop_histograms["Exclusive"]
+                    target[module] = self.pack_json(
+                        df=module_target_df,
+                        name=module,
+                        gradients=gradients,
+                        prop_hists=hists,
+                    )
+
+            ret[dataset] = target
+
+        return ret
+
     def select_rows(self, df, search_strings):
         unq, IDs = np.unique(df["dataset"], return_inverse=True)
         unqIDs = np.searchsorted(unq, search_strings)
@@ -188,8 +333,8 @@ def pack_json(
             "id": "node-" + str(df["nid"].tolist()[0]),
             "dataset": df["dataset"].unique().tolist(),
             "module": df["module"].tolist()[0],
-            "callers": df["callers"].unique().tolist(),
-            "callees": df["callees"].unique().tolist(),
+            # "callers": df["callers"].unique().tolist(),
+            # "callees": df["callees"].unique().tolist(),
             "component_path": df["component_path"].unique().tolist(),
             "component_level": df["component_level"].unique().tolist(),
             "Inclusive": {
@@ -243,6 +388,7 @@ def histogram_by_property_ensemble(self, ensemble_df, prop):
             time_ensemble_exclusive_arr = np.array(ensemble_df["time"].tolist())
 
         elif prop == "rank":
+            ensemble_df.reset_index(drop=True, inplace=True)
             ensemble_prop = ensemble_df.groupby(["dataset", prop])[
                 ["time", "time (inc)"]
             ].mean()
@@ -287,9 +433,12 @@ def histogram_by_property(self, ensemble_df, target_df, prop):
             time_target_inclusive_arr = np.array(target_df["time (inc)"].tolist())
             time_target_exclusive_arr = np.array(target_df["time"].tolist())
         elif prop == "rank":
+            ensemble_df.reset_index(drop=True, inplace=True)
             ensemble_prop = ensemble_df.groupby(["dataset", prop])[
                 ["time", "time (inc)"]
             ].mean()
+
+            target_df.reset_index(drop=True, inplace=True)
             target_prop = target_df.groupby(["dataset", prop])[
                 ["time", "time (inc)"]
             ].mean()
@@ -344,146 +493,3 @@ def histogram_by_property(self, ensemble_df, target_df, prop):
             "target": self.histogram_format(histogram_target_exclusive_grid),
         }
         return ret
-
-    # Callsite grouped information
-    def callsite_data(self):
-        ret = {}
-
-        # Create the data dict.
-        ensemble = {}
-        for callsite, callsite_df in self.name_group_df:
-            callsite_ensemble_df = self.name_group_df.get_group(callsite)
-            hists = {}
-            hists["Inclusive"] = {}
-            hists["Exclusive"] = {}
-            for prop in self.props:
-                prop_histograms = self.histogram_by_property_ensemble(
-                    callsite_ensemble_df, prop
-                )
-                hists["Inclusive"][prop] = prop_histograms["Inclusive"]
-                hists["Exclusive"][prop] = prop_histograms["Exclusive"]
-
-            gradients = Gradients(self.target_df, binCount=self.RunBinCount).run(
-                columnName="name", callsiteOrModule=callsite
-            )
-            boxplot = BoxPlot(callsite_df)
-            ensemble[callsite] = self.pack_json(
-                callsite_df,
-                callsite,
-                gradients=gradients,
-                q=boxplot.q,
-                outliers=boxplot.outliers,
-                prop_hists=hists,
-            )
-
-        ret["ensemble"] = ensemble
-
-        ## Target data.
-        # Loop through datasets and group the callsite by name.
-        for dataset in self.datasets:
-            name_grouped = self.target_name_group_df[dataset]
-            target = {}
-            for callsite, callsite_df in name_grouped:
-                callsite_ensemble_df = self.name_group_df.get_group(callsite)
-                callsite_target_df = callsite_df
-
-                if not callsite_df.empty:
-                    hists = {}
-                    hists["Inclusive"] = {}
-                    hists["Exclusive"] = {}
-                    for prop in self.props:
-                        prop_histograms = self.histogram_by_property(
-                            callsite_ensemble_df, callsite_target_df, prop
-                        )
-                        hists["Inclusive"][prop] = prop_histograms["Inclusive"]
-                        hists["Exclusive"][prop] = prop_histograms["Exclusive"]
-
-                    boxplot = BoxPlot(callsite_df)
-                    target[callsite] = self.pack_json(
-                        df=callsite_target_df,
-                        name=callsite,
-                        prop_hists=hists,
-                        q=boxplot.q,
-                        outliers=boxplot.outliers,
-                    )
-            ret[dataset] = target
-
-        return ret
-
-    def module_data(self):
-        ret = {}
-        # Module grouped information
-        modules = self.df["module"].unique()
-        ensemble = {}
-        for module, module_df in self.module_group_df:
-            module_ensemble_df = self.module_group_df.get_group(module)
-            hists = {"Inclusive": {}, "Exclusive": {}}
-            for prop in self.props:
-                prop_histograms = self.histogram_by_property_ensemble(
-                    module_ensemble_df, prop
-                )
-                hists["Inclusive"][prop] = prop_histograms["Inclusive"]
-                hists["Exclusive"][prop] = prop_histograms["Exclusive"]
-
-            # Calculate gradients
-            gradients = Gradients(self.target_df, binCount=self.RunBinCount).run(
-                columnName="module", callsiteOrModule=module
-            )
-            ensemble[module] = self.pack_json(
-                df=module_df, name=module, gradients=gradients, prop_hists=hists
-            )
-
-        ret["ensemble"] = ensemble
-
-        for dataset in self.datasets:
-            target = {}
-            module_group_df = self.target_module_group_df[dataset]
-            for module, module_df in module_group_df:
-                module_ensemble_df = self.module_group_df.get_group(module)
-                module_target_df = module_df
-                gradients = {"Inclusive": {}, "Exclusive": {}}
-                hists = {"Inclusive": {}, "Exclusive": {}}
-                if not module_target_df.empty:
-                    for prop in self.props:
-                        prop_histograms = self.histogram_by_property(
-                            module_ensemble_df, module_target_df, prop
-                        )
-                        hists["Inclusive"][prop] = prop_histograms["Inclusive"]
-                        hists["Exclusive"][prop] = prop_histograms["Exclusive"]
-                    target[module] = self.pack_json(
-                        df=module_target_df,
-                        name=module,
-                        gradients=gradients,
-                        prop_hists=hists,
-                    )
-
-            ret[dataset] = target
-
-        return ret
-
-    def run(self):
-        ret = {}
-        path = os.path.join(self.config.save_path, "all_data.json")
-
-        if self.process:
-            LOGGER.info(
-                "Calculating Gradients, Mean runtime variations, and Distribution."
-            )
-            with self.timer.phase("Process data"):
-                self.group_frames()
-            with self.timer.phase("Collect Callsite data"):
-                ret["callsite"] = self.callsite_data()
-            with self.timer.phase("Collect Module data"):
-                ret["module"] = self.module_data()
-            with self.timer.phase("Module callsite map data"):
-                ret["moduleCallsiteMap"] = self.get_module_callsite_map()
-            # with self.timer.phase("Callsite module map data"):
-            #     ret['callsiteModuleMap'] = self.get_callsite_module_map()
-            if self.write:
-                with self.timer.phase("Writing data"):
-                    with open(path, "w") as f:
-                        json.dump(ret, f)
-
-            LOGGER.debug(self.timer)
-
-        return ret
diff --git a/callflow/modules/auxiliary_single.py b/callflow/modules/auxiliary_single.py
index ecf9982e..8ff8f859 100644
--- a/callflow/modules/auxiliary_single.py
+++ b/callflow/modules/auxiliary_single.py
@@ -21,13 +21,13 @@
 
 
 class SingleAuxiliary:
-    def __init__(self, state, binCount="20", dataset="", config={}, process=True):
-        self.graph = state.new_gf.graph
-        self.df = state.new_gf.df
-        self.config = config
+    def __init__(self, gf, dataset="", MPIBinCount=20, props={}, process=True):
+        self.graph = gf.graph
+        self.df = gf.df
+        self.props = props
         self.process = process
         self.dataset = dataset
-        self.binCount = binCount
+        self.binCount = MPIBinCount
 
         ret_df = pd.DataFrame([])
         self.timer = Timer()
@@ -98,6 +98,9 @@ def pack_json(self, group_df, node_name, data_type):
             hist_inc_grid = self.histogram(time_inc_target_arr)
             hist_exc_grid = self.histogram(time_exc_target_arr)
 
+        if "rank" not in group_df.keys():
+            group_df = group_df.reset_index(drop=False)
+
         result = {
             "name": node_name,
             "time (inc)": group_df["time (inc)"].tolist(),
@@ -168,11 +171,7 @@ def module_data(self):
 
     def run(self):
         ret = {}
-        path = (
-            self.config.processed_path
-            + f"/{self.config.runName}"
-            + f"/{self.dataset}/all_data.json"
-        )
+        path = self.props["save_path"] + f"/{self.dataset}/auxiliary_data.json"
 
         # self.process = True
         if os.path.exists(path) and not self.process:
diff --git a/callflow/modules/function_list.py b/callflow/modules/function_list.py
index 2d695da9..30faf082 100644
--- a/callflow/modules/function_list.py
+++ b/callflow/modules/function_list.py
@@ -37,7 +37,6 @@ def add_paths(self, path_name):
         for idx, row in self.df.iterrows():
             # if row.show_node:
             path = row[path_name]
-            print(path)
             # TODO: Sometimes the path becomes a string. Find why it happens.
             # If it becomes a string
             if isinstance(path, str):
diff --git a/callflow/modules/module_hierarchy.py b/callflow/modules/module_hierarchy.py
index 40cdb0a7..ea3d8e31 100644
--- a/callflow/modules/module_hierarchy.py
+++ b/callflow/modules/module_hierarchy.py
@@ -11,13 +11,11 @@
 
 LOGGER = callflow.get_logger(__name__)
 from callflow.timer import Timer
-from callflow.utils import sanitizeName
 
 
 class ModuleHierarchy:
-    def __init__(self, state, module, config={}):
-        self.df = state.new_gf.df
-        self.config = config
+    def __init__(self, supergraph, module):
+        self.df = supergraph.gf.df
         self.module = module
 
         # Create the Super node's hierarchy.
@@ -34,8 +32,8 @@ def create_source_targets(self, path):
             if idx == len(path) - 1:
                 break
 
-            source = sanitizeName(path[idx])
-            target = sanitizeName(path[idx + 1])
+            source = callflow.utils.sanitize_name(path[idx])
+            target = callflow.utils.sanitize_name(path[idx + 1])
 
             edges.append({"source": source, "target": target})
         return edges
diff --git a/callflow/modules/parameter_projection.py b/callflow/modules/parameter_projection.py
index cdecd85b..ac112c99 100644
--- a/callflow/modules/parameter_projection.py
+++ b/callflow/modules/parameter_projection.py
@@ -24,23 +24,22 @@
 
 
 class ParameterProjection:
-    def __init__(self, state, similarities={}, targetDataset="", n_cluster=3):
-        # self.similarities = similarities[targetDataset]
-        # self.datasetOrder = {k: idx for idx, (k, v) in enumerate(similarities.items())}
-        self.state = state
-        self.df = state.new_gf.df
-        self.datasets = state.new_gf.df["dataset"].unique().tolist()
+    def __init__(self, supergraph, similarities={}, targetDataset="", n_cluster=3):
+
+        self.df = supergraph.gf.df
+        self.datasets = self.df["dataset"].unique().tolist()
         self.projection = "MDS"
         self.clustering = "k_means"
         self.n_cluster = int(n_cluster)
         self.targetDataset = targetDataset
-        if len(self.datasets) > self.n_cluster:
+        if len(self.datasets) >= self.n_cluster:
             self.result = self.run()
         else:
             self.result = pd.DataFrame({})
 
     def add_df_params(self, dataset):
         ret = {}
+        print(self.df)
         ret["max_inclusive_time"] = self.df.loc[self.df["dataset"] == dataset][
             "time (inc)"
         ].max()
diff --git a/callflow/operations/__init__.py b/callflow/operations/__init__.py
new file mode 100644
index 00000000..7d815be2
--- /dev/null
+++ b/callflow/operations/__init__.py
@@ -0,0 +1,5 @@
+from .process import Process
+from .group import Group
+from .union_delete import Union
+from .filter import Filter
+from .read_config import ConfigFileReader
diff --git a/callflow/pipeline/filter_networkx.py b/callflow/operations/filter.py
similarity index 58%
rename from callflow/pipeline/filter_networkx.py
rename to callflow/operations/filter.py
index 36dea19b..75731c8f 100644
--- a/callflow/pipeline/filter_networkx.py
+++ b/callflow/operations/filter.py
@@ -7,35 +7,41 @@
 LOGGER = callflow.get_logger(__name__)
 
 
-class FilterNetworkX:
-    def __init__(self, state):
-        self.df = state.new_gf.df
-        self.dataset_df = self.df.groupby(["dataset"])
-        self.dataset_idx = {}
+class Filter:
+    def __init__(
+        self, gf=None, mode="single", filter_by="time (inc)", filter_perc="10"
+    ):
+        self.gf = gf
+        self.filter_perc = filter_perc
+
         self.set_max_min_times()
 
+        if filter_by == "time (inc)":
+            self.gf.df = self.df_by_time_inc()
+            self.gf.nxg = self.graph_by_time_inc()
+        elif filter_by == "time":
+            self.gf.df = self.df_by_time()
+            self.gf.nxg = self.graph_by_time()
+
     def set_max_min_times(self):
         self.max_time_inc_list = np.array([])
         self.min_time_inc_list = np.array([])
         self.max_time_exc_list = np.array([])
         self.min_time_exc_list = np.array([])
-        count = 0
-        for dataset, df in self.dataset_df:
-            self.dataset_idx[dataset] = count
-            self.max_time_inc_list = np.hstack(
-                [self.max_time_inc_list, df["time (inc)"].max()]
-            )
-            self.min_time_inc_list = np.hstack(
-                [self.min_time_inc_list, df["time (inc)"].min()]
-            )
-            self.max_time_exc_list = np.hstack(
-                [self.max_time_exc_list, df["time"].max()]
-            )
-            self.min_time_exc_list = np.hstack(
-                [self.min_time_exc_list, df["time"].min()]
-            )
-            count += 1
-        LOGGER.info("Dataset idx: ", self.dataset_idx)
+
+        self.max_time_inc_list = np.hstack(
+            [self.max_time_inc_list, self.gf.df["time (inc)"].max()]
+        )
+        self.min_time_inc_list = np.hstack(
+            [self.min_time_inc_list, self.gf.df["time (inc)"].min()]
+        )
+        self.max_time_exc_list = np.hstack(
+            [self.max_time_exc_list, self.gf.df["time"].max()]
+        )
+        self.min_time_exc_list = np.hstack(
+            [self.min_time_exc_list, self.gf.df["time"].min()]
+        )
+
         LOGGER.info(f"Min. time (inc): {self.min_time_inc_list}")
         LOGGER.info(f"Max. time (inc): {self.max_time_inc_list}")
         LOGGER.info(f"Min. time (exc): {self.min_time_exc_list}")
@@ -46,35 +52,33 @@ def set_max_min_times(self):
         self.max_time_exc = np.max(self.max_time_exc_list)
         self.min_time_exc = np.min(self.min_time_exc_list)
 
-    def filter_df_by_time_inc(self, perc):
-        LOGGER.debug(f"[Filter] By Inclusive time : {perc}")
-        df = self.df.loc[(self.df["time (inc)"] > perc * 0.01 * self.max_time_inc)]
+    def df_by_time_inc(self):
+        LOGGER.debug(f"[Filter] By Inclusive time : {self.filter_perc}")
+        df = self.gf.df.loc[
+            (self.gf.df["time (inc)"] > self.filter_perc * 0.01 * self.max_time_inc)
+        ]
         filter_call_sites = df["name"].unique()
         return df[df["name"].isin(filter_call_sites)]
 
-    def filter_df_by_time(self, perc):
-        LOGGER.debug(f"[Filter] By Exclusive time : {perc}")
-        # df = self.df.loc[self.df['time'] > perc * 0.01 * self.max_time_exc]
-        df = self.df.loc[self.df["time"] > perc]
+    def df_by_time(self, perc):
+        LOGGER.debug(f"[Filter] By Exclusive time : {self.filter_perc}")
+        df = self.gf.df.loc[self.gf.df["time"] > self.filter_perc]
         filter_call_sites = df["name"].unique()
-        print(filter_call_sites)
         return df[df["name"].isin(filter_call_sites)]
 
-    def filter_graph_by_time_inc(self, df, g):
-        callsites = df["name"].unique()
+    def graph_by_time_inc(self):
+        callsites = self.gf.df["name"].unique()
 
         ret = nx.DiGraph()
-
-        for edge in g.edges():
+        for edge in self.gf.nxg.edges():
             # If source is present in the callsites list
             if edge[0] in callsites and edge[1] in callsites:
                 ret.add_edge(edge[0], edge[1])
             else:
-                LOGGER.info(f"Removing the edge: {edge}")
+                LOGGER.debug(f"Removing the edge: {edge}")
 
         return ret
 
-    # Refer https://stackoverflow.com/questions/28095646/finding-all-paths-walks-of-given-length-in-a-networkx-graph
     def findPaths(self, g, u, n, excludeSet=None):
         if excludeSet == None:
             excludeSet = set([u])
@@ -96,7 +100,7 @@ def findPaths(self, g, u, n, excludeSet=None):
         excludeSet.remove(u)
         return paths
 
-    def filter_graph_by_time(self, df, g):
+    def graph_by_time(self, df, g):
         callsites = df["name"].unique()
 
         ret = nx.DiGraph()
diff --git a/callflow/pipeline/group_by_module.py b/callflow/operations/group.py
similarity index 70%
rename from callflow/pipeline/group_by_module.py
rename to callflow/operations/group.py
index b9432630..a3df6881 100644
--- a/callflow/pipeline/group_by_module.py
+++ b/callflow/operations/group.py
@@ -3,57 +3,119 @@
 import networkx as nx
 from ast import literal_eval as make_list
 
+import callflow
 
-class Callsite:
-    def __init__(self, name, module):
-        self.name = name
-        self.module = module
+LOGGER = callflow.get_logger(__name__)
 
 
-class groupBy:
-    def __init__(self, state, group_by):
-        self.state = state
-        # self.g = state.g
-        # self.df = self.state.df
-        self.g = self.state.new_gf.nxg
-        self.df = self.state.new_gf.df
+class Group(callflow.GraphFrame):
+    def __init__(self, gf=None, group_by="name"):
+        self.gf = gf
         self.group_by = group_by
-        self.eliminate_funcs = []
+
+        # Data.
+        self.callsite_module_map = self.gf.df.set_index("name")["module"].to_dict()
+        self.callsite_path_map = self.gf.df.set_index("name")["path"].to_dict()
+
+        # Variables used by grouping operation.
         self.entry_funcs = {}
-        self.module_func_map = {}
         self.other_funcs = {}
-        self.module_id_map = {}
-
-        self.drop_eliminate_funcs()
-        self.name_module_map = self.df.set_index("name")["module"].to_dict()
-        self.name_path_map = self.df.set_index("name")["path"].to_dict()
-
-        self.run()
-        self.df = self.state.new_gf.df
-        self.graph = self.state.new_gf.graph
-        # self.df = self.state.df
-        # self.graph = self.state.graph
-
-    # Drop all entries user does not want to see.
-    def drop_eliminate_funcs(self):
-        for idx, func in enumerate(self.eliminate_funcs):
-            # self.state.df = self.state.df[self.state.df["module"] != func]
-            self.state.new_gf.df = self.state.new_gf.df[
-                self.state.new_gf.df["module"] != func
-            ]
+        # TODO: remove this.
+        # self.module_id_map = {}
+
+        self.compute()
+
+    def compute(self):
+        group_path = {}
+        component_path = {}
+        component_level = {}
+        entry_func = {}
+        show_node = {}
+        node_name = {}
+        module = {}
+        change_name = {}
+
+        # module_idx = {}
+        # module_id_map = {}
+        # module_count = 0
+
+        LOGGER.debug(
+            f"Nodes: {len(self.gf.nxg.nodes())}, Edges: {len(self.gf.nxg.edges())}"
+        )
+
+        for idx, edge in enumerate(self.gf.nxg.edges()):
+            snode = edge[0]
+            tnode = edge[1]
+
+            if "/" in snode:
+                snode = snode.split("/")[-1]
+            if "/" in tnode:
+                tnode = tnode.split("/")[-1]
+
+            spath = self.callsite_path_map[snode]
+            tpath = self.callsite_path_map[tnode]
+
+            stage1 = time.perf_counter()
+            temp_group_path_results = self.create_group_path(spath)
+            group_path[snode] = temp_group_path_results
+            stage2 = time.perf_counter()
+
+            stage3 = time.perf_counter()
+            component_path[snode] = self.create_component_path(spath, group_path[snode])
+            component_level[snode] = len(component_path[snode])
+            stage4 = time.perf_counter()
+
+            temp_group_path_results = self.create_group_path(tpath)
+            group_path[tnode] = temp_group_path_results
+
+            component_path[tnode] = self.create_component_path(tpath, group_path[tnode])
+            component_level[tnode] = len(component_path[tnode])
+
+            if component_level[snode] == 2:
+                entry_func[snode] = True
+                show_node[snode] = True
+            else:
+                entry_func[snode] = False
+                show_node[snode] = False
+
+            node_name[snode] = self.callsite_module_map[snode] + "=" + snode
+
+            # TODO: remove if not used.
+            # if module[tnode] not in module_id_map:
+            #     module_count += 1
+            #     module_id_map[module[tnode]] = module_count
+            #     module_idx[tnode] = module_id_map[module[tnode]]
+            # else:
+            #     module_idx[tnode] = module_id_map[module[tnode]]
+
+            if component_level[tnode] == 2:
+                entry_func[tnode] = True
+                show_node[tnode] = True
+            else:
+                entry_func[tnode] = False
+                show_node[tnode] = False
+
+            node_name[tnode] = self.callsite_module_map[snode] + "=" + tnode
+
+        self.update_df("group_path", group_path)
+        self.update_df("component_path", component_path)
+        self.update_df("show_node", entry_func)
+        self.update_df("vis_name", node_name)
+        self.update_df("component_level", component_level)
+        # self.update_df("mod_index", module_idx)
+        self.update_df("entry_function", entry_func)
 
     def create_group_path(self, path):
         if isinstance(path, str):
             path = make_list(path)
-
         group_path = []
         prev_module = None
         for idx, callsite in enumerate(path):
             if idx == 0:
                 # Assign the first callsite as from_callsite and not push into an array.
                 from_callsite = callsite
-
-                from_module = self.name_module_map[from_callsite]
+                # from_module = self.entire_df.loc[self.entire_df['name'] == from_callsite]['module'].unique()[0]
+                from_module = self.callsite_module_map[from_callsite]
 
                 # Store the previous module to check the hierarchy later.
                 prev_module = from_module
@@ -75,8 +137,8 @@ def create_group_path(self, path):
                 to_callsite = callsite
                 if "/" in to_callsite:
                     to_callsite = to_callsite.split("/")[-1]
-                # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
-                to_module = self.name_module_map[to_callsite]
+
+                to_module = self.callsite_module_map[to_callsite]
 
                 if prev_module != to_module:
                     group_path.append(to_module + "=" + to_callsite)
@@ -96,12 +158,8 @@ def create_group_path(self, path):
                 from_callsite = path[idx - 1]
                 to_callsite = callsite
 
-                # Get their modules.
-                # from_module = self.entire_df.loc[self.entire_df['name'] == from_callsite]['module'].unique()[0]
-                # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
-
-                from_module = self.name_module_map[from_callsite]
-                to_module = self.name_module_map[to_callsite]
+                from_module = self.callsite_module_map[from_callsite]
+                to_module = self.callsite_module_map[to_callsite]
 
                 # Create the entry function and other function dict if not already present.
                 if to_module not in self.entry_funcs:
@@ -123,7 +181,7 @@ def create_group_path(self, path):
                 elif to_module == prev_module:
                     to_callsite = callsite
                     # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
-                    to_module = self.name_module_map[to_callsite]
+                    to_module = self.callsite_module_map[to_callsite]
 
                     prev_module = to_module
 
@@ -140,7 +198,7 @@ def create_component_path(self, path, group_path):
             node_func = node
             if "/" in node:
                 node = node.split("/")[-1]
-            module = self.name_module_map[node]
+            module = self.callsite_module_map[node]
             if component_module == module:
                 component_path.append(node_func)
 
@@ -148,63 +206,6 @@ def create_component_path(self, path, group_path):
         return tuple(component_path)
 
     def update_df(self, col_name, mapping):
-        self.df[col_name] = self.df["name"].apply(
+        self.gf.df[col_name] = self.gf.df["name"].apply(
             lambda node: mapping[node] if node in mapping.keys() else ""
         )
-
-    def run(self):
-        group_path = {}
-        component_path = {}
-        component_level = {}
-        entry_func = {}
-        show_node = {}
-        node_name = {}
-        module = {}
-        change_name = {}
-        module_idx = {}
-        source_nid = {}
-
-        module_id_map = {}
-        module_count = 0
-
-        edge_count = 0
-
-        for edge in self.g.edges():
-            edge_count += 1
-            snode = edge[0]
-            tnode = edge[1]
-
-            spath = self.name_path_map[snode]
-            tpath = self.name_path_map[tnode]
-
-            temp_group_path_results = self.create_group_path(spath)
-            group_path[snode] = temp_group_path_results
-
-            component_path[snode] = self.create_component_path(spath, group_path[snode])
-            component_level[snode] = len(component_path[snode])
-            module[snode] = self.name_module_map[snode]
-
-            temp_group_path_results = self.create_group_path(tpath)
-            group_path[tnode] = temp_group_path_results
-
-            component_path[tnode] = self.create_component_path(tpath, group_path[tnode])
-            component_level[tnode] = len(component_path[tnode])
-            module[tnode] = self.name_module_map[tnode]
-
-            if component_level[snode] == 2:
-                entry_func[snode] = True
-                show_node[snode] = True
-            else:
-                entry_func[snode] = False
-                show_node[snode] = False
-
-            node_name[snode] = self.name_module_map[snode] + "=" + snode
-
-        self.update_df("group_path", group_path)
-        self.update_df("component_path", component_path)
-        self.update_df("show_node", entry_func)
-        self.update_df("vis_name", node_name)
-        self.update_df("component_level", component_level)
-        self.update_df("change_name", change_name)
-        self.update_df("mod_index", module_idx)
-        self.update_df("entry_function", entry_func)
diff --git a/callflow/pipeline/process.py b/callflow/operations/process.py
similarity index 58%
rename from callflow/pipeline/process.py
rename to callflow/operations/process.py
index d6bf34fd..9961b3f8 100644
--- a/callflow/pipeline/process.py
+++ b/callflow/operations/process.py
@@ -16,79 +16,28 @@
 import numpy as np
 from scipy.stats import kurtosis, skew
 
-from callflow.utils import (
-    sanitizeName,
-    visModuleCallsiteName,
-    getNodeDictFromFrame,
-    getPathListFromFrames,
-)
-
-# from callflow.logger import Log
 import callflow
 
 LOGGER = callflow.get_logger(__name__)
 
-"""
-# no need for this decorator
-def logger(func):
-    @wraps(func)
-    def tmp(*args, **kwargs):
-        log = Log("process")
-        log.info("Preprocessing : {0}".format(func.__name__))
-        return func(*args, **kwargs)
 
-    return tmp
-"""
-
-
-class PreProcess:
+class Process:
     """
     Preprocess the dataframe
     Builder object
     Preprocess.add_X().add_Y().....
     """
 
-    def __init__(self, builder):
-        self.gf = builder.gf
-        self.df = builder.df
-        self.graph = builder.graph
+    def __init__(self, gf, tag):
+        self.gf = gf
+        self.tag = tag
 
     class Builder(object):
-        def __init__(self, state, gf_type="entire"):
-            # self.log = Log("process")
-            self.state = state
+        def __init__(self, gf, tag):
+            self.gf = gf
+            self.tag = tag
 
-            self.callers = {}
-            self.callees = {}
-            self.frames = {}
-            self.paths = {}
-            self.hatchet_nodes = {}
-
-            if gf_type == "filter":
-                # self.gf = state.gf
-                # self.df = state.df
-                # self.graph = state.entire_graph
-                self.gf = state.new_gf
-                self.df = state.new_gf.df
-                self.graph = state.new_entire_gf.graph
-            elif gf_type == "entire":
-                # self.gf = state.entire_gf
-                # self.df = state.entire_df
-                # self.graph = state.entire_graph
-                self.gf = state.new_entire_gf
-                self.df = state.new_entire_gf.df
-                self.graph = state.new_entire_gf.graph
-
-            # Logger Information
-            self.cct_nodes = []
-            self.callgraph_nodes = []
-            self.supergraph_nodes = []
-            self.unmapped_targets = []
-
-            self.callgraph_nodes_np = np.array([])
-            self.cct_nodes_np = np.array([])
             self.graphMapper()
-            self.map = {}
 
         def convertFrameList(self, nodes):
             ret = []
@@ -97,18 +46,23 @@ def convertFrameList(self, nodes):
             return ret
 
         def graphMapper(self):
-            graph = self.graph
+            self.callers = {}
+            self.callees = {}
+            self.paths = {}
+            self.hatchet_nodes = {}
 
-            for node in graph.traverse():
-                node_dict = getNodeDictFromFrame(node.frame)
+            for node in self.gf.graph.traverse():
+                node_dict = callflow.utils.node_dict_from_frame(node.frame)
 
                 if node_dict["type"] == "loop":
-                    node_name = "Loop@" + sanitizeName(
+                    node_name = "Loop@" + callflow.utils.sanitize_name(
                         node_dict["name"] + ":" + str(node_dict["line"])
                     )
                 elif node_dict["type"] == "statement":
                     node_name = (
-                        sanitizeName(node_dict["name"]) + ":" + str(node_dict["line"])
+                        callflow.utils.sanitize_name(node_dict["name"])
+                        + ":"
+                        + str(node_dict["line"])
                     )
                 else:
                     node_name = node_dict["name"]
@@ -120,19 +74,19 @@ def graphMapper(self):
                 self.hatchet_nodes[node_name] = node
 
         def build(self):
-            return PreProcess(self)
+            return Process(self.gf, self.tag)
 
         # Add the path information from the node object
-        # @logger
         def add_path(self):
             self.raiseExceptionIfNodeCountNotEqual(self.paths)
-            self.df["path"] = self.df["name"].apply(
-                lambda node_name: getPathListFromFrames(self.paths[node_name])
+            self.gf.df["path"] = self.gf.df["name"].apply(
+                lambda node_name: callflow.utils.path_list_from_frames(
+                    self.paths[node_name]
+                )
             )
             return self
 
         # Imbalance percentage Series in the dataframe
-        # @logger
         def add_imbalance_perc(self):
             inclusive = {}
             exclusive = {}
@@ -145,8 +99,8 @@ def add_imbalance_perc(self):
             kurtosis_inclusive = {}
             kurtosis_exclusive = {}
 
-            for node_name in self.df["name"].unique():
-                node_df = self.df.loc[self.df["name"] == node_name]
+            for node_name in self.gf.df["name"].unique():
+                node_df = self.gf.df.loc[self.gf.df["name"] == node_name]
 
                 max_incTime = node_df["time"].mean()
                 mean_incTime = node_df["time (inc)"].mean()
@@ -173,112 +127,112 @@ def add_imbalance_perc(self):
                 kurtosis_inclusive[node_name] = kurtosis(node_df["time (inc)"].tolist())
                 kurtosis_exclusive[node_name] = kurtosis(node_df["time"].tolist())
 
-            self.df["imbalance_perc_inclusive"] = self.df["name"].apply(
+            self.gf.df["imbalance_perc_inclusive"] = self.gf.df["name"].apply(
                 lambda name: inclusive[name]
             )
-            self.df["imbalance_perc_exclusive"] = self.df["name"].apply(
+            self.gf.df["imbalance_perc_exclusive"] = self.gf.df["name"].apply(
                 lambda name: exclusive[name]
             )
 
-            self.df["std_deviation_inclusive"] = self.df["name"].apply(
+            self.gf.df["std_deviation_inclusive"] = self.gf.df["name"].apply(
                 lambda name: std_deviation_inclusive[name]
             )
-            self.df["std_deviation_exclusive"] = self.df["name"].apply(
+            self.gf.df["std_deviation_exclusive"] = self.gf.df["name"].apply(
                 lambda name: std_deviation_exclusive[name]
             )
 
-            self.df["skewness_inclusive"] = self.df["name"].apply(
+            self.gf.df["skewness_inclusive"] = self.gf.df["name"].apply(
                 lambda name: skewness_inclusive[name]
             )
-            self.df["skewness_exclusive"] = self.df["name"].apply(
+            self.gf.df["skewness_exclusive"] = self.gf.df["name"].apply(
                 lambda name: skewness_exclusive[name]
             )
 
-            self.df["kurtosis_inclusive"] = self.df["name"].apply(
+            self.gf.df["kurtosis_inclusive"] = self.gf.df["name"].apply(
                 lambda name: kurtosis_inclusive[name]
             )
-            self.df["kurtosis_exclusive"] = self.df["name"].apply(
+            self.gf.df["kurtosis_exclusive"] = self.gf.df["name"].apply(
                 lambda name: kurtosis_exclusive[name]
             )
 
             return self
 
-        # @logger
         def add_callers_and_callees(self):
-            self.df["callees"] = self.df["name"].apply(lambda node: self.callees[node])
-            self.df["callers"] = self.df["name"].apply(lambda node: self.callers[node])
+            self.gf.df["callees"] = self.gf.df["name"].apply(
+                lambda node: self.callees[node]
+            )
+            self.gf.df["callers"] = self.gf.df["name"].apply(
+                lambda node: self.callers[node]
+            )
 
             return self
 
         # node_name is different from name in dataframe. So creating a copy of it.
-        # @logger
         def add_vis_node_name(self):
-            self.module_group_df = self.df.groupby(["module"])
+            self.module_group_df = self.gf.df.groupby(["module"])
             self.module_callsite_map = self.module_group_df["name"].unique()
 
-            self.name_group_df = self.df.groupby(["name"])
+            self.name_group_df = self.gf.df.groupby(["name"])
             self.callsite_module_map = self.name_group_df["module"].unique().to_dict()
 
-            self.df["vis_node_name"] = self.df["name"].apply(
-                lambda name: sanitizeName(self.callsite_module_map[name][0])
+            self.gf.df["vis_node_name"] = self.gf.df["name"].apply(
+                lambda name: callflow.utils.sanitize_name(
+                    self.callsite_module_map[name][0]
+                )
                 + "="
                 + name
             )
             return self
 
-        # @logger
         def add_node_name_hpctoolkit(self, node_name_map):
-            self.df["node_name"] = self.df["name"].apply(
+            self.gf.df["node_name"] = self.gf.df["name"].apply(
                 lambda name: node_name_map[name]
             )
             return self
 
-        # @logger
         def add_module_name_hpctoolkit(self):
-            self.df["module"] = self.df["module"].apply(lambda name: sanitizeName(name))
+            self.gf.df["module"] = self.gf.df["module"].apply(
+                lambda name: callflow.utils.sanitize_name(name)
+            )
             return self
 
-        # @logger
         def add_node_name_caliper(self, node_module_map):
-            self.df["node_name"] = self.df["name"].apply(
+            self.gf.df["node_name"] = self.gf.df["name"].apply(
                 lambda name: name_module_map[name]
             )
 
-        # @logger
         def add_module_name_caliper(self, module_map):
-            self.df["module"] = self.df["name"].apply(lambda name: module_map[name])
+            self.gf.df["module"] = self.gf.df["name"].apply(
+                lambda name: module_map[name]
+            )
             return self
 
-        # @logger
         def add_dataset_name(self):
-            self.df["dataset"] = self.state.name
+            self.gf.df["dataset"] = self.tag
             return self
 
-        # @logger
         def add_rank_column(self):
-            if "rank" not in self.df.columns:
-                self.df["rank"] = 0
+            if "rank" not in self.gf.df.columns:
+                self.gf.df["rank"] = 0
             return self
 
-        # @logger
         def add_time_columns(self):
-            if "time (inc)" not in self.df.columns:
-                self.df["time (inc)"] = self.df["inclusive#time.duration"]
+            if "time (inc)" not in self.gf.df.columns:
+                self.gf.df["time (inc)"] = self.gf.df["inclusive#time.duration"]
 
-            if "time" not in self.df.columns:
-                self.df["time"] = self.df["sum#time.duration"]
+            if "time" not in self.gf.df.columns:
+                self.gf.df["time"] = self.gf.df["sum#time.duration"]
             return self
 
-        # @logger
         def create_name_module_map(self):
             self.name_module_map = (
-                self.df.groupby(["name"])["module"].unique().to_dict()
+                self.gf.df.groupby(["name"])["module"].unique().to_dict()
             )
             return self
 
         def raiseExceptionIfNodeCountNotEqual(self, attr):
             map_node_count = len(attr.keys())
-            df_node_count = len(self.df["name"].unique())
+            df_node_count = len(self.gf.df["name"].unique())
             LOGGER.debug(
                 f"[Validation] Map contains: {map_node_count} callsites, graph contains: {df_node_count} callsites"
             )
@@ -287,9 +241,8 @@ def raiseExceptionIfNodeCountNotEqual(self, attr):
                     f"Unmatched Preprocessing maps: Map contains: {map_node_count} nodes, graph contains: {df_node_count} nodes"
                 )
 
-        # @logger
         def logInformation(self):
             LOGGER.info(f"CCT node count : {len(self.cct_nodes)}")
             LOGGER.info(f"CallGraph node count: {len(self.callgraph_nodes)}")
-            LOGGER.info(f"SuperGraph node count: {len(self.df['module'].unique())}")
+            LOGGER.info(f"SuperGraph node count: {len(self.gf.df['module'].unique())}")
             return self
diff --git a/callflow/pipeline/read_config.py b/callflow/operations/read_config.py
similarity index 98%
rename from callflow/pipeline/read_config.py
rename to callflow/operations/read_config.py
index 2bf6719f..73ba8b9a 100644
--- a/callflow/pipeline/read_config.py
+++ b/callflow/operations/read_config.py
@@ -39,6 +39,7 @@ def __init__(self, filepath=None, config_json=None):
         self.datasets = self.json["datasets"]
         self.runName = self.json["run_name"]
         self.save_path = os.path.join(self.data_path, ".callflow")
+        self.read_parameter = self.json["read_parameter"]
 
         self.run()
 
diff --git a/callflow/operations/union_delete.py b/callflow/operations/union_delete.py
new file mode 100644
index 00000000..4878ba75
--- /dev/null
+++ b/callflow/operations/union_delete.py
@@ -0,0 +1,74 @@
+import networkx as nx
+
+# Mostly derive from supergraph.
+# Should contain the vector that stores the properties as explained in paper.
+# should contain a function `create` which contains the
+class Union(nx.DiGraph):
+    def __init__(self):
+        self.union = nx.DiGraph()
+
+    # Return the union of graphs G and H.
+    def unionize(self, nxg, name=None, rename=(None, None)):
+        if not self.union.is_multigraph() == H.is_multigraph():
+            raise nx.NetworkXError("G and H must both be graphs or multigraphs.")
+
+        self.union.graph.update(nxg)
+
+        renamed_nodes = self.add_prefix(nxg, rename[1])
+
+        LOGGER.debug("-=========================-")
+        LOGGER.debug("Nodes in R and H are same? ", set(self.union) == set(nxg))
+        if set(self.union) != set(H):
+            LOGGER.debug("Difference is ", list(set(H) - set(self.union)))
+            LOGGER.debug("Nodes in R", set(self.union)),
+            LOGGER.debug("Nodes in H", set(nxg))
+        LOGGER.debug("-=========================-")
+
+        if nxg.is_multigraph():
+            new_edges = nxg.edges(keys=True, data=True)
+        else:
+            new_edges = nxg.edges(data=True)
+
+        # add nodes and edges.
+        self.union.add_nodes_from(nxg)
+        self.union.add_edges_from(new_edges)
+
+        # add node attributes for each run
+        for n in renamed_nodes:
+            self.add_node_attributes(nxg, n, name)
+
+    # rename graph to obtain disjoint node labels
+    def add_prefix(self, graph, prefix):
+        if prefix is None:
+            return graph
+
+        def label(x):
+            if is_string_like(x):
+                name = prefix + x
+            else:
+                name = prefix + repr(x)
+            return name
+
+        return nx.relabel_nodes(graph, label)
+
+    def add_edge_attributes(self):
+        number_of_runs_mapping = self.number_of_runs()
+        nx.set_edge_attributes(
+            self.union, name="number_of_runs", values=number_of_runs_mapping
+        )
+
+    def number_of_runs(self):
+        ret = {}
+        for idx, name in enumerate(self.unionuns):
+            for edge in self.unionuns[name].edges():
+                if edge not in ret:
+                    ret[edge] = 0
+                ret[edge] += 1
+        return ret
+
+    def add_node_attributes(self, H, node, dataset_name):
+        for idx, (key, val) in enumerate(H.nodes.items()):
+            if dataset_name not in self.union.nodes[node]:
+                self.union.nodes[node][dataset_name] = 0
+            if key == node:
+                self.union.nodes[node][dataset_name] = 1
diff --git a/callflow/pipeline/__init__.py b/callflow/pipeline/__init__.py
deleted file mode 100644
index 9e43ccf6..00000000
--- a/callflow/pipeline/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from .read_config import ConfigFileReader
-from .filter_networkx import FilterNetworkX
-from .group_by_module import groupBy
-from .process import PreProcess
-from .index import Pipeline
-from .state import State
-from .convert_hatchet_to_networkx import HatchetToNetworkX
diff --git a/callflow/pipeline/convert_hatchet_to_networkx.py b/callflow/pipeline/convert_hatchet_to_networkx.py
deleted file mode 100644
index 20d5dd33..00000000
--- a/callflow/pipeline/convert_hatchet_to_networkx.py
+++ /dev/null
@@ -1,144 +0,0 @@
-import networkx as nx
-import math
-import json
-from ast import literal_eval as make_tuple
-
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-from callflow.utils import getNodeDictFromFrame, sanitizeName
-
-
-class HatchetToNetworkX(nx.Graph):
-    # Attributes:
-    # 1. State => Pass the state which needs to be handled.
-    # 2. path => '', 'path', 'group_path' or 'component_path'
-    # 3. construct_graph -> To decide if we should construct graph from path
-    # 4. add_data => To
-    def __init__(
-        self,
-        state,
-        graph_type="entire",
-        path_column_name="path",
-        construct_graph=True,
-        add_data=True,
-    ):
-        super(HatchetToNetworkX, self).__init__()
-        self.path_column_name = path_column_name
-        self.state = state
-
-        if graph_type == "entire":
-            self.df = state.new_entire_gf.df
-            self.graph = state.new_entire_gf.graph
-        else:
-            self.df = state.new_gf.df
-            self.graph = state.new_gf.graph
-
-        if construct_graph:
-            LOGGER.info("Creating a Graph for {0}.".format(self.state.name))
-            self.nxg = nx.DiGraph()
-            self.add_paths_from_graph()
-        else:
-            print("Using the existing graph from state {0}".format(state.name))
-            self.nxg = state.new_gf.nxg
-
-        self.adj_matrix = nx.adjacency_matrix(self.nxg)
-        self.dense_adj_matrix = self.adj_matrix.todense()
-
-        # TODO: Store the adjacency matrix also somewhere.
-
-        if add_data:
-            self.add_node_attributes()
-            self.add_edge_attributes()
-        else:
-            pass
-
-            # TODO: Need to raise exception when the state.g is incorrect.
-        # self.raiseExceptionIfNetworkXGraphIsIncorrect()
-
-    def no_cycle_path(self, path):
-        ret = []
-        mapper = {}
-        for idx, elem in enumerate(path):
-            if elem not in mapper:
-                mapper[elem] = 1
-                ret.append(elem)
-            else:
-                ret.append(elem + "_" + str(mapper[elem]))
-                mapper[elem] += 1
-
-        return tuple(ret)
-
-    # This is really slow for large dataframes.
-    def add_paths_from_df(self):
-        for idx, row in self.df.iterrows():
-            if row.show_node:
-                if isinstance(row[self.path_column_name], list):
-                    path_tuple = row[self.path_column_name]
-                else:
-                    path_tuple = make_tuple(row[self.path_column_name])
-                corrected_path = self.no_cycle_path(path_tuple)
-                self.nxg.add_path(corrected_path)
-
-    def add_paths_from_graph(self):
-        graph = self.graph
-
-        for root in graph.roots:
-            node_gen = root.traverse()
-
-            root_dict = getNodeDictFromFrame(root.frame)
-            root_name = root_dict["name"]
-            root_paths = root.paths()
-            node = root
-
-            try:
-                while node:
-                    node_dict = getNodeDictFromFrame(node.frame)
-                    node_name = node_dict["name"]
-
-                    # Get all node paths from hatchet.
-                    node_paths = node.paths()
-
-                    #
-                    for node_path in node_paths:
-                        if len(node_path) >= 2:
-
-                            source_node_dict = getNodeDictFromFrame(node_path[-2])
-                            target_node_dict = getNodeDictFromFrame(node_path[-1])
-
-                            if source_node_dict["line"] != "NA":
-                                source_node_name = (
-                                    sanitizeName(source_node_dict["name"])
-                                    + ":"
-                                    + str(source_node_dict["line"])
-                                )
-                            else:
-                                source_node_name = sanitizeName(
-                                    source_node_dict["name"]
-                                )
-                            if target_node_dict["line"] != "NA":
-                                target_node_name = (
-                                    sanitizeName(target_node_dict["name"])
-                                    + ":"
-                                    + str(target_node_dict["line"])
-                                )
-                            else:
-                                target_node_name = sanitizeName(
-                                    target_node_dict["name"]
-                                )
-                            self.nxg.add_edge(source_node_name, target_node_name)
-                    node = next(node_gen)
-
-            except StopIteration:
-                pass
-            finally:
-                del root
-
-    def add_node_attributes(self):
-        pass
-
-    def add_edge_attributes(self):
-        pass
-
-    def raiseExceptionIfNetworkXGraphIsIncorrect(self):
-        print(len(self.graph), len(self.nxg.nodes))
diff --git a/callflow/pipeline/filter_hatchet.py b/callflow/pipeline/filter_hatchet.py
deleted file mode 100644
index e8a85bae..00000000
--- a/callflow/pipeline/filter_hatchet.py
+++ /dev/null
@@ -1,90 +0,0 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-import pandas as pd
-import time
-
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-
-
-class FilterHatchet:
-    """
-    Filter the graphframe.
-    Input: State object, parameter to filterBy (could be inclusive/exclusive,
-            filterPerc: user provided filter percentage (1-100))
-    """
-
-    def __init__(self, state, filterBy, filterPerc):
-        self.state = state
-
-        self.graph = state.new_entire_gf.graph
-        self.df = state.new_entire_gf.df
-        self.gf = state.new_entire_gf
-
-        # self.df.set_index(['node', 'rank'], drop=False, inplace=True)
-
-        # self.df = pd.MultiIndex.from_frame(self.df, names=['node', 'rank'])
-        self.gf.dataframe = self.df
-
-        self.filterBy = filterBy
-        self.filterPercInDecimals = int(1) / 100
-        # self.filterPercInDecimals = 0.001
-
-        self.fgf = self.run()
-        self.fgf = self.graft()
-
-        # update df and graph after filtering.
-        self.df = self.fgf.dataframe
-        self.graph = self.fgf.graph
-
-    def run(self):
-        LOGGER.info("Filtering the graph.")
-        t = time.time()
-        if self.filterBy == "Inclusive":
-            max_inclusive_time = utils.getMaxIncTime_from_gf(self.graph, self.df)
-            filter_gf = self.gf.filter(
-                lambda x: True
-                if (x["time (inc)"] > self.filterPercInDecimals * max_inclusive_time)
-                else False
-            )
-        elif filterBy == "Exclusive":
-            max_exclusive_time = utils.getMaxExcTime_from_gf(self.graph, self.df)
-            LOGGER.info("[Filter] By Exclusive time = {0})".format(max_exclusive_time))
-            filter_gf = self.gf.filter(
-                lambda x: True
-                if (x["time"] >= self.filterPercInDecimals * max_exclusive_time)
-                else False
-            )
-        else:
-            LOGGER.warn("Not filtering.... Can take forever. Thou were warned")
-            filter_gf = self.gf
-
-        LOGGER.info(
-            "[Filter] Removed {0} rows. (time={1})".format(
-                self.gf.dataframe.shape[0] - filter_gf.dataframe.shape[0],
-                time.time() - t,
-            )
-        )
-
-        return filter_gf
-
-    def graft(self):
-        LOGGER.info("Squashing the graph.")
-        t = time.time()
-        fgf = self.fgf.squash()
-        LOGGER.info(
-            "[Squash] {1} rows in dataframe (time={0})".format(
-                time.time() - t, fgf.dataframe.shape[0]
-            )
-        )
-        return fgf
diff --git a/callflow/pipeline/group_by_module_ensemble.py b/callflow/pipeline/group_by_module_ensemble.py
deleted file mode 100644
index 41a33242..00000000
--- a/callflow/pipeline/group_by_module_ensemble.py
+++ /dev/null
@@ -1,266 +0,0 @@
-import pandas as pd
-import time
-import networkx as nx
-from ast import literal_eval as make_list
-
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-
-
-class ensembleGroupBy:
-    def __init__(self, state_entire, state_filter, group_by):
-        self.state_filter = state_filter
-        self.state_entire = state_entire
-        self.entire_df = self.state_entire.new_gf.df
-        self.filter_df = self.state_filter.new_gf.df
-        self.filter_g = self.state_filter.new_gf.nxg
-
-        self.group_by = group_by
-        self.eliminate_funcs = []
-        self.entry_funcs = {}
-        self.module_func_map = {}
-        self.other_funcs = {}
-        self.module_id_map = {}
-
-        self.drop_eliminate_funcs()
-        self.name_module_map = self.entire_df.set_index("name")["module"].to_dict()
-        self.entire_df["path"] = self.entire_df["path"].apply(
-            lambda path: make_list(path)
-        )
-        self.name_path_map = self.entire_df.set_index("name")["path"].to_dict()
-
-    # Drop all entries user does not want to see.
-    def drop_eliminate_funcs(self):
-        for idx, func in enumerate(self.eliminate_funcs):
-            self.state.new_gf.df = self.state.new_gf.df[
-                self.state.new_gf.df["module"] != func
-            ]
-
-    def create_group_path_time(self, path):
-        if isinstance(path, str):
-            path = make_list(path)
-        group_path = []
-        prev_module = None
-        for idx, callsite in enumerate(path):
-            if idx == 0:
-                # Assign the first callsite as from_callsite and not push into an array.
-                from_callsite = callsite
-                # from_module = self.entire_df.loc[self.entire_df['name'] == from_callsite]['module'].unique()[0]
-                from_module = self.name_module_map[from_callsite]
-
-                # Store the previous module to check the hierarchy later.
-                prev_module = from_module
-
-                # Create the entry function and other functions dict.
-                if from_module not in self.entry_funcs:
-                    self.entry_funcs[from_module] = []
-                if from_module not in self.other_funcs:
-                    self.other_funcs[from_module] = []
-
-                # Push into entry function dict since it is the first callsite.
-                self.entry_funcs[from_module].append(from_callsite)
-
-                # Append to the group path.
-                group_path.append(from_module + "=" + from_callsite)
-
-            elif idx == len(path) - 1:
-                # Final callsite in the path.
-                to_callsite = callsite
-                if "/" in to_callsite:
-                    to_callsite = to_callsite.split("/")[-1]
-                # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
-                to_module = self.name_module_map[to_callsite]
-
-                if prev_module != to_module:
-                    group_path.append(to_module + "=" + to_callsite)
-
-                if to_module not in self.entry_funcs:
-                    self.entry_funcs[to_module] = []
-                if to_module not in self.other_funcs:
-                    self.other_funcs[to_module] = []
-
-                if to_callsite not in self.other_funcs[to_module]:
-                    self.other_funcs[to_module].append(to_callsite)
-
-                if to_callsite not in self.entry_funcs[to_module]:
-                    self.entry_funcs[to_module].append(to_callsite)
-            else:
-                # Assign the from and to callsite.
-                from_callsite = path[idx - 1]
-                to_callsite = callsite
-
-                # Get their modules.
-                # from_module = self.entire_df.loc[self.entire_df['name'] == from_callsite]['module'].unique()[0]
-                # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
-
-                from_module = self.name_module_map[from_callsite]
-                to_module = self.name_module_map[to_callsite]
-
-                # Create the entry function and other function dict if not already present.
-                if to_module not in self.entry_funcs:
-                    self.entry_funcs[to_module] = []
-                if to_module not in self.other_funcs:
-                    self.other_funcs[to_module] = []
-
-                # if previous module is not same as the current module.
-                if to_module != prev_module:
-                    # TODO: Come back and check if it is in the path.
-                    if to_module in group_path:
-                        prev_module = to_module
-                    else:
-                        group_path.append(to_module + "=" + to_callsite)
-                        prev_module = to_module
-                        if to_callsite not in self.entry_funcs[to_module]:
-                            self.entry_funcs[to_module].append(to_callsite)
-
-                elif to_module == prev_module:
-                    to_callsite = callsite
-                    # to_module = self.entire_df.loc[self.entire_df['name'] == to_callsite]['module'].unique()[0]
-                    to_module = self.name_module_map[to_callsite]
-
-                    prev_module = to_module
-
-                    if to_callsite not in self.other_funcs[to_module]:
-                        self.other_funcs[to_module].append(to_callsite)
-
-        return group_path
-
-    def create_component_path(self, path, group_path):
-        component_path = []
-        component_module = group_path[len(group_path) - 1].split("=")[0]
-
-        for idx, node in enumerate(path):
-            node_func = node
-            if "/" in node:
-                node = node.split("/")[-1]
-            module = self.name_module_map[node]
-            if component_module == module:
-                component_path.append(node_func)
-
-        component_path.insert(0, component_module)
-        return tuple(component_path)
-
-    def find_all_paths(self, df):
-        ret = []
-        unique_paths = df["path"].unique()
-        for idx, path in enumerate(unique_paths):
-            ret.append(df.loc[df["path"] == path])
-        return ret
-
-    def update_df(self, col_name, mapping):
-        self.filter_df[col_name] = self.filter_df["name"].apply(
-            lambda node: mapping[node] if node in mapping.keys() else ""
-        )
-
-    def run(self):
-        group_path = {}
-        component_path = {}
-        component_level = {}
-        entry_func = {}
-        show_node = {}
-        node_name = {}
-        module = {}
-        change_name = {}
-        module_idx = {}
-        source_nid = {}
-
-        module_id_map = {}
-        module_count = 0
-
-        LOGGER.debug(
-            f"Nodes: {len(self.filter_g.nodes())}, Edges: {len(self.filter_g.edges())}"
-        )
-
-        for idx, edge in enumerate(self.filter_g.edges()):
-            snode = edge[0]
-            tnode = edge[1]
-
-            if "/" in snode:
-                snode = snode.split("/")[-1]
-            if "/" in tnode:
-                tnode = tnode.split("/")[-1]
-
-            spath = self.name_path_map[snode]
-            tpath = self.name_path_map[tnode]
-
-            stage1 = time.perf_counter()
-            temp_group_path_results = self.create_group_path_time(spath)
-            group_path[snode] = temp_group_path_results
-            stage2 = time.perf_counter()
-            # print(f"Group path: {stage2 - stage1}")
-
-            stage3 = time.perf_counter()
-            component_path[snode] = self.create_component_path(spath, group_path[snode])
-            component_level[snode] = len(component_path[snode])
-            stage4 = time.perf_counter()
-            # print(f"Component path: {stage3 - stage2}")
-
-            temp_group_path_results = self.create_group_path_time(tpath)
-            group_path[tnode] = temp_group_path_results
-
-            component_path[tnode] = self.create_component_path(tpath, group_path[tnode])
-            component_level[tnode] = len(component_path[tnode])
-
-            # if module[snode] not in module_id_map:
-            #     module_count += 1
-            #     module_id_map[module[snode]] = module_count
-            #     module_idx[snode] = module_id_map[module[snode]]
-            # else:
-            #     module_idx[snode] = module_id_map[module[snode]]
-
-            if component_level[snode] == 2:
-                entry_func[snode] = True
-                show_node[snode] = True
-            else:
-                entry_func[snode] = False
-                show_node[snode] = False
-
-            node_name[snode] = self.name_module_map[snode] + "=" + snode
-
-            # if module[tnode] not in module_id_map:
-            #     module_count += 1
-            #     module_id_map[module[tnode]] = module_count
-            #     module_idx[tnode] = module_id_map[module[tnode]]
-            # else:
-            #     module_idx[tnode] = module_id_map[module[tnode]]
-
-            if component_level[tnode] == 2:
-                entry_func[tnode] = True
-                show_node[tnode] = True
-            else:
-                entry_func[tnode] = False
-                show_node[tnode] = False
-
-            node_name[tnode] = self.name_module_map[snode] + "=" + tnode
-
-            # print('Node: ', snode)
-            # print("entry function:", entry_func[snode])
-            # print("node path: ", spath)
-            # print("group path: ", group_path[snode])
-            # print("component path: ", component_path[snode])
-            # print("component level: ", component_level[snode])
-            # print("Show node: ", show_node[snode])
-            # print("name: ", node_name[snode])
-            # print('Module: ', module[snode])
-            # print("=================================")
-            # print('Node: ', tnode)
-            # print("entry function:", entry_func[tnode])
-            # print("node path: ", tpath)
-            # print("group path: ", group_path[tnode])
-            # print("component path: ", component_path[tnode])
-            # print("component level: ", component_level[tnode])
-            # print("Show node: ", show_node[tnode])
-            # print("name: ", node_name[tnode])
-            # print('Module: ', module[tnode])
-            # print('#################################')
-
-        self.update_df("group_path", group_path)
-        self.update_df("component_path", component_path)
-        self.update_df("show_node", entry_func)
-        self.update_df("vis_name", node_name)
-        self.update_df("component_level", component_level)
-        self.update_df("mod_index", module_idx)
-        self.update_df("entry_function", entry_func)
-
-        return {"df": self.filter_df, "g": self.filter_g}
diff --git a/callflow/pipeline/index.py b/callflow/pipeline/index.py
deleted file mode 100644
index c60ef471..00000000
--- a/callflow/pipeline/index.py
+++ /dev/null
@@ -1,363 +0,0 @@
-import json
-from networkx.readwrite import json_graph
-import pandas as pd
-import os
-
-# from .create_graphframe import CreateGraphFrame
-
-from .group_by_module import groupBy
-from .group_by_module_ensemble import ensembleGroupBy
-from .filter_hatchet import FilterHatchet
-from .filter_networkx import FilterNetworkX
-from callflow.pipeline.convert_hatchet_to_networkx import HatchetToNetworkX
-from callflow.datastructures.uniongraph import UnionGraph
-from callflow.algorithms.deltacon_similarity import DeltaConSimilarity
-from callflow.modules.auxiliary_ensemble import EnsembleAuxiliary
-from .process import PreProcess
-from .state import State
-from callflow import GraphFrame
-
-# from callflow.logger import Log
-import callflow
-
-LOGGER = callflow.get_logger(__name__)
-
-
-class Pipeline:
-    def __init__(self, config):
-        # self.log = Log("pipeline")
-        self.config = config
-        self.dirname = self.config.save_path
-        self.debug = True
-
-    ##################### Pipeline Functions ###########################
-    # All pipeline functions avoid the state being mutated by reference to create separate instances of State variables.
-
-    # Create the State from the hatchet's graphframe.
-    def create_gf(self, name):
-
-        state = State(name)
-        state.new_entire_gf = GraphFrame.from_config(self.config, name)
-
-        print(state.new_entire_gf)
-        print(type(state.new_entire_gf))
-        # state.entire_gf = state.new_entire_gf
-        # state.entire_df = state.new_entire_gf.df
-        # state.entire_graph = state.new_entire_gf.graph
-
-        """
-        create = CreateGraphFrame(self.config, name)
-        #state.entire_gf = create.gf
-        #state.entire_df = create.df
-        #state.entire_graph = create.graph
-        """
-
-        LOGGER.info(
-            f"Number of call sites in CCT (From dataframe): {len(state.new_entire_gf.df['name'].unique())}"
-        )
-
-        return state
-
-    # Pre-process the dataframe and Graph to add attributes to the networkX graph.
-    # PreProcess class is a builder. Additional attributes can be added by chained calls.
-    def process_gf(self, state, gf_type):
-        if self.config.format[state.name] == "hpctoolkit":
-            preprocess = (
-                PreProcess.Builder(state, gf_type)
-                .add_path()
-                .create_name_module_map()
-                .add_callers_and_callees()
-                .add_dataset_name()
-                .add_imbalance_perc()
-                .add_module_name_hpctoolkit()
-                .add_vis_node_name()
-                .build()
-            )
-        elif self.config.format[state.name] == "caliper_json":
-            preprocess = (
-                PreProcess.Builder(state, gf_type)
-                .add_time_columns()
-                .add_rank_column()
-                .add_callers_and_callees()
-                .add_dataset_name()
-                .add_imbalance_perc()
-                .add_module_name_caliper(self.config.callsite_module_map)
-                .create_name_module_map()
-                .add_vis_node_name()
-                .add_path()
-                .build()
-            )
-
-        print(preprocess.gf)
-        state.new_gf = preprocess.gf
-        # state.df = preprocess.new_gf.df
-        # state.graph = preprocess.new_gf.graph
-        self.entire_df = state.new_gf.df
-        return state
-
-    # Converts a hatchet graph to networkX graph.
-    def hatchetToNetworkX(self, state, path):
-        convert = HatchetToNetworkX(state, path, construct_graph=True, add_data=False)
-
-        # state.g = convert.g
-        state.new_entire_gf.nxg = convert.nxg
-        state.new_gf.nxg = convert.nxg
-
-        return state
-
-    # Uses the hatchet's filter method.
-    # Filter by hatchet graphframe.
-    def filterHatchet(self, state, filterBy, filterPerc):
-        filter_obj = Filter(state, filterBy, filterPerc)
-
-        state.new_gf = filter_obj.gf
-        state.new_gf.df = filter_obj.df
-        state.new_gf.graph = filter_obj.graph
-
-        return state
-
-    # Union of all the networkX graphs.
-    def union(self, states):
-        u_graph = UnionGraph()
-        u_df = pd.DataFrame()
-        for idx, dataset in enumerate(states):
-            u_graph.unionize(states[dataset].new_gf.nxg, dataset)
-            u_df = pd.concat([u_df, states[dataset].new_gf.df], sort=True)
-
-        state = State("union")
-        state.new_gf = GraphFrame()
-        state.new_gf.df = u_df
-        state.new_gf.nxg = u_graph.R
-
-        # state.df = state.new_gf.df
-        # state.g = state.new_gf.nxg
-
-        """
-        #state.df = u_df
-        #state.g = u_graph.R
-        """
-
-        if True:  # self.debug:
-            LOGGER.debug("Done with Union.")
-            LOGGER.debug(
-                f"Number of callsites in dataframe: {len(state.new_gf.df['name'].unique())}"
-            )
-            LOGGER.debug(
-                f"Number of callsites in the graph: {len(state.new_gf.nxg.nodes())}"
-            )
-            LOGGER.debug(
-                f"Number of modules in the graph: {len(state.new_gf.df['module'].unique())}"
-            )
-
-        return state
-
-    # Filter the networkX graph based on the attribute specified in the config file.
-    def filterNetworkX(self, state, perc):
-        filter_obj = FilterNetworkX(state)
-        if self.config.filter_by == "time (inc)":
-            df = filter_obj.filter_df_by_time_inc(perc)
-            g = filter_obj.filter_graph_by_time_inc(df, state.new_gf.nxg)
-        elif self.config.filter_by == "time":
-            df = filter_obj.filter_df_by_time(perc)
-            g = filter_obj.filter_graph_by_time(df, state.new_gf.nxg)
-
-        state = State("filter_union")
-        state.new_gf = GraphFrame()
-        state.new_gf.df = df
-        state.new_gf.nxg = g
-
-        # state.df = state.new_gf.df
-        # state.g = state.new_gf.nxg
-
-        """
-        #state.df = df
-        #state.g = g
-        """
-
-        if True:  # self.debug:
-            LOGGER.debug("Done with Filtering the Union graph.")
-            LOGGER.debug(
-                f"Number of callsites in dataframe: {len(state.new_gf.df['name'].unique())}"
-            )
-            LOGGER.debug(
-                f"Number of callsites in the graph: {len(state.new_gf.nxg.nodes())}"
-            )
-            LOGGER.debug(
-                f"Number of modules in the graph: {len(state.new_gf.df['module'].unique())}"
-            )
-
-        return state
-
-    def group(self, state, attr):
-        print(state.new_gf.nxg)
-        grouped_graph = groupBy(state, attr)
-
-        # state.new_gf = groupBy(state, attr)
-
-        state.new_gf.nxg = grouped_graph.g
-        state.new_gf.df = grouped_graph.df
-        return state
-
-    def ensemble_group(self, state, attr):
-        grouped_graph = ensembleGroupBy(
-            state["ensemble_entire"], state["ensemble_filter"], attr
-        ).run()
-
-        state = State("ensemble_union")
-        state.new_gf = GraphFrame()
-        state.new_gf.df = grouped_graph["df"]
-        state.new_gf.nxg = grouped_graph["g"]
-        # state.g = state.new_gf.nxg
-        # state.df = state.new_gf.df
-
-        """
-        #state.g = grouped_graph["g"]
-        #state.df = grouped_graph["df"]
-        """
-
-        if True:  # self.debug:
-            LOGGER.debug(
-                f"Number of callsites in dataframe: {len(state.new_gf.df['name'].unique())}"
-            )
-            LOGGER.debug(
-                f"Number of callsites in the graph: {len(state.new_gf.nxg.nodes())}"
-            )
-            LOGGER.debug(f"Modules in the graph: {state.new_gf.df['module'].unique()}")
-
-        return state
-
-    ##################### Write Functions ###########################
-    # Write the dataset's graphframe to the file.
-    def write_dataset_gf(self, state, state_name, format_of_df, write_graph=True):
-        # dump the filtered dataframe to csv.
-
-        df_filepath = os.path.join(self.dirname, state_name, format_of_df + "_df.csv")
-        graph_filepath = os.path.join(
-            self.dirname, state_name, format_of_df + "_graph.json"
-        )
-
-        state.new_gf.df.to_csv(df_filepath)
-
-        g_data = json_graph.node_link_data(state.new_gf.nxg)
-        with open(graph_filepath, "w") as graphFile:
-            json.dump(g_data, graphFile)
-
-    # Write the ensemble State to the file.
-    def write_ensemble_gf(self, states, state_name):
-        state = states[state_name]
-
-        # dump the filtered dataframe to csv.
-        df_filepath = os.path.join(self.dirname, state_name + "_df.csv")
-        graph_filepath = os.path.join(self.dirname, state_name + "_graph.json")
-
-        state.new_gf.df.to_csv(df_filepath)
-
-        g_data = json_graph.node_link_data(state.new_gf.nxg)
-        with open(graph_filepath, "w") as graphFile:
-            json.dump(g_data, graphFile)
-
-    # Write the hatchet graph to a text file.
-    def write_hatchet_graph(self, states, state_name):
-        state = states[state_name]
-        gf = state.new_gf
-
-        graph_filepath = os.path.join(self.dirname, state_name, "hatchet_graph.txt")
-        with open(graph_filepath, "a") as hatchet_graphFile:
-            hatchet_graphFile.write(gf.tree(color=False))
-
-        # TODO: why are the filenames hardcoded?
-        graph_filepath = os.path.join(
-            self.dirname, state_name, "hatchet_graph_10_percent.txt"
-        )
-        with open(graph_filepath, "a") as hatchet_graphFile:
-            hatchet_graphFile.write(gf.tree(color=False, threshold=0.10))
-
-    ##################### Read Functions ###########################
-    # Read the ensemble graph and dataframe.
-    def read_ensemble_gf(self, name):
-        LOGGER.info(f"[Process] Reading the union dataframe and graph : {name}")
-        state = State(name)
-        dirname = self.config.save_path
-
-        union_df_filepath = os.path.join(dirname, name + "_df.csv")
-        union_graph_filepath = os.path.join(dirname, name + "_graph.json")
-
-        with open(union_graph_filepath, "r") as union_graphFile:
-            union_graph = json.load(union_graphFile)
-
-        state.new_gf = GraphFrame()
-        state.new_gf.nxg = json_graph.node_link_graph(union_graph)
-        state.new_gf.df = pd.read_csv(union_df_filepath)
-
-        # state.g = state.new_gf.nxg
-        # state.df = state.new_gf.df
-
-        """
-        #state.g = json_graph.node_link_graph(union_graph)
-        #state.df = pd.read_csv(union_df_filepath)
-        """
-
-        return state
-
-    # Read a single dataset, pass the dataset name as a parameter.
-    def read_dataset_gf(self, name):
-        state = State(name)
-        LOGGER.info(
-            "[Process] Reading the dataframe and graph of state: {0}".format(name)
-        )
-        dataset_dirname = os.path.abspath(os.path.join(__file__, "../../..")) + "/data"
-
-        df_filepath = os.path.join(self.dirname, name, "entire_df.csv")
-        entire_df_filepath = os.path.join(self.dirname, name, "entire_df.csv")
-        graph_filepath = os.path.join(self.dirname, name, "entire_graph.json")
-        entire_graph_filepath = os.path.join(self.dirname, name, "entire_graph.json")
-
-        parameters_filepath = os.path.join(
-            dataset_dirname, self.config.runName, name, "env_params.txt"
-        )
-
-        state.new_gf = GraphFrame()
-        state.new_gf.df = pd.read_csv(df_filepath)
-        # state.df = state.new_gf.df
-
-        # state.df = pd.read_csv(df_filepath)
-        with open(graph_filepath, "r") as filter_graphFile:
-            graph = json.load(filter_graphFile)
-
-        state.new_gf.nxg = json_graph.node_link_graph(graph)
-        # state.g = state.new_gf.nxg
-        # state.g = json_graph.node_link_graph(graph)
-
-        if self.config.runName.split("_")[0] == "osu_bcast":
-            state.projection_data = {}
-            for line in open(parameters_filepath, "r"):
-                s = 0
-                for num in line.strip().split(","):
-                    split_num = num.split("=")
-                    state.projection_data[split_num[0]] = split_num[1]
-
-        return state
-
-    # Write the graph similarities to a file.
-    def deltaconSimilarity(self, datasets, states, type):
-        ret = {}
-        for idx, dataset in enumerate(datasets):
-            ret[dataset] = []
-            for idx_2, dataset2 in enumerate(datasets):
-                union_similarity = Similarity(states[dataset2].g, states[dataset].g)
-                ret[dataset].append(union_similarity.result)
-
-        dirname = self.config.callflow_dir
-        name = self.config.runName
-        # similarity_filepath = dirname + "/" + "similarity.json"
-        similarity_filepath = os.path.join(dirname, "similarity.json")
-        with open(similarity_filepath, "w") as json_file:
-            json.dump(ret, json_file)
-
-    def read_all_data(self):
-        # dirname = self.config.callflow_path
-        all_data_filepath = os.path.join(self.config.save_path, "all_data.json")
-        LOGGER.info(f"[Read] {all_data_filepath}")
-        with open(all_data_filepath, "r") as filter_graphFile:
-            data = json.load(filter_graphFile)
-        return data
diff --git a/callflow/pipeline/state.py b/callflow/pipeline/state.py
deleted file mode 100644
index 11179435..00000000
--- a/callflow/pipeline/state.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-
-# from hatchet import *
-
-from callflow import GraphFrame
-
-
-class State(object):
-
-    # TODO: Assign self.g, self.root...
-    def __init__(self, dataset_name):
-
-        # it appears we're using name as "union", "filter", etc.
-        # this is not a data set name!
-        self.name = dataset_name
-
-        # instead of the old variables, we will use these new ones.
-        # these are callflow.graphframe object (has gf, df, and networkx)
-        self.new_gf = None
-        self.new_entire_gf = None
-
-        # these are the old variables
-        # self.entire_g = None
-        # self.entire_df = None
-        # self.entire_graph = None
-        # self.g = None
-        # self.df = None
-        # self.gf = None
-        # self.graph = None
-
-        # I cant see where these are used..
-        # self.roots = None
-        # self.map = None
-        # self.node_hash_map = {}
-        self.projection_data = {}
-
-    """
-    def lookup_by_column(self, _hash, col_name):
-        # dont think this is used anywhere
-        assert False
-
-        ret = []
-        node_df = self.df.loc[self.df["node"] == self.map[str(_hash)]]
-        node_df_T = node_df.T.squeeze()
-        node_df_T_attr = node_df_T.loc[col_name]
-        if node_df_T_attr is not None:
-            if type(node_df_T_attr) is str or type(node_df_T_attr) is float:
-                ret.append(node_df_T_attr)
-            else:
-                ret = node_df_T_attr.tolist()
-        return ret
-    """
-
-    def lookup(self, node):
-        return self.new_gf.lookup(node)
-        # return self.df.loc[
-        #    (self.df["name"] == node.callpath[-1]) & (self.df["nid"] == node.nid)
-        # ]
-
-    def lookup_with_node(self, node):
-        return self.new_gf.lookup_with_node(node)
-        # return self.df.loc[self.df["name"] == node.callpath[-1]]
-
-    def lookup_with_name(self, name):
-        return self.new_gf.lookup_with_name(node)
-        # return self.df.loc[self.df["name"] == name]
-
-    def lookup_with_vis_nodeName(self, name):
-        return self.new_gf.lookup_with_name(node)
-        # return self.df.loc[self.df["vis_node_name"] == name]
-
-    def update_df(self, col_name, mapping):
-        return self.new_gf.update_df(col_name, mapping)
-        """
-        self.df[col_name] = self.df["name"].apply(
-            lambda node: mapping[node] if node in mapping.keys() else ""
-        )
-        """
-
-    def grouped_df(self, attr):
-        self.gdf[attr] = self.new_gf.df.groupby(attr, as_index=True, squeeze=True)
-        self.gdfKeys = self.gdf[attr].groups.keys()
diff --git a/callflow/pipeline/to_delete_create_graphframe.py b/callflow/pipeline/to_delete_create_graphframe.py
deleted file mode 100644
index 294f707a..00000000
--- a/callflow/pipeline/to_delete_create_graphframe.py
+++ /dev/null
@@ -1,67 +0,0 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-
-## TODO: this file is not needed anymore.
-# its functionality is not in graphframe.from_config()
-
-# the functionality has been moved to datastructures/graphframe.py
-print("WARNING: ({}) is deprecated and should be deleted!".format(__file__))
-
-import pandas as pd
-import time
-from callflow.utils.logger import Log
-
-import os
-import hatchet as ht
-
-
-class CreateGraphFrame:
-    """
-    Creates a graph frame.
-    Input : config variable, and run name
-    Output : State object containing components of graphframe as separate object variables.
-    """
-
-    def __init__(self, config, name):
-        self.log = Log("create_graphframe")
-        LOGGER.info(f"Creating graphframes: {name}")
-        self.config = config
-        self.callflow_path = config.callflow_path
-        self.name = name
-        self.run()
-
-    def run(self):
-        data_path = os.path.abspath(
-            os.path.join(self.callflow_path, self.config.paths[self.name])
-        )
-        LOGGER.info(f"Data path: {data_path}")
-
-        if self.config.format[self.name] == "hpctoolkit":
-            self.gf = ht.GraphFrame.from_hpctoolkit(data_path)
-
-        elif self.config.format[self.name] == "caliper":
-            self.gf = ht.GraphFrame.from_caliper(data_path)
-
-        elif self.config.format[self.name] == "caliper_json":
-            self.gf = ht.GraphFrame.from_caliper(data_path, query="")
-
-        elif self.config.format[self.name] == "gprof":
-            self.gf = ht.GraphFrame.from_grof_dot(data_path)
-
-        elif self.config.format[self.name] == "literal":
-            self.gf = ht.GraphFrame.from_literal(data_path)
-
-        elif self.config.format[self.name] == "lists":
-            self.gf = ht.GraphFrame.from_lists(data_path)
-
-        self.df = self.gf.dataframe
-        self.graph = self.gf.graph
diff --git a/callflow/pipeline/unused_gradients.py b/callflow/pipeline/unused_gradients.py
deleted file mode 100644
index c0994ce8..00000000
--- a/callflow/pipeline/unused_gradients.py
+++ /dev/null
@@ -1,223 +0,0 @@
-import numpy as np
-from scipy import stats
-import statsmodels.nonparametric.api as smnp
-import matplotlib.pyplot as plt
-import math
-
-print("WARNING: ({}) is unused in the code and should be deleted!".format(__file__))
-
-
-class Gradients:
-    def __init__(self, dfs, binCount="20"):
-        self.dfs = dfs
-        self.binCount = binCount
-
-        # Find the rank information.
-        self.num_of_ranks = {}
-        max_ranks = 0
-        for dataset in self.dfs:
-            self.num_of_ranks[dataset] = len(self.dfs[dataset]["rank"].unique())
-            max_ranks = max(max_ranks, self.num_of_ranks[dataset])
-        self.max_ranks = max_ranks
-
-    def iqr(self, arr):
-        """Calculate the IQR for an array of numbers."""
-        a = np.asarray(arr)
-        self.q1 = stats.scoreatpercentile(a, 25)
-        self.q2 = stats.scoreatpercentile(a, 50)
-        self.q3 = stats.scoreatpercentile(a, 75)
-
-    def freedman_diaconis_bins(self, arr):
-        """Calculate number of hist bins using Freedman-Diaconis rule."""
-        # From https://stats.stackexchange.com/questions/798/
-        a = np.asarray(arr)
-        if len(arr) < 2:
-            return 1
-        # Calculate the iqr ranges.
-        self.iqr(arr)
-        # Calculate the h
-        h = 2 * (self.q3 - self.q1) / (len(arr) ** (1 / 3))
-        # fall back to sqrt(a) bins if iqr is 0
-        if h == 0:
-            return int(np.sqrt(arr.size))
-        else:
-            return int(np.ceil((arr.max() - arr.min()) / h))
-
-    def convert_dictmean_to_list(self, dictionary):
-        mean = []
-        dataset = {}
-        for state in dictionary:
-            d = list(dictionary[state].values())
-            # ret.append(max(d))
-            mean.append(np.mean(np.array(d)))
-            dataset[state] = np.mean(np.array(d))
-        return [mean, dataset]
-
-    def kde(
-        self,
-        data,
-        gridsize=10,
-        fft=True,
-        kernel="gau",
-        bw="scott",
-        cut=3,
-        clip=(-np.inf, np.inf),
-    ):
-        if bw == "scott":
-            bw = stats.gaussian_kde(data).scotts_factor() * data.std(ddof=1)
-        # print("biwidth is: ", bw)
-
-        kde = smnp.KDEUnivariate(data)
-
-        # create the grid to fit the estimation.
-        support_min = min(max(data.min() - bw * cut, clip[0]), 0)
-        support_max = min(data.max() + bw * cut, clip[1])
-        # print(support_max, support_min)
-        x = np.linspace(support_min, support_max, gridsize)
-
-        kde.fit("gau", bw, fft, gridsize=gridsize, cut=cut, clip=clip)
-        y = kde.density
-        # print("Y is: ", y.shape)
-
-        return x, y
-
-    def histogram(
-        self, data, dataset_dict={}, data_min=np.nan, data_max=np.nan,
-    ):
-        if np.isnan(data_min) or np.isnan(data_max):
-            data_min = data.min()
-            data_max = data.max()
-
-        h, b = np.histogram(data, range=[data_min, data_max], bins=int(self.binCount))
-
-        # Map the datasets to their histogram indexes.
-        dataset_position_dict = {}
-        for dataset in dataset_dict:
-            mean = dataset_dict[dataset]
-            for idx, x in np.ndenumerate(b):
-                if x > float(mean):
-                    dataset_position_dict[dataset] = idx[0] - 1
-                    break
-                if idx[0] == len(b) - 1:
-                    dataset_position_dict[dataset] = len(b) - 2
-
-        return 0.5 * (b[1:] + b[:-1]), h, dataset_position_dict
-
-    def clean_dict(self, in_dict):
-        ret = {k: in_dict[k] for k in in_dict if not math.isnan(in_dict[k])}
-        return np.array(tuple(ret))
-
-    def packByRankDistribution(self, df, metric):
-        ret = {}
-        if df.empty:
-            ret = dict((rank, 0) for rank in range(0, self.max_ranks))
-        else:
-            ranks = df["rank"].tolist()
-            metric_vals = df[metric].tolist()
-            # metric_vals = df.groupby("rank").max()[metric].tolist()
-            ret = dict(zip(ranks, metric_vals))
-        return ret
-
-    def get_runtime_data(self, df, column_name, debug=False):
-        time_df = df[column_name]
-        time_list = time_df.tolist()
-
-        if len(time_list) == 0:
-            time_list = [0] * self.max_ranks
-
-        ret = self.packByRankDistribution(df, column_name)
-        return ret
-
-    def run(self, columnName="name", callsiteOrModule="", targetDataset=""):
-        dist_inc = {}
-        dist_exc = {}
-        mean_inc_dist = {}
-        max_inc_dist = {}
-        mean_exc_dist = {}
-        max_exc_dist = {}
-        mean_time_inc_map = {}
-        num_of_bins = {}
-        kde_grid = {}
-        hist_inc_grid = {}
-        hist_exc_grid = {}
-
-        # Get the runtimes for all the runs.
-        for idx, dataset in enumerate(self.dfs):
-            node_df = self.dfs[dataset].loc[
-                (self.dfs[dataset][columnName] == callsiteOrModule)
-            ]
-            debug = False
-            dist_inc[dataset] = self.get_runtime_data(node_df, "time (inc)", debug)
-            dist_exc[dataset] = self.get_runtime_data(node_df, "time", debug)
-
-        # convert the dictionary of values to list of values.
-        temp_inc = self.convert_dictmean_to_list(dist_inc)
-        dist_inc_list = temp_inc[0]
-        dataset_inc_list = temp_inc[1]
-
-        temp_exc = self.convert_dictmean_to_list(dist_exc)
-        dist_exc_list = temp_exc[0]
-        dataset_exc_list = temp_exc[1]
-
-        # Calculate appropriate number of bins automatically.
-        num_of_bins = self.binCount
-
-        hist_inc_grid = self.histogram(np.array(dist_inc_list), dataset_inc_list)
-        hist_exc_grid = self.histogram(np.array(dist_exc_list), dataset_exc_list)
-
-        # max_num_of_bins = min(self.freedman_diaconis_bins(np.array(dist_list)), 50)
-
-        # Calculate the KDE grid (x, y)
-        # kde_grid[vis_node_name] = self.kde(np.array(dist_list), 10)
-        # kde_x_min = np.min(kde_grid[vis_node_name][0])
-        # kde_x_max = np.max(kde_grid[vis_node_name][0])
-        # kde_y_min = np.min(kde_grid[vis_node_name][1])
-        # kde_y_max = np.max(kde_grid[vis_node_name][1])
-
-        # print("hist ranges = {} {} {} {}\n"
-        #     .format(hist_x_min, hist_x_max, hist_y_min, hist_y_max))
-
-        results = {
-            "Inclusive": {
-                "bins": num_of_bins,
-                "dataset": {"mean": dataset_inc_list, "position": hist_inc_grid[2]},
-                # "kde": {
-                #     "x": kde_grid[vis_node_name][0].tolist(),
-                #     "y": kde_grid[vis_node_name][1].tolist(),
-                #     "x_min": kde_x_min,
-                #     "x_max": kde_x_max,
-                #     "y_min": kde_y_min,
-                #     "y_max": kde_y_max,
-                # },
-                "hist": {
-                    "x": hist_inc_grid[0].tolist(),
-                    "y": hist_inc_grid[1].tolist(),
-                    "x_min": hist_inc_grid[0][0],
-                    "x_max": hist_inc_grid[0][-1],
-                    "y_min": np.min(hist_inc_grid[1]).astype(np.float64),
-                    "y_max": np.max(hist_inc_grid[1]).astype(np.float64),
-                },
-            },
-            "Exclusive": {
-                "bins": num_of_bins,
-                "dataset": {"mean": dataset_exc_list, "position": hist_exc_grid[2]},
-                # "kde": {
-                #     "x": kde_grid[vis_node_name][0].tolist(),
-                #     "y": kde_grid[vis_node_name][1].tolist(),
-                #     "x_min": kde_x_min,
-                #     "x_max": kde_x_max,
-                #     "y_min": kde_y_min,
-                #     "y_max": kde_y_max,
-                # },
-                "hist": {
-                    "x": hist_exc_grid[0].tolist(),
-                    "y": hist_exc_grid[1].tolist(),
-                    "x_min": hist_exc_grid[0][0],
-                    "x_max": hist_exc_grid[0][-1],
-                    "y_min": np.min(hist_exc_grid[1]).astype(np.float64),
-                    "y_max": np.max(hist_exc_grid[1]).astype(np.float64),
-                },
-            },
-        }
-
-        return results
diff --git a/callflow/pipeline/unused_trees_to_literal.py b/callflow/pipeline/unused_trees_to_literal.py
deleted file mode 100644
index 500b9ffe..00000000
--- a/callflow/pipeline/unused_trees_to_literal.py
+++ /dev/null
@@ -1,116 +0,0 @@
-##############################################################################
-# Copyright (c) 2017-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
-#
-# This file is part of Hatchet.
-# Created by Abhinav Bhatele <bhatele@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/hatchet
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
-
-import numpy as np
-import json
-
-print("WARNING: ({}) is unused in the code and should be deleted!".format(__file__))
-
-
-def trees_to_literal(graph, dataframe):
-    """ Calls to_json in turn for each tree in the graph/forest
-    """
-    print("DFS on the graph")
-    # print("============================================")
-    # dfs(graph, dataframe, 100)
-    print("Number of nodes in graph", len(graph))
-    print("============================================")
-    print("Dataframe Information")
-    print("Size:", dataframe.shape)
-    nodes = dataframe.groupby(["name", "nid"]).groups.keys()
-    print("Number of nodes in dataframe: ", len(nodes))
-    # print("Nodes: {0}".format(nodes))
-    literal = []
-    nodes = dataframe["name"].unique()
-    adj_idx_map = {}
-    for idx, node in enumerate(nodes):
-        adj_idx_map[node] = idx
-
-    num_of_nodes = len(nodes)
-    adj_matrix = np.zeros(shape=(num_of_nodes, num_of_nodes))
-
-    mapper = {}
-
-    def add_nodes_and_children(hnode):
-        node_df = dataframe.loc[
-            (dataframe["name"] == hnode.callpath[-1]) & (dataframe["nid"] == hnode.nid)
-        ]
-        node_id = node_df["nid"].unique()[0]
-        node_name = hnode.callpath[-1]
-        children = []
-
-        for child in hnode.children:
-            # print(child, child.nid)
-            child_df = dataframe.loc[
-                (dataframe["name"] == child.callpath[-1])
-                & (dataframe["nid"] == child.nid)
-            ]
-
-            if not child_df.empty:
-                child_name = child_df["name"].unique()[0]
-                # print(child_name)
-                if child_name in adj_idx_map and node_name in adj_idx_map:
-                    source_idx = adj_idx_map[node_name]
-                    target_idx = adj_idx_map[child_name]
-                    if adj_matrix[source_idx][target_idx] == 0.0:
-                        adj_matrix[source_idx, target_idx] = 1.0
-                children.append(add_nodes_and_children(child))
-
-        return {
-            "name": node_name,
-            "children": children,
-            "nid": int(node_id),
-            "metrics": {
-                "time (inc)": node_df["time (inc)"].mean(),
-                "time": node_df["time"].mean(),
-            },
-        }
-
-    for root in graph.roots:
-        literal.append(add_nodes_and_children(root))
-
-    return literal
-
-
-def dfs(graph, dataframe, limit):
-    def dfs_recurse(root, level):
-        for node in root.children:
-            result = ""
-            if level < limit:
-                for i in range(0, level):
-                    result += "- "
-                node_df = dataframe.loc[
-                    (dataframe["nid"] == node.nid)
-                    & (dataframe["name"] == node.callpath[-1])
-                ]
-                inclusive_runtime = " time (inc) = " + str(node_df["time (inc)"].mean())
-                exclusive_runtime = " time = " + str(node_df["time"].mean())
-                module = "Module = " + str(node_df["module"].unique()[0])
-                result += (
-                    "Node = "
-                    + node.callpath[-1]
-                    + "["
-                    + module
-                    + ":"
-                    + str(node.nid)
-                    + "]"
-                    + inclusive_runtime
-                    + exclusive_runtime
-                )
-                print(result)
-                level += 1
-                dfs_recurse(node, level)
-
-    level = 0
-    for root in graph.roots:
-        print("Root = {0} [{1}]".format(root, root.nid))
-        dfs_recurse(root, level)
diff --git a/callflow/server/__init__.py b/callflow/server/__init__.py
new file mode 100644
index 00000000..8a452354
--- /dev/null
+++ b/callflow/server/__init__.py
@@ -0,0 +1 @@
+from .main import CallFlowServer
diff --git a/callflow/server.py b/callflow/server/main.py
similarity index 73%
rename from callflow/server.py
rename to callflow/server/main.py
index 8ebd5575..c96f0447 100644
--- a/callflow/server.py
+++ b/callflow/server/main.py
@@ -1,14 +1,7 @@
-##############################################################################
-# Copyright (c) 2018-2019, Lawrence Livermore National Security, LLC.
-# Produced at the Lawrence Livermore National Laboratory.
+# Copyright 2017-2020 Lawrence Livermore National Security, LLC and other
+# CallFlow Project Developers. See the top-level LICENSE file for details.
 #
-# This file is part of Callflow.
-# Created by Suraj Kesavan <kesavan1@llnl.gov>.
-# LLNL-CODE-741008. All rights reserved.
-#
-# For details, see: https://github.com/LLNL/Callflow
-# Please also read the LICENSE file for the MIT License notice.
-##############################################################################
+# SPDX-License-Identifier: MIT
 
 
 # ------------------------------------------------------------------------------
@@ -28,14 +21,14 @@
 import argparse
 from networkx.readwrite import json_graph
 
+# ------------------------------------------------------------------------------
+# CallFlow imports.
 import callflow
-from callflow import SingleCallFlow, EnsembleCallFlow
-from callflow.pipeline import ConfigFileReader
-
+from callflow import CallFlow
+from callflow.operations import ConfigFileReader
 
 LOGGER = callflow.get_logger(__name__)
 
-
 # ------------------------------------------------------------------------------
 # Create a Flask server.
 app = Flask(__name__, static_url_path="/public")
@@ -53,22 +46,27 @@ def __init__(self):
 
         self.debug = args.verbose or True
         self.production = args.production or False
-        configFile = args.config
         self.process = args.process
 
         # Read the config file using config file reader.
-        self.config = ConfigFileReader(configFile)
+        self.config = ConfigFileReader(args.config)
 
         # Call the version of callflow corresponding to number of datasets.
         if len(self.config.datasets) == 1:
-            self.callflow = SingleCallFlow(config=self.config, process=self.process)
+            self.callflow = callflow.CallFlow(
+                config=self.config, process=self.process, ensemble=False
+            )
         else:
-            self.callflow = EnsembleCallFlow(config=self.config, process=self.process)
+            self.callflow = callflow.CallFlow(
+                config=self.config, process=self.process, ensemble=True
+            )
 
         # Create server if not processing.
         if not self.process:
             self._create_server()
 
+    # ------------------------------------------------------------------------------
+    # Private methods.
     @staticmethod
     def _create_parser():
         """
@@ -106,6 +104,14 @@ def _verify_parser(args):
                 raise Exception()
 
     def _create_server(self):
+        """
+        Create server's request handler and starts the server.
+        Current version abstracts the requests into 3 categores: 
+        General: common requests for both ensemble and single.
+        Single: requests for single dataset processing.
+        Ensemble: requests for ensemble dataset processing. 
+        """
+
         # Socket request handlers
         self._request_handler_general()
         if len(self.config.datasets) == 1:
@@ -125,14 +131,12 @@ def _request_handler_general(self):
         General socket requests.
         """
 
-        # TODO: Find a better way to debug.
         @sockets.on("reset", namespace="/")
         def reset(data):
             """
             # TODO: This might have to be deleted.
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] reset: {}".format(data))
+            LOGGER.debug("[Socket request] reset: {}".format(data))
             dataset = data["dataset"]
             filterBy = data["filterBy"]
             filterPerc = data["filterPerc"]
@@ -148,17 +152,16 @@ def reset(data):
         @sockets.on("init", namespace="/")
         def init(data):
             """
-            # TODO: Change request tag to "config".
-            # TODO: Remove case study.
             Essential data house for single callflow.
             :return: Config file (JSON Format).
             """
-            if self.debug:
-                LOGGER.debug(f"[Socket request] init: {data}")
-
-            caseStudy = data["caseStudy"]
-            result = json.dumps(self.config, default=lambda o: o.__dict__)
-            emit("init", result, json=True)
+            LOGGER.debug(f"[Socket request] init: {data}")
+            if data["mode"] == "Ensemble":
+                result = self.callflow.request_ensemble({"name": "init"})
+            elif data["mode"] == "Single":
+                result = self.callflow.request_single({"name": "init"})
+            json_result = json.dumps(result)
+            emit("init", json_result, json=True)
 
         @sockets.on("reveal_callsite", namespace="/")
         def reveal_callsite(data):
@@ -166,8 +169,7 @@ def reveal_callsite(data):
             Reveal the callpaths of selected callsites.
             :return: networkx graph (JSON)
             """
-            if self.debug:
-                LOGGER.debug(f"[Socket request] reveal_callsite: {data}")
+            LOGGER.debug(f"[Socket request] reveal_callsite: {data}")
             nxg = self.callflow.request(
                 {
                     "name": "supergraph",
@@ -186,8 +188,7 @@ def split_by_entry_callsites(data):
             Reveal the entry callsite of selected module.
             :return: networkx graph (JSON)
             """
-            if self.debug:
-                LOGGER.debug("Split by entry: {}".format(data))
+            LOGGER.debug("Split by entry: {}".format(data))
             nxg = self.callflow.request(
                 {
                     "name": "supergraph",
@@ -206,8 +207,7 @@ def split_by_callees(data):
             Reveal the callees of selected module.
             :return: networkx graph (JSON)
             """
-            if self.debug:
-                LOGGER.debug("Split by callees: {}".format(data))
+            LOGGER.debug("Split by callees: {}".format(data))
             nxg = self.callflow.request(
                 {
                     "name": "supergraph",
@@ -220,33 +220,15 @@ def split_by_callees(data):
             json_result = json.dumps(result)
             emit("ensemble_supergraph", json_result, json=True)
 
-        # @sockets.on("mpi_range_data", namespace="/")
-        # def mpi_range_data(data):
-        #     if self.debug:
-        #         LOGGER.debug("MPI range data: {}".format(data))
-        #     nx_graph = self.callflow.request(
-        #         {
-        #             "name": "mpi_range_data",
-        #             "datasets": data["datasets"],
-        #             "range_from": data["range_from"],
-        #             "range_to": data["range_to"],
-        #         }
-        #     )
-
     def _request_handler_single(self):
         @sockets.on("single_callsite_data", namespace="/")
         def single_callsite_data(data):
             """
-            TODO: Not sure if we can merge this with init.
-            TODO: Needs discussion and a better naming convention.
-
             Data house for single callflow.
             :return: Auxiliary data.
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] single_callsite_data. {}".format(data))
-
-            result = self.callflow.request(
+            LOGGER.debug("[Socket request] single_callsite_data. {}".format(data))
+            result = self.callflow.request_single(
                 {
                     "name": "auxiliary",
                     "dataset": data["dataset"],
@@ -263,10 +245,8 @@ def single_cct(data):
             Single CCT.
             :return: CCT networkx graph (JSON format).
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] Single CCT: {}".format(data))
-
-            nxg = self.callflow.request(
+            LOGGER.debug("[Socket request] Single CCT: {}".format(data))
+            nxg = self.callflow.request_single(
                 {
                     "name": "cct",
                     "dataset": data["dataset"],
@@ -274,6 +254,8 @@ def single_cct(data):
                 }
             )
             result = json_graph.node_link_data(nxg)
+            json_result = json.dumps(result)
+
             emit("single_cct", result, json=True)
 
         @sockets.on("single_supergraph", namespace="/")
@@ -282,31 +264,25 @@ def single_supergraph(data):
             Single SuperGraph.
             :return: both SuperGraph networkx graphs (JSON format).
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] single_supergraph: {}".format(data))
-
+            LOGGER.debug("[Socket request] single_supergraph: {}".format(data))
             dataset = data["dataset"]
             groupBy = data["groupBy"].lower()
-            nxg = self.callflow.request(
+            nxg = self.callflow.request_single(
                 {"name": "supergraph", "groupBy": groupBy, "dataset": dataset}
             )
             result = json_graph.node_link_data(nxg)
-            # json_result = json.dumps(result)
-            emit("single_supergraph", result, json=True)
+            json_result = json.dumps(result)
+            emit("single_supergraph", json_result, json=True)
 
     def _request_handler_ensemble(self):
         @sockets.on("ensemble_callsite_data", namespace="/")
         def ensemble_callsite_data(data):
             """
-            TODO: Not sure if we can merge this with init.
-            TODO: Needs discussion and a better naming convention.
-
-            Essential data house for ensemble callflow.
+            Data house for ensemble callflow.
             :return: Auxiliary data.
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] ensemble_callsite_data: {}".format(data))
-            result = self.callflow.request(
+            LOGGER.debug("[Socket request] ensemble_callsite_data: {}".format(data))
+            result = self.callflow.request_ensemble(
                 {
                     "name": "auxiliary",
                     "datasets": data["datasets"],
@@ -325,9 +301,8 @@ def ensemble_cct(data):
             Union of all CCTs.
             :return: CCT networkx graph (JSON format).
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] ensemble_cct: {}".format(data))
-            nxg = self.callflow.request(
+            LOGGER.debug("[Socket request] ensemble_cct: {}".format(data))
+            nxg = self.callflow.request_ensemble(
                 {
                     "name": "ensemble_cct",
                     "datasets": data["datasets"],
@@ -335,6 +310,7 @@ def ensemble_cct(data):
                 }
             )
             result = json_graph.node_link_data(nxg)
+            # json_result = json.dumps(result)
             emit("ensemble_cct", result, json=True)
 
         @sockets.on("ensemble_supergraph", namespace="/")
@@ -343,17 +319,15 @@ def ensemble_supergraph(data):
             Ensemble SuperGraph.
             :return: both SuperGraph networkx graphs (JSON format).
             """
-            if self.debug:
-                Logger.debug("[Socket request] ensemble_supergraph: {}".format(data))
-
+            LOGGER.debug("[Socket request] ensemble_supergraph: {}".format(data))
             datasets = data["datasets"]
             groupBy = data["groupBy"].lower()
-            nxg = self.callflow.request(
+            nxg = self.callflow.request_ensemble(
                 {"name": "supergraph", "groupBy": groupBy, "datasets": datasets}
             )
             result = json_graph.node_link_data(nxg)
-            # json_result = json.dumps(result)
-            emit("ensemble_supergraph", result, json=True)
+            json_result = json.dumps(result)
+            emit("ensemble_supergraph", json_result, json=True)
 
         @sockets.on("ensemble_similarity", namespace="/")
         def ensemble_similarity(data):
@@ -361,9 +335,7 @@ def ensemble_similarity(data):
             Similarity Matrix for all callgraphs in ensemble.
             :return: Pair-wise similarity matrix
             """
-            if self.debug:
-                LOGGER.debug("ensemble_similarity: {data}")
-
+            LOGGER.debug("ensemble_similarity: {data}")
             result = self.callflow.request(
                 {
                     "name": "similarity",
@@ -380,9 +352,8 @@ def module_hierarchy(data):
             Module hierarchy of the supergraph.
             :return: CCT networkx graph (JSON format).
             """
-            if self.debug:
-                LOGGER.debug(f"module_hierarchy {data}")
-            nxg = self.callflow.request(
+            LOGGER.debug(f"module_hierarchy {data}")
+            nxg = self.callflow.request_ensemble(
                 {
                     "name": "hierarchy",
                     "datasets": data["datasets"],
@@ -400,9 +371,8 @@ def parameter_projection(data):
             Parameter projection of the datasets.
             :return: PCs. I guess.
             """
-            if self.debug:
-                LOGGER.debug(f"parameter_projection: {data}")
-            result = self.callflow.request(
+            LOGGER.debug(f"parameter_projection: {data}")
+            result = self.callflow.request_ensemble(
                 {
                     "name": "projection",
                     "datasets": data["datasets"],
@@ -412,6 +382,7 @@ def parameter_projection(data):
             )
             emit("parameter_projection", result, json=True)
 
+        # Not used now. But lets keep it. Will be useful.
         @sockets.on("parameter_information", namespace="/")
         def parameter_information(data):
             """
@@ -419,9 +390,7 @@ def parameter_information(data):
             Parameter information
             :return: { "parameter1": [Array], "parameter2": [Array] ...  }.
             """
-            if self.debug:
-                LOGGER.debug(f"[Socket request] parameter_information: {data}")
-
+            LOGGER.debug(f"[Socket request] parameter_information: {data}")
             result = self.callflow.request(
                 {"name": "run-information", "datasets": data["datasets"]}
             )
@@ -430,12 +399,10 @@ def parameter_information(data):
         @sockets.on("compare", namespace="/")
         def compare(data):
             """
-            TODO: Verify the return type.
             Compare two super-graphs.
             :return: Gradients in some JSON format.
             """
-            if self.debug:
-                LOGGER.debug("[Socket request] compare_supergraph {data}")
+            LOGGER.debug("[Socket request] compare_supergraph {data}")
             result = self.callflow.request(
                 {
                     "name": "compare",
@@ -446,18 +413,8 @@ def compare(data):
             )
             emit("compare", result, json=True)
 
-    def create_server(self):
-        app.debug = True
-        app.__dir__ = os.path.join(os.path.dirname(os.getcwd()), "")
-        # CallFlowServer routes
-        @app.route("/")
-        def root():
-            print("CallFlowServer directory", app.__dir__)
-            return send_from_directory(app.__dir__, "index.html")
-
 
 if __name__ == "__main__":
-
     # if verbose, level = 1
     # else, level = 2
     callflow.init_logger(level=1)
diff --git a/callflow/timer.py b/callflow/timer.py
index 52729106..7f754414 100644
--- a/callflow/timer.py
+++ b/callflow/timer.py
@@ -16,7 +16,9 @@
 
 
 class Timer(object):
-    """Simple phase timer with a context manager."""
+    """
+    Simple phase timer with a context manager.
+    """
 
     def __init__(self):
         self._phase = None
diff --git a/callflow/utils.py b/callflow/utils.py
index 0f55a9f6..de4c0298 100644
--- a/callflow/utils.py
+++ b/callflow/utils.py
@@ -16,7 +16,7 @@ def lookup_with_name(df, name):
 
 # ------------------------------------------------------------------------------
 # a similar function in utils/hatchet.py
-def sanitizeName(name):
+def sanitize_name(name):
     ret_name = ""
     if name is None:
         ret_name = "Unknown"
@@ -199,10 +199,10 @@ def string_to_list(string: str, sep: str):
 
 
 # ------------------------------------------------------------------------------
-# networx utilities
+# networkx utilities
 # ------------------------------------------------------------------------------
 # not sure if this is used anywhere
-# Also, why is this not consistent with the rest of the stlye (ie, actions)
+# Also, why is this not consistent with the rest of the style (ie, actions)
 def dfs(graph, dataframe, limit):
     def _dfs_recurse(root, level):
         for node in root.children:
@@ -266,33 +266,6 @@ def graphmltojson(graphfile, outfile):
 
 
 # ------------------------------------------------------------------------------
-
-
-def getPathListFromFrames(frames):
-    paths = []
-    for frame in frames:
-        path = []
-        for f in frame:
-            if f["type"] == "function":
-                path.append(f["name"])
-            elif f["type"] == "statement":
-                path.append(f["file"] + ":" + str(f["line"]))
-            elif f["type"] == "loop":
-                path.append(f["file"] + ":" + str(f["line"]))
-        paths.append(path)
-    return path
-
-
-def framesToPathLists(paths):
-    all_paths = []
-    for path in paths:
-        curr_path = []
-        for frame in path:
-            curr_path.append(frame["name"])
-        all_paths.append(curr_path)
-    return all_paths
-
-
 def bfs_hatchet(graph):
     ret = {}
     node_count = 0
@@ -330,7 +303,7 @@ def getNodeParents(node):
     return node.parents
 
 
-def getNodeName(node):
+def get_callsite_name_from_frame(node):
     name = node.frame.get("name")
     if name != None:
         return node.frame.get("name")
@@ -338,18 +311,10 @@ def getNodeName(node):
         return node.frame.get("file")
 
 
-def sanitizeName(name):
-    if name is None:
-        return "Unknown"
-    if "/" in name:
-        name_split = name.split("/")
-        return name_split[len(name_split) - 1]
-    else:
-        return name
-
-
-# Return the Callsite name from frame.
-def getNodeDictFromFrame(frame):
+def node_dict_from_frame(frame):
+    """
+    Constructs callsite's name from Hatchet's frame.
+    """
     if frame["type"] == "function":
         return {"name": frame["name"], "line": "NA", "type": "function"}
     elif frame["type"] == "statement":
@@ -358,3 +323,21 @@ def getNodeDictFromFrame(frame):
         return {"name": frame["file"], "line": frame["line"], "type": "loop"}
     else:
         return {}
+
+
+def path_list_from_frames(frames):
+    """
+    Constructs callsite's path from Hatchet's frame.
+    """
+    paths = []
+    for frame in frames:
+        path = []
+        for f in frame:
+            if f["type"] == "function":
+                path.append(f["name"])
+            elif f["type"] == "statement":
+                path.append(f["file"] + ":" + str(f["line"]))
+            elif f["type"] == "loop":
+                path.append(f["file"] + ":" + str(f["line"]))
+        paths.append(path)
+    return path
diff --git a/data/caliper-cali/config.callflow.json b/data/caliper-cali/config.callflow.json
index c0e877bb..e2926c61 100644
--- a/data/caliper-cali/config.callflow.json
+++ b/data/caliper-cali/config.callflow.json
@@ -1,6 +1,7 @@
 {
   "run_name": "caliper-cali",
   "save_path": "./data/caliper-cali/.callflow",
+  "read_parameter": false,
   "datasets": [
     {
       "name": "caliper-ex",
diff --git a/data/caliper-cpi-json/config.callflow.json b/data/caliper-cpi-json/config.callflow.json
index 3ff1d871..8e078cb2 100644
--- a/data/caliper-cpi-json/config.callflow.json
+++ b/data/caliper-cpi-json/config.callflow.json
@@ -1,6 +1,7 @@
 {
     "run_name": "caliper-cpi-json",
     "save_path": "./data/caliper-cpi-json/.callflow",
+    "read_parameter": false,
     "datasets": [
       {
         "name": "caliper-ex",
diff --git a/data/caliper-lulesh-json/config.callflow.json b/data/caliper-lulesh-json/config.callflow.json
index 13e51ae6..e841b811 100644
--- a/data/caliper-lulesh-json/config.callflow.json
+++ b/data/caliper-lulesh-json/config.callflow.json
@@ -1,6 +1,7 @@
 {
   "run_name": "caliper-lulesh-json",
   "save_path": "data/caliper-lulesh-json/.callflow",
+  "read_parameter": false,
   "datasets": [
     {
       "name": "lulesh",
diff --git a/data/gprof2dot-cpi/config.callflow.json b/data/gprof2dot-cpi/config.callflow.json
index 6d90b23b..6bbc1faa 100644
--- a/data/gprof2dot-cpi/config.callflow.json
+++ b/data/gprof2dot-cpi/config.callflow.json
@@ -1,6 +1,7 @@
 {
     "run_name": "gprof-cpi",
     "save_path": "./data/gprof2dot-cpi/.callflow",
+    "read_parameter": false,
     "datasets": [
       {
         "name": "calc-pi",
diff --git a/data/hpctoolkit-cpi-database/config.callflow.json b/data/hpctoolkit-cpi-database/config.callflow.json
index 598275a5..5fbe4820 100644
--- a/data/hpctoolkit-cpi-database/config.callflow.json
+++ b/data/hpctoolkit-cpi-database/config.callflow.json
@@ -1,6 +1,7 @@
 {
     "run_name": "hpctoolkit-cpi-database",
     "save_path": "./data/hpctoolkit-cpi-database/.callflow",
+    "read_parameter": false,
     "datasets": [
       {
         "name": "calc-pi",