diff --git a/environment.yml b/environment.yml
index 2fc9532..9c671c3 100644
--- a/environment.yml
+++ b/environment.yml
@@ -1,25 +1,26 @@
 name: floatcsep
 channels:
-    - conda-forge
-    - defaults
+  - conda-forge
+  - defaults
 dependencies:
-    - python <= 3.11
-    - numpy
-    - pycsep
-    - dateparser
-    - docker-py
-    - flake8
-    - gitpython
-    - h5py
-    - matplotlib
-    - pip
-    - pyshp
-    - pyyaml
-    - requests
-    - seaborn
-    - sphinx
-    - sphinx-autoapi
-    - sphinx-gallery
-    - sphinx-rtd-theme
-    - pytables
-    - xmltodict
+  - python <= 3.11
+  - numpy
+  - pycsep
+  - dateparser
+  - docker-py
+  - flake8
+  - gitpython
+  - h5py
+  - matplotlib
+  - pip
+  - pyshp
+  - pyyaml
+  - requests
+  - scipy
+  - seaborn
+  - sphinx
+  - sphinx-autoapi
+  - sphinx-gallery
+  - sphinx-rtd-theme
+  - pytables
+  - xmltodict
diff --git a/examples/case_g/config.yml b/examples/case_g/config.yml
index b36f4f1..82dfdca 100644
--- a/examples/case_g/config.yml
+++ b/examples/case_g/config.yml
@@ -20,4 +20,4 @@ model_config: models.yml
 test_config: tests.yml
 
 postprocess:
-  plot_custom: plot_script.py:main
\ No newline at end of file
+  plot_custom: custom_plot_script.py:main
\ No newline at end of file
diff --git a/examples/case_g/plot_script.py b/examples/case_g/custom_plot_script.py
similarity index 100%
rename from examples/case_g/plot_script.py
rename to examples/case_g/custom_plot_script.py
diff --git a/examples/case_h/config.yml b/examples/case_h/config.yml
index 846ee44..ca5c7f9 100644
--- a/examples/case_h/config.yml
+++ b/examples/case_h/config.yml
@@ -17,4 +17,8 @@ region_config:
 force_rerun: True
 catalog: catalog.csv
 model_config: models.yml
-test_config: tests.yml
\ No newline at end of file
+test_config: tests.yml
+
+postprocess:
+  plot_catalog: False
+  report: custom_report.py:main
\ No newline at end of file
diff --git a/examples/case_h/custom_report.py b/examples/case_h/custom_report.py
new file mode 100644
index 0000000..219b6d2
--- /dev/null
+++ b/examples/case_h/custom_report.py
@@ -0,0 +1,47 @@
+from floatcsep.report import MarkdownReport
+from floatcsep.utils import timewindow2str
+
+
+def main(experiment):
+
+    timewindow = experiment.timewindows[-1]
+    timestr = timewindow2str(timewindow)
+
+    report = MarkdownReport()
+    report.add_title(f"Experiment Report - {experiment.name}", "")
+    report.add_heading("Objectives", level=2)
+
+    objs = [
+        f"Comparison of ETAS, pyMock-Poisson and pyMock-NegativeBinomial models for the"
+        f"day after the Amatrice earthquake, for events with M>{min(experiment.magnitudes)}.",
+    ]
+    report.add_list(objs)
+
+    report.add_figure(
+        f"Input catalog",
+        [
+            experiment.registry.get_figure("main_catalog_map"),
+            experiment.registry.get_figure("main_catalog_time"),
+        ],
+        level=3,
+        ncols=1,
+        caption=f"Evaluation catalog of {experiment.start_date}. "
+        f"Earthquakes are filtered above Mw"
+        f" {min(experiment.magnitudes)}.",
+        add_ext=True,
+    )
+
+    # Include results from Experiment
+    test = experiment.tests[0]
+    for model in experiment.models:
+        fig_path = experiment.registry.get_figure(timestr, f"{test.name}_{model.name}")
+        report.add_figure(
+            f"{test.name}: {model.name}",
+            fig_path,
+            level=3,
+            caption="Catalog-based N-test",
+            add_ext=True,
+            width=200,
+        )
+
+    report.save(experiment.registry.abs(experiment.registry.run_dir))
diff --git a/floatcsep/cmd/main.py b/floatcsep/cmd/main.py
index efeac17..7e449c8 100644
--- a/floatcsep/cmd/main.py
+++ b/floatcsep/cmd/main.py
@@ -2,10 +2,10 @@
 import logging
 
 from floatcsep import __version__
-from floatcsep.experiment import Experiment
+from floatcsep.experiment import Experiment, ExperimentComparison
 from floatcsep.logger import setup_logger, set_console_log_level
-from floatcsep.utils import ExperimentComparison
 from floatcsep.postprocess import plot_results, plot_forecasts, plot_catalogs, plot_custom
+from floatcsep.report import generate_report, reproducibility_report
 
 setup_logger()
 log = logging.getLogger("floatLogger")
@@ -34,7 +34,7 @@ def run(config, **kwargs):
     plot_results(experiment=exp)
     plot_custom(experiment=exp)
 
-    exp.generate_report()
+    generate_report(experiment=exp)
     exp.make_repr()
 
     log.info("Finalized")
@@ -54,7 +54,7 @@ def plot(config, **kwargs):
     plot_results(experiment=exp)
     plot_custom(experiment=exp)
 
-    exp.generate_report()
+    generate_report(experiment=exp)
 
     log.debug("")
 
@@ -76,6 +76,7 @@ def reproduce(config, **kwargs):
     comp = ExperimentComparison(original_exp, reproduced_exp)
     comp.compare_results()
 
+    reproducibility_report(exp_comparison=comp)
     log.info("Finalized")
     log.debug("")
 
diff --git a/floatcsep/environments.py b/floatcsep/environments.py
index 14cf095..fc7b506 100644
--- a/floatcsep/environments.py
+++ b/floatcsep/environments.py
@@ -169,8 +169,7 @@ def create_environment(self, force=False):
                     ]
                 )
             log.info(f"\tSub-conda environment created: {self.env_name}")
-
-        self.install_dependencies()
+            self.install_dependencies()
 
     def env_exists(self) -> bool:
         """
diff --git a/floatcsep/experiment.py b/floatcsep/experiment.py
index b4b8efa..3b3187f 100644
--- a/floatcsep/experiment.py
+++ b/floatcsep/experiment.py
@@ -1,4 +1,6 @@
 import datetime
+import filecmp
+import hashlib
 import logging
 import os
 import shutil
@@ -7,8 +9,9 @@
 
 import numpy
 import yaml
+import scipy
+
 
-from floatcsep import report
 from floatcsep.evaluation import Evaluation
 from floatcsep.logger import add_fhandler
 from floatcsep.model import Model, TimeDependentModel
@@ -559,13 +562,6 @@ def read_results(self, test: Evaluation, window: str) -> List:
 
         return test.read_results(window, self.models)
 
-    def generate_report(self) -> None:
-        """Creates a report summarizing the Experiment's results."""
-
-        log.info(f"Saving report into {self.registry.run_dir}")
-
-        report.generate_report(self)
-
     def make_repr(self):
 
         log.info("Creating reproducibility config file")
@@ -700,3 +696,144 @@ def from_yml(cls, config_yml: str, repr_dir=None, **kwargs):
                 kwargs.pop("logging")
 
         return cls(**_dict, **kwargs)
+
+
+class ExperimentComparison:
+
+    def __init__(self, original, reproduced, **kwargs):
+        """"""
+        self.original = original
+        self.reproduced = reproduced
+
+        self.num_results = {}
+        self.file_comp = {}
+
+    @staticmethod
+    def obs_diff(obs_orig, obs_repr):
+
+        return numpy.abs(
+            numpy.divide((numpy.array(obs_orig) - numpy.array(obs_repr)), numpy.array(obs_orig))
+        )
+
+    @staticmethod
+    def test_stat(test_orig, test_repr):
+
+        if isinstance(test_orig[0], str):
+            if not isinstance(test_orig[1], str):
+                stats = numpy.array(
+                    [0, numpy.divide((test_repr[1] - test_orig[1]), test_orig[1]), 0, 0]
+                )
+            else:
+                stats = None
+        else:
+            stats_orig = numpy.array(
+                [numpy.mean(test_orig), numpy.std(test_orig), scipy.stats.skew(test_orig)]
+            )
+            stats_repr = numpy.array(
+                [numpy.mean(test_repr), numpy.std(test_repr), scipy.stats.skew(test_repr)]
+            )
+
+            ks = scipy.stats.ks_2samp(test_orig, test_repr)
+            stats = [*numpy.divide(numpy.abs(stats_repr - stats_orig), stats_orig), ks.pvalue]
+        return stats
+
+    def get_results(self):
+
+        win_orig = timewindow2str(self.original.timewindows)
+        win_repr = timewindow2str(self.reproduced.timewindows)
+
+        tests_orig = self.original.tests
+        tests_repr = self.reproduced.tests
+
+        models_orig = [i.name for i in self.original.models]
+        models_repr = [i.name for i in self.reproduced.models]
+
+        results = dict.fromkeys([i.name for i in tests_orig])
+
+        for test in tests_orig:
+            if test.type in ["consistency", "comparative"]:
+                results[test.name] = dict.fromkeys(win_orig)
+                for tw in win_orig:
+                    results_orig = self.original.read_results(test, tw)
+                    results_repr = self.reproduced.read_results(test, tw)
+                    results[test.name][tw] = {
+                        models_orig[i]: {
+                            "observed_statistic": self.obs_diff(
+                                results_orig[i].observed_statistic,
+                                results_repr[i].observed_statistic,
+                            ),
+                            "test_statistic": self.test_stat(
+                                results_orig[i].test_distribution,
+                                results_repr[i].test_distribution,
+                            ),
+                        }
+                        for i in range(len(models_orig))
+                    }
+
+            else:
+                results_orig = self.original.read_results(test, win_orig[-1])
+                results_repr = self.reproduced.read_results(test, win_orig[-1])
+                results[test.name] = {
+                    models_orig[i]: {
+                        "observed_statistic": self.obs_diff(
+                            results_orig[i].observed_statistic,
+                            results_repr[i].observed_statistic,
+                        ),
+                        "test_statistic": self.test_stat(
+                            results_orig[i].test_distribution, results_repr[i].test_distribution
+                        ),
+                    }
+                    for i in range(len(models_orig))
+                }
+
+        return results
+
+    @staticmethod
+    def get_hash(filename):
+
+        with open(filename, "rb") as f:
+            bytes_file = f.read()
+            readable_hash = hashlib.sha256(bytes_file).hexdigest()
+        return readable_hash
+
+    def get_filecomp(self):
+
+        win_orig = timewindow2str(self.original.timewindows)
+        win_repr = timewindow2str(self.reproduced.timewindows)
+
+        tests_orig = self.original.tests
+        tests_repr = self.reproduced.tests
+
+        models_orig = [i.name for i in self.original.models]
+        models_repr = [i.name for i in self.reproduced.models]
+
+        results = dict.fromkeys([i.name for i in tests_orig])
+
+        for test in tests_orig:
+            if test.type in ["consistency", "comparative"]:
+                results[test.name] = dict.fromkeys(win_orig)
+                for tw in win_orig:
+                    results[test.name][tw] = dict.fromkeys(models_orig)
+                    for model in models_orig:
+                        orig_path = self.original.registry.get_result(tw, test, model)
+                        repr_path = self.reproduced.registry.get_result(tw, test, model)
+
+                        results[test.name][tw][model] = {
+                            "hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
+                            "byte2byte": filecmp.cmp(orig_path, repr_path),
+                        }
+            else:
+                results[test.name] = dict.fromkeys(models_orig)
+                for model in models_orig:
+                    orig_path = self.original.registry.get_result(win_orig[-1], test, model)
+                    repr_path = self.reproduced.registry.get_result(win_orig[-1], test, model)
+                    results[test.name][model] = {
+                        "hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
+                        "byte2byte": filecmp.cmp(orig_path, repr_path),
+                    }
+        return results
+
+    def compare_results(self):
+
+        self.num_results = self.get_results()
+        self.file_comp = self.get_filecomp()
diff --git a/floatcsep/model.py b/floatcsep/model.py
index 1d748a4..5a792c5 100644
--- a/floatcsep/model.py
+++ b/floatcsep/model.py
@@ -309,7 +309,7 @@ def __init__(
             self.registry, model_class=self.__class__.__name__, **kwargs
         )
         self.build = kwargs.get("build", None)
-
+        self.force_build = kwargs.get("force_build", False)
         if self.func:
             self.environment = EnvironmentFactory.get_env(
                 self.build, self.name, self.registry.abs(model_path)
@@ -329,7 +329,7 @@ def stage(self, timewindows=None) -> None:
             self.get_source(self.zenodo_id, self.giturl, branch=self.repo_hash)
 
         if hasattr(self, "environment"):
-            self.environment.create_environment()
+            self.environment.create_environment(force=self.force_build)
 
         self.registry.build_tree(
             timewindows=timewindows,
diff --git a/floatcsep/postprocess.py b/floatcsep/postprocess.py
index a07fbab..aeecb35 100644
--- a/floatcsep/postprocess.py
+++ b/floatcsep/postprocess.py
@@ -136,7 +136,7 @@ def plot_catalogs(experiment: "Experiment") -> None:
 
 def plot_custom(experiment: "Experiment"):
 
-    plot_config = parse_plot_config(experiment.postprocess.get("plot_custom", None))
+    plot_config = parse_plot_config(experiment.postprocess.get("plot_custom", False))
     if plot_config is None:
         return
     script_path, func_name = plot_config
@@ -152,7 +152,7 @@ def plot_custom(experiment: "Experiment"):
         log.error(f"Script {script_path} is not in the configuration file directory.")
         log.info(
             "\t Skipping plotting. Script can be reallocated and re-run the plotting only"
-            " by typing 'floatcsep run {config}'"
+            " by typing 'floatcsep plot {config}'"
         )
         return
 
@@ -168,7 +168,7 @@ def plot_custom(experiment: "Experiment"):
         log.error(f"Function {func_name} not found in {script_path}")
         log.info(
             "\t Skipping plotting. Plot script can be modified and re-run the plotting only"
-            " by typing 'floatcsep run {config}'"
+            " by typing 'floatcsep plot {config}'"
         )
         return
 
@@ -178,7 +178,7 @@ def plot_custom(experiment: "Experiment"):
         log.error(f"Error executing {func_name} from {script_path}: {e}")
         log.info(
             "\t Skipping plotting. Plot script can be modified and re-run the plotting only"
-            " by typing 'floatcsep run {config}'"
+            " by typing 'floatcsep plot {config}'"
         )
     return
 
@@ -207,7 +207,7 @@ def parse_plot_config(plot_config: Union[dict, str, bool]):
             )
             log.info(
                 "\t Skipping plotting. The script can be modified and re-run the plotting only "
-                "by typing 'floatcsep run {config}'"
+                "by typing 'floatcsep plot {config}'"
             )
             return
 
diff --git a/floatcsep/report.py b/floatcsep/report.py
index 51aa9a1..85ef608 100644
--- a/floatcsep/report.py
+++ b/floatcsep/report.py
@@ -1,4 +1,19 @@
-from floatcsep.utils import MarkdownReport, timewindow2str
+import importlib.util
+import itertools
+import logging
+import os
+from typing import TYPE_CHECKING
+
+import numpy
+
+from floatcsep.experiment import ExperimentComparison
+from floatcsep.utils import timewindow2str, str2timewindow
+
+if TYPE_CHECKING:
+    from floatcsep.experiment import Experiment
+
+
+log = logging.getLogger("floatLogger")
 
 """
 Use the MarkdownReport class to create output for the experiment.
@@ -15,13 +30,18 @@
 
 def generate_report(experiment, timewindow=-1):
 
+    report_function = experiment.postprocess.get("report")
+    if report_function:
+        custom_report(report_function, experiment)
+        return
+
     timewindow = experiment.timewindows[timewindow]
     timestr = timewindow2str(timewindow)
 
-    hooks = experiment.report_hook
-    report = MarkdownReport()
-    report.add_title(f"Experiment Report - {experiment.name}", hooks.get("title_text", ""))
+    log.info(f"Saving report into {experiment.registry.run_dir}")
 
+    report = MarkdownReport()
+    report.add_title(f"Experiment Report - {experiment.name}", "")
     report.add_heading("Objectives", level=2)
 
     objs = [
@@ -30,10 +50,6 @@ def generate_report(experiment, timewindow=-1):
         f" M>{min(experiment.magnitudes)}.",
     ]
 
-    if hooks.get("objectives", None):
-        for i in hooks.get("objectives"):
-            objs.append(i)
-
     report.add_list(objs)
 
     report.add_heading("Authoritative Data", level=2)
@@ -54,16 +70,6 @@ def generate_report(experiment, timewindow=-1):
             f" {min(experiment.magnitudes)}.",
             add_ext=True,
         )
-
-    report.add_heading(
-        "Results",
-        level=2,
-        text="The following tests are applied to each of the experiment's "
-        "forecasts. More information regarding the tests can be found "
-        "[here]"
-        "(https://docs.cseptesting.org/getting_started/theory.html).",
-    )
-
     test_names = [test.name for test in experiment.tests]
     report.add_list(test_names)
 
@@ -90,3 +96,372 @@ def generate_report(experiment, timewindow=-1):
                 pass
     report.table_of_contents()
     report.save(experiment.registry.abs(experiment.registry.run_dir))
+
+
+def reproducibility_report(exp_comparison: "ExperimentComparison"):
+
+    numerical = exp_comparison.num_results
+    data = exp_comparison.file_comp
+    outname = os.path.join("reproducibility_report.md")
+    save_path = os.path.dirname(
+        os.path.join(
+            exp_comparison.reproduced.registry.workdir,
+            exp_comparison.reproduced.registry.run_dir,
+        )
+    )
+    report = MarkdownReport(out_name=outname)
+    report.add_title(f"Reproducibility Report - {exp_comparison.original.name}", "")
+
+    report.add_heading("Objectives", level=2)
+    objs = [
+        "Analyze the statistic reproducibility and data reproducibility of"
+        " the experiment. Compares the differences between "
+        "(i) the original and reproduced scores,"
+        " (ii) the statistical descriptors of the test distributions,"
+        " (iii) The p-value of a Kolmogorov-Smirnov test -"
+        " values beneath 0.1 means we can't reject the distributions are"
+        " similar -,"
+        " (iv) Hash (SHA-256) comparison between the results' files and "
+        "(v) byte-to-byte comparison"
+    ]
+
+    report.add_list(objs)
+    for num, dat in zip(numerical.items(), data.items()):
+
+        res_keys = list(num[1].keys())
+        is_time = False
+        try:
+            str2timewindow(res_keys[0])
+            is_time = True
+        except ValueError:
+            pass
+        if is_time:
+            report.add_heading(num[0], level=2)
+            for tw in res_keys:
+                rows = [
+                    [
+                        tw,
+                        "Score difference",
+                        "Test Mean  diff.",
+                        "Test Std  diff.",
+                        "Test Skew  diff.",
+                        "KS-test p value",
+                        "Hash (SHA-256) equal",
+                        "Byte-to-byte equal",
+                    ]
+                ]
+
+                for model_stat, model_file in zip(num[1][tw].items(), dat[1][tw].items()):
+                    obs = model_stat[1]["observed_statistic"]
+                    test = model_stat[1]["test_statistic"]
+                    rows.append(
+                        [
+                            model_stat[0],
+                            obs,
+                            *[f"{i:.1e}" for i in test[:-1]],
+                            f"{test[-1]:.1e}",
+                            model_file[1]["hash"],
+                            model_file[1]["byte2byte"],
+                        ]
+                    )
+                report.add_table(rows)
+        else:
+            report.add_heading(num[0], level=2)
+            rows = [
+                [
+                    res_keys[-1],
+                    "Max Score difference",
+                    "Hash (SHA-256) equal",
+                    "Byte-to-byte equal",
+                ]
+            ]
+
+            for model_stat, model_file in zip(num[1].items(), dat[1].items()):
+                obs = numpy.nanmax(model_stat[1]["observed_statistic"])
+
+                rows.append(
+                    [
+                        model_stat[0],
+                        f"{obs:.1e}",
+                        model_file[1]["hash"],
+                        model_file[1]["byte2byte"],
+                    ]
+                )
+
+            report.add_table(rows)
+    report.table_of_contents()
+    report.save(save_path)
+
+
+def custom_report(report_function: str, experiment: "Experiment"):
+
+    try:
+        script_path, func_name = report_function.split(".py:")
+        script_path += ".py"
+
+    except ValueError:
+        log.error(
+            f"Invalid format for custom plot function: {report_function}. "
+            "Try {script_name}.py:{func}"
+        )
+        log.info(
+            "\t Skipping reporting. The configuration script can be modified and re-run the"
+            " reporting (and plots) only by typing 'floatcsep plot {config}'"
+        )
+        return
+
+    log.info(f"Creating report from script {script_path} and function {func_name}")
+    script_abs_path = experiment.registry.abs(script_path)
+    allowed_directory = os.path.dirname(experiment.registry.abs(experiment.config_file))
+
+    if not os.path.isfile(script_path) or (
+        os.path.dirname(script_abs_path) != os.path.realpath(allowed_directory)
+    ):
+
+        log.error(f"Script {script_path} is not in the configuration file directory.")
+        log.info(
+            "\t Skipping reporting. The script can be reallocated and re-run the reporting only"
+            " by typing 'floatcsep plot {config}'"
+        )
+        return
+
+    module_name = os.path.splitext(os.path.basename(script_abs_path))[0]
+    spec = importlib.util.spec_from_file_location(module_name, script_abs_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+
+    try:
+        func = getattr(module, func_name)
+
+    except AttributeError:
+        log.error(f"Function {func_name} not found in {script_path}")
+        log.info(
+            "\t Skipping reporting. Report script can be modified and re-run the report only"
+            " by typing 'floatcsep plot {config}'"
+        )
+        return
+
+    try:
+        func(experiment)
+    except Exception as e:
+        log.error(f"Error executing {func_name} from {script_path}: {e}")
+        log.info(
+            "\t Skipping reporting. Report script can be modified and re-run the report only"
+            " by typing 'floatcsep plot {config}'"
+        )
+    return
+
+
+class MarkdownReport:
+    """Class to generate a Markdown report from a study."""
+
+    def __init__(self, out_name="report.md"):
+        self.out_name = out_name
+        self.toc = []
+        self.has_title = True
+        self.has_introduction = False
+        self.markdown = []
+
+    def add_introduction(self, adict):
+        """Generate document header from dictionary."""
+        first = (
+            f"# CSEP Testing Results: {adict['simulation_name']}  \n"
+            f"**Forecast Name:** {adict['forecast_name']}  \n"
+            f"**Simulation Start Time:** {adict['origin_time']}  \n"
+            f"**Evaluation Time:** {adict['evaluation_time']}  \n"
+            f"**Catalog Source:** {adict['catalog_source']}  \n"
+            f"**Number Simulations:** {adict['num_simulations']}\n"
+        )
+
+        # used to determine to place TOC at beginning of document or after
+        # introduction.
+
+        self.has_introduction = True
+        self.markdown.append(first)
+        return first
+
+    def add_text(self, text):
+        """
+        Text should be a list of strings where each string will be on its own.
+
+        line. Each add_text command represents a paragraph.
+
+        Args:
+            text (list): lines to write
+        Returns:
+        """
+        self.markdown.append("  ".join(text) + "\n\n")
+
+    def add_figure(
+        self,
+        title,
+        relative_filepaths,
+        level=2,
+        ncols=1,
+        add_ext=False,
+        text="",
+        caption="",
+        width=None,
+    ):
+        """
+        This function expects a list of filepaths.
+
+        If you want the output
+        stacked, select a value of ncols. ncols should be divisible by
+        filepaths.
+
+        Args:
+            width:
+            caption:
+            text:
+            add_ext:
+            ncols:
+            title: name of the figure
+            level (int): value 1-6 depending on the heading
+            relative_filepaths (str or List[Tuple[str]]): list of paths in
+                order to make table
+        Returns:
+        """
+        # verify filepaths have proper extension should always be png
+        is_single = False
+        paths = []
+        if isinstance(relative_filepaths, str):
+            is_single = True
+            paths.append(relative_filepaths)
+        else:
+            paths = relative_filepaths
+
+        correct_paths = []
+        if add_ext:
+            for fp in paths:
+                correct_paths.append(fp + ".png")
+        else:
+            correct_paths = paths
+
+        # generate new lists with size ncols
+        formatted_paths = [correct_paths[i : i + ncols] for i in range(0, len(paths), ncols)]
+
+        # convert str into a list, where each potential row is an iter not str
+        def build_header(_row):
+            top = "|"
+            bottom = "|"
+            for i, _ in enumerate(_row):
+                if i == ncols:
+                    break
+                top += " |"
+                bottom += " --- |"
+            return top + "\n" + bottom
+
+        size_ = bool(width) * f"width={width}"
+
+        def add_to_row(_row):
+            if len(_row) == 1:
+                return f'<img src="{_row[0]}" {size_}/>'
+            string = "| "
+            for item in _row:
+                string = string + f'<img src="{item}" width={width}/>'
+            return string
+
+        level_string = f"{level * '#'}"
+        result_cell = []
+        locator = title.lower().replace(" ", "_")
+        result_cell.append(f'{level_string} {title}  <a name="{locator}"></a>\n')
+        result_cell.append(f"{text}\n")
+
+        for i, row in enumerate(formatted_paths):
+            if i == 0 and not is_single and ncols > 1:
+                result_cell.append(build_header(row))
+            result_cell.append(add_to_row(row))
+        result_cell.append("\n")
+        result_cell.append(f"{caption}")
+
+        self.markdown.append("\n".join(result_cell) + "\n")
+
+        # generate metadata for TOC
+        self.toc.append((title, level, locator))
+
+    def add_heading(self, title, level=1, text="", add_toc=True):
+        # multiplying char simply repeats it
+        if isinstance(text, str):
+            text = [text]
+        cell = []
+        level_string = f"{level * '#'}"
+        locator = title.lower().replace(" ", "_")
+        sub_heading = f'{level_string} {title} <a name="{locator}"></a>\n'
+        cell.append(sub_heading)
+        try:
+            for item in list(text):
+                cell.append(item)
+        except Exception as ex:
+            raise RuntimeWarning(f"Unable to add document subhead, text must be iterable. {ex}")
+        self.markdown.append("\n".join(cell) + "\n")
+
+        # generate metadata for TOC
+        if add_toc:
+            self.toc.append((title, level, locator))
+
+    def add_list(self, _list):
+        cell = []
+        for item in _list:
+            cell.append(f"* {item}")
+        self.markdown.append("\n".join(cell) + "\n\n")
+
+    def add_title(self, title, text):
+        self.has_title = True
+        self.add_heading(title, 1, text, add_toc=False)
+
+    def table_of_contents(self):
+        """Generates table of contents based on contents of document."""
+        if len(self.toc) == 0:
+            return
+        toc = ["# Table of Contents"]
+
+        for i, elem in enumerate(self.toc):
+            title, level, locator = elem
+            space = "   " * (level - 1)
+            toc.append(f"{space}1. [{title}](#{locator})")
+        insert_loc = 1 if self.has_title else 0
+        self.markdown.insert(insert_loc, "\n".join(toc) + "\n\n")
+
+    def add_table(self, data, use_header=True):
+        """
+        Generates table from HTML and styles using bootstrap class.
+
+        Args:
+           data List[Tuple[str]]: should be (nrows, ncols) in size. all rows
+            should be the same sizes
+        Returns:
+            table (str): this can be added to subheading or other cell if
+                desired.
+        """
+        table = ['<div class="table table-striped">', f"<table>"]
+
+        def make_header(row_):
+            header = ["<tr>"]
+            for item in row_:
+                header.append(f"<th>{item}</th>")
+            header.append("</tr>")
+            return "\n".join(header)
+
+        def add_row(row_):
+            table_row = ["<tr>"]
+            for item in row_:
+                table_row.append(f"<td>{item}</td>")
+            table_row.append("</tr>")
+            return "\n".join(table_row)
+
+        for i, row in enumerate(data):
+            if i == 0 and use_header:
+                table.append(make_header(row))
+            else:
+                table.append(add_row(row))
+        table.append("</table>")
+        table.append("</div>")
+        table = "\n".join(table)
+        self.markdown.append(table + "\n\n")
+
+    def save(self, save_dir):
+        output = list(itertools.chain.from_iterable(self.markdown))
+        full_md_fname = os.path.join(save_dir, self.out_name)
+        with open(full_md_fname, "w") as f:
+            f.writelines(output)
diff --git a/floatcsep/utils.py b/floatcsep/utils.py
index 27e9f79..a540c9e 100644
--- a/floatcsep/utils.py
+++ b/floatcsep/utils.py
@@ -1,8 +1,6 @@
 # python libraries
 import copy
-import filecmp
 import functools
-import hashlib
 import itertools
 import logging
 import multiprocessing
@@ -23,7 +21,6 @@
 import shapely.geometry
 
 # pyCSEP libraries
-import six
 import yaml
 from csep.core.forecasts import GriddedForecast
 from csep.core.regions import CartesianGrid2D, compute_vertices
@@ -587,455 +584,12 @@ def check_exist(self):
         pass
 
 
-class MarkdownReport:
-    """Class to generate a Markdown report from a study."""
-
-    def __init__(self, outname="report.md"):
-        self.outname = outname
-        self.toc = []
-        self.has_title = True
-        self.has_introduction = False
-        self.markdown = []
-
-    def add_introduction(self, adict):
-        """Generate document header from dictionary."""
-        first = (
-            f"# CSEP Testing Results: {adict['simulation_name']}  \n"
-            f"**Forecast Name:** {adict['forecast_name']}  \n"
-            f"**Simulation Start Time:** {adict['origin_time']}  \n"
-            f"**Evaluation Time:** {adict['evaluation_time']}  \n"
-            f"**Catalog Source:** {adict['catalog_source']}  \n"
-            f"**Number Simulations:** {adict['num_simulations']}\n"
-        )
-
-        # used to determine to place TOC at beginning of document or after
-        # introduction.
-
-        self.has_introduction = True
-        self.markdown.append(first)
-        return first
-
-    def add_text(self, text):
-        """
-        Text should be a list of strings where each string will be on its own.
-
-        line. Each add_text command represents a paragraph.
-
-        Args:
-            text (list): lines to write
-        Returns:
-        """
-        self.markdown.append("  ".join(text) + "\n\n")
-
-    def add_figure(
-        self,
-        title,
-        relative_filepaths,
-        level=2,
-        ncols=1,
-        add_ext=False,
-        text="",
-        caption="",
-        width=None,
-    ):
-        """
-        This function expects a list of filepaths.
-
-        If you want the output
-        stacked, select a value of ncols. ncols should be divisible by
-        filepaths. todo: modify formatted_paths to work when not divis.
-
-        Args:
-            width:
-            caption:
-            text:
-            add_ext:
-            ncols:
-            title: name of the figure
-            level (int): value 1-6 depending on the heading
-            relative_filepaths (str or List[Tuple[str]]): list of paths in
-                order to make table
-        Returns:
-        """
-        # verify filepaths have proper extension should always be png
-        is_single = False
-        paths = []
-        if isinstance(relative_filepaths, six.string_types):
-            is_single = True
-            paths.append(relative_filepaths)
-        else:
-            paths = relative_filepaths
-
-        correct_paths = []
-        if add_ext:
-            for fp in paths:
-                correct_paths.append(fp + ".png")
-        else:
-            correct_paths = paths
-
-        # generate new lists with size ncols
-        formatted_paths = [correct_paths[i : i + ncols] for i in range(0, len(paths), ncols)]
-
-        # convert str into a list, where each potential row is an iter not str
-        def build_header(_row):
-            top = "|"
-            bottom = "|"
-            for i, _ in enumerate(_row):
-                if i == ncols:
-                    break
-                top += " |"
-                bottom += " --- |"
-            return top + "\n" + bottom
-
-        size_ = bool(width) * f"width={width}"
-
-        def add_to_row(_row):
-            if len(_row) == 1:
-                return f'<img src="{_row[0]}" {size_}/>'
-            string = "| "
-            for item in _row:
-                string = string + f'<img src="{item}" width={width}/>'
-            return string
-
-        level_string = f"{level * '#'}"
-        result_cell = []
-        locator = title.lower().replace(" ", "_")
-        result_cell.append(f'{level_string} {title}  <a name="{locator}"></a>\n')
-        result_cell.append(f"{text}\n")
-
-        for i, row in enumerate(formatted_paths):
-            if i == 0 and not is_single and ncols > 1:
-                result_cell.append(build_header(row))
-            result_cell.append(add_to_row(row))
-        result_cell.append("\n")
-        result_cell.append(f"{caption}")
-
-        self.markdown.append("\n".join(result_cell) + "\n")
-
-        # generate metadata for TOC
-        self.toc.append((title, level, locator))
-
-    def add_heading(self, title, level=1, text="", add_toc=True):
-        # multiplying char simply repeats it
-        if isinstance(text, str):
-            text = [text]
-        cell = []
-        level_string = f"{level * '#'}"
-        locator = title.lower().replace(" ", "_")
-        sub_heading = f'{level_string} {title} <a name="{locator}"></a>\n'
-        cell.append(sub_heading)
-        try:
-            for item in list(text):
-                cell.append(item)
-        except Exception as ex:
-            raise RuntimeWarning("Unable to add document subhead, text must be iterable.")
-        self.markdown.append("\n".join(cell) + "\n")
-
-        # generate metadata for TOC
-        if add_toc:
-            self.toc.append((title, level, locator))
-
-    def add_list(self, _list):
-        cell = []
-        for item in _list:
-            cell.append(f"* {item}")
-        self.markdown.append("\n".join(cell) + "\n\n")
-
-    def add_title(self, title, text):
-        self.has_title = True
-        self.add_heading(title, 1, text, add_toc=False)
-
-    def table_of_contents(self):
-        """Generates table of contents based on contents of document."""
-        if len(self.toc) == 0:
-            return
-        toc = ["# Table of Contents"]
-
-        for i, elem in enumerate(self.toc):
-            title, level, locator = elem
-            space = "   " * (level - 1)
-            toc.append(f"{space}1. [{title}](#{locator})")
-        insert_loc = 1 if self.has_title else 0
-        self.markdown.insert(insert_loc, "\n".join(toc) + "\n\n")
-
-    def add_table(self, data, use_header=True):
-        """
-        Generates table from HTML and styles using bootstrap class.
-
-        Args:
-           data List[Tuple[str]]: should be (nrows, ncols) in size. all rows
-            should be the same sizes
-        Returns:
-            table (str): this can be added to subheading or other cell if
-                desired.
-        """
-        table = ['<div class="table table-striped">', f"<table>"]
-
-        def make_header(row):
-            header = ["<tr>"]
-            for item in row:
-                header.append(f"<th>{item}</th>")
-            header.append("</tr>")
-            return "\n".join(header)
-
-        def add_row(row):
-            table_row = ["<tr>"]
-            for item in row:
-                table_row.append(f"<td>{item}</td>")
-            table_row.append("</tr>")
-            return "\n".join(table_row)
-
-        for i, row in enumerate(data):
-            if i == 0 and use_header:
-                table.append(make_header(row))
-            else:
-                table.append(add_row(row))
-        table.append("</table>")
-        table.append("</div>")
-        table = "\n".join(table)
-        self.markdown.append(table + "\n\n")
-
-    def save(self, save_dir):
-        output = list(itertools.chain.from_iterable(self.markdown))
-        full_md_fname = os.path.join(save_dir, self.outname)
-        with open(full_md_fname, "w") as f:
-            f.writelines(output)
-
-
 class NoAliasLoader(yaml.Loader):
     @staticmethod
     def ignore_aliases(self):
         return True
 
 
-class ExperimentComparison:
-
-    def __init__(self, original, reproduced, **kwargs):
-        """"""
-        self.original = original
-        self.reproduced = reproduced
-
-        self.num_results = {}
-        self.file_comp = {}
-
-    @staticmethod
-    def obs_diff(obs_orig, obs_repr):
-
-        return numpy.abs(
-            numpy.divide((numpy.array(obs_orig) - numpy.array(obs_repr)), numpy.array(obs_orig))
-        )
-
-    @staticmethod
-    def test_stat(test_orig, test_repr):
-
-        if isinstance(test_orig[0], str):
-            if not isinstance(test_orig[1], str):
-                stats = numpy.array(
-                    [0, numpy.divide((test_repr[1] - test_orig[1]), test_orig[1]), 0, 0]
-                )
-            else:
-                stats = None
-        else:
-            stats_orig = numpy.array(
-                [numpy.mean(test_orig), numpy.std(test_orig), scipy.stats.skew(test_orig)]
-            )
-            stats_repr = numpy.array(
-                [numpy.mean(test_repr), numpy.std(test_repr), scipy.stats.skew(test_repr)]
-            )
-
-            ks = scipy.stats.ks_2samp(test_orig, test_repr)
-            stats = [*numpy.divide(numpy.abs(stats_repr - stats_orig), stats_orig), ks.pvalue]
-        return stats
-
-    def get_results(self):
-
-        win_orig = timewindow2str(self.original.timewindows)
-        win_repr = timewindow2str(self.reproduced.timewindows)
-
-        tests_orig = self.original.tests
-        tests_repr = self.reproduced.tests
-
-        models_orig = [i.name for i in self.original.models]
-        models_repr = [i.name for i in self.reproduced.models]
-
-        results = dict.fromkeys([i.name for i in tests_orig])
-
-        for test in tests_orig:
-            if test.type in ["consistency", "comparative"]:
-                results[test.name] = dict.fromkeys(win_orig)
-                for tw in win_orig:
-                    results_orig = self.original.read_results(test, tw)
-                    results_repr = self.reproduced.read_results(test, tw)
-                    results[test.name][tw] = {
-                        models_orig[i]: {
-                            "observed_statistic": self.obs_diff(
-                                results_orig[i].observed_statistic,
-                                results_repr[i].observed_statistic,
-                            ),
-                            "test_statistic": self.test_stat(
-                                results_orig[i].test_distribution,
-                                results_repr[i].test_distribution,
-                            ),
-                        }
-                        for i in range(len(models_orig))
-                    }
-
-            else:
-                results_orig = self.original.read_results(test, win_orig[-1])
-                results_repr = self.reproduced.read_results(test, win_orig[-1])
-                results[test.name] = {
-                    models_orig[i]: {
-                        "observed_statistic": self.obs_diff(
-                            results_orig[i].observed_statistic,
-                            results_repr[i].observed_statistic,
-                        ),
-                        "test_statistic": self.test_stat(
-                            results_orig[i].test_distribution, results_repr[i].test_distribution
-                        ),
-                    }
-                    for i in range(len(models_orig))
-                }
-
-        return results
-
-    @staticmethod
-    def get_hash(filename):
-
-        with open(filename, "rb") as f:
-            bytes_file = f.read()
-            readable_hash = hashlib.sha256(bytes_file).hexdigest()
-        return readable_hash
-
-    def get_filecomp(self):
-
-        win_orig = timewindow2str(self.original.timewindows)
-        win_repr = timewindow2str(self.reproduced.timewindows)
-
-        tests_orig = self.original.tests
-        tests_repr = self.reproduced.tests
-
-        models_orig = [i.name for i in self.original.models]
-        models_repr = [i.name for i in self.reproduced.models]
-
-        results = dict.fromkeys([i.name for i in tests_orig])
-
-        for test in tests_orig:
-            if test.type in ["consistency", "comparative"]:
-                results[test.name] = dict.fromkeys(win_orig)
-                for tw in win_orig:
-                    results[test.name][tw] = dict.fromkeys(models_orig)
-                    for model in models_orig:
-                        orig_path = self.original.registry.get_result(tw, test, model)
-                        repr_path = self.reproduced.registry.get_result(tw, test, model)
-
-                        results[test.name][tw][model] = {
-                            "hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
-                            "byte2byte": filecmp.cmp(orig_path, repr_path),
-                        }
-            else:
-                results[test.name] = dict.fromkeys(models_orig)
-                for model in models_orig:
-                    orig_path = self.original.registry.get_result(win_orig[-1], test, model)
-                    repr_path = self.reproduced.registry.get_result(win_orig[-1], test, model)
-                    results[test.name][model] = {
-                        "hash": (self.get_hash(orig_path) == self.get_hash(repr_path)),
-                        "byte2byte": filecmp.cmp(orig_path, repr_path),
-                    }
-        return results
-
-    def compare_results(self):
-
-        self.num_results = self.get_results()
-        self.file_comp = self.get_filecomp()
-        self.write_report()
-
-    def write_report(self):
-
-        numerical = self.num_results
-        data = self.file_comp
-        outname = os.path.join("reproducibility_report.md")
-        save_path = os.path.dirname(
-            os.path.join(self.reproduced.registry.workdir, self.reproduced.registry.run_dir)
-        )
-        report = MarkdownReport(outname=outname)
-        report.add_title(f"Reproducibility Report - {self.original.name}", "")
-
-        report.add_heading("Objectives", level=2)
-        objs = [
-            "Analyze the statistic reproducibility and data reproducibility of"
-            " the experiment. Compares the differences between "
-            "(i) the original and reproduced scores,"
-            " (ii) the statistical descriptors of the test distributions,"
-            " (iii) The p-value of a Kolmogorov-Smirnov test -"
-            " values beneath 0.1 means we can't reject the distributions are"
-            " similar -,"
-            " (iv) Hash (SHA-256) comparison between the results' files and "
-            "(v) byte-to-byte comparison"
-        ]
-
-        report.add_list(objs)
-        for num, dat in zip(numerical.items(), data.items()):
-
-            res_keys = list(num[1].keys())
-            is_time = False
-            try:
-                str2timewindow(res_keys[0])
-                is_time = True
-            except ValueError:
-                pass
-            if is_time:
-                report.add_heading(num[0], level=2)
-                for tw in res_keys:
-                    rows = [
-                        [
-                            tw,
-                            "Score difference",
-                            "Test Mean  diff.",
-                            "Test Std  diff.",
-                            "Test Skew  diff.",
-                            "KS-test p value",
-                            "Hash (SHA-256) equal",
-                            "Byte-to-byte equal",
-                        ]
-                    ]
-
-                    for model_stat, model_file in zip(num[1][tw].items(), dat[1][tw].items()):
-                        obs = model_stat[1]["observed_statistic"]
-                        test = model_stat[1]["test_statistic"]
-                        rows.append(
-                            [
-                                model_stat[0],
-                                obs,
-                                *[f"{i:.1e}" for i in test[:-1]],
-                                f"{test[-1]:.1e}",
-                                model_file[1]["hash"],
-                                model_file[1]["byte2byte"],
-                            ]
-                        )
-                    report.add_table(rows)
-            else:
-                report.add_heading(num[0], level=2)
-                rows = [
-                    [tw, "Max Score difference", "Hash (SHA-256) equal", "Byte-to-byte equal"]
-                ]
-
-                for model_stat, model_file in zip(num[1].items(), dat[1].items()):
-                    obs = numpy.nanmax(model_stat[1]["observed_statistic"])
-
-                    rows.append(
-                        [
-                            model_stat[0],
-                            f"{obs:.1e}",
-                            model_file[1]["hash"],
-                            model_file[1]["byte2byte"],
-                        ]
-                    )
-
-                report.add_table(rows)
-        report.table_of_contents()
-        report.save(save_path)
-
-
 #######################
 # Perhaps add to pycsep
 #######################
diff --git a/requirements.txt b/requirements.txt
index 81d20f8..eefaee8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,6 +11,7 @@ pycsep
 pyshp
 pyyaml
 requests
+scipy
 seaborn
 tables
 xmltodict
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index fb356c6..ee8bd06 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -33,6 +33,7 @@ install_requires =
     pyshp
     pyyaml
     requests
+    scipy
     seaborn
     tables
     xmltodict
diff --git a/tests/qa/test_data.py b/tests/qa/test_data.py
index 872b4be..541f3e2 100644
--- a/tests/qa/test_data.py
+++ b/tests/qa/test_data.py
@@ -34,10 +34,10 @@ def get_eval_dist(self):
         pass
 
 
-@patch.object(Experiment, "generate_report")
 @patch("floatcsep.cmd.main.plot_forecasts")
 @patch("floatcsep.cmd.main.plot_catalogs")
 @patch("floatcsep.cmd.main.plot_custom")
+@patch("floatcsep.cmd.main.generate_report")
 class RunExamples(DataTest):
 
     def test_case_a(self, *args):
@@ -79,6 +79,7 @@ def test_case_g(self, *args):
 @patch("floatcsep.cmd.main.plot_forecasts")
 @patch("floatcsep.cmd.main.plot_catalogs")
 @patch("floatcsep.cmd.main.plot_custom")
+@patch("floatcsep.cmd.main.generate_report")
 class ReproduceExamples(DataTest):
 
     def test_case_c(self, *args):
diff --git a/tests/unit/test_environments.py b/tests/unit/test_environments.py
index dfe90f6..29cd4ba 100644
--- a/tests/unit/test_environments.py
+++ b/tests/unit/test_environments.py
@@ -96,9 +96,10 @@ def test_create_environment(self, mock_exists, mock_run):
     @patch("subprocess.run")
     def test_create_environment_force(self, mock_run):
         manager = CondaManager("test_base", "/path/to/model")
-        manager.env_exists = MagicMock(return_value=True)
+        manager.env_exists = MagicMock()
+        manager.env_exists.side_effect = [True, False]
         manager.create_environment(force=True)
-        self.assertEqual(mock_run.call_count, 2)  # One for remove, one for create
+        self.assertEqual(mock_run.call_count, 3)  # One for remove, one for create
 
     @patch("subprocess.run")
     @patch.object(CondaManager, "detect_package_manager", return_value="conda")