diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
new file mode 100644
index 000000000..8164f174e
--- /dev/null
+++ b/.github/workflows/pipeline.yaml
@@ -0,0 +1,25 @@
+name: Test pipeline
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install poetry==1.4.0
+          poetry install
+      - name: Test with pre-commit
+        run: poetry run pre-commit run --all-files --show-diff-on-failure
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..45dc504b0
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+ci:
+  autoupdate_branch: "main"
+  autoupdate_schedule: monthly
+repos:
+  - repo: local
+    hooks:
+      - id: pylint
+        name: pylint
+        entry: pylint
+        language: system
+        types: [python]
+        files: "^express/"
+        args:
+          [
+            "-rn", # Only display messages
+            "-sn", # Don't display the score
+            "--disable=fixme"
+          ]
diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index c3b5c5064..000000000
--- a/.pylintrc
+++ /dev/null
@@ -1,587 +0,0 @@
-[MASTER]
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code.
-extension-pkg-whitelist=cv2
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS
-
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
-ignore-patterns=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
-# number of processors available to use.
-jobs=1
-
-# Control the amount of potential inferred values when inferring a single
-# object. This can help the performance when dealing with large functions or
-# complex, nested conditions.
-limit-inference-results=100
-
-# List of plugins (as comma separated values of python module names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Pickle collected data for later comparisons.
-persistent=yes
-
-# Specify a configuration file.
-#rcfile=
-
-# When enabled, pylint would attempt to guess common misconfiguration and emit
-# user-friendly hints instead of false-positive error messages.
-suggestion-mode=yes
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
-confidence=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once). You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use "--disable=all --enable=classes
-# --disable=W".
-disable=print-statement,
-        parameter-unpacking,
-        unpacking-in-except,
-        old-raise-syntax,
-        backtick,
-        long-suffix,
-        old-ne-operator,
-        old-octal-literal,
-        import-star-module-level,
-        non-ascii-bytes-literal,
-        raw-checker-failed,
-        bad-inline-option,
-        locally-disabled,
-        file-ignored,
-        suppressed-message,
-        useless-suppression,
-        deprecated-pragma,
-        use-symbolic-message-instead,
-        apply-builtin,
-        basestring-builtin,
-        buffer-builtin,
-        cmp-builtin,
-        coerce-builtin,
-        execfile-builtin,
-        file-builtin,
-        long-builtin,
-        raw_input-builtin,
-        reduce-builtin,
-        standarderror-builtin,
-        unicode-builtin,
-        xrange-builtin,
-        coerce-method,
-        delslice-method,
-        getslice-method,
-        setslice-method,
-        no-absolute-import,
-        old-division,
-        dict-iter-method,
-        dict-view-method,
-        next-method-called,
-        metaclass-assignment,
-        indexing-exception,
-        raising-string,
-        reload-builtin,
-        oct-method,
-        hex-method,
-        nonzero-method,
-        cmp-method,
-        input-builtin,
-        round-builtin,
-        intern-builtin,
-        unichr-builtin,
-        map-builtin-not-iterating,
-        zip-builtin-not-iterating,
-        range-builtin-not-iterating,
-        filter-builtin-not-iterating,
-        using-cmp-argument,
-        eq-without-hash,
-        div-method,
-        idiv-method,
-        rdiv-method,
-        exception-message-attribute,
-        invalid-str-codec,
-        sys-max-int,
-        bad-python3-import,
-        deprecated-string-function,
-        deprecated-str-translate-call,
-        deprecated-itertools-function,
-        deprecated-types-field,
-        next-method-defined,
-        dict-items-not-iterating,
-        dict-keys-not-iterating,
-        dict-values-not-iterating,
-        deprecated-operator-function,
-        deprecated-urllib-function,
-        xreadlines-attribute,
-        deprecated-sys-function,
-        exception-escape,
-        comprehension-escape,
-        no-else-return,  # Explicit elif / else can improve readability
-        bad-continuation,  # Checked by pycodestyle
-        missing-final-newline,  # Too strict to enforce
-        trailing-newlines  # Too strict to enforce
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-enable=c-extension-no-member
-
-
-[REPORTS]
-
-# Python expression which should return a score less than or equal to 10. You
-# have access to the variables 'error', 'warning', 'refactor', and 'convention'
-# which contain the number of messages in each category, as well as 'statement'
-# which is the total number of statements analyzed. This score is used by the
-# global evaluation report (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details.
-#msg-template=
-
-# Set the output format. Available formats are text, parseable, colorized, json
-# and msvs (visual studio). You can also give a reporter class, e.g.
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Tells whether to display a full report or only the messages.
-reports=no
-
-# Activate the evaluation score.
-score=yes
-
-
-[REFACTORING]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-# Complete name of functions that never returns. When checking for
-# inconsistent-return-statements if a never returning function is called then
-# it will be considered as an explicit return statement and no message will be
-# printed.
-never-returning-functions=sys.exit
-
-
-[VARIABLES]
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid defining new builtins when possible.
-additional-builtins=
-
-# Tells whether unused global variables should be treated as a violation.
-allow-global-unused-variables=yes
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,
-          _cb
-
-# A regular expression matching the name of dummy variables (i.e. expected to
-# not be used).
-dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore.
-ignored-argument-names=_.*|^ignored_|^unused_
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,
-      XXX
-
-
-[SPELLING]
-
-# Limits count of emitted suggestions for spelling mistakes.
-max-spelling-suggestions=4
-
-# Spelling dictionary name. Available dictionaries: none. To make it work,
-# install the python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains the private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to the private dictionary (see the
-# --spelling-private-dict-file option) instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[TYPECHECK]
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# Tells whether to warn about missing members when the owner of the attribute
-# is inferred to be None.
-ignore-none=yes
-
-# This flag controls whether pylint should warn about no-member and similar
-# checks whenever an opaque object is returned when inferring. The inference
-# can return multiple potential results while evaluating a Python object, but
-# some branches might not be evaluated, which results in partial inference. In
-# that case, it might be useful to still emit no-member and other checks for
-# the rest of the inferred objects.
-ignore-on-opaque-inference=yes
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis). It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=cv2
-
-# Show a hint with possible names when a member name was not found. The aspect
-# of finding the hint is based on edit distance.
-missing-member-hint=yes
-
-# The minimum edit distance a name should have in order to be considered a
-# similar match for a missing member name.
-missing-member-hint-distance=1
-
-# The total number of similar names that should be taken in consideration when
-# showing a hint for a missing member.
-missing-member-max-choices=1
-
-# List of decorators that change the signature of a decorated function.
-signature-mutators=
-
-
-[BASIC]
-
-# Naming style matching correct argument names.
-argument-naming-style=snake_case
-
-# Regular expression matching correct argument names. Overrides argument-
-# naming-style.
-#argument-rgx=
-
-# Naming style matching correct attribute names.
-attr-naming-style=snake_case
-
-# Regular expression matching correct attribute names. Overrides attr-naming-
-# style.
-#attr-rgx=
-
-# Bad variable names which should always be refused, separated by a comma.
-bad-names=foo,
-          bar,
-          baz,
-          toto,
-          tutu,
-          tata
-
-# Naming style matching correct class attribute names.
-class-attribute-naming-style=any
-
-# Regular expression matching correct class attribute names. Overrides class-
-# attribute-naming-style.
-#class-attribute-rgx=
-
-# Naming style matching correct class names.
-class-naming-style=PascalCase
-
-# Regular expression matching correct class names. Overrides class-naming-
-# style.
-#class-rgx=
-
-# Naming style matching correct constant names.
-const-naming-style=UPPER_CASE
-
-# Regular expression matching correct constant names. Overrides const-naming-
-# style.
-#const-rgx=
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-# Naming style matching correct function names.
-function-naming-style=snake_case
-
-# Regular expression matching correct function names. Overrides function-
-# naming-style.
-#function-rgx=
-
-# Good variable names which should always be accepted, separated by a comma.
-good-names=e,
-           f,
-           i,
-           j,
-           k,
-           df,
-           ex,
-           Run,
-           _
-
-# Include a hint for the correct naming format with invalid-name.
-include-naming-hint=no
-
-# Naming style matching correct inline iteration names.
-inlinevar-naming-style=any
-
-# Regular expression matching correct inline iteration names. Overrides
-# inlinevar-naming-style.
-#inlinevar-rgx=
-
-# Naming style matching correct method names.
-method-naming-style=snake_case
-
-# Regular expression matching correct method names. Overrides method-naming-
-# style.
-#method-rgx=
-
-# Naming style matching correct module names.
-module-naming-style=snake_case
-
-# Regular expression matching correct module names. Overrides module-naming-
-# style.
-#module-rgx=
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-# These decorators are taken in consideration only for invalid-name.
-property-classes=abc.abstractproperty
-
-# Naming style matching correct variable names.
-variable-naming-style=snake_case
-
-# Regular expression matching correct variable names. Overrides variable-
-# naming-style.
-#variable-rgx=
-
-
-[STRING]
-
-# This flag controls whether the implicit-str-concat-in-sequence should
-# generate a warning on implicit string concatenation in sequences defined over
-# several lines.
-check-str-concat-over-line-jumps=no
-
-
-[LOGGING]
-
-# Format style used to check logging format string. `old` means using %
-# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings.
-logging-format-style=old
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format.
-logging-modules=logging
-
-
-[FORMAT]
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
-
-# Number of spaces of indent required inside a hanging or continued line.
-indent-after-paren=4
-
-# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
-# tab).
-indent-string='    '
-
-# Maximum number of characters on a single line.
-max-line-length=100
-
-# Maximum number of lines in a module.
-max-module-lines=1000
-
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=trailing-comma,
-               dict-separator
-
-# Allow the body of a class to be on the same line as the declaration if body
-# contains single statement.
-single-line-class-stmt=no
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-
-[SIMILARITIES]
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-# Minimum lines number of a similarity.
-min-similarity-lines=7
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method.
-max-args=5
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Maximum number of boolean expressions in an if statement (see R0916).
-max-bool-expr=5
-
-# Maximum number of branch for function / method body.
-max-branches=12
-
-# Maximum number of locals for function / method body.
-max-locals=15
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of return / yield for function / method body.
-max-returns=6
-
-# Maximum number of statements in function / method body.
-max-statements=50
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-
-[IMPORTS]
-
-# List of modules that can be imported at any level, not just the top level
-# one.
-allow-any-import-level=
-
-# Allow wildcard imports from modules that define __all__.
-allow-wildcard-with-all=no
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-# Deprecated modules which should not be used, separated by a comma.
-deprecated-modules=optparse,tkinter.tix
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled).
-ext-import-graph=
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled).
-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled).
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-# Couples of modules and preferred modules, separated by a comma.
-preferred-modules=
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,
-                      __new__,
-                      setUp,
-                      __post_init__
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,
-                  _fields,
-                  _replace,
-                  _source,
-                  _make
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=cls
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "BaseException, Exception".
-overgeneral-exceptions=BaseException,
-                       Exception
diff --git a/README.md b/README.md
index 4d0e4092b..55f7f902a 100644
--- a/README.md
+++ b/README.md
@@ -87,5 +87,13 @@ Example use cases of Express include:
 
 Check out the [examples folder](examples) for some illustrations.
 
+## Contributing
 
+We use [poetry](https://python-poetry.org/docs/) and pre-commit to enable a smooth developer flow. Run the following commands to 
+set up your development environment:
 
+```commandline
+pip install poetry
+poetry install
+pre-commit install
+```
\ No newline at end of file
diff --git a/express/components/__init__.py b/express/components/__init__.py
index a9429195d..be2e64770 100644
--- a/express/components/__init__.py
+++ b/express/components/__init__.py
@@ -1 +1,3 @@
-from .pandas_components import PandasLoaderComponent, PandasTransformComponent
\ No newline at end of file
+"""Module holding base components to build on."""
+
+from .pandas_components import PandasLoaderComponent, PandasTransformComponent
diff --git a/express/components/common.py b/express/components/common.py
index 5257b8151..3dbc84889 100644
--- a/express/components/common.py
+++ b/express/components/common.py
@@ -18,11 +18,11 @@
 STORAGE_MODULE_PATH = StorageHandlerModule().to_dict()[os.environ.get('CLOUD_ENV', 'GCP')]
 STORAGE_HANDLER = importlib.import_module(STORAGE_MODULE_PATH).StorageHandler()
 
-TIndex = TypeVar('TIndex')
-TData = TypeVar('TData')
+IndexT = TypeVar('IndexT')
+DataT = TypeVar('DataT')
 
 
-class ExpressDataset(ABC, Generic[TIndex, TData]):
+class ExpressDataset(ABC, Generic[IndexT, DataT]):
     """
     An abstract wrapper class that gives read access to Express Datasets.
     It can be extended to create a draft for a new (output) dataset.
@@ -36,19 +36,19 @@ def __init__(self, manifest: DataManifest):
         self.manifest = manifest
         self._index_data = self.load_index()
 
-    def extend(self) -> 'ExpressDatasetDraft[TIndex, TData]':
+    def extend(self) -> 'ExpressDatasetDraft[IndexT, DataT]':
         """
         Create an `ExpressDatasetDraft` that extends this dataset.
         """
         return ExpressDatasetDraft.extend(self)
 
     @abstractmethod
-    def load_index(self) -> TIndex:
+    def load_index(self) -> IndexT:
         """
         Loads the index data.
         """
 
-    def load(self, data_source: str, for_index: Optional[TIndex] = None) -> TData:
+    def load(self, data_source: str, for_index: Optional[IndexT] = None) -> DataT:
         """
         Load data from a named data source.
 
@@ -70,7 +70,7 @@ def load(self, data_source: str, for_index: Optional[TIndex] = None) -> TData:
 
     @staticmethod
     @abstractmethod
-    def _load_data_source(data_source: DataSource, for_index: Optional[TIndex]) -> TData:
+    def _load_data_source(data_source: DataSource, index_filter: Optional[IndexT]) -> DataT:
         """
         Load data from a (possibly remote) path.
         This method can be subclassed to present data in a specific way. For example, as a local
@@ -78,7 +78,7 @@ def _load_data_source(data_source: DataSource, for_index: Optional[TIndex]) -> T
 
         Args:
             data_source (DataSource): The DataSource to load the data from.
-            for_index (Optional[TIndex]): Pass in an index to filter the data on.
+            index_filter (Optional[TIndex]): Pass in an index to filter the data on.
 
         Returns:
             TData: Data of type TData
@@ -87,7 +87,7 @@ def _load_data_source(data_source: DataSource, for_index: Optional[TIndex]) -> T
     # TODO should we keep track of both input and output in the manifests?
 
 
-class ExpressDatasetDraft(ABC, Generic[TIndex, TData]):
+class ExpressDatasetDraft(ABC, Generic[IndexT, DataT]):
     """
     Draft of an `ExpressDataset`, tracking both preexisting data sources and local data that still
     needs to be uploaded.
@@ -103,11 +103,11 @@ class ExpressDatasetDraft(ABC, Generic[TIndex, TData]):
     """
 
     def __init__(self,
-                 index: Optional[Union[DataSource, TIndex]] = None,
-                 data_sources: Dict[str, Union[DataSource, TData]] = None,
-                 extending_dataset: Optional[ExpressDataset[TIndex, TData]] = None):
+                 index: Optional[Union[DataSource, IndexT]] = None,
+                 data_sources: Dict[str, Union[DataSource, DataT]] = None,
+                 extending_dataset: Optional[ExpressDataset[IndexT, DataT]] = None):
         self.index = index
-        self.data_sources = data_sources or dict()
+        self.data_sources = data_sources or {}
         if not (extending_dataset is None) ^ (index is None):
             raise ValueError(
                 "A dataset draft needs to have a single valid index. Either pass an index "
@@ -124,14 +124,14 @@ def __init__(self,
                 self.with_data_source(name, dataset, replace_ok=False)
 
     @classmethod
-    def extend(cls, dataset: ExpressDataset[TIndex, TData]) -> 'ExpressDatasetDraft[TIndex, TData]':
+    def extend(cls, dataset: ExpressDataset[IndexT, DataT]) -> 'ExpressDatasetDraft[IndexT, DataT]':
         """
         Creates a new Express Dataset draft extending the given dataset, which will take over both
          its index and all data sources.
         """
         return cls(extending_dataset=dataset)
 
-    def with_index(self, index: TData) -> 'ExpressDatasetDraft[TIndex, TData]':
+    def with_index(self, index: DataT) -> 'ExpressDatasetDraft[IndexT, DataT]':
         """
         Replaces the current index with the given index.
 
@@ -141,8 +141,8 @@ def with_index(self, index: TData) -> 'ExpressDatasetDraft[TIndex, TData]':
         self.index = index
         return self
 
-    def with_data_source(self, name: str, data: Union[TData, DataSource], replace_ok=False) \
-            -> 'ExpressDatasetDraft[TIndex, TData]':
+    def with_data_source(self, name: str, data: Union[DataT, DataSource], replace_ok=False) \
+            -> 'ExpressDatasetDraft[IndexT, DataT]':
         """
         Adds a new data source or replaces a preexisting data source with the same name.
 
@@ -166,7 +166,7 @@ def with_data_source(self, name: str, data: Union[TData, DataSource], replace_ok
 
 
 # pylint: disable=too-few-public-methods
-class ExpressDatasetHandler(ABC, Generic[TIndex, TData]):
+class ExpressDatasetHandler(ABC, Generic[IndexT, DataT]):
     """
     Abstract mixin class to read from and write to Express Datasets.
     Can be subclassed to deal with a specific type of parsed data representations, like reading to a
@@ -192,7 +192,7 @@ def _path_for_upload(metadata: Metadata, name: str) -> str:
 
     @classmethod
     @abstractmethod
-    def _load_dataset(cls, input_manifest: DataManifest) -> ExpressDataset[TIndex, TData]:
+    def _load_dataset(cls, input_manifest: DataManifest) -> ExpressDataset[IndexT, DataT]:
         """
         Parses a manifest to an ExpressDataset of a specific type, for downstream use by transform
         components.
@@ -200,7 +200,7 @@ def _load_dataset(cls, input_manifest: DataManifest) -> ExpressDataset[TIndex, T
 
     @classmethod
     @abstractmethod
-    def _upload_index(cls, index: TIndex, remote_path: str) -> DataSource:
+    def _upload_index(cls, index: IndexT, remote_path: str) -> DataSource:
         """
         Uploads index data of a certain type as parquet and creates a new DataSource.
 
@@ -214,7 +214,7 @@ def _upload_index(cls, index: TIndex, remote_path: str) -> DataSource:
 
     @classmethod
     @abstractmethod
-    def _upload_data_source(cls, name: str, data: TData, remote_path: str) -> DataSource:
+    def _upload_data_source(cls, name: str, data: DataT, remote_path: str) -> DataSource:
         """
         Uploads data of a certain type as parquet and creates a new DataSource.
 
@@ -265,7 +265,7 @@ def _update_metadata(cls,
         return Metadata.from_dict(metadata_dict)
 
     @classmethod
-    def _create_output_dataset(cls, draft: ExpressDatasetDraft[TIndex, TData],
+    def _create_output_dataset(cls, draft: ExpressDatasetDraft[IndexT, DataT],
                                metadata: Metadata, save_path: str) -> DataManifest:
         """
         Processes a dataset draft of a specific type, uploading all local data to storage and
@@ -277,7 +277,7 @@ def _create_output_dataset(cls, draft: ExpressDatasetDraft[TIndex, TData],
             remote_path = cls._path_for_upload(metadata, "index")
             index = cls._upload_index(draft.index, remote_path)
 
-        data_sources = dict()
+        data_sources = {}
         for name, dataset in draft.data_sources.items():
             if isinstance(dataset, DataSource):
                 data_sources[name] = dataset
@@ -290,11 +290,11 @@ def _create_output_dataset(cls, draft: ExpressDatasetDraft[TIndex, TData],
             metadata=metadata
         )
         Path(save_path).parent.mkdir(parents=True, exist_ok=True)
-        Path(save_path).write_text(manifest.to_json())
+        Path(save_path).write_text(manifest.to_json(), encoding="utf-8")
         return manifest
 
 
-class ExpressTransformComponent(ExpressDatasetHandler, Generic[TIndex, TData]):
+class ExpressTransformComponent(ExpressDatasetHandler, Generic[IndexT, DataT]):
     """
     An abstract component that facilitates end-to-end transformation of Express Datasets.
     It can be subclassed or used with a mixin to support reading and writing of a specific data
@@ -347,9 +347,9 @@ def _parse_args(cls):
 
     @classmethod
     @abstractmethod
-    def transform(cls, data: ExpressDataset[TIndex, TData],
+    def transform(cls, data: ExpressDataset[IndexT, DataT],
                   extra_args: Optional[Dict[str, Union[str, int, float, bool]]] = None) \
-            -> ExpressDatasetDraft[TIndex, TData]:
+            -> ExpressDatasetDraft[IndexT, DataT]:
         """
         Applies transformations to the input dataset and creates a draft for a new dataset.
         The recommended pattern for a transform is to extend the input dataset with a filtered index
@@ -371,7 +371,7 @@ def transform(cls, data: ExpressDataset[TIndex, TData],
         """
 
 
-class ExpressLoaderComponent(ExpressDatasetHandler, Generic[TIndex, TData]):
+class ExpressLoaderComponent(ExpressDatasetHandler, Generic[IndexT, DataT]):
     """
     An abstract component that facilitates creation of a new Express Dataset.
     This will commonly be the first component in an Express Pipeline. It can be subclassed or used
@@ -421,7 +421,7 @@ def _parse_args(cls):
     @classmethod
     @abstractmethod
     def load(cls, extra_args: Optional[Dict[str, Union[str, int, float, bool]]] = None) -> \
-            ExpressDatasetDraft[TIndex, TData]:
+            ExpressDatasetDraft[IndexT, DataT]:
         """
         Loads data from an arbitrary source to create a draft for a new dataset.
 
@@ -432,4 +432,4 @@ def load(cls, extra_args: Optional[Dict[str, Union[str, int, float, bool]]] = No
         Returns:
             ExpressDatasetDraft[TIndex, TData]: draft of output dataset, to be uploaded after this
              loader completes.
-        """
\ No newline at end of file
+        """
diff --git a/express/components/pandas_components.py b/express/components/pandas_components.py
index f80cd64de..0a44e8fcb 100644
--- a/express/components/pandas_components.py
+++ b/express/components/pandas_components.py
@@ -8,10 +8,15 @@
 
 import pandas as pd
 
-from .common import ExpressDatasetHandler, ExpressDataset, ExpressTransformComponent, \
-    ExpressDatasetDraft, ExpressLoaderComponent
-from express.storage_interface import StorageHandlerModule
+from express.components.common import (
+    ExpressDatasetHandler,
+    ExpressDataset,
+    ExpressTransformComponent,
+    ExpressDatasetDraft,
+    ExpressLoaderComponent
+)
 from express.manifest import DataManifest, DataSource, DataType
+from express.storage_interface import StorageHandlerModule
 
 # Define interface of pandas draft
 # pylint: disable=unsubscriptable-object
@@ -36,7 +41,7 @@ def load_index(self) -> pd.Series:
     @staticmethod
     def _load_data_source(data_source: DataSource,
                           index_filter: Union[pd.DataFrame, pd.Series, List[str]]) -> pd.DataFrame:
-        if data_source.type != DataType.parquet:
+        if data_source.type != DataType.PARQUET:
             raise TypeError("Only reading from parquet is currently supported.")
 
         with tempfile.TemporaryDirectory() as tmp_dir:
@@ -78,7 +83,7 @@ def _upload_parquet(data: Union[pd.DataFrame, pd.Series],
                                       destination=fully_qualified_blob_path)
             return DataSource(
                 location=fully_qualified_blob_path,
-                type=DataType.parquet,
+                type=DataType.PARQUET,
                 extensions=["parquet"],
                 n_files=1,
                 n_items=len(data)
@@ -127,4 +132,4 @@ class PandasLoaderComponent(PandasDatasetHandler, ExpressLoaderComponent[List[st
     @abstractmethod
     def load(cls, extra_args: Optional[Dict[str, Union[str, int, float, bool]]] = None) \
             -> PandasDatasetDraft:
-        """Load initial dataset"""
\ No newline at end of file
+        """Load initial dataset"""
diff --git a/express/gcp_storage.py b/express/gcp_storage.py
index 67841f71c..3c1188a9b 100644
--- a/express/gcp_storage.py
+++ b/express/gcp_storage.py
@@ -11,7 +11,7 @@
 from express.storage_interface import StorageHandlerInterface, DecodedBlobPath
 from express import io
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 class StorageHandler(StorageHandlerInterface):
@@ -87,7 +87,7 @@ def copy_folder(source: str, destination: str, copy_source_content: bool = False
             ["gsutil", '-o', '"GSUtil:use_gcloud_storage=True"', '-q', "-m", "cp", "-r",
              source, destination], check=True)
 
-        LOGGER.info("Copying folder from %s to %s [DONE]", source, destination)
+        logger.info("Copying folder from %s to %s [DONE]", source, destination)
 
         folder_name = io.get_file_name(source)
 
@@ -107,19 +107,19 @@ def copy_files(source_files: List[str], destination: str):
         # Write file paths to a text file before piping
         with tempfile.TemporaryDirectory() as temp_folder:
             upload_text_file = os.path.join(temp_folder, "files_to_upload.txt")
-            with open(upload_text_file, "w") as out_file:
+            with open(upload_text_file, "w", encoding="utf-8") as out_file:
                 for file in source_files:
                     out_file.write(file)
                     out_file.write("\n")
 
             # Write files to tmp director
-            pipe_file_list = subprocess.Popen(["cat", upload_text_file],  # nosec
-                                              stdout=subprocess.PIPE)
-            subprocess.call(  # nosec
-                ['gsutil', '-o', '"GSUtil:use_gcloud_storage=True"', '-q', '-m', 'cp', '-I',
-                 destination], stdin=pipe_file_list.stdout)
+            with subprocess.Popen(["cat", upload_text_file], stdout=subprocess.PIPE) \
+                    as pipe_file_list:
+                subprocess.call(  # nosec
+                    ['gsutil', '-o', '"GSUtil:use_gcloud_storage=True"', '-q', '-m', 'cp', '-I',
+                     destination], stdin=pipe_file_list.stdout)
 
-            LOGGER.info("A total of %s files were copied to %s", len(source_files), destination)
+            logger.info("A total of %s files were copied to %s", len(source_files), destination)
 
     def copy_file(self, source_file: str, destination: str) -> str:
         """
@@ -133,4 +133,4 @@ def copy_file(self, source_file: str, destination: str) -> str:
         """
         self.copy_files([source_file], destination)
         file_name = io.get_file_name(source_file, return_extension=True)
-        return os.path.join(destination, file_name)
\ No newline at end of file
+        return os.path.join(destination, file_name)
diff --git a/express/io.py b/express/io.py
index a8117fa20..9011cb072 100644
--- a/express/io.py
+++ b/express/io.py
@@ -5,7 +5,7 @@
 import logging
 import os
 
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 def get_file_extension(file_url: str) -> str:
diff --git a/express/kfp.py b/express/kfp.py
index 0f9451ba8..dc11189c4 100644
--- a/express/kfp.py
+++ b/express/kfp.py
@@ -2,9 +2,7 @@
 import ast
 import logging
 
-import torch
-
-LOGGER = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 
 def parse_kfp_list(kfp_parsed_string: str) -> list:
diff --git a/express/logger.py b/express/logger.py
index 3e1dbd945..6ef49da18 100644
--- a/express/logger.py
+++ b/express/logger.py
@@ -19,4 +19,4 @@ def configure_logging() -> None:
     handler = logging.StreamHandler(stream=sys.stdout)
     handler.setFormatter(
         logging.Formatter(fmt='[%(asctime)s | %(name)s | %(levelname)s] %(message)s'))
-    logger.addHandler(handler)
\ No newline at end of file
+    logger.addHandler(handler)
diff --git a/express/manifest.py b/express/manifest.py
index 4c0005757..3e277c356 100644
--- a/express/manifest.py
+++ b/express/manifest.py
@@ -14,8 +14,8 @@ class DataType(str, Enum):
     """
     Supported types for stored data.
     """
-    parquet = 'parquet'
-    blob = 'blob'
+    PARQUET = 'parquet'
+    BLOB = 'blob'
 
     @classmethod
     def is_valid(cls, data_type: str) -> bool:
@@ -90,7 +90,7 @@ class DataManifest:
     metadata: Metadata = field(default_factory=Metadata)  # TODO: make mandatory during construction
 
     def __post_init__(self):
-        if (self.index.type != DataType.parquet) or (not DataType.is_valid(self.index.type)):
+        if (self.index.type != DataType.PARQUET) or (not DataType.is_valid(self.index.type)):
             raise TypeError("Index needs to be of type 'parquet'.")
         for name, dataset in self.data_sources.items():
             if not DataType.is_valid(dataset.type):
@@ -100,7 +100,7 @@ def __post_init__(self):
     @classmethod
     def from_path(cls, manifest_path):
         """Load data manifest from a given manifest path"""
-        with open(manifest_path) as f:
-            manifest_load = json.load(f)
+        with open(manifest_path, encoding="utf-8") as file_:
+            manifest_load = json.load(file_)
             # pylint: disable=no-member
-            return DataManifest.from_dict(manifest_load)
\ No newline at end of file
+            return DataManifest.from_dict(manifest_load)
diff --git a/express/storage_interface.py b/express/storage_interface.py
index b9b656303..c53431561 100644
--- a/express/storage_interface.py
+++ b/express/storage_interface.py
@@ -83,9 +83,8 @@ def copy_folder(source: str, destination: str, copy_source_content: bool = False
             str: the path to the destination copied folder
         """
 
-    @staticmethod
     @abstractmethod
-    def copy_file(source_file: str, destination: str) -> str:
+    def copy_file(self, source_file: str, destination: str) -> str:
         """
         Function that copies source files from a remote/local source to a local/remote
         location respectively
@@ -128,4 +127,4 @@ def copy_parquet(self, parquet_path: str, destination: str) -> str:
                 parquet_path,
                 destination)
 
-        return local_parquet_path
\ No newline at end of file
+        return local_parquet_path
diff --git a/pylint.py b/pylint.py
deleted file mode 100644
index 0e0f05c7c..000000000
--- a/pylint.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""
-Pylint script to run pylint from the root of a module. This ensures that
-all paths and imports are resolved correctly.
-"""
-
-import os
-import sys
-import logging
-import argparse
-import subprocess  # nosec
-from typing import Any
-from pathlib import Path
-from setuptools import find_packages
-
-# pylint: disable=invalid-name
-parser = argparse.ArgumentParser()
-module_parser = parser.add_mutually_exclusive_group()
-module_parser.add_argument('--module',
-                           help='Module to pylint',
-                           type=str,
-                           action='append')
-module_parser.add_argument('--modules-file',
-                           help='Path to file with modules',
-                           type=str)
-
-GREEN, RED, NC = '\033[0;32m', '\033[0;31m', '\033[0m'
-
-
-def install_requirements(module_path: str) -> None:
-    """Install the requirements.txt file of a module on best effort basis. If
-    the installation fails, the error is printed but ignored.
-
-    Args:
-        module_path: Path to the root of the module.
-
-    """
-    try:
-        subprocess.run([sys.executable, '-m', 'pip', 'install', '-r',  # nosec
-                        'requirements.txt', '--force-reinstall'],
-                       cwd=module_path,
-                       check=True,
-                       stdout=subprocess.PIPE,
-                       stderr=subprocess.PIPE)
-    except subprocess.CalledProcessError as e:
-        logging.error('Failed to install requirements')
-        logging.error(e.stderr.decode())
-
-
-def pylint_module(module_path: str) -> int:
-    """Run pylint on a module with correct path configuration.
-
-    Args:
-        module_path: Path to the module.
-
-    Returns:
-        Pylint return code.
-
-    """
-    print(f'Linting module {module_path}')
-    requirements = Path(os.path.join(module_path, "requirements.txt"))
-    if requirements.exists():
-        install_requirements(module_path)
-
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    init_hook = f'import sys; sys.path.append(r"{module_path}")'
-
-    process = subprocess.run(['pylint', module_path,  # nosec
-                              f'--rcfile={os.path.join(dir_path, ".pylintrc")}',
-                              f'--init-hook={init_hook}'],
-                             check=False)
-
-    return process.returncode
-
-
-def main(args: Any) -> None:
-    """Run pylint on provided modules."""
-    if args.modules_file:
-        # Read in all modules to be pylinted
-        with open(args.modules_file, 'r') as f:
-            modules = [line.strip('\n') for line in f.readlines()]
-    elif args.module:
-        modules = args.module
-    else:
-        # If no modules were given, detect them automatically
-        logging.warning('No modules were supplied, detecting them automatically')
-        modules = [module for module in find_packages() if '.' not in module]
-
-    logging.warning('Running pylint for %s', modules)
-
-    # Run pylint for each module from the module directory
-    failures = [module for module in modules
-                if pylint_module(os.path.abspath(module)) != 0]
-
-    # Print some encouraging messages with info
-    if failures:
-        print(RED + "Failure :'(" + NC)
-        print("Following modules failed: {}".format(failures))
-        sys.exit(1)
-    else:
-        print(GREEN + 'Success!' + NC)
-
-
-if __name__ == '__main__':
-    main(parser.parse_args())
diff --git a/pyproject.toml b/pyproject.toml
index 681725f9d..71ffcbd69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,8 @@ pandas = "^1.3.5"
 [tool.poetry.group.test.dependencies]
 bandit = { version = "^1.7.4", extras = ["toml"] }
 liccheck = "^0.7.3"
-pylint = "^2.5.3"
+pylint = "2.16.4"
+pre-commit = "^3.1.1"
 
 [tool.bandit]
 exclude_dirs = ["*/mlpipelines/components/sd_finetuning_component/src/train_text_to_image.py"]