From 97acdb8db163e29b31832d0ccc4219cce1dbf8cd Mon Sep 17 00:00:00 2001
From: Sabrina Yan <9669990+violetbrina@users.noreply.github.com>
Date: Wed, 18 Dec 2024 15:25:25 +1100
Subject: [PATCH] fix(docs/update_readme.py,pyproject.toml-,+7): fix linter
 warnings

---
 docs/update_readme.py          |  10 +-
 pyproject.toml                 | 223 ++++++++++++++++-----------------
 src/cpg_flow/inputs.py         |   1 -
 src/cpg_flow/metamist.py       |  72 +++++------
 src/cpg_flow/stage.py          |  49 +++-----
 src/cpg_flow/status.py         |   9 +-
 src/cpg_flow/targets/cohort.py |   7 +-
 src/cpg_flow/workflow.py       |   3 +-
 tests/test_cohort.py           |   4 +-
 9 files changed, 177 insertions(+), 201 deletions(-)
diff --git a/docs/update_readme.py b/docs/update_readme.py
index 054f7579..18de6714 100644
--- a/docs/update_readme.py
+++ b/docs/update_readme.py
@@ -11,7 +11,7 @@
 
 
 def load_descriptions(description_file):
-    with open(description_file, 'r') as f:
+    with open(description_file) as f:
         descriptions = yaml.safe_load(f)
         return descriptions
 
@@ -22,7 +22,7 @@ def parse_workflows(directory, description_file):
     for filename in os.listdir(directory):
         if filename.endswith('.yaml') or filename.endswith('.yml'):
             filepath = os.path.join(directory, filename)
-            with open(filepath, 'r') as file:
+            with open(filepath) as file:
                 try:
                     data = yaml.load(file, Loader=yaml.BaseLoader)
                     workflow_name = data.get('name', 'Unnamed Workflow')
@@ -57,9 +57,9 @@ def parse_triggers(on_field):
             else:
                 triggers.append(f'`{trigger}`')
         return ' and '.join(triggers)
-    elif isinstance(on_field, list):  # Handle `on: [event1, event2]`
+    if isinstance(on_field, list):  # Handle `on: [event1, event2]`
         return ', '.join([f'`{event}`' for event in on_field])
-    elif isinstance(on_field, str):  # Handle `on: event`
+    if isinstance(on_field, str):  # Handle `on: event`
         return f'`{on_field}`'
     return '`manual`'  # Default if `on` is missing
 
@@ -77,7 +77,7 @@ def generate_markdown(workflows):
 
 
 def update_readme(markdown, readme_file):
-    with open(readme_file, 'r') as file:
+    with open(readme_file) as file:
         content = file.read()
 
     start_marker = '### 🎢 Workflows'
diff --git a/pyproject.toml b/pyproject.toml
index dbc3bd8f..051c6b94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,6 @@ dependencies = [
     "grpcio-status>=1.48,<1.50",
     "metamist>=6.9.0",
     "networkx>=3.4.2",
-    "coloredlogs>=15.0.1",
 ]
 classifiers = [
     "Development Status :: 3 - Alpha",
@@ -144,146 +143,140 @@ fixable = [
 ]
 
 ignore = [
+    "A001",
+    "A002",
+    "ANN001",
+    "ANN002",
+    "ANN003",
     "ANN101",  # Missing type annotation for self in method
     "ANN102",  # Missing type annotation for `cls` in classmethod
     "ANN201",  # Missing return type annotation for public function
+    "ANN202",
     "ANN204",  # Missing type annotation for special method `__init__`
+    "ANN205",
     "ANN401",  # Dynamically typed expressions (typing.Any) are disallowed in `**kwargs`
+    "ARG001",  # Unused function argument
+    "ARG002",  # Unused method argument
+    "ARG005",
+    "B006",
+    "B007",
+    "B008",
+    "B023",
+    "B904",
+    "BLE001",
+    "C400",
+    "C401",
+    "C403",
+    "C405",
+    "C408",
+    "C416",
+    "C417",
+    "C419",
+    "C901",
+    "COM819",
+    "DTZ002",
+    "DTZ005",
+    "DTZ011",
+    "E401",
+    "E402",
     "E501",    # Line length too long
+    "E712",
+    "E713",
     "E731",    # Do not assign a lambda expression, use a def
     "E741",    # Ambiguous variable name
-    "G004",    # Logging statement uses f-string
-    "PLR0911", # Too many return statements
-    "PLR0912", # Too many branches
-    "PLR0913", # Too many arguments to function call
-    "PLR0915", # Too many statements
-    "PLW0603", # Using the global statement to update `<VAR>` is discouraged
-    "PT018",   # Assertion should be broken down into multiple parts
-    "Q000",    # Single quotes found but double quotes preferred
-    "S101",    # Use of assert detected
-    "SLF001",  # Private member accessed: `_preemptible`
-
-    "ARG001", # Unused function argument
-    "ARG002", # Unused method argument
-
-    "PLR2004", # Magic value used
-
-    "ANN001",
-    "ANN202",
-    "C408",
-    "TID252",
-    "RET504",
-    "ERA001",
-    "UP032",
-    "RUF100",
-    "ISC001",
-    "PIE804",
-    "F401",
-    "C901",
-    "W605",
-    "RET505",
-    "ANN003",
-    "RUF013",
-    "UP031",
-    "RUF010",
-    "B006",
-    "ANN002",
-    "B023",
+    "ERA001",  # Found commented-out code
     "EXE001",
+    "F401",
+    "F403",
+    "F821",
+    "F841",
     "G001",
-    "SIM108",
-    "RUF005",
     "G002",
-    "PD901",
-    "N999",
-    "SIM118",
-    "SIM102",
-    "PLW2901",
-    "S603",
-    "ARG005",
-    "PGH003",
-    "B904",
-    "N802",
+    "G003",
+    "G004",    # Logging statement uses f-string
+    "ISC001",
+    "ISC002",
     "ISC003",
-    "ANN205",
-    "S607",
-    "RUF015",
-    "E701",
-    "N818",
-    "PIE790",
+    "N802",
     "N803",
-    "A002",
-    "RUF012",
-    "W291",
-    "S113",
-    "S311",
+    "N805",
     "N806",
-    "PLR5501",
-    "F403",
-    "SIM115",
-    "B007",
-    "F841",
-    "C405",
-    "C419",
-    "SIM300",
+    "N818",
+    "N999",
+    "PD003",
+    "PD010",
     "PD011",
-    "UP015",
-    "S602",
-    "Q002",
-    "ISC002",
-    "COM819",
-    "C416",
-    "DTZ005",
-    "G003",
-    "S608",
+    "PD901",
+    "PGH003",
+    "PIE790",
+    "PIE804",
     "PIE808",
-    "B008",
-    "S108",
-    "E402",
-    "S605",
-    "F821",
-    "RET507",
-    "RET503",
-    "UP030",
-    "UP026",
-    "PLR1714",
-    "C403",
-    "PLR1711",
     "PIE810",
-    "DTZ011",
-    "S105",
-    "BLE001",
-    "C401",
-    "C400",
     "PLR0402",
-    "SIM201",
-    "RET506",
-    "C417",
-    "PD010",
+    "PLR0911", # Too many return statements
+    "PLR0912", # Too many branches
+    "PLR0913", # Too many arguments to function call
+    "PLR0915", # Too many statements
+    "PLR1711",
+    "PLR1714",
+    "PLR2004", # Magic value used
+    "PLR5501",
+    "PLW0603", # Using the global statement to update `<VAR>` is discouraged
     "PLW1510",
-    "A001",
-    "W292",
-    "PYI024",
+    "PLW2901",
+    "Q000",    # Single quotes found but double quotes preferred
+    "Q001",
+    "Q002",
     "Q003",
-    "S301",
     "RET501",
-    "PD003",
-    "SIM117",
+    "RET503",
+    "RET504",
+    "RET505",
+    "RET506",
+    "RET507",
     "RUF002",
-    "UP027",
-    "SIM105",
-    "E713",
-    "S324",
+    "RUF005",
+    "RUF010",
+    "RUF012",
+    "RUF013",
+    "RUF015",
+    "RUF100",
+    "S101",    # Use of assert detected
+    "S105",
+    "S108",
+    "S113",
+    "S301",
     "S310",
-    "Q001",
-    "UP020",
+    "S311",
+    "S324",
     "S506",
-    "N805",
-    "E712",
-    "E401",
+    "S602",
+    "S603",
+    "S605",
+    "S607",
+    "S608",
+    "SIM102",
+    "SIM105",
+    "SIM108",
+    "SIM115",
+    "SIM117",
+    "SIM118",
+    "SIM201",
     "SIM212",
-    "DTZ002",
+    "SIM300",
+    "SLF001",  # Private member accessed: `_preemptible`
+    "TID252",
     "UP007",
+    "UP015",
+    "UP020",
+    "UP026",
+    "UP027",
+    "UP030",
+    "UP031",
+    "UP032",
+    "W291",
+    "W292",
+    "W605",
 ]
 
 [tool.ruff.lint.isort]
diff --git a/src/cpg_flow/inputs.py b/src/cpg_flow/inputs.py
index b94f3fda..0a47e2e8 100644
--- a/src/cpg_flow/inputs.py
+++ b/src/cpg_flow/inputs.py
@@ -173,7 +173,6 @@ def _populate_alignment_inputs(
             f'No reads found for sequencing group {sequencing_group.id} of type {entry["type"]}',
         )
 
-    return None
 
 
 def _populate_analysis(dataset: Dataset) -> None:
diff --git a/src/cpg_flow/metamist.py b/src/cpg_flow/metamist.py
index a2741b95..1524fe50 100644
--- a/src/cpg_flow/metamist.py
+++ b/src/cpg_flow/metamist.py
@@ -140,7 +140,6 @@ class MetamistError(Exception):
     Error while interacting with Metamist.
     """
 
-    pass
 
 
 class AnalysisStatus(Enum):
@@ -283,7 +282,7 @@ def make_aapi_call(self, api_func: Callable, **kwargv: Any):
             # log the error and continue
             traceback.print_exc()
             LOGGER.error(
-                f'Error: {e} Call {api_func} failed with payload:\n{str(kwargv)}',
+                f'Error: {e} Call {api_func} failed with payload:\n{kwargv!s}',
             )
         # TODO: discuss should we catch all here as well?
         # except Exception as e:
@@ -424,15 +423,14 @@ def create_analysis(
         )
         if aid is None:
             LOGGER.error(
-                f'Failed to create Analysis(type={type_}, status={status}, output={str(output)}) in {metamist_proj}',
+                f'Failed to create Analysis(type={type_}, status={status}, output={output!s}) in {metamist_proj}',
             )
             return None
-        else:
-            LOGGER.info(
-                f'Created Analysis(id={aid}, type={type_}, status={status}, '
-                f'output={str(output)}) in {metamist_proj}',
-            )
-            return aid
+        LOGGER.info(
+            f'Created Analysis(id={aid}, type={type_}, status={status}, '
+            f'output={output!s}) in {metamist_proj}',
+        )
+        return aid
 
     def get_ped_entries(self, dataset: str | None = None) -> list[dict[str, str]]:
         """
@@ -595,35 +593,33 @@ def parse_reads(  # pylint: disable=too-many-return-statements
                 index_path=index_location,
                 reference_assembly=reference_assembly,
             )
-        else:
-            assert location.endswith('.bam')
-            return BamPath(location, index_path=index_location)
-
-    else:
-        fastq_pairs = FastqPairs()
-
-        for lane_pair in reads_data:
-            if len(lane_pair) != 2:
-                raise ValueError(
-                    f'Sequence data for sequencing group {sequencing_group_id} is incorrectly '
-                    f'formatted. Expecting 2 entries per lane (R1 and R2 fastqs), '
-                    f'but got {len(lane_pair)}. '
-                    f'Read data: {pprint.pformat(lane_pair)}',
-                )
-            if check_existence and not exists(lane_pair[0]['location']):
-                raise MetamistError(
-                    f'{sequencing_group_id}: ERROR: read 1 file does not exist: {lane_pair[0]["location"]}',
-                )
-            if check_existence and not exists(lane_pair[1]['location']):
-                raise MetamistError(
-                    f'{sequencing_group_id}: ERROR: read 2 file does not exist: {lane_pair[1]["location"]}',
-                )
+        assert location.endswith('.bam')
+        return BamPath(location, index_path=index_location)
+
+    fastq_pairs = FastqPairs()
 
-            fastq_pairs.append(
-                FastqPair(
-                    to_path(lane_pair[0]['location']),
-                    to_path(lane_pair[1]['location']),
-                ),
+    for lane_pair in reads_data:
+        if len(lane_pair) != 2:
+            raise ValueError(
+                f'Sequence data for sequencing group {sequencing_group_id} is incorrectly '
+                f'formatted. Expecting 2 entries per lane (R1 and R2 fastqs), '
+                f'but got {len(lane_pair)}. '
+                f'Read data: {pprint.pformat(lane_pair)}',
+            )
+        if check_existence and not exists(lane_pair[0]['location']):
+            raise MetamistError(
+                f'{sequencing_group_id}: ERROR: read 1 file does not exist: {lane_pair[0]["location"]}',
+            )
+        if check_existence and not exists(lane_pair[1]['location']):
+            raise MetamistError(
+                f'{sequencing_group_id}: ERROR: read 2 file does not exist: {lane_pair[1]["location"]}',
             )
 
-        return fastq_pairs
+        fastq_pairs.append(
+            FastqPair(
+                to_path(lane_pair[0]['location']),
+                to_path(lane_pair[1]['location']),
+            ),
+        )
+
+    return fastq_pairs
diff --git a/src/cpg_flow/stage.py b/src/cpg_flow/stage.py
index d06208ca..38dc0fdf 100644
--- a/src/cpg_flow/stage.py
+++ b/src/cpg_flow/stage.py
@@ -20,7 +20,7 @@
 import pathlib
 from abc import ABC, abstractmethod
 from collections.abc import Callable, Sequence
-from typing import Generic, Optional, TypeVar, Union, cast
+from typing import Generic, Optional, TypeVar, cast
 
 from cloudpathlib import CloudPath
 
@@ -633,12 +633,11 @@ def _get_action(self, target: TargetT) -> Action:
                     f'workflow/allow_missing_outputs_for_stages)',
                 )
                 return Action.REUSE
-            else:
-                raise WorkflowError(
-                    f'{self.name}: stage is required, but is skipped, and '
-                    f'the following expected outputs for target {target} do not exist: '
-                    f'{first_missing_path}',
-                )
+            raise WorkflowError(
+                f'{self.name}: stage is required, but is skipped, and '
+                f'the following expected outputs for target {target} do not exist: '
+                f'{first_missing_path}',
+            )
 
         if reusable and not first_missing_path:
             if target.forced:
@@ -646,16 +645,15 @@ def _get_action(self, target: TargetT) -> Action:
                     f'{self.name}: {target} [QUEUE] (can reuse, but forcing the target to rerun this stage)',
                 )
                 return Action.QUEUE
-            elif self.forced:
+            if self.forced:
                 LOGGER.info(
                     f'{self.name}: {target} [QUEUE] (can reuse, but forcing the stage to rerun)',
                 )
                 return Action.QUEUE
-            else:
-                LOGGER.info(
-                    f'{self.name}: {target} [REUSE] (expected outputs exist: {expected_out})',
-                )
-                return Action.REUSE
+            LOGGER.info(
+                f'{self.name}: {target} [REUSE] (expected outputs exist: {expected_out})',
+            )
+            return Action.REUSE
 
         LOGGER.info(f'{self.name}: {target} [QUEUE]')
 
@@ -698,13 +696,12 @@ def _is_reusable(self, expected_out: ExpectedResultT) -> tuple[bool, Path | None
                 return False, first_missing_path
 
             return True, None
-        else:
-            if self.skipped:
-                # Do not check the files' existence, trust they exist.
-                # note that for skipped stages, we automatically assume outputs exist
-                return True, None
-            # Do not check the files' existence, assume they don't exist:
-            return False, None
+        if self.skipped:
+            # Do not check the files' existence, trust they exist.
+            # note that for skipped stages, we automatically assume outputs exist
+            return True, None
+        # Do not check the files' existence, assume they don't exist:
+        return False, None
 
     def get_job_attrs(self, target: TargetT | None = None) -> dict[str, str]:
         """
@@ -719,7 +716,7 @@ def get_job_attrs(self, target: TargetT | None = None) -> dict[str, str]:
 
 
 def stage(
-    cls: Optional[type['Stage']] = None,
+    cls: type['Stage'] | None = None,
     *,
     analysis_type: str | None = None,
     analysis_keys: list[str | Path] | None = None,
@@ -729,7 +726,7 @@ def stage(
     skipped: bool = False,
     assume_outputs_exist: bool = False,
     forced: bool = False,
-) -> Union[StageDecorator, Callable[..., StageDecorator]]:
+) -> StageDecorator | Callable[..., StageDecorator]:
     """
     Implements a standard class decorator pattern with optional arguments.
     The goal is to allow declaring workflow stages without requiring to implement
@@ -781,8 +778,7 @@ def wrapper_stage() -> Stage:
 
     if cls is None:
         return decorator_stage
-    else:
-        return decorator_stage(cls)
+    return decorator_stage(cls)
 
 
 class SequencingGroupStage(Stage[SequencingGroup], ABC):
@@ -805,7 +801,6 @@ def queue_jobs(
         """
         Override to add Hail Batch jobs.
         """
-        pass
 
     def queue_for_multicohort(
         self,
@@ -849,7 +844,6 @@ def queue_jobs(self, dataset: Dataset, inputs: StageInput) -> StageOutput | None
         """
         Override to add Hail Batch jobs.
         """
-        pass
 
     def queue_for_multicohort(
         self,
@@ -886,7 +880,6 @@ def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput | None:
         """
         Override to add Hail Batch jobs.
         """
-        pass
 
     def queue_for_multicohort(
         self,
@@ -916,7 +909,6 @@ def expected_outputs(self, multicohort: MultiCohort) -> ExpectedResultT:
         """
         Override to declare expected output paths.
         """
-        pass
 
     @abstractmethod
     def queue_jobs(
@@ -927,7 +919,6 @@ def queue_jobs(
         """
         Override to add Hail Batch jobs.
         """
-        pass
 
     def queue_for_multicohort(
         self,
diff --git a/src/cpg_flow/status.py b/src/cpg_flow/status.py
index 58663a54..0b513232 100644
--- a/src/cpg_flow/status.py
+++ b/src/cpg_flow/status.py
@@ -88,10 +88,9 @@ def complete_analysis_job(
         _msg = f'Creation of Analysis failed (type={analysis_type}, output={output}) in {project_name}'
         print(_msg)
         raise ConnectionError(_msg)
-    else:
-        print(
-            f'Created Analysis(id={a_id}, type={analysis_type}, output={output}) in {project_name}',
-        )
+    print(
+        f'Created Analysis(id={a_id}, type={analysis_type}, output={output}) in {project_name}',
+    )
 
 
 class StatusReporterError(Exception):
@@ -164,7 +163,7 @@ def create_analysis(
         sequencing_group_ids = []
         if target is None:
             raise ValueError('Target is required to create analysis')
-        elif isinstance(target, MultiCohort):
+        if isinstance(target, MultiCohort):
             cohort_ids = target.get_cohort_ids()
         elif isinstance(target, Cohort):
             cohort_ids = [target.get_cohort_id()]
diff --git a/src/cpg_flow/targets/cohort.py b/src/cpg_flow/targets/cohort.py
index 43469d17..f1b69cbd 100644
--- a/src/cpg_flow/targets/cohort.py
+++ b/src/cpg_flow/targets/cohort.py
@@ -103,10 +103,9 @@ def add_sequencing_group_object(
                     f'SequencingGroup {s.id} already exists in the Cohort {self.name}',
                 )
                 return self._sequencing_group_by_id[s.id]
-            else:
-                raise ValueError(
-                    f'SequencingGroup {s.id} already exists in the Cohort {self.name}',
-                )
+            raise ValueError(
+                f'SequencingGroup {s.id} already exists in the Cohort {self.name}',
+            )
         self._sequencing_group_by_id[s.id] = s
 
     def get_sequencing_groups(
diff --git a/src/cpg_flow/workflow.py b/src/cpg_flow/workflow.py
index 5c5a793e..6944f66d 100644
--- a/src/cpg_flow/workflow.py
+++ b/src/cpg_flow/workflow.py
@@ -135,8 +135,7 @@ def wrapper_stage(*args, **kwargs) -> 'Stage':
 
     if _fun is None:
         return decorator_stage
-    else:
-        return decorator_stage(_fun)
+    return decorator_stage(_fun)
 
 
 _workflow: Optional['Workflow'] = None
diff --git a/tests/test_cohort.py b/tests/test_cohort.py
index b00f1408..b53ea1cd 100644
--- a/tests/test_cohort.py
+++ b/tests/test_cohort.py
@@ -96,7 +96,7 @@ def _custom_cohort_config(tmp_path) -> str:
 
 
 def load_mock_data(file_path: str) -> list[dict]:
-    with open(file_path, 'r') as file:
+    with open(file_path) as file:
         return json.load(file)
 
 
@@ -138,7 +138,7 @@ def mock_get_cohort_sgs(cohort_id: str, *args, **kwargs) -> list[dict]:
         'COH5': load_mock_data('tests/assets/test_cohort/COH5.json'),
     }
 
-    if cohort_id not in cohorts.keys():
+    if cohort_id not in cohorts:
         raise MetamistError(f"Error fetching cohort: The provided cohort ID was not valid: '{cohort_id}'")
 
     return cohorts[cohort_id]