From 97acdb8db163e29b31832d0ccc4219cce1dbf8cd Mon Sep 17 00:00:00 2001 From: Sabrina Yan <9669990+violetbrina@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:25:25 +1100 Subject: [PATCH] fix(docs/update_readme.py,pyproject.toml-,+7): fix linter warnings --- docs/update_readme.py | 10 +- pyproject.toml | 223 ++++++++++++++++----------------- src/cpg_flow/inputs.py | 1 - src/cpg_flow/metamist.py | 72 +++++------ src/cpg_flow/stage.py | 49 +++----- src/cpg_flow/status.py | 9 +- src/cpg_flow/targets/cohort.py | 7 +- src/cpg_flow/workflow.py | 3 +- tests/test_cohort.py | 4 +- 9 files changed, 177 insertions(+), 201 deletions(-) diff --git a/docs/update_readme.py b/docs/update_readme.py index 054f7579..18de6714 100644 --- a/docs/update_readme.py +++ b/docs/update_readme.py @@ -11,7 +11,7 @@ def load_descriptions(description_file): - with open(description_file, 'r') as f: + with open(description_file) as f: descriptions = yaml.safe_load(f) return descriptions @@ -22,7 +22,7 @@ def parse_workflows(directory, description_file): for filename in os.listdir(directory): if filename.endswith('.yaml') or filename.endswith('.yml'): filepath = os.path.join(directory, filename) - with open(filepath, 'r') as file: + with open(filepath) as file: try: data = yaml.load(file, Loader=yaml.BaseLoader) workflow_name = data.get('name', 'Unnamed Workflow') @@ -57,9 +57,9 @@ def parse_triggers(on_field): else: triggers.append(f'`{trigger}`') return ' and '.join(triggers) - elif isinstance(on_field, list): # Handle `on: [event1, event2]` + if isinstance(on_field, list): # Handle `on: [event1, event2]` return ', '.join([f'`{event}`' for event in on_field]) - elif isinstance(on_field, str): # Handle `on: event` + if isinstance(on_field, str): # Handle `on: event` return f'`{on_field}`' return '`manual`' # Default if `on` is missing @@ -77,7 +77,7 @@ def generate_markdown(workflows): def update_readme(markdown, readme_file): - with open(readme_file, 'r') as file: + with open(readme_file) as file: content = file.read() start_marker = '### 🎢 Workflows' diff --git a/pyproject.toml b/pyproject.toml index dbc3bd8f..051c6b94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ dependencies = [ "grpcio-status>=1.48,<1.50", "metamist>=6.9.0", "networkx>=3.4.2", - "coloredlogs>=15.0.1", ] classifiers = [ "Development Status :: 3 - Alpha", @@ -144,146 +143,140 @@ fixable = [ ] ignore = [ + "A001", + "A002", + "ANN001", + "ANN002", + "ANN003", "ANN101", # Missing type annotation for self in method "ANN102", # Missing type annotation for `cls` in classmethod "ANN201", # Missing return type annotation for public function + "ANN202", "ANN204", # Missing type annotation for special method `__init__` + "ANN205", "ANN401", # Dynamically typed expressions (typing.Any) are disallowed in `**kwargs` + "ARG001", # Unused function argument + "ARG002", # Unused method argument + "ARG005", + "B006", + "B007", + "B008", + "B023", + "B904", + "BLE001", + "C400", + "C401", + "C403", + "C405", + "C408", + "C416", + "C417", + "C419", + "C901", + "COM819", + "DTZ002", + "DTZ005", + "DTZ011", + "E401", + "E402", "E501", # Line length too long + "E712", + "E713", "E731", # Do not assign a lambda expression, use a def "E741", # Ambiguous variable name - "G004", # Logging statement uses f-string - "PLR0911", # Too many return statements - "PLR0912", # Too many branches - "PLR0913", # Too many arguments to function call - "PLR0915", # Too many statements - "PLW0603", # Using the global statement to update `` is discouraged - "PT018", # Assertion should be broken down into multiple parts - "Q000", # Single quotes found but double quotes preferred - "S101", # Use of assert detected - "SLF001", # Private member accessed: `_preemptible` - - "ARG001", # Unused function argument - "ARG002", # Unused method argument - - "PLR2004", # Magic value used - - "ANN001", - "ANN202", - "C408", - "TID252", - "RET504", - "ERA001", - "UP032", - "RUF100", - "ISC001", - "PIE804", - "F401", - "C901", - "W605", - "RET505", - "ANN003", - "RUF013", - "UP031", - "RUF010", - "B006", - "ANN002", - "B023", + "ERA001", # Found commented-out code "EXE001", + "F401", + "F403", + "F821", + "F841", "G001", - "SIM108", - "RUF005", "G002", - "PD901", - "N999", - "SIM118", - "SIM102", - "PLW2901", - "S603", - "ARG005", - "PGH003", - "B904", - "N802", + "G003", + "G004", # Logging statement uses f-string + "ISC001", + "ISC002", "ISC003", - "ANN205", - "S607", - "RUF015", - "E701", - "N818", - "PIE790", + "N802", "N803", - "A002", - "RUF012", - "W291", - "S113", - "S311", + "N805", "N806", - "PLR5501", - "F403", - "SIM115", - "B007", - "F841", - "C405", - "C419", - "SIM300", + "N818", + "N999", + "PD003", + "PD010", "PD011", - "UP015", - "S602", - "Q002", - "ISC002", - "COM819", - "C416", - "DTZ005", - "G003", - "S608", + "PD901", + "PGH003", + "PIE790", + "PIE804", "PIE808", - "B008", - "S108", - "E402", - "S605", - "F821", - "RET507", - "RET503", - "UP030", - "UP026", - "PLR1714", - "C403", - "PLR1711", "PIE810", - "DTZ011", - "S105", - "BLE001", - "C401", - "C400", "PLR0402", - "SIM201", - "RET506", - "C417", - "PD010", + "PLR0911", # Too many return statements + "PLR0912", # Too many branches + "PLR0913", # Too many arguments to function call + "PLR0915", # Too many statements + "PLR1711", + "PLR1714", + "PLR2004", # Magic value used + "PLR5501", + "PLW0603", # Using the global statement to update `` is discouraged "PLW1510", - "A001", - "W292", - "PYI024", + "PLW2901", + "Q000", # Single quotes found but double quotes preferred + "Q001", + "Q002", "Q003", - "S301", "RET501", - "PD003", - "SIM117", + "RET503", + "RET504", + "RET505", + "RET506", + "RET507", "RUF002", - "UP027", - "SIM105", - "E713", - "S324", + "RUF005", + "RUF010", + "RUF012", + "RUF013", + "RUF015", + "RUF100", + "S101", # Use of assert detected + "S105", + "S108", + "S113", + "S301", "S310", - "Q001", - "UP020", + "S311", + "S324", "S506", - "N805", - "E712", - "E401", + "S602", + "S603", + "S605", + "S607", + "S608", + "SIM102", + "SIM105", + "SIM108", + "SIM115", + "SIM117", + "SIM118", + "SIM201", "SIM212", - "DTZ002", + "SIM300", + "SLF001", # Private member accessed: `_preemptible` + "TID252", "UP007", + "UP015", + "UP020", + "UP026", + "UP027", + "UP030", + "UP031", + "UP032", + "W291", + "W292", + "W605", ] [tool.ruff.lint.isort] diff --git a/src/cpg_flow/inputs.py b/src/cpg_flow/inputs.py index b94f3fda..0a47e2e8 100644 --- a/src/cpg_flow/inputs.py +++ b/src/cpg_flow/inputs.py @@ -173,7 +173,6 @@ def _populate_alignment_inputs( f'No reads found for sequencing group {sequencing_group.id} of type {entry["type"]}', ) - return None def _populate_analysis(dataset: Dataset) -> None: diff --git a/src/cpg_flow/metamist.py b/src/cpg_flow/metamist.py index a2741b95..1524fe50 100644 --- a/src/cpg_flow/metamist.py +++ b/src/cpg_flow/metamist.py @@ -140,7 +140,6 @@ class MetamistError(Exception): Error while interacting with Metamist. """ - pass class AnalysisStatus(Enum): @@ -283,7 +282,7 @@ def make_aapi_call(self, api_func: Callable, **kwargv: Any): # log the error and continue traceback.print_exc() LOGGER.error( - f'Error: {e} Call {api_func} failed with payload:\n{str(kwargv)}', + f'Error: {e} Call {api_func} failed with payload:\n{kwargv!s}', ) # TODO: discuss should we catch all here as well? # except Exception as e: @@ -424,15 +423,14 @@ def create_analysis( ) if aid is None: LOGGER.error( - f'Failed to create Analysis(type={type_}, status={status}, output={str(output)}) in {metamist_proj}', + f'Failed to create Analysis(type={type_}, status={status}, output={output!s}) in {metamist_proj}', ) return None - else: - LOGGER.info( - f'Created Analysis(id={aid}, type={type_}, status={status}, ' - f'output={str(output)}) in {metamist_proj}', - ) - return aid + LOGGER.info( + f'Created Analysis(id={aid}, type={type_}, status={status}, ' + f'output={output!s}) in {metamist_proj}', + ) + return aid def get_ped_entries(self, dataset: str | None = None) -> list[dict[str, str]]: """ @@ -595,35 +593,33 @@ def parse_reads( # pylint: disable=too-many-return-statements index_path=index_location, reference_assembly=reference_assembly, ) - else: - assert location.endswith('.bam') - return BamPath(location, index_path=index_location) - - else: - fastq_pairs = FastqPairs() - - for lane_pair in reads_data: - if len(lane_pair) != 2: - raise ValueError( - f'Sequence data for sequencing group {sequencing_group_id} is incorrectly ' - f'formatted. Expecting 2 entries per lane (R1 and R2 fastqs), ' - f'but got {len(lane_pair)}. ' - f'Read data: {pprint.pformat(lane_pair)}', - ) - if check_existence and not exists(lane_pair[0]['location']): - raise MetamistError( - f'{sequencing_group_id}: ERROR: read 1 file does not exist: {lane_pair[0]["location"]}', - ) - if check_existence and not exists(lane_pair[1]['location']): - raise MetamistError( - f'{sequencing_group_id}: ERROR: read 2 file does not exist: {lane_pair[1]["location"]}', - ) + assert location.endswith('.bam') + return BamPath(location, index_path=index_location) + + fastq_pairs = FastqPairs() - fastq_pairs.append( - FastqPair( - to_path(lane_pair[0]['location']), - to_path(lane_pair[1]['location']), - ), + for lane_pair in reads_data: + if len(lane_pair) != 2: + raise ValueError( + f'Sequence data for sequencing group {sequencing_group_id} is incorrectly ' + f'formatted. Expecting 2 entries per lane (R1 and R2 fastqs), ' + f'but got {len(lane_pair)}. ' + f'Read data: {pprint.pformat(lane_pair)}', + ) + if check_existence and not exists(lane_pair[0]['location']): + raise MetamistError( + f'{sequencing_group_id}: ERROR: read 1 file does not exist: {lane_pair[0]["location"]}', + ) + if check_existence and not exists(lane_pair[1]['location']): + raise MetamistError( + f'{sequencing_group_id}: ERROR: read 2 file does not exist: {lane_pair[1]["location"]}', ) - return fastq_pairs + fastq_pairs.append( + FastqPair( + to_path(lane_pair[0]['location']), + to_path(lane_pair[1]['location']), + ), + ) + + return fastq_pairs diff --git a/src/cpg_flow/stage.py b/src/cpg_flow/stage.py index d06208ca..38dc0fdf 100644 --- a/src/cpg_flow/stage.py +++ b/src/cpg_flow/stage.py @@ -20,7 +20,7 @@ import pathlib from abc import ABC, abstractmethod from collections.abc import Callable, Sequence -from typing import Generic, Optional, TypeVar, Union, cast +from typing import Generic, Optional, TypeVar, cast from cloudpathlib import CloudPath @@ -633,12 +633,11 @@ def _get_action(self, target: TargetT) -> Action: f'workflow/allow_missing_outputs_for_stages)', ) return Action.REUSE - else: - raise WorkflowError( - f'{self.name}: stage is required, but is skipped, and ' - f'the following expected outputs for target {target} do not exist: ' - f'{first_missing_path}', - ) + raise WorkflowError( + f'{self.name}: stage is required, but is skipped, and ' + f'the following expected outputs for target {target} do not exist: ' + f'{first_missing_path}', + ) if reusable and not first_missing_path: if target.forced: @@ -646,16 +645,15 @@ def _get_action(self, target: TargetT) -> Action: f'{self.name}: {target} [QUEUE] (can reuse, but forcing the target to rerun this stage)', ) return Action.QUEUE - elif self.forced: + if self.forced: LOGGER.info( f'{self.name}: {target} [QUEUE] (can reuse, but forcing the stage to rerun)', ) return Action.QUEUE - else: - LOGGER.info( - f'{self.name}: {target} [REUSE] (expected outputs exist: {expected_out})', - ) - return Action.REUSE + LOGGER.info( + f'{self.name}: {target} [REUSE] (expected outputs exist: {expected_out})', + ) + return Action.REUSE LOGGER.info(f'{self.name}: {target} [QUEUE]') @@ -698,13 +696,12 @@ def _is_reusable(self, expected_out: ExpectedResultT) -> tuple[bool, Path | None return False, first_missing_path return True, None - else: - if self.skipped: - # Do not check the files' existence, trust they exist. - # note that for skipped stages, we automatically assume outputs exist - return True, None - # Do not check the files' existence, assume they don't exist: - return False, None + if self.skipped: + # Do not check the files' existence, trust they exist. + # note that for skipped stages, we automatically assume outputs exist + return True, None + # Do not check the files' existence, assume they don't exist: + return False, None def get_job_attrs(self, target: TargetT | None = None) -> dict[str, str]: """ @@ -719,7 +716,7 @@ def get_job_attrs(self, target: TargetT | None = None) -> dict[str, str]: def stage( - cls: Optional[type['Stage']] = None, + cls: type['Stage'] | None = None, *, analysis_type: str | None = None, analysis_keys: list[str | Path] | None = None, @@ -729,7 +726,7 @@ def stage( skipped: bool = False, assume_outputs_exist: bool = False, forced: bool = False, -) -> Union[StageDecorator, Callable[..., StageDecorator]]: +) -> StageDecorator | Callable[..., StageDecorator]: """ Implements a standard class decorator pattern with optional arguments. The goal is to allow declaring workflow stages without requiring to implement @@ -781,8 +778,7 @@ def wrapper_stage() -> Stage: if cls is None: return decorator_stage - else: - return decorator_stage(cls) + return decorator_stage(cls) class SequencingGroupStage(Stage[SequencingGroup], ABC): @@ -805,7 +801,6 @@ def queue_jobs( """ Override to add Hail Batch jobs. """ - pass def queue_for_multicohort( self, @@ -849,7 +844,6 @@ def queue_jobs(self, dataset: Dataset, inputs: StageInput) -> StageOutput | None """ Override to add Hail Batch jobs. """ - pass def queue_for_multicohort( self, @@ -886,7 +880,6 @@ def queue_jobs(self, cohort: Cohort, inputs: StageInput) -> StageOutput | None: """ Override to add Hail Batch jobs. """ - pass def queue_for_multicohort( self, @@ -916,7 +909,6 @@ def expected_outputs(self, multicohort: MultiCohort) -> ExpectedResultT: """ Override to declare expected output paths. """ - pass @abstractmethod def queue_jobs( @@ -927,7 +919,6 @@ def queue_jobs( """ Override to add Hail Batch jobs. """ - pass def queue_for_multicohort( self, diff --git a/src/cpg_flow/status.py b/src/cpg_flow/status.py index 58663a54..0b513232 100644 --- a/src/cpg_flow/status.py +++ b/src/cpg_flow/status.py @@ -88,10 +88,9 @@ def complete_analysis_job( _msg = f'Creation of Analysis failed (type={analysis_type}, output={output}) in {project_name}' print(_msg) raise ConnectionError(_msg) - else: - print( - f'Created Analysis(id={a_id}, type={analysis_type}, output={output}) in {project_name}', - ) + print( + f'Created Analysis(id={a_id}, type={analysis_type}, output={output}) in {project_name}', + ) class StatusReporterError(Exception): @@ -164,7 +163,7 @@ def create_analysis( sequencing_group_ids = [] if target is None: raise ValueError('Target is required to create analysis') - elif isinstance(target, MultiCohort): + if isinstance(target, MultiCohort): cohort_ids = target.get_cohort_ids() elif isinstance(target, Cohort): cohort_ids = [target.get_cohort_id()] diff --git a/src/cpg_flow/targets/cohort.py b/src/cpg_flow/targets/cohort.py index 43469d17..f1b69cbd 100644 --- a/src/cpg_flow/targets/cohort.py +++ b/src/cpg_flow/targets/cohort.py @@ -103,10 +103,9 @@ def add_sequencing_group_object( f'SequencingGroup {s.id} already exists in the Cohort {self.name}', ) return self._sequencing_group_by_id[s.id] - else: - raise ValueError( - f'SequencingGroup {s.id} already exists in the Cohort {self.name}', - ) + raise ValueError( + f'SequencingGroup {s.id} already exists in the Cohort {self.name}', + ) self._sequencing_group_by_id[s.id] = s def get_sequencing_groups( diff --git a/src/cpg_flow/workflow.py b/src/cpg_flow/workflow.py index 5c5a793e..6944f66d 100644 --- a/src/cpg_flow/workflow.py +++ b/src/cpg_flow/workflow.py @@ -135,8 +135,7 @@ def wrapper_stage(*args, **kwargs) -> 'Stage': if _fun is None: return decorator_stage - else: - return decorator_stage(_fun) + return decorator_stage(_fun) _workflow: Optional['Workflow'] = None diff --git a/tests/test_cohort.py b/tests/test_cohort.py index b00f1408..b53ea1cd 100644 --- a/tests/test_cohort.py +++ b/tests/test_cohort.py @@ -96,7 +96,7 @@ def _custom_cohort_config(tmp_path) -> str: def load_mock_data(file_path: str) -> list[dict]: - with open(file_path, 'r') as file: + with open(file_path) as file: return json.load(file) @@ -138,7 +138,7 @@ def mock_get_cohort_sgs(cohort_id: str, *args, **kwargs) -> list[dict]: 'COH5': load_mock_data('tests/assets/test_cohort/COH5.json'), } - if cohort_id not in cohorts.keys(): + if cohort_id not in cohorts: raise MetamistError(f"Error fetching cohort: The provided cohort ID was not valid: '{cohort_id}'") return cohorts[cohort_id]