diff --git a/chemicalx/data/batchgenerator.py b/chemicalx/data/batchgenerator.py index 1bcbfe4..135deb1 100644 --- a/chemicalx/data/batchgenerator.py +++ b/chemicalx/data/batchgenerator.py @@ -33,14 +33,13 @@ def __init__( ): """Initialize a batch generator. - Args: - batch_size: Number of drug pairs per batch. - context_features: Indicator whether the batch should include biological context features. - drug_features: Indicator whether the batch should include drug features. - drug_molecules: Indicator whether the batch should include drug molecules - context_feature_set: A context feature set for feature generation. - drug_feature_set: A drug feature set for feature generation. - labeled_triples: A labeled triples object used to generate batches. + :param batch_size: Number of drug pairs per batch. + :param context_features: Indicator whether the batch should include biological context features. + :param drug_features: Indicator whether the batch should include drug features. + :param drug_molecules: Indicator whether the batch should include drug molecules + :param context_feature_set: A context feature set for feature generation. + :param drug_feature_set: A drug feature set for feature generation. + :param labeled_triples: A labeled triples object used to generate batches. """ self.batch_size = batch_size self.context_features = context_features @@ -53,10 +52,8 @@ def __init__( def _get_context_features(self, context_identifiers: Iterable[str]) -> Optional[torch.FloatTensor]: """Get the context features as a matrix. - Args: - context_identifiers (pd.Series): The context identifiers of interest. - Returns: - context_features (torch.FloatTensor): The matrix of biological context features. + :param context_identifiers: The context identifiers of interest. + :returns: The matrix of biological context features. """ if not self.context_features or self.context_feature_set is None: return None @@ -65,10 +62,8 @@ def _get_context_features(self, context_identifiers: Iterable[str]) -> Optional[ def _get_drug_features(self, drug_identifiers: Iterable[str]) -> Optional[torch.FloatTensor]: """Get the global drug features as a matrix. - Args: - drug_identifiers: The drug identifiers of interest. - Returns: - drug_features: The matrix of drug features. + :param drug_identifiers: The drug identifiers of interest. + :returns: The matrix of drug features. """ if not self.drug_features or self.drug_feature_set is None: return None @@ -77,10 +72,8 @@ def _get_drug_features(self, drug_identifiers: Iterable[str]) -> Optional[torch. def _get_drug_molecules(self, drug_identifiers: Iterable[str]) -> Optional[PackedGraph]: """Get the molecular structure of drugs. - Args: - drug_identifiers: The drug identifiers of interest. - Returns: - molecules: The molecules diagonally batched together for message passing. + :param drug_identifiers: The drug identifiers of interest. + :returns: The molecules diagonally batched together for message passing. """ if not self.drug_molecules or self.drug_feature_set is None: return None @@ -90,10 +83,8 @@ def _get_drug_molecules(self, drug_identifiers: Iterable[str]) -> Optional[Packe def _transform_labels(cls, labels: Sequence[float]) -> torch.FloatTensor: """Transform the labels from a chunk of the labeled triples frame. - Args: - labels: The drug pair binary labels. - Returns: - labels : The label target vector as a column vector. + :param labels: The drug pair binary labels. + :returns: The label target vector as a column vector. """ return torch.FloatTensor(np.array(labels).reshape(-1, 1)) @@ -101,10 +92,8 @@ def generate_batch(self, batch_frame: pd.DataFrame) -> DrugPairBatch: """ Generate a batch of drug features, molecules, context features and labels for a set of pairs. - Args: - batch_frame (pd.DataFrame): The labeled pairs of interest. - Returns: - batch (DrugPairBatch): A batch of tensors for the pairs. + :param batch_frame: The labeled pairs of interest. + :Returns: A batch of tensors for the pairs. """ drug_features_left = self._get_drug_features(batch_frame["drug_1"]) drug_molecules_left = self._get_drug_molecules(batch_frame["drug_1"]) diff --git a/chemicalx/data/contextfeatureset.py b/chemicalx/data/contextfeatureset.py index 5906a1c..df5307e 100644 --- a/chemicalx/data/contextfeatureset.py +++ b/chemicalx/data/contextfeatureset.py @@ -21,9 +21,7 @@ def from_dict(cls, data: Mapping[str, Sequence[float]]) -> "ContextFeatureSet": def get_feature_matrix(self, contexts: Iterable[str]) -> torch.FloatTensor: """Get the feature matrix for a list of contexts. - Args: - contexts: A list of context identifiers. - Return: - features: A matrix of context features. + :param contexts: A list of context identifiers. + :returns: A matrix of context features. """ return torch.cat([self.data[context] for context in contexts]) diff --git a/chemicalx/data/datasetloader.py b/chemicalx/data/datasetloader.py index 8e24467..86d45e1 100644 --- a/chemicalx/data/datasetloader.py +++ b/chemicalx/data/datasetloader.py @@ -75,14 +75,14 @@ def get_generator( ) -> BatchGenerator: """Initialize a batch generator. - Args: - batch_size: Number of drug pairs per batch. - context_features: Indicator whether the batch should include biological context features. - drug_features: Indicator whether the batch should include drug features. - drug_molecules: Indicator whether the batch should include drug molecules - labels: Indicator whether the batch should include drug pair labels. - labeled_triples: A labeled triples object used to generate batches. If none is given, will use - all triples from the dataset. + :param batch_size: Number of drug pairs per batch. + :param context_features: Indicator whether the batch should include biological context features. + :param drug_features: Indicator whether the batch should include drug features. + :param drug_molecules: Indicator whether the batch should include drug molecules + :param labeled_triples: + A labeled triples object used to generate batches. If none is given, will use + all triples from the dataset. + :returns: A batch generator """ return BatchGenerator( batch_size=batch_size, @@ -96,12 +96,7 @@ def get_generator( @abstractmethod def get_context_features(self) -> ContextFeatureSet: - """ - Get the context feature set. - - Returns: - : The ContextFeatureSet of the dataset of interest. - """ + """Get the context feature set.""" @property def num_contexts(self) -> int: @@ -115,12 +110,7 @@ def context_channels(self) -> int: @abstractmethod def get_drug_features(self): - """ - Get the drug feature set. - - Returns: - : The DrugFeatureSet of the dataset of interest. - """ + """Get the drug feature set.""" @property def num_drugs(self) -> int: @@ -134,12 +124,7 @@ def drug_channels(self) -> int: @abstractmethod def get_labeled_triples(self) -> LabeledTriples: - """ - Get the labeled triples file from the storage. - - Returns: - : The labeled triples in the dataset. - """ + """Get the labeled triples file from the storage.""" @property def num_labeled_triples(self) -> int: @@ -168,46 +153,36 @@ class RemoteDatasetLoader(DatasetLoader): def __init__(self, dataset_name: str): """Instantiate the dataset loader. - Args: - dataset_name (str): The name of the dataset. + :param dataset_name: The name of the dataset. """ self.base_url = "https://raw.githubusercontent.com/AstraZeneca/chemicalx/main/dataset" self.dataset_name = dataset_name assert dataset_name in ["drugcombdb", "drugcomb", "twosides", "drugbankddi"] def generate_path(self, file_name: str) -> str: - """ - Generate a complete url for a dataset file. + """Generate a complete url for a dataset file. - Args: - file_name (str): Name of the data file. - Returns: - data_path (str): The complete url to the dataset. + :param file_name: Name of the data file. + :returns: The complete url to the dataset. """ data_path = "/".join([self.base_url, self.dataset_name, file_name]) return data_path def load_raw_json_data(self, path: str) -> Dict: - """ - Load a raw JSON dataset at the given path. + """Load a raw JSON dataset at the given path. - Args: - path (str): The path to the JSON file. - Returns: - raw_data (dict): A dictionary with the data. + :param path: The path to the JSON file. + :returns: A dictionary with the data. """ with urllib.request.urlopen(path) as url: raw_data = json.loads(url.read().decode()) return raw_data def load_raw_csv_data(self, path: str) -> pd.DataFrame: - """ - Load a CSV dataset at the given path. + """Load a CSV dataset at the given path. - Args: - path (str): The path to the triples CSV file. - Returns: - raw_data (pd.DataFrame): A pandas DataFrame with the data. + :param path: The path to the triples CSV file. + :returns: A pandas DataFrame with the data. """ data_bytes = urllib.request.urlopen(path).read() types = {"drug_1": str, "drug_2": str, "context": str, "label": float} @@ -215,26 +190,16 @@ def load_raw_csv_data(self, path: str) -> pd.DataFrame: return raw_data @lru_cache(maxsize=1) - def get_context_features(self): - """ - Get the context feature set. - - Returns: - : The ContextFeatureSet of the dataset of interest. - """ + def get_context_features(self) -> ContextFeatureSet: + """Get the context feature set.""" path = self.generate_path("context_set.json") raw_data = self.load_raw_json_data(path) raw_data = {k: torch.FloatTensor(np.array(v).reshape(1, -1)) for k, v in raw_data.items()} return ContextFeatureSet(raw_data) @lru_cache(maxsize=1) - def get_drug_features(self): - """ - Get the drug feature set. - - Returns: - : The DrugFeatureSet of the dataset of interest. - """ + def get_drug_features(self) -> DrugFeatureSet: + """Get the drug feature set.""" path = self.generate_path("drug_set.json") raw_data = self.load_raw_json_data(path) raw_data = { @@ -244,13 +209,8 @@ def get_drug_features(self): return DrugFeatureSet.from_dict(raw_data) @lru_cache(maxsize=1) - def get_labeled_triples(self): - """ - Get the labeled triples file from the storage. - - Returns: - : The labeled triples in the dataset. - """ + def get_labeled_triples(self) -> LabeledTriples: + """Get the labeled triples file from the storage.""" path = self.generate_path("labeled_triples.csv") df = self.load_raw_csv_data(path) return LabeledTriples(df) diff --git a/chemicalx/data/drugfeatureset.py b/chemicalx/data/drugfeatureset.py index 27f7d5b..0f9a3e2 100644 --- a/chemicalx/data/drugfeatureset.py +++ b/chemicalx/data/drugfeatureset.py @@ -30,19 +30,15 @@ def from_dict(cls, data: Dict[str, Dict]) -> "DrugFeatureSet": def get_feature_matrix(self, drugs: Iterable[str]) -> torch.FloatTensor: """Get the drug feature matrix for a list of drugs. - Args: - drugs: A list of drug identifiers. - Return: - : A matrix of drug features. + :param drugs: A list of drug identifiers. + :returns: A matrix of drug features. """ return torch.cat([self.data[drug]["features"] for drug in drugs]) def get_molecules(self, drugs: Iterable[str]) -> PackedGraph: """Get the molecular structures. - Args: - drugs: A list of drug identifiers. - Return: - : The molecules batched together for message passing. + :param drugs: A list of drug identifiers. + :returns: The molecules batched together for message passing. """ return Graph.pack([self.data[drug]["molecule"] for drug in drugs]) diff --git a/chemicalx/data/labeledtriples.py b/chemicalx/data/labeledtriples.py index 9ecacf8..2652b2b 100644 --- a/chemicalx/data/labeledtriples.py +++ b/chemicalx/data/labeledtriples.py @@ -32,85 +32,43 @@ def __add__(self, value: "LabeledTriples") -> "LabeledTriples": """ Add the triples in two LabeledTriples objects together - syntactic sugar for '+'. - Args: - value: Another LabeledTriples object for the addition. - Returns: - : A LabeledTriples object after the addition. + :param value: Another LabeledTriples object for the addition. + :returns: A LabeledTriples object after the addition. """ return LabeledTriples(pd.concat([self.data, value.data])) def get_drug_count(self) -> int: - """ - Get the number of drugs in the labeled triples dataset. - - Returns - int: The number of unique compounds in the labeled triples dataset. - """ + """Get the number of drugs in the labeled triples dataset.""" return pd.unique(self.data[["drug_1", "drug_2"]].values.ravel("K")).shape[0] def get_context_count(self) -> int: - """ - Get the number of unique contexts in the labeled triples dataset. - - Returns - int: The number of unique contexts in the labeled triples dataset. - """ + """Get the number of unique contexts in the labeled triples dataset.""" return self.data["context"].nunique() def get_combination_count(self) -> int: - """ - Get the number of unique drug pairs in the labeled triples dataset. - - Returns - int: The number of unique pairs in the labeled triples dataset. - """ + """Get the number of unique drug pairs in the labeled triples dataset.""" combination_count = self.data[["drug_1", "drug_2"]].drop_duplicates().shape[0] return combination_count def get_labeled_triple_count(self) -> int: - """ - Get the number of triples in the labeled triples dataset. - - Returns - int: The number of triples in the labeled triples dataset. - """ + """Get the number of triples in the labeled triples dataset.""" triple_count = self.data.shape[0] return triple_count def get_positive_count(self) -> int: - """ - Get the number of positive triples in the dataset. - - Returns - int: The number of positive triples. - """ + """Get the number of positive triples in the dataset.""" return int(self.data["label"].sum()) def get_negative_count(self) -> int: - """ - Get the number of negative triples in the dataset. - - Returns - int: The number of negative triples. - """ + """Get the number of negative triples in the dataset.""" return self.get_labeled_triple_count() - self.get_positive_count() def get_positive_rate(self) -> float: - """ - Get the ratio of positive triples in the dataset. - - Returns - float: The ratio of positive triples. - """ + """Get the ratio of positive triples in the dataset.""" return self.data["label"].mean() def get_negative_rate(self) -> float: - """ - Get the ratio of positive triples in the dataset. - - Returns - float: The ratio of negative triples. - """ + """Get the ratio of positive triples in the dataset.""" return 1.0 - self.data["label"].mean() def train_test_split( @@ -119,12 +77,9 @@ def train_test_split( """ Split the LabeledTriples object for training and testing. - Args: - train_size: The ratio of training triples. Default is 0.8 if None is passed. - random_state: The random seed. Default is 42. Set to none for no fixed seed. - Returns - train_labeled_triples (LabeledTriples): The training triples. - test_labeled_triples (LabeledTriples): The testing triples. + :param train_size: The ratio of training triples. Default is 0.8 if None is passed. + :param random_state: The random seed. Default is 42. Set to none for no fixed seed. + :returns: A pair of training triples and testing triples """ train_data, test_data = train_test_split(self.data, train_size=train_size or 0.8, random_state=random_state) return LabeledTriples(train_data), LabeledTriples(test_data) diff --git a/chemicalx/models/deepsynergy.py b/chemicalx/models/deepsynergy.py index 2061bec..9a227b6 100644 --- a/chemicalx/models/deepsynergy.py +++ b/chemicalx/models/deepsynergy.py @@ -66,15 +66,12 @@ def forward( drug_features_left: torch.FloatTensor, drug_features_right: torch.FloatTensor, ) -> torch.FloatTensor: - """ - Run a forward pass of the DeepSynergy model. + """Run a forward pass of the DeepSynergy model. - Args: - context_features: A matrix of biological context features. - drug_features_left: A matrix of head drug features. - drug_features_right: A matrix of tail drug features. - Returns: - : A column vector of predicted synergy scores. + :param context_features: A matrix of biological context features. + :param drug_features_left: A matrix of head drug features. + :param drug_features_right: A matrix of tail drug features. + :returns: A column vector of predicted synergy scores. """ hidden = torch.cat([context_features, drug_features_left, drug_features_right], dim=1) return self.layers(hidden) diff --git a/chemicalx/models/epgcnds.py b/chemicalx/models/epgcnds.py index 1b0aeb5..31fb0ff 100644 --- a/chemicalx/models/epgcnds.py +++ b/chemicalx/models/epgcnds.py @@ -58,14 +58,11 @@ def _forward_molecules(self, molecules: PackedGraph) -> torch.FloatTensor: return features def forward(self, molecules_left: PackedGraph, molecules_right: PackedGraph) -> torch.FloatTensor: - """ - Run a forward pass of the EPGCN-DS model. + """Run a forward pass of the EPGCN-DS model. - Args: - molecules_left: Batched molecules for the left side drugs. - molecules_right: Batched molecules for the right side drugs. - Returns: - : A column vector of predicted synergy scores. + :param molecules_left: Batched molecules for the left side drugs. + :param molecules_right: Batched molecules for the right side drugs. + :returns: A column vector of predicted synergy scores. """ features_left = self._forward_molecules(molecules_left) features_right = self._forward_molecules(molecules_right) diff --git a/chemicalx/models/matchmaker.py b/chemicalx/models/matchmaker.py index 4d06c45..82b9d4b 100644 --- a/chemicalx/models/matchmaker.py +++ b/chemicalx/models/matchmaker.py @@ -73,15 +73,12 @@ def forward( drug_features_left: torch.FloatTensor, drug_features_right: torch.FloatTensor, ) -> torch.FloatTensor: - """ - Run a forward pass of the MatchMaker model. + """Run a forward pass of the MatchMaker model. - Args: - context_features: A matrix of biological context features. - drug_features_left: A matrix of head drug features. - drug_features_right: A matrix of tail drug features. - Returns: - hidden: A column vector of predicted synergy scores. + :param context_features: A matrix of biological context features. + :param drug_features_left: A matrix of head drug features. + :param drug_features_right: A matrix of tail drug features. + :returns: A column vector of predicted synergy scores. """ # The left drug hidden_left = torch.cat([context_features, drug_features_left], dim=1) diff --git a/pull_request_template.md b/pull_request_template.md index 62f41d9..60000b8 100644 --- a/pull_request_template.md +++ b/pull_request_template.md @@ -11,7 +11,7 @@ Please ensure you have the following: _Please provide a high-level summary of the changes for the changes and notes for the reviewers_ - [ ] Unit tests provided for these changes -- [ ] Documentation and docstrings added for these changes +- [ ] Documentation and docstrings added for these changes using the [sphinx style](https://sphinx-rtd-tutorial.readthedocs.io/en/latest/docstrings.html) ## Changes diff --git a/setup.cfg b/setup.cfg index 95b61ac..66d71a3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,3 +4,10 @@ [metadata] long_description = file: README.md long_description_content_type = text/markdown + +########################## +# Darglint Configuration # +########################## +[darglint] +docstring_style = sphinx +strictness = short diff --git a/tox.ini b/tox.ini index a45dc17..7ef4be6 100644 --- a/tox.ini +++ b/tox.ini @@ -40,6 +40,7 @@ description = Run linters. [testenv:flake8] deps = + darglint flake8 flake8-black flake8-bugbear