Skip to content

Commit

Permalink
Merging First official version
Browse files Browse the repository at this point in the history
Prepare version for official launch with publication
  • Loading branch information
RaulFD-creator authored Aug 23, 2024
2 parents 4b94895 + 6adabe5 commit e76cee0
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 21 deletions.
25 changes: 14 additions & 11 deletions autopeptideml/autopeptideml.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,25 @@
from copy import deepcopy
from multiprocessing import cpu_count
import joblib
import os
from typing import Dict, List, Optional, Union

import joblib
import matplotlib
matplotlib.use('Agg')
from copy import deepcopy
from multiprocessing import cpu_count
from typing import Dict, List, Union

from hestia.partition import ccpart, graph_part
import matplotlib.pyplot as plt
import numpy as np
import optuna
import pandas as pd
import scikitplot.metrics as skplt
import sklearn.metrics

from sklearn.model_selection import StratifiedKFold
from hestia.partition import ccpart, graph_part

from .data.algorithms import SYNONYMS, SUPPORTED_MODELS
from .data.metrics import METRICS, METRIC2FUNCTION, THRESHOLDED_METRICS
from .data.residues import is_canonical
from .utils.embeddings import RepresentationEngine
from .utils.training import FlexibleObjective, UniDL4BioPep_Objective, ModelSelectionObjective
from .utils.training import (FlexibleObjective, UniDL4BioPep_Objective,
ModelSelectionObjective)


class AutoPeptideML:
Expand Down Expand Up @@ -60,7 +59,7 @@ def autosearch_negatives(
self,
df_pos: pd.DataFrame,
positive_tags: List[str],
proportion: float=1.0
proportion: float = 1.0
) -> pd.DataFrame:
"""Method for searching bioactive databases for peptides
Expand Down Expand Up @@ -122,7 +121,7 @@ def autosearch_negatives(
else:
missing += samples_to_draw
df_neg = pd.concat([df_neg, subdf])

get_tags = lambda x: ';'.join(sorted(set(
[column if (x[column] == 1) else df_neg.columns[0]
for column in self.tags]
Expand Down Expand Up @@ -643,6 +642,10 @@ def _fasta2csv(self, dataset: str, outputdir: str) -> pd.DataFrame:
return df

def _make_figures(self, figures_path: str, truths, preds_proba):
import matplotlib
matplotlib.use('Agg')
import scikitplot.metrics as skplt

preds = preds_proba > 0.5
new_preds_proba = np.zeros((len(preds_proba), 2))
new_preds_proba[:, 0] = 1 - preds_proba
Expand Down
17 changes: 9 additions & 8 deletions autopeptideml/data/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
from lightgbm import LGBMClassifier
import sklearn.ensemble
import sklearn.neighbors
import sklearn.neural_network
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import sklearn.svm
import sklearn.metrics
from xgboost import XGBClassifier

from ..utils.unidl4biopep.model import Cnn


SUPPORTED_MODELS = {
'knn': sklearn.neighbors.KNeighborsClassifier,
'rfc': sklearn.ensemble.RandomForestClassifier,
'knn': KNeighborsClassifier,
'rfc': RandomForestClassifier,
'svm': sklearn.svm.SVC,
'mlp': sklearn.neural_network.MLPClassifier,
'xgboost': sklearn.ensemble.GradientBoostingClassifier,
'adaboost': sklearn.ensemble.AdaBoostClassifier,
'mlp': MLPClassifier,
'xgboost': XGBClassifier,
'adaboost': AdaBoostClassifier,
'unidl4biopep': Cnn,
'lightgbm': LGBMClassifier
}
Expand Down
1 change: 1 addition & 0 deletions autopeptideml/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def parse_cli():

return parser.parse_args()


def parse_cli_predict():
parser = argparse.ArgumentParser()
parser.add_argument('dataset', type=str)
Expand Down
2 changes: 1 addition & 1 deletion autopeptideml/utils/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
}


class RepresentationEngine(torch.nn.Module):
class RepresentationEngine(torch.nn.Module):
def __init__(self, model: str, batch_size: int):
super().__init__()
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
'transformers',
'lightgbm',
'mdpdf',
'xgboost',
'hestia-ood'
]

Expand Down Expand Up @@ -51,6 +52,6 @@
name='autopeptideml',
packages=find_packages(exclude=['examples']),
url='https://ibm.github.io/AutoPeptideML/',
version='0.3.4',
version='1.0.0',
zip_safe=False,
)
7 changes: 7 additions & 0 deletions tests/test_apml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from autopeptideml import AutoPeptideML


def test_load():
apml = AutoPeptideML()
df = apml.curate_dataset('examples/AB_positives.csv')
assert len(df) == 6_583

0 comments on commit e76cee0

Please sign in to comment.