Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify package version per Knime version #9

Merged
merged 12 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
runs-on: ${{ matrix.os }}-latest
strategy:
matrix:
os: ["ubuntu", "macos", "windows"]
os: ["macos", "ubuntu", "windows"]
defaults:
run:
shell: bash -l {0}
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ __pycache__
package.json
package-lock.json
node_modules
.coverage
.coverage
build
*.egg-info
18 changes: 13 additions & 5 deletions retropath2_wrapper/Args.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,23 @@
from retropath2_wrapper._version import __version__


DEFAULT_TIMEOUT = 60 # minutes
DEFAULT_MSC_TIMEOUT = 10 # minutes
DEFAULT_KNIME_VERSION = '4.5.0'
DEFAULT_RP2_VERSION = 'r20220104'
KNIME_PACKAGE = {
'4.5.0': {
'org.knime.features.chem.types.feature.group': '4.5.0.v202107011901',
'org.knime.features.datageneration.feature.group': '4.5.0.v202107011901',
'org.knime.features.python.feature.group': '4.5.2.v202203041212',
'org.rdkit.knime.feature.feature.group': '4.5.0.v202207051536',
},
}
RETCODES = {
'OK': 0,
'NoError': 0,
# Warnings
'SrcInSink': -1,
'NoSolution': -2,
'TimeLimit': -3,
# Errors
'FileNotFound': 1,
'OSError': 2,
Expand Down Expand Up @@ -96,6 +103,7 @@ def _add_arguments(parser):
parser.add_argument(
'--kver',
type=str,
choices=list(KNIME_PACKAGE.keys()),
default=DEFAULT_KNIME_VERSION,
help='version of KNIME (mandatory if --kexec is passed).',
)
Expand All @@ -120,10 +128,10 @@ def _add_arguments(parser):
parser.add_argument('--dmax' , type=int, default=1000)
parser.add_argument('--mwmax_source' , type=int, default=1000)
parser.add_argument(
'--timeout',
'--msc_timeout',
type=int,
default=DEFAULT_TIMEOUT,
help=f'Defines the time after which the program will stop and return calculated results (default: {DEFAULT_TIMEOUT})'
default=DEFAULT_MSC_TIMEOUT,
help=f'Defines the time after which the RDKit MCS Aggregation method will stop searching for best match (default: {DEFAULT_MSC_TIMEOUT}).'
)
# parser.add_argument('--forward' , action='store_true')

Expand Down
131 changes: 72 additions & 59 deletions retropath2_wrapper/RetroPath2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
@description: Python wrapper to run RetroPath2.0 KNIME workflow

"""
import os
from os import (
mkdir as os_mkdir,
path as os_path,
Expand All @@ -22,9 +23,6 @@
rmtree
)
from sys import platform as sys_platform
from subprocess import (
TimeoutExpired
) # nosec
from brs_utils import (
download_and_extract_tar_gz,
download,
Expand Down Expand Up @@ -54,11 +52,14 @@
from csv import reader
from .Args import (
DEFAULT_KNIME_FOLDER,
DEFAULT_TIMEOUT,
DEFAULT_MSC_TIMEOUT,
DEFAULT_KNIME_VERSION,
DEFAULT_RP2_VERSION,
RETCODES
KNIME_PACKAGE,
RETCODES,
)
from retropath2_wrapper.preference import Preference


here = os_path.dirname(os_path.realpath(__file__))

Expand Down Expand Up @@ -115,6 +116,11 @@ def set_vars(
kpath = kexec[:kexec.rfind('/')]
kinstall = kpath[:kpath.rfind('/')]

workflow = os_path.join(
here, 'workflows', f'RetroPath2.0_{rp2_version}.knwf'
)


# Build a dict to store KNIME vars
return {
'kexec' : kexec,
Expand All @@ -123,11 +129,7 @@ def set_vars(
'kpath' : kpath,
'kinstall' : kinstall,
'kpkg_install' : kpkg_install,
'workflow' : os_path.join(
here,
'workflows',
f'RetroPath2.0_{rp2_version}.knwf'
)
'workflow' : workflow,
}


Expand All @@ -142,7 +144,7 @@ def retropath2(
topx: int = 100,
dmin: int = 0, dmax: int = 100,
mwmax_source: int = 1000,
timeout: int = DEFAULT_TIMEOUT,
msc_timeout: int = DEFAULT_MSC_TIMEOUT,
logger: Logger = getLogger(__name__)
) -> Tuple[str, Dict]:

Expand All @@ -161,7 +163,7 @@ def retropath2(
logger.debug(f'dmin: {dmin}')
logger.debug(f'dmax: {dmax}')
logger.debug(f'mwmax_source: {mwmax_source}')
logger.debug(f'timeout: {timeout}')
logger.debug(f'msc_timeout: {msc_timeout}')

if kvars is None:
# Store KNIME vars into a dictionary
Expand Down Expand Up @@ -231,6 +233,9 @@ def retropath2(

logger.info('{attr1}Initializing{attr2}'.format(attr1=attr('bold'), attr2=attr('reset')))

# Preferences
preference = Preference(rdkit_timeout_minutes=msc_timeout)

with TemporaryDirectory() as tempd:

# Format files for KNIME
Expand All @@ -247,13 +252,13 @@ def retropath2(

# Call KNIME
r_code = call_knime(
kvars,
files,
rp2_params,
timeout,
logger
kvars=kvars,
files=files,
params=rp2_params,
preference=preference,
logger=logger,
)
if r_code == RETCODES['TimeLimit'] or r_code == RETCODES['OSError']:
if r_code == RETCODES['OSError']:
return r_code, files

r_code = check_src_in_sink_2(
Expand All @@ -274,14 +279,14 @@ def check_input(

# Check if InChI is well-formed
inchi = check_inchi_from_file(source_file, logger)
if inchi == '':
if inchi == '' or inchi in RETCODES.values():
return RETCODES['InChI'], None

# Check if source is in sink
r_code = check_src_in_sink_1(inchi, sink_file, logger)
if r_code == -1:
if r_code == RETCODES['SrcInSink']:
return RETCODES['SrcInSink'], None
elif r_code == -2:
elif r_code == RETCODES['FileNotFound']:
return RETCODES['FileNotFound'], None

return RETCODES['OK'], inchi
Expand Down Expand Up @@ -465,7 +470,7 @@ def install_knime(

logger.info(' |--url: '+kurl)
logger.info(' |--install_dir: '+kinstall)


def gunzip_to_csv(filename: str, indir: str) -> str:
"""
Expand All @@ -491,6 +496,11 @@ def gunzip_to_csv(filename: str, indir: str) -> str:
return filename


def standardize_path(path: str) -> str:
if sys_platform == 'win32':
path = "/".join(path.split(os.sep))
return path

def format_files_for_knime(
sinkfile: str, sourcefile: str, rulesfile: str,
indir: str, outdir: str,
Expand Down Expand Up @@ -577,21 +587,18 @@ def install_knime_pkgs(
logger.debug(f' + kpkg_install: {kpkg_install}')
logger.debug(f' + kver: {kver}')

args = \
' -application org.eclipse.equinox.p2.director' \
+ ' -nosplash -consolelog' \
+ ' -r http://update.knime.org/community-contributions/trunk,' \
args = [kexec]
args += ['-application', 'org.eclipse.equinox.p2.director']
args += ['-nosplash']
args += ['-consoleLog']
args += ['-r', 'http://update.knime.org/community-contributions/trunk,' \
+ 'http://update.knime.com/community-contributions/trusted/'+kver[:3]+',' \
+ 'http://update.knime.com/analytics-platform/'+kver[:3] \
+ ' -i org.knime.features.chem.types.feature.group,' \
+ 'org.knime.features.datageneration.feature.group,' \
+ 'org.knime.features.python.feature.group,' \
+ 'org.rdkit.knime.feature.feature.group' \
+ ' -bundlepool ' + kpkg_install + ' -d ' + kpkg_install
+ 'http://update.knime.com/analytics-platform/'+kver[:3]]
args += ['-i', ','.join([x + '/' + y for x, y in KNIME_PACKAGE[kver].items()])]
args += ['-bundlepool', kpkg_install]
args += ['-d', kpkg_install]

cmd = f'{kexec} {args}'

returncode = subprocess_call(cmd, logger=logger)
returncode = subprocess_call(" ".join(args), logger=logger)
StreamHandler.terminator = "\n"
logger.info(' OK')
return returncode
Expand All @@ -600,7 +607,7 @@ def call_knime(
kvars: Dict,
files: Dict,
params: Dict,
timeout: int,
preference: Preference,
logger: Logger = getLogger(__name__)
) -> int:
"""
Expand All @@ -614,8 +621,8 @@ def call_knime(
Paths of sink, source, rules files.
params: Dict
Parameters of the workflow to process.
timeout: int
Time after which the run returns.
preference: Preference
A preference object.
logger : Logger
The logger object.

Expand All @@ -628,21 +635,32 @@ def call_knime(
StreamHandler.terminator = ""
logger.info('{attr1}Running KNIME...{attr2}'.format(attr1=attr('bold'), attr2=attr('reset')))

args = ' -nosplash -nosave -reset --launcher.suppressErrors -application org.knime.product.KNIME_BATCH_APPLICATION ' \
+ ' -workflowFile=' + kvars['workflow'] \
+ ' -workflow.variable=input.dmin,"' + str(params['dmin']) + '",int' \
+ ' -workflow.variable=input.dmax,"' + str(params['dmax']) + '",int' \
+ ' -workflow.variable=input.max-steps,"' + str(params['max_steps']) + '",int' \
+ ' -workflow.variable=input.sourcefile,"' + files['source'] + '",String' \
+ ' -workflow.variable=input.sinkfile,"' + files['sink'] + '",String' \
+ ' -workflow.variable=input.rulesfile,"' + files['rules'] + '",String' \
+ ' -workflow.variable=input.topx,"' + str(params['topx']) + '",int' \
+ ' -workflow.variable=input.mwmax-source,"' + str(params['mwmax_source']) + '",int' \
+ ' -workflow.variable=output.dir,"' + files['outdir'] + '",String' \
+ ' -workflow.variable=output.solutionfile,"' + files['results'] + '",String' \
+ ' -workflow.variable=output.sourceinsinkfile,"' + files['src-in-sk'] + '",String'

logger.debug(kvars['kexec'] + ' ' + args)
args = [kvars["kexec"]]
args += ["-nosplash"]
args += ["-nosave"]
args += ["-reset"]
args += ["-consoleLog"]
args += ["--launcher.suppressErrors"]
args += ["-application", "org.knime.product.KNIME_BATCH_APPLICATION"]
args += ["-workflowFile=%s" % (standardize_path(path=kvars['workflow']),)]

args += ['-workflow.variable=input.dmin,"%s",int' % (params['dmin'],)]
args += ['-workflow.variable=input.dmax,"%s",int' % (params['dmax'],)]
args += ['-workflow.variable=input.max-steps,"%s",int' % (params['max_steps'],)]
args += ['-workflow.variable=input.topx,"%s",int' % (params['topx'],)]
args += ['-workflow.variable=input.mwmax-source,"%s",int' % (params['mwmax_source'],)]

args += ['-workflow.variable=input.sourcefile,"%s",String' % (standardize_path(files['source']),)]
args += ['-workflow.variable=input.sinkfile,"%s",String' % (standardize_path(files['sink']),)]
args += ['-workflow.variable=input.rulesfile,"%s",String' % (standardize_path(files['rules']),)]
args += ['-workflow.variable=output.dir,"%s",String' % (standardize_path(files['outdir']),)]
args += ['-workflow.variable=output.solutionfile,"%s",String' % (standardize_path(files['results']),)]
args += ['-workflow.variable=output.sourceinsinkfile,"%s",String' % (standardize_path(files['src-in-sk']),)]
if preference and preference.is_init():
preference.to_file()
args += ["-preferences=" + standardize_path(preference.path)]

logger.debug(" ".join(args))

try:
printout = open(devnull, 'wb') if logger.level > 10 else None
Expand All @@ -654,19 +672,14 @@ def call_knime(
os_environ['CONDA_PREFIX'],
"lib"
)
returncode = subprocess_call(cmd=kvars['kexec'] + args, logger=logger)
returncode = subprocess_call(cmd=" ".join(args), logger=logger)
os_environ['LD_LIBRARY_PATH'] = ':'.join(
os_environ['LD_LIBRARY_PATH'].split(':')[:-1]
)
StreamHandler.terminator = "\n"
logger.info(' {bold}OK{reset}'.format(bold=attr('bold'), reset=attr('reset')))
return returncode

except TimeoutExpired as e:
logger.warning(' |- Time limit ({timeout} min) is reached'.format(timeout=timeout))
logger.warning(' Results collected until now are available')
return RETCODES['TimeLimit']

except OSError as e:
logger.error(e)
return RETCODES['OSError']
6 changes: 2 additions & 4 deletions retropath2_wrapper/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,13 @@ def _cli():
dmax=args.dmax,
mwmax_source=args.mwmax_source,
rp2_version=args.rp2_version,
timeout=args.timeout,
msc_timeout=args.msc_timeout,
logger=logger
)

logger.info('')

if r_code == RETCODES['OK'] or r_code == RETCODES['TimeLimit']:
if r_code == RETCODES['TimeLimit']:
logger.warning('Time limit is reached.')
if r_code == RETCODES['OK']:
logger.info('{attr1}Results{attr2}'.format(attr1=attr('bold'), attr2=attr('reset')))
logger.info(' |- Checking... ')
r_code = check_results(result_files, logger)
Expand Down
26 changes: 26 additions & 0 deletions retropath2_wrapper/preference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import datetime
import tempfile


class Preference(object):
def __init__(self, *args, **kwargs) -> None:
self.path = kwargs.get("path", tempfile.NamedTemporaryFile(suffix=".epf").name)
self.rdkit_timeout_minutes = kwargs.get("rdkit_timeout_minutes")

def is_init(self) -> bool:
if self.rdkit_timeout_minutes:
return True
return False

def to_file(self) -> None:
now = datetime.datetime.now(datetime.timezone.utc)
with open(self.path, "w") as fod:
fod.write("#")
fod.write(now.strftime("%a %b %d %H:%M:%S %Z %Y"))
fod.write("\n")
fod.write("\\!/=")
fod.write("\n")
if self.rdkit_timeout_minutes:
fod.write("/instance/org.rdkit.knime.nodes/mcsAggregation.timeout=")
fod.write(str(int(self.rdkit_timeout_minutes) * 60))
fod.write("\n")
Loading