Skip to content

Commit

Permalink
[SYSTEMDS-3687] Python API startup fixes
Browse files Browse the repository at this point in the history
This PR change the startup of the python interface to properly use the
jar files, and fixes a release issue where if the SYSTEMDS_ROOT is not
set the python API did not properly hook into the released jar artifacts.
  • Loading branch information
Baunsgaard committed Apr 4, 2024
1 parent 9100584 commit 387b6c1
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 20 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ buildNumber.properties
_site/

# Tutorial data mnist
src/main/python/systemds/examples/tutorials/*/
scripts/nn/examples/data/*

# User configuration files
Expand Down
7 changes: 7 additions & 0 deletions src/main/python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,10 @@ tests/examples/tutorials/model
tests/lineage/temp

python_venv/
venv

# Main Jar location for API communiation.
systemds/SystemDS.jar

# tutorial
systemds/examples/tutorials/*
33 changes: 33 additions & 0 deletions src/main/python/conf/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

log4j.rootLogger=ERROR,console

log4j.logger.org.apache.sysds=ERROR
log4j.logger.org.apache.sysds.utils.SettingsChecker=WARN
log4j.logger.org.apache.spark=ERROR
log4j.logger.org.apache.hadoop=OFF
log4j.logger.org.apache.hadoop.util.NativeCodeLoader=INFO

log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
10 changes: 10 additions & 0 deletions src/main/python/pre_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@
shutil.copy(os.path.join(SYSTEMDS_ROOT,'conf', 'log4j.properties'), os.path.join(this_path, PYTHON_DIR, 'conf', 'log4j.properties'))
shutil.copy(os.path.join(SYSTEMDS_ROOT,'conf', 'SystemDS-config-defaults.xml'), os.path.join(this_path, PYTHON_DIR, 'conf', 'SystemDS-config-defaults.xml'))


# Take SystemDS file.
shutil.copy(os.path.join(SYSTEMDS_ROOT, 'target', 'SystemDS.jar'), os.path.join(PYTHON_DIR, 'SystemDS.jar'))

# remove redundant SystemDS file inside lib.
for file in os.listdir(os.path.join(PYTHON_DIR, 'lib')):
if "systemds" in file:
if "extra" not in file:
os.remove(os.path.join(PYTHON_DIR, 'lib', file))

SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
shutil.copyfile(os.path.join(SYSTEMDS_ROOT, 'LICENSE'), 'LICENSE')
shutil.copyfile(os.path.join(SYSTEMDS_ROOT, 'NOTICE'), 'NOTICE')
Expand Down
61 changes: 42 additions & 19 deletions src/main/python/systemds/context/systemds_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,34 +155,47 @@ def __build_startup_command(self, port: int):
"""Build the command line argument for the startup of the JVM
:param port: The port address to use if -1 chose random port."""

# Base command
command = ["java", "-cp"]

# Find the operating system specifc separator, nt means its Windows
cp_separator = ";" if os.name == "nt" else ":"
root = os.environ.get("SYSTEMDS_ROOT")

if root == None:
# If there is no systemds install default to use the PIP packaged java files.
# If there is no systemds install default to use the pip packaged java files.
root = os.path.join(get_module_dir())

# nt means its Windows
cp_separator = ";" if os.name == "nt" else ":"

if os.environ.get("SYSTEMDS_ROOT") != None:
# Find the SystemDS jar file.
if root != None: # root path was set
self._log.debug("SYSTEMDS_ROOT was set, searching for jar file")
lib_release = os.path.join(root, "lib")
lib_cp = os.path.join(root, "target", "lib")
if os.path.exists(lib_release):
classpath = cp_separator.join([os.path.join(lib_release, '*')])
elif os.path.exists(lib_cp):
systemds_cp = os.path.join(root, "target", "SystemDS.jar")
classpath = cp_separator.join(
[os.path.join(lib_cp, '*'), systemds_cp])
systemds_cp = os.path.join(root, "target", "SystemDS.jar")
if os.path.exists(lib_release): # It looks like it was a release path for root.
classpath = os.path.join(root, "SystemDS.jar")
if not os.path.exists(classpath):
for f in os.listdir(root):
if "systemds" in f:
if os.path.exists(classpath):
raise(ValueError("Invalid setup there were multiple conflicting systemds jar fines in" + root))
else:
classpath = os.path.join(root, f)
if not os.path.exists(classpath):
raise ValueError(
"Invalid setup did not find SystemDS jar file in " + root)
elif os.path.exists(systemds_cp):
classpath = cp_separator.join([systemds_cp])
else:
raise ValueError(
"Invalid setup at SYSTEMDS_ROOT env variable path " + lib_cp)
else:
lib1 = os.path.join(root, "lib", "*")
lib2 = os.path.join(root, "lib")
classpath = cp_separator.join([lib1, lib2])
"Invalid setup at SYSTEMDS_ROOT env variable path " + root)
else: # root path was not set use the pip installed SystemDS
self._log.warning("SYSTEMDS_ROOT was unset, defaulting to python packaged jar files")
systemds_cp = os.path.join(root,"SystemDS.jar")
classpath = cp_separator.join([systemds_cp])

command.append(classpath)

# Find the logging configuration file.
if os.environ.get("LOG4JPROP") == None:
files = glob(os.path.join(root, "conf", "log4j*.properties"))
if len(files) > 1:
Expand All @@ -195,16 +208,23 @@ def __build_startup_command(self, port: int):
else:
command.append("-Dlog4j.configuration=file:" + files[0])
else:
command.append("-Dlog4j.configuration=file:" +os.environ.get("LOG4JPROP"))
logging_file = os.environ.get("LOG4JPROP")
if os.path.exists(logging_file):
command.append("-Dlog4j.configuration=file:" +os.environ.get("LOG4JPROP"))
else:
self._log.warning("LOG4JPROP is set but path is invalid: " + str(logging_file))

# Specify the main function inside SystemDS to launch in java.
command.append("org.apache.sysds.api.PythonDMLScript")

# Find the configuration file for systemds.
# TODO: refine the choise of configuration file
files = glob(os.path.join(root, "conf", "SystemDS*.xml"))
if len(files) > 1:
self._log.warning(
"Multiple config files found selecting: " + files[0])
if len(files) == 0:
self._log.warning("No log4j file found at: "
self._log.warning("No xml config file found at: "
+ os.path.join(root, "conf")
+ " therefore using default settings")
else:
Expand All @@ -219,6 +239,9 @@ def __build_startup_command(self, port: int):
command.append("--python")
command.append(str(actual_port))

self._log.info("Command " + str(command))
self._log.info("Port used for communication: " + str(actual_port))

return command, actual_port

def __start(self, port: int, capture_stdout: bool, retry: int = 0):
Expand Down

0 comments on commit 387b6c1

Please sign in to comment.