From 26eae7489357a1529464a6306b61de1a3f9704ec Mon Sep 17 00:00:00 2001 From: Robert Bartel Date: Tue, 21 May 2024 12:48:50 -0400 Subject: [PATCH] Fixing ngen worker entrypoint and image scripts. --- docker/main/ngen/funcs.sh | 16 ++++++++-------- docker/main/ngen/ngen_entrypoint.sh | 22 +++++++++++----------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docker/main/ngen/funcs.sh b/docker/main/ngen/funcs.sh index 3bb99851c..14bc4bd62 100644 --- a/docker/main/ngen/funcs.sh +++ b/docker/main/ngen/funcs.sh @@ -23,10 +23,10 @@ init_script_mpi_vars() init_ngen_executable_paths() { - NGEN_SERIAL_EXECUTABLE="/ngen/ngen/cmake_build_serial/ngen" - NGEN_PARALLEL_EXECUTABLE="/ngen/ngen/cmake_build_parallel/ngen" + NGEN_SERIAL_EXECUTABLE="/dmod/bin/ngen-serial" + NGEN_PARALLEL_EXECUTABLE="/dmod/bin/ngen-parallel" # This will be symlinked to the parallel one currently - NGEN_EXECUTABLE="/ngen/ngen/cmake_build/ngen" + NGEN_EXECUTABLE="/dmod/bin/ngen" } check_for_dataset_dir() @@ -154,11 +154,11 @@ ngen_sanity_checks_and_derived_init() # Run some sanity checks # Use complement of valid range like this in a few places to catch non-integer values if ! [ "${MPI_NODE_COUNT:-1}" -gt 0 ] 2>/dev/null; then - echo "Error: invalid value '${MPI_NODE_COUNT}' given for MPI node count" > 2>&1 + >&2 echo "Error: invalid value '${MPI_NODE_COUNT}' given for MPI node count" exit 1 fi if ! [ "${WORKER_INDEX:-0}" -ge 0 ] 2>/dev/null; then - echo "Error: invalid value '${WORKER_INDEX}' given for MPI worker index/rank" > 2>&1 + >&2 echo "Error: invalid value '${WORKER_INDEX}' given for MPI worker index/rank" exit 1 fi @@ -166,15 +166,15 @@ ngen_sanity_checks_and_derived_init() if [ -n "${MPI_NODE_COUNT:-}" ] || [ -n "${MPI_HOST_STRING:-}" ] || [ -n "${WORKER_INDEX:-}" ]; then # ... and as such, they all must be present if [ -z "${MPI_HOST_STRING:-}" ]; then - echo "Error: MPI host string not provided for job that will utilize MPI" > 2>&1 + >&2 echo "Error: MPI host string not provided for job that will utilize MPI" exit 1 fi if [ -z "${MPI_NODE_COUNT:-}" ]; then - echo "Error: MPI node count not provided for job that will utilize MPI" > 2>&1 + >&2 echo "Error: MPI node count not provided for job that will utilize MPI" exit 1 fi if [ -z "${WORKER_INDEX:-}" ]; then - echo "Error: MPI worker index not provided for job that will utilize MPI" > 2>&1 + >&2 echo "Error: MPI worker index not provided for job that will utilize MPI" exit 1 fi # Also, require a partitioning config for any MPI job diff --git a/docker/main/ngen/ngen_entrypoint.sh b/docker/main/ngen/ngen_entrypoint.sh index fef4c25b0..8aa2a32c6 100755 --- a/docker/main/ngen/ngen_entrypoint.sh +++ b/docker/main/ngen/ngen_entrypoint.sh @@ -4,35 +4,35 @@ while [ ${#} -gt 0 ]; do case "${1}" in --config-dataset) - CONFIG_DATASET_NAME="${2:?}" + declare -x CONFIG_DATASET_NAME="${2:?}" shift ;; --host-string) - MPI_HOST_STRING="${2:?}" + declare -x MPI_HOST_STRING="${2:?}" shift ;; --hydrofabric-dataset) - HYDROFABRIC_DATASET_NAME="${2:?}" + declare -x HYDROFABRIC_DATASET_NAME="${2:?}" shift ;; --job-id) - JOB_ID="${2:?}" + declare -x JOB_ID="${2:?}" shift ;; --node-count) - MPI_NODE_COUNT="${2:?}" + declare -x MPI_NODE_COUNT="${2:?}" shift ;; --output-dataset) - OUTPUT_DATASET_NAME="${2:?}" + declare -x OUTPUT_DATASET_NAME="${2:?}" shift ;; --partition-dataset) - PARTITION_DATASET_NAME="${2:?}" + declare -x PARTITION_DATASET_NAME="${2:?}" shift ;; --worker-index) - WORKER_INDEX="${2:?}" + declare -x WORKER_INDEX="${2:?}" shift ;; esac @@ -40,7 +40,7 @@ while [ ${#} -gt 0 ]; do done # Get some universally applicable functions and constants -source ./funcs.sh +source /ngen/funcs.sh ngen_sanity_checks_and_derived_init init_script_mpi_vars @@ -49,8 +49,8 @@ init_ngen_executable_paths # Move to the output dataset mounted directory cd ${OUTPUT_DATASET_DIR:?Output dataset directory not defined} #Needed for routing -if [ ! -e /dmod/dataset/experiment_output ]; then - ln -s $(pwd) /dmod/dataset/experiment_output +if [ ! -e /dmod/datasets/linked_job_output ]; then + ln -s $(pwd) /dmod/datasets/linked_job_output fi # We can allow worker index to not be supplied when executing serially