diff --git a/docs/source/canfar.md b/docs/source/canfar.md index 7a83e47e..1f490c50 100644 --- a/docs/source/canfar.md +++ b/docs/source/canfar.md @@ -56,16 +56,28 @@ it is recommended to perform some or all of the testing steps (1. - 4.). ## Monitoring + +### Status and output of submitted job + Monitoring of the currently active remote session can be performed using the session IDs `session_IDs.txt` written by the remote session script `curl_canfar_local.sh`. In the patch main directory, run ```bash +curl_canfar_monitor.sh events +``` +to display the remotely started docker image status, and +```bash curl_canfar_monitor.sh logs ``` -to print `stdout` of the remotely run pipeline script, and +to print `stdout` of the remotely run pipeline script. + +### Number of submitted running jobs + +The script ```bash -curl_canfar_monitor.sh events +stats_headless_canfar.py ``` -to display the remotely started docker image status. +returns the number of actively running headless jobs. + ## Post-hoc summary @@ -74,3 +86,9 @@ In the patch main directory, run summary_run PATCH ``` to print a summary with missing image IDs per job and module. + +## Deleting jobs + +```bash + for id in `cat session_IDs.txt`; do echo $id; curl -X DELETE -E /arc/home/kilbinger/.ssl/cadcproxy.pem https://ws-uv.canfar.net/skaha/v0/session/$id; done + ``` diff --git a/environment.yml b/environment.yml index ea42dda0..548c7e61 100644 --- a/environment.yml +++ b/environment.yml @@ -22,6 +22,7 @@ dependencies: - PyQt5 - pyqtgraph - reproject + - skaha - sip_tpv - sf_tools - sqlitedict diff --git a/example/cfis/config_Gie_vos.ini b/example/cfis/config_Gie_vos.ini index a326434e..ee4b92f3 100644 --- a/example/cfis/config_Gie_vos.ini +++ b/example/cfis/config_Gie_vos.ini @@ -98,4 +98,6 @@ RETRIEVE = vos N_TRY = 3 # Retrieve command options, optional -RETRIEVE_OPTIONS = --certfile=$VM_HOME/.ssl/cadcproxy.pem +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem + +CHECK_EXISTING_DIR = $SP_RUN/data_exp diff --git a/example/cfis/config_GitFeGie_vos.ini b/example/cfis/config_GitFeGie_vos.ini index 9ef72f19..75044bf4 100644 --- a/example/cfis/config_GitFeGie_vos.ini +++ b/example/cfis/config_GitFeGie_vos.ini @@ -84,7 +84,7 @@ OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- RETRIEVE = vos # Copy command options, optional -RETRIEVE_OPTIONS = --certfile=$VM_HOME/.ssl/cadcproxy.pem +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem CHECK_EXISTING_DIR = $SP_RUN/output/run_sp_Git/get_images_runner_run_1/output @@ -152,4 +152,4 @@ RETRIEVE = vos N_TRY = 3 # Retrieve command options, optional -RETRIEVE_OPTIONS = --certfile=$VM_HOME/.ssl/cadcproxy.pem +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem diff --git a/example/cfis/config_GitFe_symlink.ini b/example/cfis/config_GitFe_symlink.ini new file mode 100644 index 00000000..4e7ce75c --- /dev/null +++ b/example/cfis/config_GitFe_symlink.ini @@ -0,0 +1,105 @@ +# ShapePipe configuration file for: get images and find exposures + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_GitFe + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner, find_exposures_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get tiles +[GET_IMAGES_RUNNER] + +FILE_PATTERN = tile_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = + +# Paths + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = $SP_RUN/data_tiles, $SP_RUN/data_tiles + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = CFIS.000.000.r, CFIS.000.000.r.weight + +# Input file extensions +INPUT_FILE_EXT = .fits, .fits.fz + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{3}\.\d{3} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- + +# Copy/download method, one in 'vos', 'symlink' +RETRIEVE = symlink + +# Copy command options, optional +RETRIEVE_OPTIONS = -L + + +[FIND_EXPOSURES_RUNNER] + +INPUT_MODULE = get_images_runner + +FILE_PATTERN = CFIS_image + +FILE_EXT = .fits + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = -000-000 + +# Column number of exposure name in FITS header +COLNUM = 3 + +# Prefix to remove from exposure name +EXP_PREFIX = p diff --git a/example/cfis/config_Git_vos.ini b/example/cfis/config_Git_vos.ini new file mode 100644 index 00000000..26edf3ef --- /dev/null +++ b/example/cfis/config_Git_vos.ini @@ -0,0 +1,93 @@ +# ShapePipe configuration file for: get images + + +## Default ShapePipe options +[DEFAULT] + +# verbose mode (optional), default: True, print messages on terminal +VERBOSE = False + +# Name of run (optional) default: shapepipe_run +RUN_NAME = run_sp_Git + +# Add date and time to RUN_NAME, optional, default: False +RUN_DATETIME = True + + +## ShapePipe execution options +[EXECUTION] + +# Module name, single string or comma-separated list of valid module runner names +MODULE = get_images_runner + +# Parallel processing mode, SMP or MPI +MODE = SMP + + +## ShapePipe file handling options +[FILE] + +# Log file master name, optional, default: shapepipe +LOG_NAME = log_sp + +# Runner log file name, optional, default: shapepipe_runs +RUN_LOG_NAME = log_run_sp + +# Input directory, containing input files, single string or list of names +INPUT_DIR = $SP_RUN + +# Output directory +OUTPUT_DIR = $SP_RUN/output + + +## ShapePipe job handling options +[JOB] + +# Batch size of parallel processing (optional), default is 1, i.e. run all jobs in serial +SMP_BATCH_SIZE = 1 + +# Timeout value (optional), default is None, i.e. no timeout limit applied +TIMEOUT = 96:00:00 + + +## Module options + +# Get tiles +[GET_IMAGES_RUNNER] + +FILE_PATTERN = tile_numbers + +FILE_EXT = .txt + +# NUMBERING_SCHEME (optional) string with numbering pattern for input files +NUMBERING_SCHEME = + +# Paths + +# Input path where original images are stored. Can be local path or vos url. +# Single string or list of strings +INPUT_PATH = vos:cfis/tiles_DR5, vos:cfis/tiles_DR5 + +# Input file pattern including tile number as dummy template +INPUT_FILE_PATTERN = CFIS.000.000.r, CFIS.000.000.r.weight + +# Input file extensions +INPUT_FILE_EXT = .fits, .fits.fz + +# Input numbering scheme, python regexp +INPUT_NUMBERING = \d{3}\.\d{3} + +# Output file pattern without number +OUTPUT_FILE_PATTERN = CFIS_image-, CFIS_weight- + +# Copy/download method, one in 'vos', 'symlink' +RETRIEVE = vos + +# If RETRIEVE=vos, number of attempts to download +# Optional, default=3 +N_TRY = 3 + +# Copy command options, optional +RETRIEVE_OPTIONS = --certfile=$HOME/.ssl/cadcproxy.pem + +CHECK_EXISTING_DIR = $SP_RUN/data_tiles diff --git a/scripts/jupyter/summary_run.ipynb b/scripts/jupyter/summary_run.ipynb index e57c72b1..451c591d 100644 --- a/scripts/jupyter/summary_run.ipynb +++ b/scripts/jupyter/summary_run.ipynb @@ -2,29 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "id": "130112a4-f2ca-4d26-b884-d8b054676f9c", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", - "%autoreload 2" + "%autoreload 3\n", + "%reload_ext autoreload" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "id": "196beca5-10a1-4cf5-9462-be145167cc70", "metadata": { "tags": [] @@ -37,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 3, "id": "ca63c72d-212c-463e-a792-71efbac0b908", "metadata": { "tags": [] @@ -45,14 +37,14 @@ "outputs": [], "source": [ "# Setting\n", - "patch = \"P1\"\n", + "patch = \"P7\"\n", "\n", - "verbose = True" + "verbose = False" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 4, "id": "dcb5604c-d61f-4705-8295-63875455cadb", "metadata": { "tags": [] @@ -60,12 +52,12 @@ "outputs": [], "source": [ "# Load parameters\n", - "%run ~/shapepipe/scripts/python/summary_params_pre_v2 {patch}" + "%run ~/shapepipe/scripts/python/summary_params_pre_v2" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 5, "id": "e69b7dab-1fea-4fcc-a8d9-0720e1d628c3", "metadata": { "tags": [] @@ -75,14 +67,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "Checking main directory = /arc/home/kilbinger/cosmostat/v2/pre_v2/psfex/P1\n" + "Checking main directory = /arc/home/kilbinger/cosmostat/v2/pre_v2/psfex/P7\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Set job info for patch P1\n" + "Set job info for patch P7\n" ] } ], @@ -92,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 6, "id": "2c3a9dde-cf88-493f-926e-7ae7e8e10916", "metadata": { "tags": [] @@ -105,34 +97,20 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 7, "id": "1e9c1487-3cec-4394-9fcf-c12e92a0f984", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "===========\n", - "par_runtime\n", - "-----------\n", - "n_tile_IDs 3527\n", - "list_tile_IDs 3527 entries\n", - "===========\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "print_par_runtime(par_runtime, verbose=verbose)" + "# No effect in notebook\n", + "#print_par_runtime(par_runtime, verbose=verbose)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "id": "b7c63a22-ead1-4d6a-b081-a74ade515439", "metadata": { "tags": [] @@ -154,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 9, "id": "4720ae18-0633-4646-b392-b1b24e0294c3", "metadata": { "tags": [] @@ -165,58 +143,27 @@ "output_type": "stream", "text": [ " (Job 1)\n", - "get_images_runner_run_1 7054 4402 0 2652 1326.0 62.4%\n", - "find_exposures_runner 3527 875 0 2652 2652.0 24.8%\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "** module get_images_runner_run_1\n", - "*** subdirs ['']\n", - "**** base_and_subdir /arc/home/kilbinger/cosmostat/v2/pre_v2/psfex/P1/output//.\n", - "Matching entries: [, ]\n", - "** module find_exposures_runner\n", - "*** subdirs ['']\n", - "**** base_and_subdir /arc/home/kilbinger/cosmostat/v2/pre_v2/psfex/P1/output//.\n", - "Matching entries: [, ]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "get_images_runner_run_2 7119 1509 0 5610 1870.0 21.2%\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "** module get_images_runner_run_2\n", - "*** subdirs ['']\n", - "**** base_and_subdir /arc/home/kilbinger/cosmostat/v2/pre_v2/psfex/P1/output//.\n", - "Matching entries: [, ]\n" + "get_images_runner_run_1 464 462 0 2 1.0 99.6%\n", + "find_exposures_runner 232 231 0 1 1.0 99.6%\n", + "get_images_runner_run_2 537 0 0 537 179.0 0.0%\n" ] } ], "source": [ "for key in \"1\":\n", - " job = jobs[key]\n", - " job.print_intro()\n", - " job.check_numbers(par_runtime=par_runtime, indices=[0, 1])\n", + " jobs[key].print_intro()\n", + " jobs[key].check_numbers(par_runtime=par_runtime, indices=[0, 1])\n", "\n", - " all_exposures = get_all_exposures(job._paths_in_dir[1], verbose=verbose)\n", + " all_exposures = get_all_exposures(jobs[key]._paths_in_dir[1], verbose=verbose)\n", " par_runtime[\"n_exposures\"] = len(all_exposures)\n", " par_runtime[\"list_exposures\"] = all_exposures\n", "\n", - " job.check_numbers(par_runtime, indices=[2])" + " jobs[key].check_numbers(par_runtime, indices=[2])" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 10, "id": "f149f404-64e7-4d92-8f54-f300ed620130", "metadata": { "tags": [] @@ -229,39 +176,7 @@ }, { "cell_type": "code", - "execution_count": 29, - "id": "607fed1b-005a-4d3a-811c-51b1c5049c19", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n", - "===========\n", - "par_runtime\n", - "-----------\n", - "n_tile_IDs 3527\n", - "list_tile_IDs 3527 entries\n", - "n_exposures 2373\n", - "list_exposures 2373 entries\n", - "n_shdus 94920\n", - "list_shdus 94920 entries\n", - "n_3*n_shdus+n_exposures 287133\n", - "===========\n", - "\n" - ] - } - ], - "source": [ - "print_par_runtime(par_runtime, verbose=verbose)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, + "execution_count": 12, "id": "78a9065f-8983-41cf-a34c-21892fc52dd2", "metadata": { "tags": [] @@ -275,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 13, "id": "79e39954-1155-4ca3-b0b2-64bc5670db53", "metadata": { "tags": [] @@ -293,107 +208,156 @@ "mask_runner 1268 1268 0 0 0.0 100.0%\n", " (Job 8)\n", "mask_runner 45600 45600 0 0 0.0 100.0%\n", - " (Job 16)\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipykernel_48/963628441.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mjobs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprint_intro\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_numbers\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpar_runtime\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpar_runtime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/.local/lib/python3.7/site-packages/shapepipe/utilities/summary.py\u001b[0m in \u001b[0;36mcheck_numbers\u001b[0;34m(self, par_runtime, indices)\u001b[0m\n\u001b[1;32m 603\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 605\u001b[0;31m \u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 606\u001b[0m )\n\u001b[1;32m 607\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/.local/lib/python3.7/site-packages/shapepipe/utilities/summary.py\u001b[0m in \u001b[0;36mget_names_in_dir\u001b[0;34m(self, iterable, module, idx)\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"**** base_and_subdir {base_and_subdir}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 516\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 517\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbase_and_subdir\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 518\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0mmatches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/genericpath.py\u001b[0m in \u001b[0;36misdir\u001b[0;34m(s)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[0;34m\"\"\"Return true if the pathname refers to an existing directory.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 42\u001b[0;31m \u001b[0mst\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 43\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 44\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + " (Job 16)\n", + "sextractor_runner 2536 2536 0 0 0.0 100.0%\n", + " (Job 32)\n", + "sextractor_runner 91200 91200 0 0 0.0 100.0%\n", + "setools_runner 91200 91032 0 168 84.0 99.8%\n", + "psfex_runner 91200 91032 0 168 84.0 99.8%\n", + " (Job 64)\n", + "psfex_interp_runner 1268 1268 0 0 0.0 100.0%\n", + "vignetmaker_runner_run_1 1268 1268 0 0 0.0 100.0%\n", + "spread_model_runner 1268 1268 0 0 0.0 100.0%\n", + "vignetmaker_runner_run_2 5072 5072 0 0 0.0 100.0%\n", + " (Job 128)\n", + "ngmix_runner 1268 1225 0 43 43.0 96.6%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1217 0 51 51.0 96.0%\n", + "ngmix_runner 1268 1228 0 40 40.0 96.8%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + "ngmix_runner 1268 1216 0 52 52.0 95.9%\n", + " (Job 256)\n", + "merge_sep_cats_runner 1268 0 0 1268 1268.0 0.0%\n", + "make_cat_runner 1268 0 0 1268 1268.0 0.0%\n", + " (Job 1024)\n", + "psfex_interp_runner 45600 41132 0 4468 4468.0 90.2%\n" ] } ], "source": [ "for key in keys:\n", - " job = jobs[key]\n", - " job.print_intro()\n", - " job.check_numbers(par_runtime=par_runtime)" + " jobs[key].print_intro()\n", + " jobs[key].check_numbers(par_runtime=par_runtime)" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "8d2243cb-5aa6-4e82-88f6-8689f15e5b61", - "metadata": {}, + "execution_count": 65, + "id": "b3d51a05-ecca-420b-b8b3-1fb2b1ec9fe3", + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " (Job 64)\n", - "psfex_interp_runner 1268 945 0 323 323.0 74.5%\n", - "vignetmaker_runner_run_1 1268 971 0 297 297.0 76.6%\n", - "spread_model_runner 1268 173 0 1095 1095.0 13.6%\n", - "vignetmaker_runner_run_2 5072 690 0 4382 1095.5 13.6%\n" + " (Job 128)\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1265 0 3 3.0 99.8%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n", + "ngmix_runner 1268 1268 0 0 0.0 100.0%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n" ] } ], "source": [ - "# Update some runs\n", - "for key in [\"64\"]:\n", - " job = jobs[key]\n", - " job.print_intro()\n", - " job.check_numbers(par_runtime=par_runtime)" + "## Update some runs\n", + "for key in [\"128\"]:\n", + " jobs[key].print_intro()\n", + " jobs[key].check_numbers(par_runtime=par_runtime)" ] }, { "cell_type": "code", - "execution_count": 35, - "id": "e8a38747-21b7-4f74-b667-62094f17154e", + "execution_count": 64, + "id": "67b50a61-e3cc-4559-941d-f39c6a200294", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " (Job 64)\n", - "psfex_interp_runner 1268 965 0 303 303.0 76.1%\n", - "vignetmaker_runner_run_1 1268 993 0 275 275.0 78.3%\n", - "spread_model_runner 1268 187 0 1081 1081.0 14.7%\n", - "vignetmaker_runner_run_2 5072 749 0 4323 1080.8 14.8%\n" + " (Job 128)\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1265 0 3 3.0 99.8%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1267 0 1 1.0 99.9%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n", + "ngmix_runner 1268 1268 0 0 0.0 100.0%\n", + "ngmix_runner 1268 1266 0 2 2.0 99.8%\n" ] } ], "source": [ - "for key in [\"64\"]:\n", - " job = jobs[key]\n", - " job.print_intro()\n", - " job.check_numbers(par_runtime=par_runtime)" + "for key in [\"128\"]:\n", + " jobs[key].print_intro()\n", + " \n", + " \n", + " jobs[key].check_numbers(par_runtime=par_runtime)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "82cb7e0b-a8fa-4a55-840f-55ec0452bf7f", - "metadata": {}, + "execution_count": 19, + "id": "affa8293-daf9-4d2b-9215-fe19f8e2c1e2", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "session = Session()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "269496d1-cd89-4d13-a5e4-41b897669e22", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ids = [session[\"id\"] for session in session.fetch(kind=\"headless\")]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "80af8dff-98c7-4db4-8bcc-06936e1875cf", + "metadata": { + "tags": [] + }, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - " (Job 64)\n" + "ename": "RuntimeError", + "evalue": "This event loop is already running", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_69/559116804.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdestroy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mids\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.7/site-packages/skaha/session.py\u001b[0m in \u001b[0;36mdestroy\u001b[0;34m(self, id)\u001b[0m\n\u001b[1;32m 264\u001b[0m \u001b[0marguments\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"url\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"/\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0mloop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_event_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 266\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_until_complete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marguments\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 267\u001b[0m \u001b[0mresponses\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mDict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbool\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 268\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midentity\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/asyncio/base_events.py\u001b[0m in \u001b[0;36mrun_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 561\u001b[0m \"\"\"\n\u001b[1;32m 562\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_closed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 563\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_runnung\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 564\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 565\u001b[0m \u001b[0mnew_task\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mfutures\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misfuture\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfuture\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/asyncio/base_events.py\u001b[0m in \u001b[0;36m_check_runnung\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 521\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_check_runnung\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 522\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_running\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 523\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mRuntimeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'This event loop is already running'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 524\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mevents\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_running_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 525\u001b[0m raise RuntimeError(\n", + "\u001b[0;31mRuntimeError\u001b[0m: This event loop is already running" ] } ], "source": [ - "for key in [\"64\"]:\n", - " job = jobs[key]\n", - " job.print_intro()\n", - " job.check_numbers(par_runtime=par_runtime)" + "session.destroy(ids[0])" ] }, { "cell_type": "code", "execution_count": null, - "id": "c57c576f-b7e8-4735-a047-5b63e245f9a4", + "id": "66a3ed14-8aaf-4028-b933-10ecb7376d68", "metadata": {}, "outputs": [], "source": [] diff --git a/scripts/python/stats_headless_canfar.py b/scripts/python/stats_headless_canfar.py new file mode 100755 index 00000000..1ede78ee --- /dev/null +++ b/scripts/python/stats_headless_canfar.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +# Name: stats_headless_canfar.py + +import sys +from skaha.session import Session + + +def main(argv=None): + session = Session() + + n_headless = session.stats()["instances"]["headless"] + + print(n_headless) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/python/summary_params_pre_v2.py b/scripts/python/summary_params_pre_v2.py index 31b63cb3..5b67ee0c 100644 --- a/scripts/python/summary_params_pre_v2.py +++ b/scripts/python/summary_params_pre_v2.py @@ -46,7 +46,10 @@ def set_jobs_v2_pre_v2(patch, verbose): if not os.path.isdir(path): os.mkdir(path) log_file_name = f"{path}/summary_log.txt" - handlers = [logging.FileHandler(log_file_name), logging.StreamHandler()] + handlers = [ + logging.FileHandler(log_file_name, mode="w"), + logging.StreamHandler() + ] logging.basicConfig( level=logging.INFO, format="%(message)s", handlers=handlers ) @@ -88,7 +91,7 @@ def set_jobs_v2_pre_v2(patch, verbose): # n_mult=[1, 1, 1], jobs["2"] = job_data( 2, - ["run_sp_Uz", "run_sp_exp_SpMh", "run_sp_exp_SpMh_2023-12"], + ["run_sp_Uz", "run_sp_exp_SpMh", "run_sp_exp_SpMh"], ["uncompress_fits_runner", "merge_headers_runner", "split_exp_runner"], ["tile_IDs", 0, "3*n_shdus+n_exposures"], path_main=path_main, @@ -141,12 +144,14 @@ def set_jobs_v2_pre_v2(patch, verbose): "run_sp_exp_SxSePsf", "run_sp_exp_SxSePsf", "run_sp_exp_SxSePsf", - ], # "run_sp_exp_Pi"], + #"run_sp_exp_Pi" + ], [ "sextractor_runner", "setools_runner", "psfex_runner", - ], # "psfex_interp_runner"], + # "psfex_interp_runner"], + ], "shdus", n_mult=[2, 2, 2], # 1], path_main=path_main, @@ -157,17 +162,17 @@ def set_jobs_v2_pre_v2(patch, verbose): ) # For P3 - jobs["33"] = job_data( - 33, - "run_sp_exp_Pi", - ["psfex_interp_runner"], - "shdus", - path_main=path_main, - path_left="exp_runs", - output_subdirs="shdus", - path_right="output", - verbose=verbose, - ) + #jobs["33"] = job_data( + # 33, + # "run_sp_exp_Pi", + # ["psfex_interp_runner"], + # "shdus", + # path_main=path_main, + # path_left="exp_runs", + # output_subdirs="shdus", + # path_right="output", + # verbose=verbose, + #) jobs["64"] = job_data( "64", diff --git a/scripts/python/summary_run.py b/scripts/python/summary_run.py index b790300a..331764b2 100755 --- a/scripts/python/summary_run.py +++ b/scripts/python/summary_run.py @@ -5,47 +5,46 @@ from shapepipe.utilities.summary import * +from summary_params_pre_v2 import * + def main(argv=None): patch = argv[1] verbose = False - - import summary_params_pre_v2 as params - jobs, list_tile_IDs_dot = params.set_jobs_v2_pre_v2(patch, verbose) + jobs, list_tile_IDs_dot = set_jobs_v2_pre_v2(patch, verbose) list_tile_IDs = job_data.replace_dot_dash(list_tile_IDs_dot) # Numbers updated at runtime - par_runtime = params.init_par_runtime(list_tile_IDs) + par_runtime = init_par_runtime(list_tile_IDs) job_data.print_stats_header() for key in "1": - job = jobs[key] - job.print_intro() - job.check_numbers(par_runtime=par_runtime, indices=[0, 1]) + jobs[key].print_intro() + jobs[key].check_numbers(par_runtime=par_runtime, indices=[0, 1]) - all_exposures = get_all_exposures(job._paths_in_dir[1], verbose=True) + all_exposures = get_all_exposures(jobs[key]._paths_in_dir[1], verbose=True) par_runtime["n_exposures"] = len(all_exposures) par_runtime["list_exposures"] = all_exposures - job.check_numbers(par_runtime, indices=[2]) + jobs[key].check_numbers(par_runtime, indices=[2]) - par_runtime = params.update_par_runtime_after_find_exp(par_runtime, all_exposures) + par_runtime = update_par_runtime_after_find_exp(par_runtime, all_exposures) print_par_runtime(par_runtime, verbose=verbose) + # Get all keys after "1" keys = sorted(jobs.keys(), key=int) _ = keys.pop(0) for key in keys: - job = jobs[key] - job.print_intro() - job.check_numbers(par_runtime=par_runtime) + jobs[key].print_intro() + jobs[key].check_numbers(par_runtime=par_runtime) return 0 diff --git a/scripts/sh/curl_canfar_local.sh b/scripts/sh/curl_canfar_local.sh index ed4d21bd..3e03dcd5 100755 --- a/scripts/sh/curl_canfar_local.sh +++ b/scripts/sh/curl_canfar_local.sh @@ -17,6 +17,7 @@ N_SMP=1 kind=-1 version=1.0 cmd_remote="shapepipe/scripts/sh/init_run_exclusive_canfar.sh" +batch_max=200 dry_run=0 # TODO psf @@ -25,7 +26,7 @@ dry_run=0 usage="Usage: $(basename "$0") -j JOB -[e ID |-f file_IDs] -k KIND [OPTIONS] \n\nOptions:\n -h\tthis message\n - -j, --job JOB\tRUnning JOB, bit-coded\n + -j, --job JOB\tRunning JOB, bit-coded\n -e, --exclusive ID \timage ID\n -f, --file_IDs path @@ -40,6 +41,8 @@ usage="Usage: $(basename "$0") -j JOB -[e ID |-f file_IDs] -k KIND [OPTIONS] \tversion of docker image, default='$version'\n -C, --command_remote\n \tremote command to run on canfar, default='$cmd_remote'\n + -b, --batch_max\n + \tmaximum batch size = number of jobs run simultaneously, default=$batch_max\n -n, --dry_run LEVEL\n \tdry run, from LEVEL=2 (no processing) to 0 (full run)\n " @@ -70,13 +73,17 @@ while [ $# -gt 0 ]; do shift ;; -N|--N_SMP) - n_SMP="$2" + N_SMP="$2" shift ;; -k|--kind) kind="$2" shift ;; + -b|--batch_max) + batch_max="$2" + shift + ;; -n|--dry_run) dry_run="$2" shift @@ -119,46 +126,114 @@ RESOURCES="ram=4&cores=$N_SMP" # TODO: dir as command line argument to this script dir=`pwd` -arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" +#arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + + +# Return argument for local script to be called via curl +function set_arg() { + my_arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + echo $my_arg +} + + +function call_curl() { + my_arg=$1 + +} + +function update_session_logs() { + echo $my_session >> session_IDs.txt + echo "$my_session $ID" >> session_image_IDs.txt + +} + +function submit_batch() { + path=$1 + + for ID in `cat $path`; do + my_arg=$(set_arg) + my_session=`curl -E $SSL $SESSION?$RESOURCES -d "image=$IMAGE:$version" -d "name=${NAME}" -d "cmd=$cmd_remote" --data-urlencode "args=$my_arg"` + update_session_logs + done + +} + +batch=20 +sleep=300 + +((n_thresh=batch_max-batch)) if [ "$dry_run" == 2 ]; then + # Do not call curl (dry run = 2) echo "Running command dry run:" if [ "$ID" == "-1" ]; then + # Submit file (dry run = 2) for ID in `cat $file_IDs`; do - arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + arg=$(set_arg) echo curl -E $SSL $SESSION?$RESOURCES -d \"image=$IMAGE:$version\" -d \"name=${NAME}\" -d \"cmd=$cmd_remote\" --data-urlencode \"args=$arg\" done else - arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + # Submit image (dry run = 2) + arg=$(set_arg) echo curl -E $SSL $SESSION?$RESOURCES -d \"image=$IMAGE:$version\" -d \"name=${NAME}\" -d \"cmd=$cmd_remote\" --data-urlencode \"args=$arg\" fi else + # Call curl rm -rf session_IDs.txt session_image_IDs.txt if [ "$ID" == "-1" ]; then - for ID in `cat $file_IDs`; do - arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" - session=`curl -E $SSL $SESSION?$RESOURCES -d "image=$IMAGE:$version" -d "name=${NAME}" -d "cmd=$cmd_remote" --data-urlencode "args=$arg"` - echo $session >> session_IDs.txt - echo "$session $ID" >> session_image_IDs.txt - done + # Submit file + n_jobs=`cat $file_IDs | wc -l` + if [ "$n_jobs" -gt "$batch_max" ]; then + + # Split into batches + prefix="${file_IDs}_split_" + split -d -l $batch $file_IDs $prefix + n_split=`ls -l $prefix* | wc -l` + echo "Split '$file_IDs' into $n_split batches of size $batch" + + count=1 + for batch in $prefix*; do + echo "Number of running jobs = $n_running" + echo "Submitting batch $batch ($count/$n_split)" + echo -ne "\033]0;curl patch=$patch job=$job $count/$n_split\007" + submit_batch $batch + ((count=count+1)) + + n_running=`stats_headless_canfar.py` + + while [ "$n_running" -gt "$n_thresh" ]; do + echo "Wait for #jobs = $n_running jobs to go < $n_thresh ..." + sleep $sleep + n_running=`stats_headless_canfar.py` + done + + done + + else + + # Submit entire file (single batch) + echo "Submit '$file_IDs' in single batch" + submit_batch $file_IDs + + fi else - arg="-j $job -e $ID -N $N_SMP -k $kind $arg_dry_run -d $dir" + # Submit image + arg=$(set_arg) session=`curl -E $SSL $SESSION?$RESOURCES -d "image=$IMAGE:$version" -d "name=${NAME}" -d "cmd=$cmd_remote" --data-urlencode "args=$arg"` - echo $session >> session_IDs.txt - echo "$session $ID" >> session_image_IDs.txt + update_session_logs fi diff --git a/scripts/sh/curl_canfar_monitor_local.sh b/scripts/sh/curl_canfar_monitor_local.sh index 4ba0bf87..793d8026 100755 --- a/scripts/sh/curl_canfar_monitor_local.sh +++ b/scripts/sh/curl_canfar_monitor_local.sh @@ -9,6 +9,14 @@ type=$1 echo "type=$type" +for session_ID in `cat session_IDs.txt`; do + cmd="curl -E $SSL $SESSION/$session_ID?view=$type" + echo $cmd + $cmd +done + +exit 0 + while [ 1 ]; do session_ID=`tail -n 1 session_IDs.txt` cmd="curl -E $SSL $SESSION/$session_ID?view=$type" diff --git a/scripts/sh/init_run_exclusive_canfar.sh b/scripts/sh/init_run_exclusive_canfar.sh index 05ca2a84..65ecfe8c 100755 --- a/scripts/sh/init_run_exclusive_canfar.sh +++ b/scripts/sh/init_run_exclusive_canfar.sh @@ -57,7 +57,7 @@ while [ $# -gt 0 ]; do shift ;; -N|--N_SMP) - n_SMP="$2" + N_SMP="$2" shift ;; -k|--kind) @@ -177,7 +177,6 @@ done if [[ $do_job != 0 ]]; then if [ "$kind" == "tile" ]; then cd ../../.. - #command "rm -rf tile_runs/$ID/output/run_exp_SxSePsf*" $dry_run command "link_to_exp_for_tile.py -t $ID -i tile_runs -I exp_runs" $dry_run cd ${kind}_runs/$ID/output @@ -188,6 +187,9 @@ if [[ $do_job != 0 ]]; then echo "removing $n_remove duplicate old job-32 runs" rm -rf `ls -rt1d run_sp_tile_Sx_* | head -$n_remove` fi + + # Remove previous runs of this job + rm -rf run_sp_tile_PsViSmVi* fi fi diff --git a/scripts/sh/job_curl_canfar_local.sh b/scripts/sh/job_curl_canfar_local.sh deleted file mode 100644 index 665f6ea7..00000000 --- a/scripts/sh/job_curl_canfar_local.sh +++ /dev/null @@ -1,17 +0,0 @@ -# Usage: -# Edit the file "all.txt" -# screen -# bash run_curl.sh kind job - -rm -f session_IDs.txt session_image_IDs.txt - -script_local=~/shapepipe/scripts/sh/curl_canfar_local.sh -version="1.0" -cmd_remote="shapepipe/scripts/sh/init_run_exclusive_canfar.sh" -N_SMP=1 -kind="$1" -job="$2" - -echo $kind $job -cat all.txt | xargs -n 1 -P 1 $script_local -v $version -c $cmd_remote -N $N_SMP -k $kind -j $job -e - diff --git a/scripts/sh/job_sp_canfar.bash b/scripts/sh/job_sp_canfar.bash index 91f67619..f2b9c03d 100755 --- a/scripts/sh/job_sp_canfar.bash +++ b/scripts/sh/job_sp_canfar.bash @@ -160,9 +160,6 @@ export SP_RUN=`pwd` export SP_CONFIG=$SP_RUN/cfis export SP_CONFIG_MOD=$SP_RUN/cfis_mod -# To find ssl certificate -export VM_HOME=$HOME - ## Other variables # Output @@ -341,17 +338,31 @@ fi if [[ $do_job != 0 ]]; then ### Retrieve files - command_sp "shapepipe_run -c $SP_CONFIG/config_GitFeGie_$retrieve.ini" "Retrieve images" + command_cfg_shapepipe \ + "config_GitFeGie_$retrieve.ini" \ + "Retrieve images" \ + -1 \ + $exclusive + + #if [[ ! -d "data_tiles" ]]; then + #echo "Directory or link 'data_tiles' does not exist, exiting" + #exit 1 + #fi + #command_cfg_shapepipe "config_Git_vos.ini" "Retrieve tiles" -1 $n_exclusive ### Retrieve and save star catalogues for masking if [ "$star_cat_for_mask" == "save" ]; then #### For tiles mkdir $SP_RUN/star_cat_tiles - command_sp "create_star_cat $SP_RUN/output/run_sp_GitFeGie_*/get_images_runner_run_1/output $SP_RUN/star_cat_tiles" "Save star cats for masking (tile)" + command_sp \ + "create_star_cat $SP_RUN/output/run_sp_GitFeGie_*/get_images_runner_run_1/output $SP_RUN/star_cat_tiles" \ + "Save star cats for masking (tile)" #### For single-exposures mkdir $SP_RUN/star_cat_exp - command_sp "create_star_cat $SP_RUN/output/run_sp_GitFeGie_*/get_images_runner_run_2/output $SP_RUN/star_cat_exp exp" "Save star cats for masking (exp)" + command_sp \ + "create_star_cat $SP_RUN/output/run_sp_GitFeGie_*/get_images_runner_run_2/output $SP_RUN/star_cat_exp exp" \ + "Save star cats for masking (exp)" fi fi @@ -364,7 +375,11 @@ if [[ $do_job != 0 ]]; then command_cfg_shapepipe "config_tile_Uz.ini" "Run shapepipe (uncompress tile weights)" $n_smp $exclusive ### Split images into single-HDU files, merge headers for WCS info - command_cfg_shapepipe "config_exp_SpMh.ini" "Run shapepipe (split images, merge headers)" $n_smp $exclusive + command_cfg_shapepipe \ + "config_exp_SpMh.ini" \ + "Run shapepipe (split images, merge headers)" \ + $n_smp \ + $exclusive fi @@ -373,7 +388,11 @@ fi if [[ $do_job != 0 ]]; then ### Mask tiles - command_cfg_shapepipe "config_tile_Ma_$star_cat_for_mask.ini" "Run shapepipe (mask tiles)" $n_smp $exclusive + command_cfg_shapepipe \ + "config_tile_Ma_$star_cat_for_mask.ini" \ + "Run shapepipe (mask tiles)" \ + $n_smp \ + $exclusive fi @@ -382,7 +401,11 @@ fi if [[ $do_job != 0 ]]; then ### Mask exposures - command_cfg_shapepipe "config_exp_Ma_$star_cat_for_mask.ini" "Run shapepipe (mask exposures)" $n_smp $exclusive + command_cfg_shapepipe \ + "config_exp_Ma_$star_cat_for_mask.ini" \ + "Run shapepipe (mask exposures)" \ + $n_smp \ + $exclusive fi @@ -392,7 +415,11 @@ fi if [[ $do_job != 0 ]]; then ### Object detection on tiles - command_cfg_shapepipe "config_tile_Sx.ini" "Run shapepipe (tile detection)" $n_smp $exclusive + command_cfg_shapepipe \ + "config_tile_Sx.ini" \ + "Run shapepipe (tile detection)" \ + $n_smp \ + $exclusive fi @@ -403,7 +430,11 @@ if [[ $do_job != 0 ]]; then ### Star detection, selection, PSF model. setools can exit with an error for CCD with insufficient stars, ### the script should continue STOP=0 - command_cfg_shapepipe "config_exp_${psf}.ini" "Run shapepipe (exp $psf)" $n_smp $exclusive + command_cfg_shapepipe \ + "config_exp_${psf}.ini" \ + "Run shapepipe (exp $psf)" \ + $n_smp \ + $exclusive STOP=1 fi @@ -415,7 +446,11 @@ if [[ $do_job != 0 ]]; then ### PSF model letter: 'P' (psfex) or 'M' (mccd) letter=${psf:0:1} Letter=${letter^} - command_cfg_shapepipe "config_tile_${Letter}iViSmVi_canfar.ini" "Run shapepipe (tile PsfInterp=$Letter}: up to ngmix+galsim)" $n_smp $exclusive + command_cfg_shapepipe \ + "config_tile_${Letter}iViSmVi_canfar.ini" \ + "Run shapepipe (tile PsfInterp=$Letter}: up to ngmix+galsim)" \ + $n_smp \ + $exclusive fi @@ -454,13 +489,19 @@ if [[ $do_job != 0 ]]; then ngmix_out="$ngmix_run/output" n_out=`ls -rlt $ngmix_out | wc -l` if [ "$n_out" -lt 2 ]; then - command "rm -rf $OUTPUT/run_sp_tile_ngmix_Ng${k}u" "Re-running existing empty ngmix subrun $k" - command_sp "shapepipe_run -c $SP_CONFIG_MOD/config_tile_Ng${k}u.ini" "Run shapepipe (tile: ngmix $k)" & + command \ + "rm -rf $OUTPUT/run_sp_tile_ngmix_Ng${k}u" \ + "Re-running existing empty ngmix subrun $k" + command_sp \ + "shapepipe_run -c $SP_CONFIG_MOD/config_tile_Ng${k}u.ini" \ + "Run shapepipe (tile: ngmix $k)" & else echo "Skipping existing non-empty ngmix subrun $k" fi else - command_sp "shapepipe_run -c $SP_CONFIG_MOD/config_tile_Ng${k}u.ini" "Run shapepipe (tile: ngmix $k)" & + command_sp \ + "shapepipe_run -c $SP_CONFIG_MOD/config_tile_Ng${k}u.ini" \ + "Run shapepipe (tile: ngmix $k)" & fi done wait @@ -478,9 +519,17 @@ if [[ $do_job != 0 ]]; then > $SP_CONFIG_MOD/config_merge_sep_cats.ini ### Merge separated shapes catalogues - command_sp "shapepipe_run -c $SP_CONFIG_MOD/config_merge_sep_cats.ini" "Run shapepipe (tile: merge sep cats)" "$VERBOSE" "$ID" + command_sp \ + "shapepipe_run -c $SP_CONFIG_MOD/config_merge_sep_cats.ini" \ + "Run shapepipe (tile: merge sep cats)" \ + "$VERBOSE" \ + "$ID" ### Merge all relevant information into final catalogue - command_sp "shapepipe_run -c $SP_CONFIG/config_make_cat_$psf.ini" "Run shapepipe (tile: create final cat $psf)" "$VERBOSE" "$ID" + command_sp \ + "shapepipe_run -c $SP_CONFIG/config_make_cat_$psf.ini" \ + "Run shapepipe (tile: create final cat $psf)" \ + "$VERBOSE" \ + "$ID" fi diff --git a/shapepipe/utilities/summary.py b/shapepipe/utilities/summary.py index 001e975f..42b7ebf9 100755 --- a/shapepipe/utilities/summary.py +++ b/shapepipe/utilities/summary.py @@ -15,6 +15,8 @@ from tqdm import tqdm +print("summaary v1.0") + def get_IDs_from_file(path): """Get IDs From File. @@ -408,16 +410,20 @@ def output_missing( if n_unique > 0: if not self._output_path_missing_IDs: output_path = ( - f"{self._path_main}/summary/missing_job_{self._bit}_{module}.txt" + f"{self._path_main}/summary/missing_job_{self._bit}" + + f"_{module}.txt" ) else: output_path = self._output_path_missing_IDs[idx] + #print("MKDEBUG", missing_IDs_unique) self.write_IDs_to_file(output_path, missing_IDs_unique) return missing_IDs_unique def output_missing_job(self): - output_path = f"{self._path_main}/summary/missing_job_{self._bit}_all.txt" + output_path = ( + f"{self._path_main}/summary/missing_job_{self._bit}_all.txt" + ) missing_IDs_all = set(self._missing_IDs_job) @@ -464,7 +470,7 @@ def get_module_output_dir(self, full_path, module): def get_matches_final(self, directory, idx): - # Look over files + # Loop over files # os.path.whether exists is twice faster than try/except if os.path.exists(directory): @@ -475,6 +481,7 @@ def get_matches_final(self, directory, idx): and ( fnmatch.fnmatch(entry2.name, pattern) ) + and entry2.stat().st_size > 0 ): # Append matching files self._names_in_dir[idx].append(entry2.name) @@ -482,21 +489,6 @@ def get_matches_final(self, directory, idx): os.path.join(directory, entry2.name) ) - #if os.path.exists(directory): - #with os.scandir(directory) as entries2: - #files = [ - #entry2.name - #for entry2 in entries2 - #if entry2.name.startswith(self._pattern[idx]) - #] - - ## Append matching files - #self._names_in_dir[idx].extend(files) - #self._paths_in_dir[idx].extend( - #[os.path.join(directory, file) - #for file in files] - #) - def get_names_in_dir(self, iterable, module, idx): # Initialise output file names and paths @@ -530,7 +522,9 @@ def get_names_in_dir(self, iterable, module, idx): continue if self._verbose: - print("Matching entries: ", matches) + print("**** Matching entries: ", end="") + for match in matches: + print(match.name) full_path = self.get_last_full_path( base_and_subdir, matches @@ -684,7 +678,7 @@ def get_par_runtime(par_runtime, key, kind="n"): def print_par_runtime(par_runtime, verbose=True): # Print runtime parameter values - if verbose: + if True: logging.info("") logging.info("===========") logging.info("par_runtime")