Skip to content

Commit

Permalink
In 24.11 slurm_get_errno() was removed as
Browse files Browse the repository at this point in the history
the errcode of the API call is being the return value
(with SLURM_SUCCESS == no error)

This is also present in in 24.05.X however slurm_get_errno() still exists.
  • Loading branch information
wickhagj committed Dec 2, 2024
1 parent df549c5 commit c27b77a
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 11 deletions.
61 changes: 50 additions & 11 deletions slurm_drmaa/job.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,21 @@ slurmdrmaa_job_control( fsd_job_t *self, int action )
job_id_spec.original = self->job_id;
self->job_id = slurmdrmaa_set_job_id(&job_id_spec);

int _serrno;

switch( action )
{
case DRMAA_CONTROL_SUSPEND:
#if SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(14,10,0)
if(slurm_suspend2(self->job_id, NULL) == -1) {
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
if(( _serrno = slurm_suspend2(self->job_id, NULL)) != SLURM_SUCCESS) {
#elif SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(14,10,0)
if( slurm_suspend2(self->job_id, NULL) == -1) {
int _serrno = slurm_get_errno();
#else
if(slurm_suspend(fsd_atoi(self->job_id)) == -1) {
int _serrno = slurm_get_errno();
#endif
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_suspend error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_suspend error: %s,job_id: %s", slurm_strerror( _serrno ), self->job_id);
}
slurm_self->user_suspended = true;
break;
Expand All @@ -89,17 +95,26 @@ slurmdrmaa_job_control( fsd_job_t *self, int action )
job_desc.job_id = atoi(self->job_id);
job_desc.priority = 0;
job_desc.alloc_sid = 0;
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
if(( _serrno = slurm_update_job(&job_desc)) != SLURM_SUCCESS ) {
#else
if(slurm_update_job(&job_desc) == -1) {
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_update_job error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
_serrno = slurm_get_errno();
#endif
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_update_job error: %s,job_id: %s", slurm_strerror( _serrno ), self->job_id);
}
break;
case DRMAA_CONTROL_RESUME:
#if SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(14,10,0)
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
if(( _serrno = slurm_resume2(self->job_id, NULL)) != SLURM_SUCCESS ) {
#elif SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(14,10,0)
if(slurm_resume2(self->job_id, NULL) == -1) {
_serrno = slurm_get_errno();
#else
if(slurm_resume(fsd_atoi(self->job_id)) == -1) {
_serrno = slurm_get_errno();
#endif
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_resume error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_resume error: %s,job_id: %s", slurm_strerror( _serrno ), self->job_id);
}
slurm_self->user_suspended = false;
break;
Expand All @@ -108,19 +123,29 @@ slurmdrmaa_job_control( fsd_job_t *self, int action )
slurm_init_job_desc_msg(&job_desc);
job_desc.priority = INFINITE;
job_desc.job_id = atoi(self->job_id);
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
if((_serrno = slurm_update_job(&job_desc)) != SLURM_SUCCESS ) {
#else
if(slurm_update_job(&job_desc) == -1) {
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_update_job error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
_serrno = slurm_get_errno();
#endif
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_update_job error: %s,job_id: %s", slurm_strerror( _serrno ), self->job_id);
}
break;
case DRMAA_CONTROL_TERMINATE:
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(21,8,0)
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
if(( _serrno = slurm_kill_job2(self->job_id, SIGKILL, 0, NULL)) != SLURM_SUCCESS ) {
#elif SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(21,8,0)
if(slurm_kill_job2(self->job_id, SIGKILL, 0, NULL) == -1) {
_serrno = slurm_get_errno();
#elif SLURM_VERSION_NUMBER > SLURM_VERSION_NUM(14,10,0)
if(slurm_kill_job2(self->job_id, SIGKILL, 0) == -1) {
_serrno = slurm_get_errno();
#else
if(slurm_kill_job(fsd_atoi(self->job_id), SIGKILL, 0) == -1) {
_serrno = slurm_get_errno();
#endif
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_terminate_job error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_terminate_job error: %s,job_id: %s", slurm_strerror( _serrno ), self->job_id);
}
break;
default:
Expand Down Expand Up @@ -152,16 +177,21 @@ slurmdrmaa_find_job_info( fsd_job_t *self, job_info_msg_t **job_info ) {

if (! (str_i = strchr( self->job_id, '_' ))) {
/* single job */
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
int _slurm_errno;
if (( _slurm_errno = slurm_load_job( job_info, fsd_atoi( self->job_id ), SHOW_ALL)) != SLURM_SUCCESS ) {
#else
if ( slurm_load_job( job_info, fsd_atoi( self->job_id ), SHOW_ALL) ) {
int _slurm_errno = slurm_get_errno();
#endif

if (_slurm_errno == ESLURM_INVALID_JOB_ID) {
self->on_missing(self);
} else if (_slurm_errno == SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT ||
_slurm_errno == SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR) {
fsd_exc_raise_fmt(FSD_ERRNO_DRM_COMMUNICATION_FAILURE, "slurm_load_jobs error: %s,job_id: %s", slurm_strerror(_slurm_errno), self->job_id);
} else {
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_load_jobs error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_load_jobs error: %s,job_id: %s", slurm_strerror(_slurm_errno), self->job_id);
}
}

Expand Down Expand Up @@ -191,8 +221,13 @@ slurmdrmaa_find_job_info( fsd_job_t *self, job_info_msg_t **job_info ) {

fsd_log_debug(( "looking for task (%u) of job (%s)", task_id, parent_job ));

#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
int _slurm_errno;
if (( _slurm_errno = slurm_load_job( job_info, fsd_atoi( parent_job ), SHOW_ALL)) != SLURM_SUCCESS ) {
#else
if ( slurm_load_job( job_info, fsd_atoi( parent_job ), SHOW_ALL) ) {
int _slurm_errno = slurm_get_errno();
#endif

if (_slurm_errno == ESLURM_INVALID_JOB_ID) {
self->on_missing(self);
Expand All @@ -201,7 +236,7 @@ slurmdrmaa_find_job_info( fsd_job_t *self, job_info_msg_t **job_info ) {
_slurm_errno == SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR) {
fsd_exc_raise_fmt(FSD_ERRNO_DRM_COMMUNICATION_FAILURE, "slurm_load_jobs error: %s,job_id: %s", slurm_strerror(_slurm_errno), self->job_id);
} else {
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_load_jobs error: %s,job_id: %s", slurm_strerror(slurm_get_errno()), self->job_id);
fsd_exc_raise_fmt(FSD_ERRNO_INTERNAL_ERROR, "slurm_load_jobs error: %s,job_id: %s", slurm_strerror(_slurm_errno), self->job_id);
}
}

Expand Down Expand Up @@ -444,7 +479,11 @@ slurmdrmaa_job_on_missing( fsd_job_t *self )
job_id_spec_t job_id_spec;
slurmdb_job_cond_t *job_cond = NULL;
slurmdb_job_rec_t *job = NULL;
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
list_t *jobs;
#else
List jobs;
#endif

#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,5,0)
list_itr_t *itr = NULL;
Expand Down
4 changes: 4 additions & 0 deletions slurm_drmaa/slurm_missing.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@
#ifndef __LL_DRMAA__SLURM_MISSING_H
#define __LL_DRMAA__SLURM_MISSING_H

#if SLURM_VERSION_NUMBER < SLURM_VERSION_NUM(24,11,0)
extern void * slurm_list_peek (List l);
#endif
#if SLURM_VERSION_NUMBER < SLURM_VERSION_NUM(24,5,0)
extern void * slurm_list_remove (ListIterator i);
#endif

#if SLURM_VERSION_NUMBER < SLURM_VERSION_NUM(24,11,0)
extern int slurm_addto_step_list(List step_list, char *names);
#endif

/* --clusters is not supported with Slurm < 15.08, but these are defined to
* avoid compiler warnings
Expand Down
6 changes: 6 additions & 0 deletions slurm_drmaa/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <slurm_drmaa/slurm_missing.h>
#include <slurm_drmaa/slurm_drmaa.h>

#include <slurm/slurm_version.h>

unsigned int
slurmdrmaa_datetime_parse( const char *string )
{
Expand Down Expand Up @@ -690,7 +692,11 @@ slurmdrmaa_unset_job_id(job_id_spec_t *job_id_spec)
void
slurmdrmaa_set_cluster(const char * value)
{
#if SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM(24,11,0)
list_t *cluster_list = NULL;
#else
volatile List cluster_list = NULL;
#endif

fsd_log_enter(( "({value=%s})", value));

Expand Down

0 comments on commit c27b77a

Please sign in to comment.