Skip to content

Commit

Permalink
Merge pull request #167 from chu11/redfishpower_status_polling
Browse files Browse the repository at this point in the history
redfishpower: adapt status polling interval
  • Loading branch information
mergify[bot] committed Apr 8, 2024
2 parents 8a8a848 + 2d295d9 commit 47a7071
Showing 1 changed file with 49 additions and 6 deletions.
55 changes: 49 additions & 6 deletions src/redfishpower/redfishpower.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,7 @@ static zhashx_t *test_power_status;
/* Per documentation, wait incremental time then proceed if timeout < 0 */
#define INCREMENTAL_WAIT 500

/* in usec
*
* status polling interval of 1 second may seem long, but testing
* shows wait ranges from a few seconds to 20 seconds
*/
/* in usec */
#define STATUS_POLLING_INTERVAL_DEFAULT 1000000

#define MS_IN_SEC 1000
Expand Down Expand Up @@ -120,10 +116,13 @@ struct powermsg {
* timeout - when the overall power command times out
*
* delaystart - if message should be sent after a wait
*
* poll_count - number of poll attempts
*/
struct timeval start;
struct timeval timeout;
struct timeval delaystart;
int poll_count;

/* zlistx handle */
void *handle;
Expand Down Expand Up @@ -318,6 +317,7 @@ static struct powermsg *powermsg_create(CURLM *mh,
const char *postdata,
struct timeval *start,
long int delay_usec,
int poll_count,
int output_result,
int state)
{
Expand Down Expand Up @@ -363,6 +363,9 @@ static struct powermsg *powermsg_create(CURLM *mh,
waitdelay.tv_usec = delay_usec;
timeradd(&now, &waitdelay, &pm->delaystart);
}

pm->poll_count = poll_count;

return pm;
}

Expand Down Expand Up @@ -417,6 +420,7 @@ static struct powermsg *stat_cmd_plug(CURLM * mh,
NULL,
NULL,
0,
0,
output_result,
STATE_SEND_POWERCMD);
if (verbose > 1)
Expand Down Expand Up @@ -768,6 +772,7 @@ struct powermsg *power_cmd_plug(CURLM * mh,
postdata,
NULL,
0,
0,
OUTPUT_RESULT,
STATE_SEND_POWERCMD);
if (verbose > 1)
Expand Down Expand Up @@ -939,13 +944,50 @@ static void send_status_poll(struct powermsg *pm)
{
struct powermsg *nextpm;
char *path = NULL;
long int poll_delay;

get_path(CMD_STAT, pm->plugname, &path, NULL);
if (!path) {
printf("%s: %s path not set\n", pm->plugname, CMD_STAT);
return;
}

/* testing a range of hardware shows that the amount of time it
* takes to complete an on/off falls into two bands. Either it
* completes in the 2-8 second range OR it takes 20-60 seconds.
*
* Some example timings from a HPE Cray Supercomputing EX Chassis
*
* - Turn switch off - 1.18 seconds
* - Turn switch on - 4.5 seconds
* - Turn blade off - 1.18 seconds
* - Turn blade on - 3.76 seconds
* - Turn node off - 6.86 seconds
* - Turn node on - 54.53 seconds
*
* (achu: Going off memory, the Supermicro H12DSG-O-CPU took
* around 20 seconds for on/off.)
*
* To get the best turn around time for the quick end of that range
* and avoid excessive polling on the other end, we will do a slightly
* altered 'exponential backoff' delay.
*
* We delay 1 second each of the first 4 polls.
* We delay 2 seconds for the 5th and 6th poll.
* We delay 4 seconds afterwards.
*
* Special note, testing shows that powering on a "on" node can
* also lead to a temporary entrance into the "PoweringOn" state.
* So we also want a quick turnaround for that case, which is
* typically only 1-2 seconds.
*/
if (pm->poll_count < 4)
poll_delay = status_polling_interval;
else if (pm->poll_count < 6)
poll_delay = status_polling_interval * 2;
else
poll_delay = status_polling_interval * 4;

/* issue a follow on stat to wait until the on/off is complete.
* note that we set the initial start time of this new command to
* the original on/off, so we can timeout correctly
Expand All @@ -961,7 +1003,8 @@ static void send_status_poll(struct powermsg *pm)
path,
NULL,
&pm->start,
status_polling_interval,
poll_delay,
pm->poll_count + 1,
OUTPUT_RESULT,
STATE_WAIT_UNTIL_ON_OFF);
if (!(nextpm->handle = zlistx_add_end(delayedcmds, nextpm)))
Expand Down

0 comments on commit 47a7071

Please sign in to comment.