-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblcr_checkpoint_script
executable file
·85 lines (71 loc) · 2.7 KB
/
blcr_checkpoint_script
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#! /usr/bin/perl
################################################################################
#
# Usage: checkpoint_script
#
# This script is invoked by pbs_mom to checkpoint a job.
#
################################################################################
use strict;
use Sys::Syslog;
# Log levels:
# 0 = none -- no logging
# 1 = fail -- log only failures
# 2 = info -- log invocations
# 3 = debug -- log all subcommands
my $logLevel = 3;
logPrint(2, "Invoked: $0 " . join(' ', @ARGV) . "\n");
my ($sessionId, $jobId, $userId, $groupId, $checkpointDir, $checkpointName, $signalNum, $depth);
my $usage = "Usage: $0 \n";
# Note that depth is not used in this script but could control a limit to the number of checkpoint
# image files that are preserved on the disk.
#
# Note also that a request was made to identify whether this script was invoked by the job's
# owner or by a system administrator. While this information is known to pbs_server, it
# is not propagated to pbs_mom and thus it is not possible to pass this to the script.
# Therefore, a workaround is to invoke qmgr and attempt to set a trivial variable.
# This will fail if the invoker is not a manager.
if (@ARGV == 8)
{
($sessionId, $jobId, $userId, $groupId, $checkpointDir, $checkpointName, $signalNum, $depth) = @ARGV;
}
else { logDie(1, "Number of arguments is ".scalar(@ARGV).". Should be 7."); }
#logDie(1, $usage); }
# Change to the checkpoint directory where we want the checkpoint to be created
chdir $checkpointDir
or logDie(1, "Unable to cd to checkpoint dir ($checkpointDir): $!\n")
if $logLevel;
my $cmd = "cr_checkpoint";
$cmd .= " --signal $signalNum" if $signalNum;
$cmd .= " --tree $sessionId";
$cmd .= " --file $checkpointName";
my $output = `$cmd 2>&1`;
my $rc = $? >> 8;
logDie(1, "Subcommand ($cmd) failed with rc=$rc:\n$output")
if $rc && $logLevel >= 1;
logPrint(3, "Subcommand ($cmd) yielded rc=$rc:\n$output")
if $logLevel >= 3;
exit 0;
################################################################################
# logPrint($message)
# Write a message (to syslog) and die
################################################################################
sub logPrint
{
my ($level, $message) = @_;
my @severity = ('none', 'warning', 'info', 'debug');
return if $level > $logLevel;
openlog('checkpoint_script', '', 'user');
syslog($severity[$level], $message);
closelog();
}
################################################################################
# logDie($message)
# Write a message (to syslog) and die
################################################################################
sub logDie
{
my ($level, $message) = @_;
logPrint($level, $message);
die($message);
}