Skip to content

Commit

Permalink
Merge pull request #305 from artpol84/timing
Browse files Browse the repository at this point in the history
Timing framework improvement
  • Loading branch information
artpol84 committed Dec 16, 2014
2 parents 357daa8 + 8ffad75 commit 01601f3
Show file tree
Hide file tree
Showing 8 changed files with 769 additions and 141 deletions.
8 changes: 5 additions & 3 deletions ompi/runtime/ompi_mpi_finalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
#include "ompi/runtime/ompi_cr.h"

extern bool ompi_enable_timing;
extern bool ompi_enable_timing_ext;

int ompi_mpi_finalize(void)
{
Expand Down Expand Up @@ -151,7 +152,7 @@ int ompi_mpi_finalize(void)
opal_progress_event_users_increment();

/* check to see if we want timing information */
OPAL_TIMING_EVENT((&tm,"Start barrier"));
OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier"));

/* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
*all* connected processes. This only means that all processes
Expand Down Expand Up @@ -232,8 +233,9 @@ int ompi_mpi_finalize(void)

/* check for timing request - get stop time and report elapsed
time if so */
OPAL_TIMING_EVENT((&tm,"Finish barrier"));
OPAL_TIMING_REPORT(ompi_enable_timing, &tm, "MPI_Finish");
OPAL_TIMING_MSTOP(&tm);
OPAL_TIMING_DELTAS(ompi_enable_timing, &tm);
OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm);
OPAL_TIMING_RELEASE(&tm);

/*
Expand Down
34 changes: 20 additions & 14 deletions ompi/runtime/ompi_mpi_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ opal_hash_table_t ompi_mpi_f90_complex_hashtable;
*/
opal_list_t ompi_registered_datareps;

bool ompi_enable_timing;
bool ompi_enable_timing, ompi_enable_timing_ext;
extern bool ompi_mpi_yield_when_idle;
extern int ompi_mpi_event_tick_rate;

Expand Down Expand Up @@ -366,6 +366,13 @@ static int ompi_register_mca_variables(void)
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_enable_timing);

ompi_enable_timing_ext = false;
(void) mca_base_var_register("ompi", "ompi", NULL, "timing_ext",
"Request that critical timing loops be measured",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_enable_timing_ext);
return OMPI_SUCCESS;
}

Expand Down Expand Up @@ -436,7 +443,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL);
}

OPAL_TIMING_EVENT((&tm,"Start"));
OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init"));

/* if we were not externally started, then we need to setup
* some envars so the MPI_INFO_ENV can get the cmd name
Expand Down Expand Up @@ -470,7 +477,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_rte_initialized = true;

/* check for timing request - get stop time and report elapsed time if so */
OPAL_TIMING_EVENT((&tm,"rte_init complete"));
OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex"));

#if OPAL_HAVE_HWLOC
/* if hwloc is available but didn't get setup for some
Expand Down Expand Up @@ -615,7 +622,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}

/* check for timing request - get stop time and report elapsed time if so */
OPAL_TIMING_EVENT((&tm,"Start modex"));
OPAL_TIMING_MNEXT((&tm,"time to execute modex"));

/* exchange connection info - this function may also act as a barrier
* if data exchange is required. The modex occurs solely across procs
Expand All @@ -624,7 +631,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
*/
OPAL_FENCE(NULL, 0, NULL, NULL);

OPAL_TIMING_EVENT((&tm,"End modex"));
OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier"));

/* select buffered send allocator component to be used */
if( OMPI_SUCCESS !=
Expand Down Expand Up @@ -773,9 +780,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Do we need to wait for a debugger? */
ompi_rte_wait_for_debugger();

/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */
OPAL_TIMING_EVENT((&tm,"Start barrier"));
/* Next timing measurement */
OPAL_TIMING_MNEXT((&tm,"time to execute barrier"));

/* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax
Expand All @@ -784,7 +790,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)

/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */
OPAL_TIMING_EVENT((&tm,"End barrier"));
OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init"));

#if OPAL_ENABLE_PROGRESS_THREADS == 0
/* Start setting up the event engine for MPI operations. Don't
Expand Down Expand Up @@ -853,8 +859,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}



/* Check whether we have been spawned or not. We introduce that
at the very end, since we need collectives, datatypes, ptls
etc. up and running here.... */
Expand Down Expand Up @@ -930,9 +934,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)

ompi_mpi_initialized = true;

/* check for timing request - get stop time and report elapsed time if so */
OPAL_TIMING_EVENT((&tm,"Finish"));
OPAL_TIMING_REPORT(ompi_enable_timing, &tm,"MPI Init");
/* Finish last measurement, output results
* and clear timing structure */
OPAL_TIMING_MSTOP(&tm);
OPAL_TIMING_DELTAS(ompi_enable_timing, &tm);
OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm);
OPAL_TIMING_RELEASE(&tm);

return MPI_SUCCESS;
Expand Down
5 changes: 2 additions & 3 deletions ompi/tools/mpisync/mpirun_prof
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/bin/sh
#!/bin/bash
#
# Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>
Expand Down Expand Up @@ -54,8 +53,8 @@ syncfile=`pwd`"/ompi_clock_sync_data.$jobid"
tmp_timings=`pwd`"/ompi_timing_temp_file.$jobid"
tmp_out=`pwd`"/ompi_mpirun_prof.$jobid"

timing_bkp=$OMPI_MCA_opal_timing_file
export OMPI_MCA_opal_timing_file=$tmp_timings
timing_bkp=$OMPI_MCA_opal_timing_output
export OMPI_MCA_opal_timing_output=$tmp_timings

opts=`extract_nodeset_opts $@`
${ompi_instdir}/mpirun --npernode 1 $opts ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1
Expand Down
76 changes: 40 additions & 36 deletions ompi/tools/mpisync/ompi_timing_post
Original file line number Diff line number Diff line change
Expand Up @@ -10,45 +10,49 @@
#####################################################################
# Evaluate a floating point number expression.

function float_eval(){
float_scale=9
local stat=0
local result=0.0
if [[ $# -gt 0 ]]; then
result=$(echo "scale=$float_scale; $*" | bc -q 2>/dev/null)
stat=$?
if [[ $stat -eq 0 && -z "$result" ]]; then
stat=1;
fi
fi
echo $result
return $stat
}

if [ -z "$1" ] || [ -z "$2" ]; then
echo "Need the name of a timing file and the output file"
exit 0
fi

thefile=$1
initfile=$1
postfile=$2
sed '/^$/d' $thefile > ${thefile}_tmp
sort ${thefile}_tmp > ${postfile}

read line < ${postfile}
first_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'`
prev_ts=$first_ts
echo $first_ts

while read line ; do
cur_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'`
dif1=`float_eval "$cur_ts - $first_ts"`
dif2=`float_eval "$cur_ts - $prev_ts"`
newline=`echo $line | sed -e "s/$cur_ts/$dif1:$dif2/"`
prev_ts=$cur_ts
echo $newline
done < ${postfile} > ${thefile}_tmp

cat ${thefile}_tmp > ${postfile}

rm -f ${thefile}_tmp

# 1. Filter OPAL_TRACE entrieas only
# and put the timestamp to the first place
#.2. Sort considering that we dealing with
# floating point numbers
# 3. Return to initial field order and count relative fields
cat $initfile | \
awk 'BEGIN { FPAT = "([^ ]+)|(\"[^\"]+\")" }
{
if( $0 ~ /\[OPAL_TRACE\]/ ){
x = $NF
$NF = $1
$1 = x
print $0
}
}' | sort --general-numeric-sort | \
awk 'BEGIN {
FPAT = "([^ ]+)|(\"[^\"]+\")"
first = 0
prev = 0
}
{
if( first == 0 ){
first = $1
prev = $1
}
x = $1
$1 = $NF
$NF = x
rel_to_first = x - first
rel_to_prev = x - prev
prev = x
print $0, " ", rel_to_prev, " ", rel_to_first
}' > $postfile





Loading

0 comments on commit 01601f3

Please sign in to comment.