Skip to content

Commit

Permalink
initial possion
Browse files Browse the repository at this point in the history
  • Loading branch information
Kamalavasan committed Oct 9, 2019
1 parent 8bf1383 commit 216e751
Show file tree
Hide file tree
Showing 44 changed files with 22,279 additions and 0 deletions.
186 changes: 186 additions & 0 deletions ops/c/include/ops_checkpointing.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
* Open source copyright declaration based on BSD open source template:
* http://www.opensource.org/licenses/bsd-license.php
*
* This file is part of the OPS distribution.
*
* Copyright (c) 2013, Mike Giles and others. Please see the AUTHORS file in
* the main source directory for a full list of copyright holders.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * The name of Mike Giles may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY Mike Giles ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Mike Giles BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/** @file
* @brief OPS checkpointing library function declarations
* @author Istvan Reguly
* @details function declarations for checkpointing
*/

#ifndef __OPS_CHECKPOINTING_H
#define __OPS_CHECKPOINTING_H

typedef enum {
OPS_BACKUP_GATHER,
OPS_BACKUP_LEADIN,
OPS_BACKUP_RESTORE,
OPS_BACKUP_BEGIN,
OPS_BACKUP_IN_PROCESS,
OPS_BACKUP_END,
OPS_NONE
} ops_backup_state;
typedef enum { OPS_NOT_SAVED, OPS_SAVED, OPS_UNDECIDED } ops_checkpoint_types;

typedef enum {
/**
* Indicates that there are a number of parallel loops at the very beginning
* of the simulations which should be excluded from any checkpoint;
* mainly because they initialise datasets that do not change during the
* main body of the execution.
* During restore mode these loops are executed as usual.
* An example would be the computation of the mesh geometry, which can be
* excluded from the checkpoint if it is re-computed when recovering and
* restoring a checkpoint.
* The API call void ops_checkpointing_initphase_done() indicates the end
* of this initial phase.
*/
OPS_CHECKPOINT_INITPHASE = 1,
/**
* Indicates that the user manually controls the location of the checkpoint,
* and explicitly specifies the list of ::ops_dat s to be saved.
*/
OPS_CHECKPOINT_MANUAL_DATLIST = 2,
/**
* Indicates that the user manually controls the location of the checkpoint,
* and it also enables fast-forwarding, by skipping the execution of the
* application (even though none of the parallel loops would actually
* execute, there may be significant work outside of those) up to the
* checkpoint.
*/
OPS_CHECKPOINT_FASTFW = 4,
/**
* Indicates that when the corresponding API function is called, the
* checkpoint should be created.
* Assumes the presence of the above two options as well.
*/
OPS_CHECKPOINT_MANUAL = 8
} ops_checkpoint_options;

#ifdef __cplusplus
extern "C" {
#endif

/**
* Initialises the checkpointing system, has to be called after ops_partition().
*
* @param filename name of the file for checkpointing.
* In MPI, this will automatically be postfixed with the rank ID.
* @param interval average time (seconds) between checkpoints
* @param options a combinations of flags, defined by ::ops_checkpoint_options
* @return `true` if the application launches in restore mode,
* `false` otherwise.
*/
bool ops_checkpointing_init(const char *filename, double interval, int options);
void ops_checkpointing_initphase_done();
bool ops_checkpointing_before(ops_arg *args, int nargs, int *range,
int loop_id);
bool ops_checkpointing_name_before(ops_arg *args, int nargs, int *range,
const char *s);
void ops_checkpointing_exit();
void ops_checkpointing_reduction(ops_reduction red);

/**
* Call this routine at a point in the code to mark the location of a checkpoint.
*
* At this point, the list of datasets specified will be saved.
* The validity of what is saved is not checked by the checkpointing algorithm
* assuming that the user knows what data sets to be saved for full recovery.
* This routine should be called frequently (compared to check-pointing
* frequency) and it will trigger the creation of the checkpoint the first time
* it is called after the timeout occurs.
*
* @param ndats number of datasets to be saved
* @param datlist arrays of ::ops_dat handles to be saved
*/
void ops_checkpointing_manual_datlist(int ndats, ops_dat *datlist);

/**
* Call this routine at a point in the code to mark the location of a checkpoint.
*
* At this point, the specified payload (e.g. iteration count, simulation time,
* etc.) along with the necessary datasets, as determined by the checkpointing
* algorithm will be saved.
* This routine should be called frequently (compared to checkpointing
* frequency), will trigger the creation of the checkpoint the first time it is
* called after the timeout occurs.
* In restore mode, will restore all datasets the first time it is called, and
* returns `true` indicating that the saved payload is returned in payload.
* Does not save reduction data.
*
* @param nbytes size of the payload in bytes
* @param payload pointer to memory into which the payload is packed
* @return
*/
bool ops_checkpointing_fastfw(int nbytes, char *payload);

/**
* Combines the ops_checkpointing_manual_datlist() and
* ops_checkpointing_fastfw() calls.
*
* @param ndats number of datasets to be saved
* @param datlist arrays of ::ops_dat handles to be saved
* @param nbytes size of the payload in bytes
* @param payload pointer to memory into which the payload is packed
* @return
*/
bool ops_checkpointing_manual_datlist_fastfw(int ndats, ops_dat *datlist,
int nbytes, char *payload);

/**
* With this routine it is possible to manually trigger checkpointing,
* instead of relying on the timeout process.
*
* It combines the ops_checkpointing_manual_datlist() and
* ops_checkpointing_fastfw() calls, and triggers the creation of a
* checkpoint when called.
*
* @param ndats number of datasets to be saved
* @param datlist arrays of ::ops_dat handles to be saved
* @param nbytes size of the payload in bytes
* @param payload pointer to memory into which the payload is packed
* @return
*/
bool ops_checkpointing_manual_datlist_fastfw_trigger(int ndats,
ops_dat *datlist,
int nbytes, char *payload);

extern ops_backup_state backup_state;
extern char *OPS_dat_ever_written;
extern ops_checkpoint_types *OPS_dat_status;
extern int OPS_ranks_per_node;
extern int ops_checkpoint_inmemory;

#ifdef __cplusplus
}
#endif

#endif /* __OPS_CHECKPOINTING_H */
177 changes: 177 additions & 0 deletions ops/c/include/ops_hdf5.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* Open source copyright declaration based on BSD open source template:
* http://www.opensource.org/licenses/bsd-license.php
*
* This file is part of the OPS distribution.
*
* Copyright (c) 2013, Mike Giles and others. Please see the AUTHORS file in
* the main source directory for a full list of copyright holders.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * The name of Mike Giles may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY Mike Giles ''AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Mike Giles BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/** @file
* @brief Header file for the parallel I/O functions
* @author Gihan Mudalige (started 28-08-2015)
* @details
*/

#ifndef __OPS_HDF5_H
#define __OPS_HDF5_H

#ifdef __cplusplus
extern "C" {
#endif

/**
* This routine defines a dataset to be read in from a named hdf5 file.
*
*
* @param block structured block
* @param dat_size dimension of dataset (number of items per grid element)
* @param type the name of type used for output diagnostics
* (e.g. "double", "float")
* @param dat_name name of the dat used for output diagnostics
* @param file_name HDF5 file to read and obtain the data from
* @return
*/
ops_dat ops_decl_dat_hdf5(ops_block block, int dat_size, char const *type,
char const *dat_name, char const *file_name);

/**
* This routine reads the details of a structured grid block from a named
* HDF5 file.
*
* Although this routine does not read in any extra information about the block
* from the named HDF5 file than what is already specified in the arguments, it
* is included here for error checking(e.g. check if blocks defined in an HDF5
* file is matching with the declared arguments in an application) and
* completeness.
*
* @param dims dimension of the block
* @param block_name a name used for output diagnostics
* @param file_name HDF5 file to read and obtain the block information from
* @return
*/
ops_block ops_decl_block_hdf5(int dims, const char *block_name,
char const *file_name);

/**
*
* @param dims dimension of loop iteration
* @param points number of points in the stencil
* @param stencil_name string representing the name of the stencil
* @param file_name HDF5 file to read from
* @return
*/
ops_stencil ops_decl_stencil_hdf5(int dims, int points,
const char *stencil_name,
char const *file_name);

/**
*
* @param dims dimension of loop iteration
* @param points number of points in the stencil
* @param stencil_name string representing the name of the stencil
* @param file_name HDF5 file to read from
* @return
*/
ops_stencil ops_decl_strided_stencil_hdf5(int dims, int points,
const char *stencil_name,
char const *file_name);

/**
* This routine reads in a halo relationship between two datasets defined on
* two different blocks from a named HDF5 file.
*
* @param from origin dataset
* @param to destination dataset
* @param file_name HDF5 file to read and obtain the data from
* @return
*/
ops_halo ops_decl_halo_hdf5(ops_dat from, ops_dat to, char const *file_name);

/**
* Write the details of an ::ops_dat to a named HDF5 file.
*
* Can be used over MPI (puts the data in an ::ops_dat into an HDF5 file
* using MPI I/O)
* @param dat
* @param file_name
*/
void ops_fetch_dat_hdf5_file(ops_dat dat, char const *file_name);

/**
* Write the details of an ::ops_block to a named HDF5 file.
*
* Can be used over MPI (puts the data in an ::ops_block into an HDF5 file
* using MPI I/O)
*
* @param block ops block to be written
* @param file_name HDF5 file to write to
*/
void ops_fetch_block_hdf5_file(ops_block block, char const *file_name);

/**
* Write the details of an ::ops_stencil to a named HDF5 file.
*
* Can be used over MPI (puts the data in an ::ops_stencil into an HDF5 file
* using MPI I/O)
*
* @param stencil ::ops_stencil to be written
* @param file_name HDF5 file to write to
*/
void ops_fetch_stencil_hdf5_file(ops_stencil stencil, char const *file_name);

/**
* Write the details of an ::ops_halo to a named HDF5 file.
*
* Can be used over MPI (puts the data in an ::ops_halo into an HDF5 file
* using MPI I/O)
*
* @param halo ::ops_halo to be written
* @param file_name HDF5 file to write to
*/
void ops_fetch_halo_hdf5_file(ops_halo halo, char const *file_name);

#ifndef DOXYGEN_SHOULD_SKIP_THIS
void ops_read_dat_hdf5(ops_dat dat);
#endif /* DOXYGEN_SHOULD_SKIP_THIS*/

/**
* Write all state (blocks, datasets, stencils) to a named HDF5 file.
*
* @param file_name HDF5 file to write to
*/
void ops_dump_to_hdf5(char const *file_name);

void ops_write_const_hdf5(char const *name, int dim, char const *type,
char *const_data, char const *file_name);
void ops_get_const_hdf5(char const *name, int dim, char const *type,
char *const_data, char const *file_name);

#ifdef __cplusplus
}
#endif
#endif
/* __OPS_HDF5_H */
Loading

0 comments on commit 216e751

Please sign in to comment.