diff --git a/CMakeLists.txt b/CMakeLists.txt index accf04e3..d08c78c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,7 +156,6 @@ else() message(FATAL_ERROR "Unknown RUNTIME_TYPE: ${RUNTIME_TYPE}. Supported values are 'STARPU' or 'PARSEC'.") endif() - # ExaGeoStatCPP depends on LAPACK/BLASPP # ------------------------------- include(ImportBLASPP) diff --git a/cmake/FindHICMA-X.cmake b/cmake/FindHICMA-X.cmake index a0d3ff3c..bb2a045e 100644 --- a/cmake/FindHICMA-X.cmake +++ b/cmake/FindHICMA-X.cmake @@ -30,6 +30,7 @@ if(PKG_CONFIG_FOUND) endif() # TODO: This is not generalized for the case of hicma installed manually set(HICMA_X_SRC_DIR ${HICMA_X_ROOT}/hicma-x-src) + set(HICMA_X_BIN_DIR ${HICMA_X_ROOT}/bin) set(HICMA-X_FOUND TRUE) set(HICMA-X_LIBRARIES ${DPLASMA_PKG_LIBRARIES} ${PARSEC_PKG_LIBRARIES}) set(HICMA-X_LIBRARY_DIRS "${HICMA_X_LIB_PATH}") diff --git a/cmake/ImportHiCMAX.cmake b/cmake/ImportHiCMAX.cmake index 06493016..6bdc2a79 100644 --- a/cmake/ImportHiCMAX.cmake +++ b/cmake/ImportHiCMAX.cmake @@ -16,7 +16,9 @@ set(name "HICMA-X") set(tag "FIX-package-installation-MK") # Flags to configure the build for HiCMA-X, including precision settings for DPLASMA # and disabling GPU support for both CUDA and HIP. -set(flags '-DDPLASMA_PRECISIONS="s;d"' \-DPARSEC_WITH_DEVEL_HEADERS=ON \-DCMAKE_Fortran_FLAGS="-Wno-main" \-DPARSEC_GPU_WITH_HIP=OFF \-DPARSEC_GPU_WITH_CUDA=OFF \-DPARSEC_HAVE_CUDA=OFF \-DPARSEC_DIST_SHORT_LIMIT=0 \-DPARSEC_DIST_COLLECTIVES=ON \-DPARSEC_HAVE_DEV_CUDA_SUPPORT=OFF \-DDPLASMA_HAVE_CUDA=OFF) +set(flags '-DDPLASMA_PRECISIONS="s;d"' \-DPARSEC_WITH_DEVEL_HEADERS=ON \-DCMAKE_Fortran_FLAGS="-Wno-main" + \-DPARSEC_GPU_WITH_HIP=OFF \-DPARSEC_GPU_WITH_CUDA=OFF \-DPARSEC_HAVE_CUDA=OFF \-DPARSEC_DIST_SHORT_LIMIT=0 + \-DPARSEC_DIST_COLLECTIVES=ON \-DPARSEC_HAVE_DEV_CUDA_SUPPORT=OFF \-DDPLASMA_HAVE_CUDA=OFF \-DBLA_VENDOR=${BLA_VENDOR}) # Indicates that HiCMA-X uses CMake for its build system. set(is_cmake ON) # Indicates that HiCMA-X is hosted on a Git repository. diff --git a/cmake/ImportStarsH.cmake b/cmake/ImportStarsH.cmake index 10ec8347..da715722 100644 --- a/cmake/ImportStarsH.cmake +++ b/cmake/ImportStarsH.cmake @@ -31,7 +31,7 @@ elseif(RUNTIME_TYPE STREQUAL "PARSEC") endif() # 'flag' is used for additional build configuration options, specifically disabling StarPU and optionally enabling MPI. -set(flag \-DSTARPU=OFF \-DMPI=${USE_MPI}) +set(flag \-DSTARPU=OFF \-DMPI=${USE_MPI} \-DBLA_VENDOR=${BLA_VENDOR}) # 'is_cmake' indicates that STARSH uses CMake as its build system, set to ON. set(is_cmake ON) # 'is_git' denotes that the source code for STARSH is hosted on a Git repository, set to ON. diff --git a/examples/climate-emulator/ClimateEmulator.cpp b/examples/climate-emulator/ClimateEmulator.cpp index 44f5424f..2ff03177 100644 --- a/examples/climate-emulator/ClimateEmulator.cpp +++ b/examples/climate-emulator/ClimateEmulator.cpp @@ -27,17 +27,12 @@ int main(int argc, char **argv) { configurations.InitializeArguments(argc, argv); // Initialize the ExaGeoStat Hardware auto hardware = ExaGeoStatHardware(configurations); - -// // Create a unique pointer to hold the data. -// std::unique_ptr> data; -// // Transform and prepare the data. -// ExaGeoStat::ExaGeoStatTransformData(configurations, data); -// // Load the data, either by reading from a file or generating synthetic data. -// ExaGeoStat::ExaGeoStatLoadData(configurations, data); -// // Perform data modeling. -// ExaGeoStat::ExaGeoStatDataModeling(configurations, data); -// // Analyze the data. -// ExaGeoStat::ExaGeoStatDataAnalyzer(configurations, data); + // Create a unique pointer to hold the data. + std::unique_ptr> data; + // Load the data, either by reading from a file or generating synthetic data. + ExaGeoStat::ExaGeoStatLoadData(configurations, data); + // Perform data modeling. + ExaGeoStat::ExaGeoStatDataModeling(configurations, data); return 0; } diff --git a/inst/include/api/ExaGeoStat.hpp b/inst/include/api/ExaGeoStat.hpp index 0430cac2..b399b460 100644 --- a/inst/include/api/ExaGeoStat.hpp +++ b/inst/include/api/ExaGeoStat.hpp @@ -14,8 +14,6 @@ #ifndef EXAGEOSTATCPP_EXAGEOSTAT_HPP #define EXAGEOSTATCPP_EXAGEOSTAT_HPP -#include - #include #include @@ -51,17 +49,6 @@ namespace exageostat::api { std::unique_ptr> &aData, T *apMeasurementsMatrix = nullptr); - - /** - * @brief Objective function used in optimization, and following the NLOPT objective function format. - * @param[in] aTheta An array of length n containing the current point in the parameter space. - * @param[in] aGrad An array of length n where you can optionally return the gradient of the objective function. - * @param[in] apInfo pointer containing needed configurations and data. - * @return double MLE results. - * - */ - static double ModelingAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo); - /** * @brief Predict missing measurements values. * @param[in] aConfigurations Reference to Configurations object containing user input data. @@ -76,26 +63,6 @@ namespace exageostat::api { ExaGeoStatPrediction(configurations::Configurations &aConfigurations, std::unique_ptr> &aData, T *apMeasurementsMatrix = nullptr, dataunits::Locations *apTrainLocations = nullptr, dataunits::Locations *apTestLocations = nullptr); - /** - * @brief Transform data into the required format for ExaGeoStat operations. - * @param[in] aConfigurations Reference to Configurations object containing user input data. - * @param[in, out] aData Reference to an ExaGeoStatData object containing the descriptors and locations to be transformed. - * @return void - * - */ - static void - ExaGeoStatTransformData(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); - - /** - * @brief Analyze the data to extract insights and patterns. - * @param[in] aConfigurations Reference to Configurations object containing user input data. - * @param[in, out] aData Reference to an ExaGeoStatData object that contains the data to be analyzed. - * @return void - * - */ - static void - ExaGeoStatDataAnalyzer(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); - }; /** diff --git a/inst/include/common/Definitions.hpp b/inst/include/common/Definitions.hpp index 400e26df..65707b02 100644 --- a/inst/include/common/Definitions.hpp +++ b/inst/include/common/Definitions.hpp @@ -131,7 +131,8 @@ namespace exageostat::common { */ enum DescriptorType { CHAMELEON_DESCRIPTOR = 0, - HICMA_DESCRIPTOR = 1 + HICMA_DESCRIPTOR = 1, + PARSEC_DESCRIPTOR = 2 }; /** @@ -141,7 +142,8 @@ namespace exageostat::common { */ enum DataSourceType { SYNTHETIC = 0, - CSV_FILE = 1 + CSV_FILE = 1, + PARSEC_FILE = 2 }; /** @@ -205,6 +207,21 @@ namespace exageostat::common { DESCRIPTOR_SUM = 52, DESCRIPTOR_R = 53, DESCRIPTOR_R_COPY = 54, + DESCRIPTOR_F_DATA = 55, + DESCRIPTOR_ET1 = 56, + DESCRIPTOR_ET2 = 57, + DESCRIPTOR_EP = 58, + DESCRIPTOR_SLMN = 59, + DESCRIPTOR_IE = 60, + DESCRIPTOR_IO = 61, + DESCRIPTOR_P = 62, + DESCRIPTOR_D = 63, + DESCRIPTOR_FLMERA = 64, + DESCRIPTOR_ZLM = 65, + DESCRIPTOR_SC = 66, + DESCRIPTOR_F_SPATIAL = 67, + DESCRIPTOR_FLM = 68, + DESCRIPTOR_FLMT = 69 }; /** diff --git a/inst/include/data-analyzer/DataAnalyzer.hpp b/inst/include/data-analyzer/DataAnalyzer.hpp index 72cadf02..f9f50379 100644 --- a/inst/include/data-analyzer/DataAnalyzer.hpp +++ b/inst/include/data-analyzer/DataAnalyzer.hpp @@ -27,35 +27,39 @@ namespace exageostat::analyzer{ class DataAnalyzer { public: - /** - * @brief Default constructor. - */ - DataAnalyzer(); /** - * @brief Default destructor. + * @brief Analyzes the given matrix data pre computation. + * @param[in, out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. + * @return void + * */ - ~DataAnalyzer(); + static void PreAnalyzeMatrix(std::unique_ptr> &aData); /** - * @brief Analyzes the given matrix data. - * @param[in] aConfigurations Reference to Configurations object containing needed parameters. + * @brief Analyzes the given matrix data post computation. * @param[in, out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. * @return void * */ - static void AnalyzeMatrix(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + static void PostAnalyzeMatrix(std::unique_ptr> &aData); /** - * @brief Compares betweent two matrices by getting the difference. - * @param[in] aConfigurations Reference to Configurations object containing needed parameters. + * @brief Compares between two matrices by getting the difference. * @param[in, out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. * @return the calculated MSE. * */ - static double CompareMatDifference(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + static double CompareMatDifference(std::unique_ptr> &aData); }; + /** + * @brief Instantiates the ExaGeoStat class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(DataAnalyzer) + }//namespace exageostat #endif // EXAGEOSTATCPP_DATAANALYZER_HPP \ No newline at end of file diff --git a/inst/include/data-generators/DataGenerator.hpp b/inst/include/data-generators/DataGenerator.hpp index 2e5d4344..9f27e933 100644 --- a/inst/include/data-generators/DataGenerator.hpp +++ b/inst/include/data-generators/DataGenerator.hpp @@ -61,8 +61,8 @@ namespace exageostat::generators { protected: - /// Used enum for data generators types. - static common::DataSourceType aDataSourceType; + /// Used flag to determine if data generated is synthetic. + static bool aIsSynthetic; }; /** diff --git a/inst/include/data-loader/DataLoader.hpp b/inst/include/data-loader/DataLoader.hpp index 5f530bd6..931cdec0 100644 --- a/inst/include/data-loader/DataLoader.hpp +++ b/inst/include/data-loader/DataLoader.hpp @@ -67,6 +67,35 @@ namespace exageostat::dataLoader { virtual void WriteData(const T &aMatrixPointer, const int &aProblemSize, const int &aP, std::string &aLoggerPath, exageostat::dataunits::Locations &aLocations) = 0; + + /** + * @brief Abstract method for loading data based on provided configurations and kernel. + * @param[in] aConfigurations Reference to the configurations object that contains parameters for loading data. + * @param[in] aKernel Reference to the kernel object that defines the operations to be applied while loading the data. + * @return A unique pointer to the loaded ExaGeoStatData object. + * + */ + virtual std::unique_ptr> + LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) = 0; + + /** + * @brief Factory method for creating a DataLoader instance based on the given configurations. + * This method dynamically determines the type of data loader to instantiate based on compile-time conditions. + * @param[in] aConfigurations Reference to the configurations object that contains parameters for loading data. + * @return A unique pointer to a DataLoader instance configured as per the specified runtime conditions. + * + */ + static std::unique_ptr> + CreateDataLoader(exageostat::configurations::Configurations &apConfigurations); + + /** + * @brief Releases the singleton instance of the currently active DataLoader. + * This method ensures proper deallocation of the singleton instance of the data loader, + * depending on the selected runtime. + * + */ + static void ReleaseDataLoader(); + }; /** diff --git a/inst/include/data-loader/concrete/CSVLoader.hpp b/inst/include/data-loader/concrete/CSVLoader.hpp index 52b185bd..153d8922 100644 --- a/inst/include/data-loader/concrete/CSVLoader.hpp +++ b/inst/include/data-loader/concrete/CSVLoader.hpp @@ -5,7 +5,7 @@ /** * @file CSVLoader.hpp - * @brief A class for generating synthetic data. + * @brief A class for loading csv format data. * @version 1.1.0 * @author Mahmoud ElKarargy * @author Sameh Abdulah @@ -54,6 +54,14 @@ namespace exageostat::dataLoader::csv { exageostat::dataunits::Locations &aLocations) override; /** + * @brief Loads data based on given configuration. + * @copydoc DataLoader::LoadData() + * + */ + std::unique_ptr> + LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) override; + + /** * @brief Release the singleton instance of the CSVLoader class. * @return void * diff --git a/inst/include/data-loader/concrete/ParsecLoader.hpp b/inst/include/data-loader/concrete/ParsecLoader.hpp new file mode 100644 index 00000000..6d0a1b70 --- /dev/null +++ b/inst/include/data-loader/concrete/ParsecLoader.hpp @@ -0,0 +1,121 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecLoader.hpp + * @brief A class for loading PaRSEC format data. + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-02-04 +**/ + +#ifndef EXAGEOSTAT_CPP_PARSECDATALOADER_HPP +#define EXAGEOSTAT_CPP_PARSECDATALOADER_HPP + +#include + +namespace exageostat::dataLoader::parsec { + + /** + * @class ParsecLoader + * @brief A class for creating data by reading PaRSEC files. + * @tparam T Data Type: float or double + */ + template + class ParsecLoader : public DataLoader { + public: + + /** + * @brief Get a pointer to the singleton instance of the ParsecLoader class. + * @return A pointer to the instance of the ParsecLoader class. + * + */ + static ParsecLoader *GetInstance(); + + /** + * @brief Reads data from external sources into ExaGeoStat format. + * @copydoc DataLoader::ReadData() + * + */ + void ReadData(configurations::Configurations &aConfigurations, std::vector &aMeasurementsMatrix, + std::vector &aXLocations, std::vector &aYLocations, std::vector &aZLocations, + const int &aP) override; + + /** + * @brief Writes a matrix of vectors to disk. + * @copydoc DataLoader::WriteData() + * + */ + void + WriteData(const T &aMatrixPointer, const int &aProblemSize, const int &aP, std::string &aLoggerPath, + exageostat::dataunits::Locations &aLocations) override; + + /** + * @brief Creates the data by synthetically generating it. + * @copydoc DataGenerator::LoadData() + * + */ + std::unique_ptr> + LoadData(configurations::Configurations &aConfigurations, kernels::Kernel &aKernel) override; + + /** + * @brief Release the singleton instance of the ParsecLoader class. + * @return void + * + */ + static void ReleaseInstance(); + + /** + * @brief Reads data from a CSV file into a matrix. + * @param[in] apFilename Name of the CSV file. + * @param[out] apFileContent Pointer to an array where file contents will be stored. + * @param[in] aM Number of rows in the matrix. + * @param[in] aN Number of columns in the matrix. + * @return 0 on success, or a non-zero error code on failure. + * + */ + int ReadCSVFileHelper(const char* apFilename, double *apFileContent, int aM, int aN); + + /** + * @brief Adapter for the matrix compress operation + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * @return void. + * + */ + void CompressMatrixHelper(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + + + private: + /** + * @brief Constructor for the ParsecLoader class. + * @return void + * + */ + ParsecLoader() = default; + + /** + * @brief Default destructor. + * + */ + ~ParsecLoader() override = default; + + /** + * @brief Pointer to the singleton instance of the ParsecLoader class. + * + */ + static ParsecLoader *mpInstance; + + }; + + /** + * @brief Instantiates the PaRSEC Data Generator class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(ParsecLoader) +} +#endif //EXAGEOSTAT_CPP_PARSECDATALOADER_HPP \ No newline at end of file diff --git a/inst/include/data-transformer/DataTransformer.hpp b/inst/include/data-transformer/DataTransformer.hpp index a63e3c01..9cf640cc 100644 --- a/inst/include/data-transformer/DataTransformer.hpp +++ b/inst/include/data-transformer/DataTransformer.hpp @@ -18,7 +18,7 @@ #include #include -namespace exageostat::transformer{ +namespace exageostat::transformers{ /** * @brief Class represents the data transformer for the Climate Emulator. @@ -27,12 +27,13 @@ namespace exageostat::transformer{ template class DataTransformer { + public: /** * @brief Performs the forward spherical harmonics transform (SHT). - * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in] aLSize The size of tile size * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. */ - static void ForwardSHT(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + static void ForwardSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData); /** * @brief Reshapes data during the forward phase of the simulation. @@ -43,10 +44,17 @@ namespace exageostat::transformer{ /** * @brief Performs the inverse spherical harmonics transform (SHT). - * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in] aLSize The size of tile size * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. */ - static void InverseSHT(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + static void InverseSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData); }; -} + + /** + * @brief Instantiates the DataTransformers class for float and double types. + * @tparam T Data Type: float or double + */ + EXAGEOSTAT_INSTANTIATE_CLASS(DataTransformer) +} // namespace exageostat + #endif // EXAGEOSTATCPP_DATATRANSFORMER_HPP diff --git a/inst/include/data-units/DescriptorData.hpp b/inst/include/data-units/DescriptorData.hpp index f5775187..fe771445 100644 --- a/inst/include/data-units/DescriptorData.hpp +++ b/inst/include/data-units/DescriptorData.hpp @@ -19,9 +19,6 @@ #include #include -#if DEFAULT_RUNTIME -#include -#endif namespace exageostat::dataunits { @@ -33,9 +30,11 @@ namespace exageostat::dataunits { union BaseDescriptor { #if DEFAULT_RUNTIME CHAM_desc_t *chameleon_desc; -#ifdef USE_HICMA + #ifdef USE_HICMA HICMA_desc_t *hicma_desc; -#endif + #endif +#else + parsec_matrix_block_cyclic_t *parsec_desc; #endif }; @@ -141,10 +140,11 @@ namespace exageostat::dataunits { * */ void SetDescriptor(const common::DescriptorType &aDescriptorType, const common::DescriptorName &aDescriptorName, - const bool &aIsOOC, void *apMatrix, const common::FloatPoint &aFloatPoint, const int &aMB, - const int &aNB, const int &aSize, const int &aLM, const int &aLN, const int &aI, - const int &aJ, const int &aM, const int &aN, const int &aP, const int &aQ, - const bool &aValidOOC = true, const bool &aConverted = false); + const bool &aIsOOC = false, void *apMatrix = nullptr, + const common::FloatPoint &aFloatPoint = common::EXAGEOSTAT_REAL_DOUBLE, const int &aMB = 0, + const int &aNB = 0, const int &aSize = 0, const int &aLM = 0, const int &aLN = 0, + const int &aI = 0, const int &aJ = 0, const int &aM = 0, const int &aN = 0, + const int &aP = 0, const int &aQ = 0, const bool &aValidOOC = true, const bool &aConverted = false); /** * @brief Getter for the Descriptor matrix. diff --git a/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp b/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp index d8a734fa..c10bb8a8 100644 --- a/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp +++ b/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp @@ -16,9 +16,14 @@ #define EXAGEOSTATCPP_EXAGEOSTATDESCRIPTOR_HPP #if DEFAULT_RUNTIME + #include -#endif #include + +#else +#include +#endif + #include /** diff --git a/inst/include/data-units/descriptor/concrete/ParsecDescriptor.hpp b/inst/include/data-units/descriptor/concrete/ParsecDescriptor.hpp new file mode 100644 index 00000000..074a4744 --- /dev/null +++ b/inst/include/data-units/descriptor/concrete/ParsecDescriptor.hpp @@ -0,0 +1,58 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecDescriptor.hpp + * @brief Defines the ParsecDescriptor class for creating matrix descriptors using the PaRSEC library. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-10-18 +**/ + +#ifndef EXAGEOSTATCPP_ParsecDESCRIPTOR_HPP +#define EXAGEOSTATCPP_ParsecDESCRIPTOR_HPP + +#include +#include + +namespace exageostat::dataunits::descriptor { + + /** + * @brief ParsecDescriptor is a class for creating matrix descriptors by Parsec library. + * @tparam T Data Type: float or double + * + */ + template + class ParsecDescriptor { + + public: + /** + * @brief Create a Parsec descriptor for a matrix with the given parameters. + * @param[in] apDescriptor A pointer to the existing parsec_matrix_block_cyclic_t descriptor. The new descriptor will be created based on this descriptor. + * @return A pointer to the newly created parsec_matrix_block_cyclic_t descriptor. + * + */ + static parsec_matrix_block_cyclic_t *CreateParsecDescriptor(void *apDescriptor); + + /** + * @brief destroys and finalize a descriptor + * @param[in] apDescriptor A pointer to the existing parsec_matrix_block_cyclic_t descriptor. + * @return An error code or success code. + * + */ + static int DestroyParsecDescriptor(void *apDescriptor); + }; + + /** + * @brief Instantiates the Parsec descriptor methods class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(ParsecDescriptor) + +}//namespace exageostat + +#endif //EXAGEOSTATCPP_ParsecDESCRIPTOR_HPP diff --git a/inst/include/hardware/ExaGeoStatHardware.hpp b/inst/include/hardware/ExaGeoStatHardware.hpp index 5d1ed2ea..8eb69570 100644 --- a/inst/include/hardware/ExaGeoStatHardware.hpp +++ b/inst/include/hardware/ExaGeoStatHardware.hpp @@ -18,6 +18,10 @@ #include #include +#if !DEFAULT_RUNTIME +#include +#endif + /** * @brief Class represents the hardware configuration for the ExaGeoStat solver. * @@ -113,6 +117,21 @@ class ExaGeoStatHardware { */ [[nodiscard]] static void *GetContext(exageostat::common::Computation aComputation); + /** + * @brief Sets the rank of MPI for PaRSEC. + * @param[in] aRank The new value for the rank. + * @return void + * + **/ + static void SetParsecMPIRank(int aRank); + + /** + * @brief Retrieves the rank of MPI for PaRSEC. + * @return The current rank of MPI PaRSEC. + * + **/ + static int GetParsecMPIRank(); + /** * @brief Retrieves the P dimension of the grid. * @details This function returns the current setting of the P dimension of the grid, which is part of the grid configuration used in various computational processes. @@ -147,6 +166,40 @@ class ExaGeoStatHardware { **/ static void SetQGrid(int aQ); +#if !DEFAULT_RUNTIME + /** + * @brief Retrieves the HiCMA parameters. + * @details This function returns a pointer to the current HiCMA parameters used in the computational process. + * @return A pointer to the current HiCMA parameters of type `hicma_parsec_params_t`. + * + */ + static hicma_parsec_params_t* GetHicmaParams(); + + /** + * @brief Retrieves the STARSH parameters. + * @details This function returns a pointer to the current STARSH parameters used in the computational process. + * @return A pointer to the current STARSH parameters of type `starsh_params_t`. + * + */ + static starsh_params_t* GetParamsKernel(); + + /** + * @brief Retrieves the HiCMA data. + * @details This function returns a pointer to the current HiCMA data used in the computational process. + * @return A pointer to the current HiCMA data of type `hicma_parsec_data_t`. + * + */ + static hicma_parsec_data_t* GetHicmaData(); + + /** + * @brief Retrieves the HiCMA matrix analysis. + * @details This function returns a pointer to the current HiCMA matrix analysis data used in the computational process. + * @return A pointer to the current HiCMA matrix analysis of type `hicma_parsec_matrix_analysis_t`. + * + */ + static hicma_parsec_matrix_analysis_t* GetAnalysis(); +#endif + private: //// Used Pointer to the Chameleon hardware context. static void *mpChameleonContext; @@ -155,11 +208,23 @@ class ExaGeoStatHardware { //// Used Pointer to the PaRSEC hardware context. static void *mpParsecContext; //// Used P-Grid + static int mParsecMPIRank; + //// Used P-Grid static int mPGrid; //// Used Q-Grid static int mQGrid; //// Used boolean to avoid re-init mpi static bool mIsMPIInit; +#if !DEFAULT_RUNTIME + //// HiCMA-specific variables - Himca_parsec_params + static std::unique_ptr mpHicmaParams; + //// HiCMA-specific variables - starsh_params_t + static std::unique_ptr mpParamsKernel; + //// HiCMA-specific variables - hicma_parsec_data_t + static std::unique_ptr mpHicmaData; + //// HiCMA-specific variables - hicma_parsec_matrix_analysis_t + static std::unique_ptr mpAnalysis; +#endif }; #endif // EXAGEOSTATCPP_EXAGEOSTATHARDWARE_HPP \ No newline at end of file diff --git a/inst/include/linear-algebra-solvers/LinearAlgebraMethods.hpp b/inst/include/linear-algebra-solvers/LinearAlgebraMethods.hpp index 2b35a6c9..251d6319 100644 --- a/inst/include/linear-algebra-solvers/LinearAlgebraMethods.hpp +++ b/inst/include/linear-algebra-solvers/LinearAlgebraMethods.hpp @@ -49,9 +49,6 @@ namespace exageostat::linearAlgebra { */ virtual ~LinearAlgebraMethods() = default; - // TODO: Since the common linear algebra fn between HiCMA andChameleon won't work with PaRSEC, - // consider move them and make this file as an Interface for virtual fns -#if DEFAULT_RUNTIME /** * @brief Initializes the descriptors necessary for the linear algebra solver. * @details This method initializes the descriptors necessary for the linear algebra solver. @@ -130,6 +127,84 @@ namespace exageostat::linearAlgebra { dataunits::Locations *apLocation3, const int &aDistanceMetric, const kernels::Kernel &aKernel); + /** + * @brief Calculates the log likelihood value of a given value theta. + * @param[in,out] aData DescriptorData object to be populated with descriptors and data. + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in] apTheta Optimization parameter used by NLOPT. + * @param[in] apMeasurementsMatrix measurements matrix to be stored in DescZ. + * @param[in] aKernel Reference to the kernel object to use. + * @return log likelihood value + * + */ + virtual T ExaGeoStatMLETile(std::unique_ptr> &aData, + configurations::Configurations &aConfigurations, const double *apTheta, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) = 0; + + + /** + * @brief Copies a matrix in the tile layout from source to destination + * @param[in] aUpperLower Specifies the part of the matrix A to be copied to B. + * @param[in] apA Source matrix A. + * @param[in,out] apB Destination matrix B. On exit, B = A in the locations specified by Upper Lower. + * @return void + * + */ + virtual void ExaGeoStatLapackCopyTile(const common::UpperLower &aUpperLower, void *apA, void *apB) = 0; + + /** + * @brief Wait for the completion of a sequence. + * @param[in] apSequence apSequence A pointer to either CHAMELEON or HiCMA sequence. + * @return void + * + */ + virtual void ExaGeoStatSequenceWait(void *apSequence) = 0; + + /** + * @brief Create Sequence. + * @param[out] apSequence A pointer to either CHAMELEON or HiCMA sequence. + * @return void + * + */ + virtual void + ExaGeoStatCreateSequence(void *apSequence) = 0; + + /** + * @brief Computes the Cholesky factorization of a symmetric positive definite or Symmetric positive definite matrix. + * @param[in] aUpperLower Whether upper or lower part of the matrix A. + * @param[in, out] apA Symmetric matrix A. + * @param[in] aBand Diagonal thickness parameter. + * @param[in] apCD Additional matrix CD. + * @param[in] apCrk Additional matrix Crk. + * @param[in] aMaxRank Maximum rank parameter. + * @param[in] aAcc Accuracy parameter. + * @return void + * + */ + virtual void + ExaGeoStatPotrfTile(const common::UpperLower &aUpperLower, void *apA, int aBand, void *apCD, void *apCrk, + const int &aMaxRank, const int &aAcc) = 0; + + /** + * @brief Solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B. + * @param[in] aSide Specifies whether op(A) appears on the left or on the right of X. + * @param[in] aUpperLower Specifies whether the matrix A is upper triangular or lower triangular. + * @param[in] aTrans Specifies the form of op( A ) to be used in the matrix multiplication. + * @param[in] aDiag Specifies whether or not A is unit triangular. + * @param[in] aAlpha Specifies the scalar alpha. When alpha is zero, A is not referenced and B need not be set before entry. + * @param[in] apA The triangular matrix A. + * @param[in] apCD Additional matrix CD. + * @param[in] apCrk Additional matrix Crk. + * @param[in, out] apZ The matrix B of dimension, on exit is overwritten by the solution matrix X. + * @param[in] aMaxRank Maximum rank parameter. + * @return void + * + */ + virtual void ExaGeoStatTrsmTile(const common::Side &aSide, const common::UpperLower &aUpperLower, + const common::Trans &aTrans, const common::Diag &aDiag, const T &aAlpha, + void *apA, void *apCD, void *apCrk, void *apZ, const int &aMaxRank) = 0; + + /** * @brief Solve a positive definite linear system of equations AX = B using tiled algorithms. * @param[in] aUpperLower Specifies whether the matrix A is upper triangular or lower triangular. @@ -210,6 +285,22 @@ namespace exageostat::linearAlgebra { */ void ExaGeoStatDesc2Lap(T *apA, const int &aLDA, void *apDescA, const common::UpperLower &aUpperLower); +#ifdef USE_HICMA + + /** + * @brief Copy Descriptor Matrix to another Descriptor matrix. + * @param[out] apSourceDesc Descriptor matrix to be copied + * @param[out] apDestinationDesc Descriptor matrix to be copied to. + * @param[in] aSize Size of matrix to be copied. + * @param[in] aDirection Specifies the type of Descriptors to be copied. + * @return void + * + */ + void CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, + const common::CopyDirection &aDirection); + +#endif + /** * @brief Sets the values of all or part of a two-dimensional Tile. * @param[in] aUpperLower Specifies Specifies whether the upper or lower triangular part of the covariance matrix is stored. @@ -308,129 +399,6 @@ namespace exageostat::linearAlgebra { * */ bool Recover(char *apPath, const int &aIterationCount, T *apTheta, T *apLogLik, const int &aNumParams); -#endif - - /** - * @brief Copy Descriptor Matrix to another Descriptor matrix. - * @param[out] apSourceDesc Descriptor matrix to be copied - * @param[out] apDestinationDesc Descriptor matrix to be copied to. - * @param[in] aSize Size of matrix to be copied. - * @param[in] aDirection Specifies the type of Descriptors to be copied. - * @return void - * - */ - virtual void CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) = 0; - - /** - * @brief The Gateway for the Modeling Operation - * @param[in,out] aData DescriptorData object to be populated with descriptors and data. - * @param[in] aConfigurations Configurations object containing relevant settings. - * @param[in] apTheta Optimization parameter used by NLOPT. - * @param[in] apMeasurementsMatrix measurements matrix to be stored in DescZ. - * @param[in] aKernel Reference to the kernel object to use. - * @return log likelihood value - * - */ - virtual T ModelingOperations(std::unique_ptr> &aData, - configurations::Configurations &aConfigurations, const double *apTheta, - T *apMeasurementsMatrix, const kernels::Kernel &aKernel) = 0; - - /** - * @brief Copies a matrix in the tile layout from source to destination - * @param[in] aUpperLower Specifies the part of the matrix A to be copied to B. - * @param[in] apA Source matrix A. - * @param[in,out] apB Destination matrix B. On exit, B = A in the locations specified by Upper Lower. - * @return void - * - */ - virtual void ExaGeoStatLapackCopyTile(const common::UpperLower &aUpperLower, void *apA, void *apB) = 0; - - /** - * @brief Wait for the completion of a sequence. - * @param[in] apSequence apSequence A pointer to either CHAMELEON or HiCMA sequence. - * @return void - * - */ - virtual void ExaGeoStatSequenceWait(void *apSequence) = 0; - - /** - * @brief Create Sequence. - * @param[out] apSequence A pointer to either CHAMELEON or HiCMA sequence. - * @return void - * - */ - virtual void - ExaGeoStatCreateSequence(void *apSequence) = 0; - - /** - * @brief Computes the Cholesky factorization of a symmetric positive definite or Symmetric positive definite matrix. - * @param[in] aUpperLower Whether upper or lower part of the matrix A. - * @param[in, out] apA Symmetric matrix A. - * @param[in] aBand Diagonal thickness parameter. - * @param[in] apCD Additional matrix CD. - * @param[in] apCrk Additional matrix Crk. - * @param[in] aMaxRank Maximum rank parameter. - * @param[in] aAcc Accuracy parameter. - * @return void - * - */ - virtual void - ExaGeoStatPotrfTile(const common::UpperLower &aUpperLower, void *apA, int aBand, void *apCD, void *apCrk, - const int &aMaxRank, const int &aAcc) = 0; - - /** - * @brief Solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B. - * @param[in] aSide Specifies whether op(A) appears on the left or on the right of X. - * @param[in] aUpperLower Specifies whether the matrix A is upper triangular or lower triangular. - * @param[in] aTrans Specifies the form of op( A ) to be used in the matrix multiplication. - * @param[in] aDiag Specifies whether or not A is unit triangular. - * @param[in] aAlpha Specifies the scalar alpha. When alpha is zero, A is not referenced and B need not be set before entry. - * @param[in] apA The triangular matrix A. - * @param[in] apCD Additional matrix CD. - * @param[in] apCrk Additional matrix Crk. - * @param[in, out] apZ The matrix B of dimension, on exit is overwritten by the solution matrix X. - * @param[in] aMaxRank Maximum rank parameter. - * @return void - * - */ - virtual void ExaGeoStatTrsmTile(const common::Side &aSide, const common::UpperLower &aUpperLower, - const common::Trans &aTrans, const common::Diag &aDiag, const T &aAlpha, - void *apA, void *apCD, void *apCrk, void *apZ, const int &aMaxRank) = 0; - - /** - * @brief Performs a SYRK (symmetric rank-k update) operation on the matrix. - * @param[in] aConfigurations Configurations object containing relevant settings. - * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. - * - */ - virtual void - ExaGeoStatSYRK(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) = 0; - - /** - * @brief Performs TLR Cholesky operation on the matrix. - * @param[in] aConfigurations Configurations object containing relevant settings. - * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. - * - */ - virtual void ExaGeoStatTLRCholesky(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) = 0; - - /** - * @brief Calculates norm. - * @param[in] aConfigurations Configurations object containing relevant settings. - * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. - * - */ - virtual void ExaGeoStatNorm(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) =0; - - /** - * @brief Calculates the Mean Squared Error (MSE). - * @param[in] aConfigurations Reference to Configurations object containing needed parameters. - * @param[out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. - * @return the calculated MSE. - * - */ - virtual double CalculateMSE(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) =0; }; /** diff --git a/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp b/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp index 0b421af8..b42c46c8 100644 --- a/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp +++ b/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp @@ -17,7 +17,6 @@ #define EXAGEOSTATCPP_CHAMELEONIMPLEMENTATION_HPP #include -#include namespace exageostat::linearAlgebra { @@ -32,12 +31,12 @@ namespace exageostat::linearAlgebra { /** * @brief Calculates the log likelihood value of a given value theta. - * @copydoc LinearAlgebraMethods::ModelingOperations() + * @copydoc LinearAlgebraMethods::ExaGeoStatMLETile() * */ - T ModelingOperations(std::unique_ptr > &aData, - configurations::Configurations &aConfigurations, const double *theta, - T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; + T ExaGeoStatMLETile(std::unique_ptr> &aData, + configurations::Configurations &aConfigurations, const double *theta, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; /** * @brief Copies a matrix in the tile layout from source to destination @@ -55,14 +54,6 @@ namespace exageostat::linearAlgebra { const common::Trans &aTrans, const common::Diag &aDiag, const T &aAlpha, void *apA, void *apCD, void *apCrk, void *apZ, const int &aMaxRank) override; - /** - * @brief Copy Descriptor Matrix to another Descriptor matrix. - * @copydoc LinearAlgebraMethods::CopyDescriptors() - * - */ - void CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) override; - /** * @brief Wait for the completion of a sequence. * @copydoc LinearAlgebraMethods::ExaGeoStatSequenceWait() @@ -78,42 +69,13 @@ namespace exageostat::linearAlgebra { */ void ExaGeoStatCreateSequence(void *apSequence) override; - - /** - * @brief Performs a SYRK (symmetric rank-k update) operation on the matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatSYRK() - * - */ - void ExaGeoStatSYRK(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - /** - * @brief Performs TLR Cholesky operation on the matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatTLRCholesky() - * - */ - void ExaGeoStatTLRCholesky(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - /** - * @brief Calculates norm. - * @copydoc LinearAlgebraMethods::ExaGeoStatNorm() - * - */ - void ExaGeoStatNorm(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - /** - * @brief Calculates the Mean Squared Error (MSE). - * @copydoc LinearAlgebraMethods::CalculateMSE() - * - */ - double CalculateMSE(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - }; + /** + * @brief Instantiates the Chameleon Implementation class for float and double types. + * @tparam T Data Type: float or double + * + */ EXAGEOSTAT_INSTANTIATE_CLASS(ChameleonImplementation) }//namespace exageostat diff --git a/inst/include/linear-algebra-solvers/concrete/hicma/tlr/HicmaImplementation.hpp b/inst/include/linear-algebra-solvers/concrete/hicma/tlr/HicmaImplementation.hpp index b7667205..4c488a3d 100644 --- a/inst/include/linear-algebra-solvers/concrete/hicma/tlr/HicmaImplementation.hpp +++ b/inst/include/linear-algebra-solvers/concrete/hicma/tlr/HicmaImplementation.hpp @@ -50,11 +50,11 @@ namespace exageostat::linearAlgebra::tileLowRank { /** * @brief Calculates the log likelihood value of a given value theta. - * @copydoc LinearAlgebraMethods::ModelingOperations() + * @copydoc LinearAlgebraMethods::ExaGeoStatMLETile() */ - T ModelingOperations(std::unique_ptr > &aData, - configurations::Configurations &aConfigurations, const double *theta, - T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; + T ExaGeoStatMLETile(std::unique_ptr> &aData, + configurations::Configurations &aConfigurations, const double *theta, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; /** * @brief Copies a matrix in the tile layout from source to destination @@ -91,46 +91,6 @@ namespace exageostat::linearAlgebra::tileLowRank { const common::Trans &aTrans, const common::Diag &aDiag, const T &aAlpha, void *apA, void *apCD, void *apCrk, void *apZ, const int &aMaxRank) override; - /** - * @brief Copy Descriptor Matrix to another Descriptor matrix. - * @copydoc LinearAlgebraMethods::CopyDescriptors() - * - */ - void CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) override; - - /** - * @brief Performs a SYRK (symmetric rank-k update) operation on the matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatSYRK() - * - */ - void ExaGeoStatSYRK(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - /** - * @brief Performs TLR Cholesky operation on the matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatTLRCholesky() - * - */ - void ExaGeoStatTLRCholesky(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - /** - * @brief Calculates norm. - * @copydoc LinearAlgebraMethods::ExaGeoStatNorm() - * - */ - void ExaGeoStatNorm(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - - /** - * @brief Calculates the Mean Squared Error (MSE). - * @copydoc LinearAlgebraMethods::CalculateMSE() - * - */ - double CalculateMSE(configurations::Configurations &aConfigurations, - std::unique_ptr > &aData) override; - }; /** diff --git a/inst/include/linear-algebra-solvers/concrete/parsec/ParsecImplementation.hpp b/inst/include/linear-algebra-solvers/concrete/parsec/ParsecImplementation.hpp deleted file mode 100644 index 6d7df8cd..00000000 --- a/inst/include/linear-algebra-solvers/concrete/parsec/ParsecImplementation.hpp +++ /dev/null @@ -1,122 +0,0 @@ - -// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, -// All rights reserved. -// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). - -/** - * @file ParsecImplementation.hpp - * @brief This file contains the declaration of ParsecImplementation class. - * @details ParsecImplementation is a concrete implementation of the LinearAlgebraMethods class. - * @version 2.0.0 - * @author Mahmoud ElKarargy - * @author Sameh Abdulah - * @date 2024-10-15 -**/ - -#ifndef EXAGEOSTATCPP_PARSECIMPLEMENTATION_HPP -#define EXAGEOSTATCPP_PARSECIMPLEMENTATION_HPP - -#include - -namespace exageostat::linearAlgebra { - - /** - * @brief ParsecImplementation is a concrete implementation of LinearAlgebraMethods class. - * @tparam T Data Type: float or double - * - */ - template - class ParsecImplementation : public LinearAlgebraMethods { - - /** - * @brief Performs a SYRK (symmetric rank-k update) operation on the matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatSYRK() - * - */ - void ExaGeoStatSYRK(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) override; - - /** - * @brief Performs TLR Cholesky operation on the matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatTLRCholesky() - * - */ - void ExaGeoStatTLRCholesky(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) override; - - /** - * @brief Calculates norm. - * @copydoc LinearAlgebraMethods::ExaGeoStatNorm() - * - */ - void ExaGeoStatNorm(configurations::Configurations &aConfigurations, - std::unique_ptr> &aData) override; - - /** - * @brief Calculates the Mean Squared Error (MSE). - * @copydoc LinearAlgebraMethods::CalculateMSE() - * - */ - double CalculateMSE(configurations::Configurations &aConfigurations, - std::unique_ptr> &aData) override; - - /** - * @brief The Gateway for the Modeling Operation - * @copydoc LinearAlgebraMethods::ModelingOperations() - * - */ - virtual T ModelingOperations(std::unique_ptr> &aData, - configurations::Configurations &aConfigurations, const double *apTheta, - T *apMeasurementsMatrix, const kernels::Kernel &aKernel) = 0; - - /** - * @brief Copies a matrix in the tile layout from source to destination - * @copydoc LinearAlgebraMethods::ExaGeoStatLapackCopyTile() - * - */ - virtual void ExaGeoStatLapackCopyTile(const common::UpperLower &aUpperLower, void *apA, void *apB) = 0; - - /** - * @brief Wait for the completion of a sequence. - * @copydoc LinearAlgebraMethods::ExaGeoStatSequenceWait() - * - */ - virtual void ExaGeoStatSequenceWait(void *apSequence) = 0; - - /** - * @brief Create Sequence. - * @copydoc LinearAlgebraMethods::ExaGeoStatCreateSequence() - * - */ - virtual void - ExaGeoStatCreateSequence(void *apSequence) = 0; - - /** - * @brief Computes the Cholesky factorization of a symmetric positive definite or Symmetric positive definite matrix. - * @copydoc LinearAlgebraMethods::ExaGeoStatPotrfTile() - * - */ - virtual void - ExaGeoStatPotrfTile(const common::UpperLower &aUpperLower, void *apA, int aBand, void *apCD, void *apCrk, - const int &aMaxRank, const int &aAcc) = 0; - - /** - * @brief Solves one of the matrix equations op( A )*X = alpha*B, or X*op( A ) = alpha*B. - * @copydoc LinearAlgebraMethods::ExaGeoStatTrsmTile() - * - */ - virtual void ExaGeoStatTrsmTile(const common::Side &aSide, const common::UpperLower &aUpperLower, - const common::Trans &aTrans, const common::Diag &aDiag, const T &aAlpha, - void *apA, void *apCD, void *apCrk, void *apZ, const int &aMaxRank) = 0; - - /** - * @brief Copy Descriptor Matrix to another Descriptor matrix. - * @copydoc LinearAlgebraMethods::CopyDescriptors() - * - */ - virtual void CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) = 0; - - }; - EXAGEOSTAT_INSTANTIATE_CLASS(ParsecImplementation) -}//namespace exageostat - -#endif //EXAGEOSTATCPP_PARSECIMPLEMENTATION_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/RuntimeSolverFactory.hpp b/inst/include/runtime-solver/RuntimeSolverFactory.hpp new file mode 100644 index 00000000..f8034fb8 --- /dev/null +++ b/inst/include/runtime-solver/RuntimeSolverFactory.hpp @@ -0,0 +1,52 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file RuntimeSolverFactory.hpp + * @brief Header file for the RuntimeSolverFactory class, which creates runtime solvers based on the configured runtime. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_RUNTIMESOLVERFACTORY_HPP +#define EXAGEOSTATCPP_RUNTIMESOLVERFACTORY_HPP + +#include + +#include +#include +#include + +namespace exageostat::runtimesolver { + + /** + * @class RuntimeSolverFactory + * @brief A class that creates linear algebra solvers based on the input computation type. + * @tparam T Data Type: float or double. + * + */ + template + class RuntimeSolverFactory { + public: + + /** + * @brief Creates a linear algebra solver based on the input computation type. + * @return Pointer to the created linear algebra solver. + * + */ + static std::unique_ptr> CreateRuntimeSolver(); + }; + + /** + * @brief Instantiates the Runtime Solver Factory class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(RuntimeSolverFactory) + +}//namespace exageostat + +#endif //EXAGEOSTATCPP_RUNTIMESOLVERFACTORY_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/RuntimeSolverMethods.hpp b/inst/include/runtime-solver/RuntimeSolverMethods.hpp new file mode 100644 index 00000000..acd87cf4 --- /dev/null +++ b/inst/include/runtime-solver/RuntimeSolverMethods.hpp @@ -0,0 +1,54 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file RuntimeSolverMethods.hpp + * @brief Header file for the RuntimeSolverMethods class, which defines the interface for runtime solvers. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_RUNTIMESOLVERMETHODS_HPP +#define EXAGEOSTATCPP_RUNTIMESOLVERMETHODS_HPP + +#include +#include + +namespace exageostat::runtimesolver { + + /** + * @class RuntimeSolverMethods + * @brief A class that defines the interface for linear algebra solvers. + * @tparam T Data Type: float or double. + * + */ + template + class RuntimeSolverMethods { + public: + + /** + * @brief Virtual destructor to allow calls to the correct concrete destructor. + * + */ + virtual ~RuntimeSolverMethods() = default; + + /** + * @brief The Gateway for the Modeling Operation + * @param[in,out] aData DescriptorData object to be populated with descriptors and data. + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in] apMeasurementsMatrix measurements matrix to be stored in DescZ. + * @param[in] aKernel Reference to the kernel object to use. + * @return log likelihood value + * + */ + virtual T ModelingOperations(std::unique_ptr> &aData, configurations::Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) = 0; + + }; +}//namespace exageostat + +#endif //EXAGEOSTATCPP_RUNTIMESOLVERMETHODS_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/concrete/ParsecRuntimeSolver.hpp b/inst/include/runtime-solver/concrete/ParsecRuntimeSolver.hpp new file mode 100644 index 00000000..0f88bb2a --- /dev/null +++ b/inst/include/runtime-solver/concrete/ParsecRuntimeSolver.hpp @@ -0,0 +1,83 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecRuntimeSolver.hpp + * @brief This file contains the declaration of ParsecRuntimeSolver class. + * @details ParsecRuntimeSolver is a concrete implementation of the RuntimeSolverMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_PARSECRUNTIMESOLVER_HPP +#define EXAGEOSTATCPP_PARSECRUNTIMESOLVER_HPP + +#include + +namespace exageostat::runtimesolver { + + /** + * @brief ParsecRuntimeSolver is a concrete implementation of RuntimeSolverMethods class for dense or diagonal-super tile matrices. + * @tparam T Data Type: float or double + * + */ + template + class ParsecRuntimeSolver : public RuntimeSolverMethods { + public: + + /** + * @brief Calculates the log likelihood value of a given value theta. + * @copydoc RuntimeSolverMethods::ModelingOperations() + * + */ + T ModelingOperations(std::unique_ptr > &aData, configurations::Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; + + /** + * @brief Performs a SYRK (symmetric rank-k update) operation on the matrix. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * + */ + void ExaGeoStatSYRK(std::unique_ptr> &aData); + + /** + * @brief Performs TLR Cholesky operation on the matrix. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * + */ + void ExaGeoStatTLRCholesky(std::unique_ptr> &aData); + + /** + * @brief Calculates norm. + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * + */ + double ExaGeoStatNorm(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + + /** + * @brief Calculates the Mean Squared Error (MSE). + * @param[in] aConfigurations Reference to Configurations object containing needed parameters. + * @param[out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. + * @return the calculated MSE. + * + */ + double CalculateMSE(configurations::Configurations &aConfigurations, + std::unique_ptr> &aData); + + }; + + /** + * @brief Instantiates the Parsec Runtime Solver class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(ParsecRuntimeSolver) +}//namespace exageostat + +#endif //EXAGEOSTATCPP_PARSECRUNTIMESOLVER_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/concrete/StarpuRuntimeSolver.hpp b/inst/include/runtime-solver/concrete/StarpuRuntimeSolver.hpp new file mode 100644 index 00000000..09c32b04 --- /dev/null +++ b/inst/include/runtime-solver/concrete/StarpuRuntimeSolver.hpp @@ -0,0 +1,62 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StarpuRuntimeSolver.hpp + * @brief This file contains the declaration of StarpuRuntimeSolver class. + * @details StarpuRuntimeSolver is a concrete implementation of the RuntimeSolverMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_STARPURUNTIMESOLVER_HPP +#define EXAGEOSTATCPP_STARPURUNTIMESOLVER_HPP + +#include +#include + +namespace exageostat::runtimesolver { + + /** + * @brief StarpuRuntimeSolver is a concrete implementation of RuntimeSolverMethods class for dense or diagonal-super tile matrices. + * @tparam T Data Type: float or double + * + */ + template + class StarpuRuntimeSolver : public RuntimeSolverMethods { + public: + + /** + * @brief Calculates the log likelihood value of a given value theta. + * @copydoc RuntimeSolverMethods::ModelingOperations() + * + */ + T ModelingOperations(std::unique_ptr > &aData, + configurations::Configurations &aConfigurations, T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; + + + /** + * @brief Objective function used in optimization, and following the NLOPT objective function format. + * @param[in] aTheta An array of length n containing the current point in the parameter space. + * @param[in] aGrad An array of length n where you can optionally return the gradient of the objective function. + * @param[in] apInfo pointer containing needed configurations and data. + * @return double MLE results. + * + */ + static double DataModelingAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo); + + }; + + /** + * @brief Instantiates the Starpu Runtime Solver class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(StarpuRuntimeSolver) +}//namespace exageostat + +#endif //EXAGEOSTATCPP_STARPURUNTIMESOLVER_HPP \ No newline at end of file diff --git a/inst/include/runtime/parsec/JDFHelperFunctions.h b/inst/include/runtime/parsec/JDFHelperFunctions.h new file mode 100644 index 00000000..bcba3377 --- /dev/null +++ b/inst/include/runtime/parsec/JDFHelperFunctions.h @@ -0,0 +1,76 @@ +/** + * @file JDFHelperFunctions.h + * @brief A header file for declarations of JDF helper functions. + * @details Contains function prototypes for JDF operations, including computations, + * transformations, file I/O, and MPI-related data handling for matrix and data structures. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-10-20 +**/ + +#include +#include + +/** + * @brief Calculates a unique single index from given dimensions. + * @param[in] aN The row index. + * @param[in] aM The column index. + * @return The calculated single index. + * + */ +int CalculateSingleIndex(int aN, int aM); + +/** + * @brief Sums the elements of a double-precision data matrix. + * @param[in] apData Pointer to the data matrix. + * @param[in] aColumn The number of columns in the matrix. + * @param[in] aRow The number of rows in the matrix. + * @return The sum of matrix elements as a double. + * + */ +double SumDoubleData(double *apData, int aColumn, int aRow); + +/** + * @brief Sums the elements of a complex double-precision data matrix. + * @param[in] apData Pointer to the complex data matrix. + * @param[in] aColumn The number of columns in the matrix. + * @param[in] aRow The number of rows in the matrix. + * @return The sum of matrix elements as a complex double. + * @return void + * + */ +complex double SumComplexData(complex double *apData, int aColumn, int aRow); + +/** + * @brief Performs forward Spherical Harmonic Transform (SHT) calculations. + * @param[in,out] apFlm Pointer to SHT coefficients. + * @param[in] apF_data Pointer to spatial data for transformation. + * @param[in] aFDataM Number of rows in spatial data. + * @param[in] aFDataN Number of columns in spatial data. + * @param[in] apEt1, apEt2, apEp, apSlmn, apIe, apIo, apP Arrays for intermediate calculations. + * @param[in] aEt1M, aEt2M, aEpM, aEpN, aSlmnN, aSlmnM, aIeM, aIeN, aIoM, aIoN Dimensions of respective arrays. + * @param[in] apD, apGmtheta_r, apFmnm, apTmp1, apTmp2 Additional intermediate matrices. + * @param[in] aL Maximum degree for the SHT. + * @return void + * + */ +void ForwardSHTHelper(double *apFlm, complex double *apF_data, int aFDataM, int aFDataN, + complex double *apEt1, int aEt1M, complex double *apEt2, int aEt2M, + complex double *apEp, int aEpM, int aEpN, complex double *apSlmn, + int aSlmnN, int aSlmnM, complex double *apIe, int aIeM, int aIeN, + complex double *apIo, int aIoM, int aIoN, complex double *apP, + int aPM, int aPN, complex double *apD, complex double *apGmtheta_r, + complex double *apFmnm, complex double *apTmp1, + complex double *apTmp2, int aL); + +/** + * @brief Performs inverse Spherical Harmonic Transform (SHT) calculations. + * @param[in] apFlm Pointer to coefficients from forward SHT. + * @param[out] apF_spatial Pointer to result spatial data. + * @param[in] apZlm, apSC, apSmt Arrays for intermediate calculations. + * @param[in] aL Maximum degree for the SHT. + * @return void + * + */ +void InverseSHTHelper(double *apFlm, double *apF_spatial, double *apZlm, double *apSC, + double *apSmt, int aL); diff --git a/inst/include/runtime/parsec/ParsecHeader.hpp b/inst/include/runtime/parsec/ParsecHeader.h similarity index 100% rename from inst/include/runtime/parsec/ParsecHeader.hpp rename to inst/include/runtime/parsec/ParsecHeader.h diff --git a/inst/include/runtime/parsec/jdf/JobDescriptionFormat.h b/inst/include/runtime/parsec/jdf/JobDescriptionFormat.h new file mode 100644 index 00000000..3312f713 --- /dev/null +++ b/inst/include/runtime/parsec/jdf/JobDescriptionFormat.h @@ -0,0 +1,266 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file JobDescriptionFormat.h + * @brief A header file for parsec generated functions from jdf files. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-10-08 +**/ + +#include + +/** + * @brief Reads a CSV file into a matrix description. + * @details This function reads data from a CSV file and populates the specified matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSV(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a specific time slot from a CSV file. + * @details This function extracts data from a specified time slot in a CSV file and updates the matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, + int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a complex CSV file for a specific time slot. + * @details This function reads complex data from a CSV file corresponding to a specific time slot. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVToComplexTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, + int aNB, int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a complex CSV file. + * @details This function reads complex data from a CSV file and updates the matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, + int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a complex CSV file into a matrix description. + * @details This function reads complex data from a CSV file and populates the specified matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVToComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, + int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Performs forward spherical harmonic transform. + * @details This function computes the forward spherical harmonic transform using the provided data descriptors. + * @param[in] apContext Pointer to the parsec context. + * @param[in] apFDataDesc Pointer to the data descriptor for the forward transform. + * @param[in] apFLMDesc Pointer to the descriptor for the spherical harmonic coefficients. + * @param[in] apFLMTDesc Pointer to the descriptor for the transformed spherical harmonic coefficients. + * @param[in] apET1Desc Pointer to the first auxiliary descriptor. + * @param[in] apET2Desc Pointer to the second auxiliary descriptor. + * @param[in] apEPDesc Pointer to the endpoint descriptor. + * @param[in] apSLMNDesc Pointer to the descriptor for Spherical Harmonic Matrix. + * @param[in] apIEDesc Pointer to the input descriptor. + * @param[in] apIODesc Pointer to the output descriptor. + * @param[in] apPDesc Pointer to the parameter descriptor. + * @param[in] apDDesc Pointer to the descriptor for intermediate data. + * @param[in] aFDataM Number of rows in the forward data matrix. + * @param[in] aEPN Number of endpoints. + * @param[in] aET1M Number of rows in the first auxiliary matrix. + * @param[in] aET2M Number of rows in the second auxiliary matrix. + * @param[in] aPN Number of processes. + * @param[in] aFlmM Number of rows in the spherical harmonic coefficients matrix. + * @param[in] aFlmN Number of columns in the spherical harmonic coefficients matrix. + * @param[in] aLSize Size of the spherical harmonic basis. + * @return 0 on success, negative value on error. + * + */ +int ForwardSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmM, int aFlmN, int aLSize); + +/** + * @brief Computes the element-wise difference between two matrices. + * @details This function calculates the difference between corresponding elements + * of two matrices, `apDescA` and `apDescB`, which are described by a block-cyclic distribution. + * @param[in] apContext Pointer to the PaRSEC context (`parsec_context_t`) in which the computation will be performed. + * @param[in] apDescA Pointer to the descriptor of the first matrix. + * @param[in] apDescB Pointer to the descriptor of the second matrix. + * @return Returns 0 on successful completion or an error otherwise. + * + */ +int DifferenceDouble(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDescA, parsec_matrix_block_cyclic_t *apDescB); + +/** + * @brief Performs forward spherical harmonic transform with reshaping. + * @details This function computes the forward spherical harmonic transform and reshapes the resulting data. + * @param[in] apContext Pointer to the parsec context. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] apFDataDesc Pointer to the data descriptor for the forward transform. + * @param[in] apFLMDesc Pointer to the descriptor for the spherical harmonic coefficients. + * @param[in] apFLMTDesc Pointer to the descriptor for the transformed spherical harmonic coefficients. + * @param[in] apET1Desc Pointer to the first auxiliary descriptor. + * @param[in] apET2Desc Pointer to the second auxiliary descriptor. + * @param[in] apEPDesc Pointer to the endpoint descriptor. + * @param[in] apSLMNDesc Pointer to the descriptor for Spherical Harmonic Matrix. + * @param[in] apIEDesc Pointer to the input descriptor. + * @param[in] apIODesc Pointer to the output descriptor. + * @param[in] apPDesc Pointer to the parameter descriptor. + * @param[in] apDDesc Pointer to the descriptor for intermediate data. + * @param[in] apADesc Pointer to the descriptor for A matrix. + * @param[in] aFDataM Number of rows in the forward data matrix. + * @param[in] aEPN Number of endpoints. + * @param[in] aET1M Number of rows in the first auxiliary matrix. + * @param[in] aET2M Number of rows in the second auxiliary matrix. + * @param[in] aPN Number of processes. + * @param[in] aFlmTNB Block size parameter specific to the reshaping of spherical harmonic coefficients. + * @param[in] aT Transform-specific parameter, often used to define temporal or frequency characteristics. + * @param[in] aLSize Size of the spherical harmonic basis, determining the resolution of the transformation. + * @param[out] apNormGlobal Pointer to an array storing global normalization values applied during the transform. + * @param[in] aNT Number of transformations applied, defining the iteration count for computation. + * @param[in] aUpperLower Specifies the range for the spherical harmonic transformation, either upper or lower spherical components. + * @return 0 on success, negative value on error. + * + */ +int ForwardSHTReshape(parsec_context_t *apContext, int aRank, int aVerbose, parsec_tiled_matrix_t *apFDataDesc, + parsec_tiled_matrix_t *apFLMDesc, parsec_tiled_matrix_t *apFLMTDesc, + parsec_tiled_matrix_t *apET1Desc, parsec_tiled_matrix_t *apET2Desc, + parsec_tiled_matrix_t *apEPDesc, parsec_tiled_matrix_t *apSLMNDesc, + parsec_tiled_matrix_t *apIEDesc, parsec_tiled_matrix_t *apIODesc, + parsec_tiled_matrix_t *apPDesc, parsec_tiled_matrix_t *apDDesc, + parsec_tiled_matrix_t *apADesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmTNB, int aT, int aLSize, double *apNormGlobal, + int aNT, int aUpperLower); + +/** + * @brief Computes the mean squared error between data and spatial descriptors. + * @details This function calculates the mean squared error (MSE) between two matrix descriptors, which represent + * the observed data and the spatial data for a given size of spherical harmonic basis. + * @param[in] apContext Pointer to the parsec context. + * @param[in] apFDataDesc Pointer to the matrix descriptor for the observed data. + * @param[in] apFSpatialDesc Pointer to the matrix descriptor for the spatial data. + * @param[in] aLSize Size of the spherical harmonic basis. + * @return 0 on success, negative value on error. + * + */ +int MeanSquaredError(parsec_context_t *apContext, parsec_matrix_block_cyclic_t* apFDataDesc, + parsec_matrix_block_cyclic_t* apFSpatialDesc, int aLSize); + +/** + * @brief Computes the norm of a tiled matrix. + * @details This function calculates the norm of a matrix represented by the given tiled matrix descriptor. + * The result can be computed for the entire matrix or for specific tiles based on the provided parameters. + * @param[in] apContext Pointer to the parsec context, which holds information about the execution environment. + * @param[in] apNormGlobal The global norm value to be updated. + * @param[in] apADesc Pointer to the descriptor of the tiled matrix for which the norm is being computed. + * @param[in] aNT The number of tiles in the tiled matrix. This indicates how many submatrices will be considered. + * @param[in] aIsSymmetric flag to specify if the data matrix is symmetric or not. + * @param[in] aUpperLower Specifies whether to calculate the norm for the upper or lower triangular part of the matrix. + * @return void + * + */ +void GetMatrixNorm(parsec_context_t *apContext, double *apNormGlobal, parsec_tiled_matrix_t *apADesc, + int aNT, int aUpperLower, int aIsSymmetric); + +/** + * @brief Performs inverse spherical harmonic transform. + * @details This function computes the inverse spherical harmonic transform using the provided data descriptors. + * @param[in] apContext Pointer to the parsec context. + * @param[in] apFSpatialDesc Pointer to the descriptor for the spherical harmonic coefficients. + * @param[in] apFLMTDesc Pointer to the descriptor for the transformed spherical harmonic coefficients. + * @param[in] apZLMDesc Pointer to the second auxiliary descriptor. + * @param[in] apSCDesc Pointer to the input descriptor. + * @param[in] aLSize Size of the spherical harmonic basis. + * @return 0 on success, negative value on error. + * + */ +int InverseSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFSpatialDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apZLMDesc, parsec_tiled_matrix_t *apSCDesc, int aLSize); + +/** + * @brief Compresses a matrix using a specified compression strategy and parameters. + * @details This function applies matrix compression based on the parameters provided. + * @param[in] apContex Pointer to the Parsec context, which manages task execution and dataflow. + * @param[in] apNormGlobal The global norm value to be updated. + * @param[in] aUpperLower Integer flag indicating which triangular part of the matrix to compress. + * @param[in] aBandSizeDense Band size for the dense region of the matrix in the compression process. + * @param[in] aNT Number of tiles in one dimension of the matrix. + * @param[in] aMaxRank Maximum allowable rank for the compressed matrix. Controls the degree of compression. + * @param[in] aN Total number of columns in the matrix, used to define the matrix dimensions. + * @param[in] aAdaptiveDecision Flag indicating whether to enable adaptive compression. + * @param[in] aTolerance Compression tolerance value, used to decide the accuracy of the compressed matrix. + * @param[in] aSendFullTile Flag indicating whether to transmit full tiles as part of the compression,. + * @param[in] aAutoBand Flag to enable automatic adjustment of the band size during compression. + * @param[in] aGpus Number of GPUs to utilize in the compression. + * @param[in] apHicmaData The HiCMA data struct of descriptors + * @param[in] apParamsKernel The Starsh struct of kernels + * @return void + * + */ +void MatrixCompress(parsec_context_t *apContext, double *apNormGlobal, int aUpperLower, int aBandSizeDense, int aNT, + int aMaxRank, int aN, int aAdaptiveDecision, int aTolerance, int aSendFullTile, int aAutoBand, + int aGpus, hicma_parsec_data_t *apHicmaData, starsh_params_t *apParamsKernel); \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 713d0b05..d7bf24f7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,18 +16,20 @@ add_subdirectory(kernels) add_subdirectory(helpers) add_subdirectory(data-units) add_subdirectory(runtime) -add_subdirectory(linear-algebra-solvers) add_subdirectory(api) add_subdirectory(results) add_subdirectory(hardware) add_subdirectory(data-generators) add_subdirectory(data-loader) add_subdirectory(prediction) +add_subdirectory(runtime-solver) # Check the value of RUNTIME_TYPE and configure chameleon/starpu accordingly if (RUNTIME_TYPE STREQUAL "PARSEC") add_subdirectory(data-analyzer) add_subdirectory(data-transformer) +else() + add_subdirectory(linear-algebra-solvers) endif () if (USE_R) @@ -37,7 +39,7 @@ endif () # Set the name of the library to be created. set(LIB_NAME ${PROJECT_NAME}) # Create the library with the specified source files and linking libraries. -add_library(${LIB_NAME} SHARED ${SOURCES}) +add_library(${LIB_NAME} SHARED ${SOURCES} ${JDF_GENERATED_SOURCES}) target_compile_definitions(${LIB_NAME} PUBLIC ${COMPILE_DEFINITIONS}) target_link_libraries(${LIB_NAME} ${LIBS}) diff --git a/src/api/ExaGeoStat.cpp b/src/api/ExaGeoStat.cpp index 1e2a371e..b9dc6a2d 100644 --- a/src/api/ExaGeoStat.cpp +++ b/src/api/ExaGeoStat.cpp @@ -13,11 +13,10 @@ #include #include -#include #include +#include using namespace std; -using namespace nlopt; using namespace exageostat::api; using namespace exageostat::generators; @@ -39,6 +38,7 @@ void ExaGeoStat::ExaGeoStatLoadData(Configurations &aConfigurations, std::uni aData = data_generator->CreateData(aConfigurations, *pKernel); delete pKernel; LOGGER("\t*Data generation/loading finished*") + } template @@ -53,57 +53,11 @@ T ExaGeoStat::ExaGeoStatDataModeling(Configurations &aConfigurations, std::un // Add the data modeling arguments. aConfigurations.InitializeDataModelingArguments(); - int parameters_number = pKernel->GetParametersNumbers(); - int max_number_of_iterations = aConfigurations.GetMaxMleIterations(); - // Setting struct of data to pass to the modeling. - auto modeling_data = new mModelingData(aData, aConfigurations, *apMeasurementsMatrix, *pKernel); - // Create nlopt - double opt_f; - opt optimizing_function(nlopt::LN_BOBYQA, parameters_number); - // Initialize problem's bound. - optimizing_function.set_lower_bounds(aConfigurations.GetLowerBounds()); - optimizing_function.set_upper_bounds(aConfigurations.GetUpperBounds()); - optimizing_function.set_ftol_abs(aConfigurations.GetTolerance()); - // Set max iterations value. - optimizing_function.set_maxeval(max_number_of_iterations); - // TODO: ON API level it should be consistent regardless of the runtime being implemented -#if DEFAULT_RUNTIME - optimizing_function.set_max_objective(ModelingAPI, (void *) modeling_data); -#endif - // Optimize mle using nlopt. - optimizing_function.optimize(aConfigurations.GetStartingTheta(), opt_f); - aConfigurations.SetEstimatedTheta(aConfigurations.GetStartingTheta()); - - auto theta = aConfigurations.GetStartingTheta(); - - LOGGER("--> Final Theta Values (", true) - for (int i = 0; i < parameters_number; i++) { - LOGGER_PRECISION(theta[i]) - if (i != parameters_number - 1) { - LOGGER_PRECISION(", ") - } - } - LOGGER_PRECISION(")") - LOGGER("") - - delete pKernel; - delete modeling_data; - return optimizing_function.last_optimum_value(); -} - -template -double ExaGeoStat::ModelingAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo) { - - auto config = ((mModelingData *) apInfo)->mpConfiguration; - auto data = ((mModelingData *) apInfo)->mpData; - auto measurements = ((mModelingData *) apInfo)->mpMeasurementsMatrix; - auto kernel = ((mModelingData *) apInfo)->mpKernel; - // We do Date Modeling with any computation. - auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver( - config->GetComputation()); - - return linear_algebra_solver->ModelingOperations(*data, *config, aTheta.data(), measurements, *kernel); + auto runtime_solver = runtimesolver::RuntimeSolverFactory::CreateRuntimeSolver(); + T result = runtime_solver->ModelingOperations(aData, aConfigurations, apMeasurementsMatrix, *pKernel); + delete pKernel; + return result; } @@ -124,12 +78,3 @@ void ExaGeoStat::ExaGeoStatPrediction(Configurations &aConfigurations, std::u delete pKernel; } -template -void ExaGeoStat::ExaGeoStatTransformData(Configurations &aConfigurations, std::unique_ptr> &aData){ - -} - -template -void ExaGeoStat::ExaGeoStatDataAnalyzer(Configurations &aConfigurations, std::unique_ptr> &aData){ - -} diff --git a/src/configurations/Configurations.cpp b/src/configurations/Configurations.cpp index 0e77e2c6..cc0d790d 100644 --- a/src/configurations/Configurations.cpp +++ b/src/configurations/Configurations.cpp @@ -36,7 +36,7 @@ Configurations::Configurations() { SetGPUsNumbers(0); SetPGrid(1); SetQGrid(1); - SetMaxRank(1); + SetMaxRank(-1); SetIsOOC(false); SetKernelName(""); SetDimension(Dimension2D); @@ -80,6 +80,9 @@ Configurations::Configurations() { SetFileNumber(1); SetEnableInverse(false); SetMPIIO(true); + SetTolerance(0); + // TODO: currently, we support real data only in parsec. In the future, we should support synthetic and real data for both runtimes + SetIsSynthetic(false); #endif } @@ -133,7 +136,7 @@ void Configurations::InitializeArguments(const int &aArgC, char **apArgV, const } else if (argument_name == "--gpus" || argument_name == "--GPUsNumbers" || argument_name == "--gpu_number" || argument_name == "--ngpus") { SetGPUsNumbers(CheckNumericalValue(argument_value)); - } else if (argument_name == "--DTS" || argument_name == "--dts" || argument_name == "--Dts" || argument_name == "--NB") { + } else if (argument_name == "--DTS" || argument_name == "--dts" || argument_name == "--Dts") { SetDenseTileSize(CheckNumericalValue(argument_value)); } else if (argument_name == "--LTS" || argument_name == "--lts" || argument_name == "--Lts") { SetLowTileSize(CheckNumericalValue(argument_value)); @@ -238,6 +241,16 @@ void Configurations::InitializeArguments(const int &aArgC, char **apArgV, const if (GetKernelName().empty()) { throw domain_error("You need to set the Kernel, before starting"); } + if(GetMaxRank() == -1){ + SetMaxRank(1); + } +#else + if(GetMaxRank() == -1){ + SetMaxRank(GetDenseTileSize() / 2); + } + if(GetTolerance() >= 0){ + SetTolerance(8); + } #endif size_t found = GetKernelName().find("NonGaussian"); @@ -308,11 +321,19 @@ void Configurations::InitializeDataGenerationArguments() { } if (GetDimension() != DimensionST) { if (GetTimeSlot() != 1) { +#if DEFAULT_RUNTIME throw std::runtime_error("Time Slot can only be greater than 1 if the dimensions are set to SpaceTime."); +#endif } } else if (GetTimeSlot() < 1) { throw std::runtime_error("Time Slot must be at least 1 if the dimensions are set to SpaceTime."); } + +#if !DEFAULT_RUNTIME + if (GetDataPath().empty()) { + throw domain_error("You need to set the data path, before starting"); + } +#endif } void Configurations::InitializeDataModelingArguments() { @@ -672,6 +693,7 @@ void Configurations::PrintSummary() { if (!mFirstInit) { LOGGER("********************SUMMARY**********************") +#if DEFAULT_RUNTIME if (this->GetIsSynthetic()) { LOGGER("#Synthetic Data generation") } else { @@ -716,6 +738,12 @@ void Configurations::PrintSummary() { if (this->GetIsOOC()) { LOGGER("#Out Of Core (OOC) technology is enabled") } +#else + LOGGER("\t#L: " << this->GetDenseTileSize() << "\t\t\t\t#T: " << this->GetTimeSlot()) + LOGGER("\t#NB: " << this->GetDenseTileSize() << "\t\t\t\t#gpus: " << this->GetGPUsNumbers()) + LOGGER("\t#Nodes: " << this->GetCoresNumber() << "\t\t\t#Time slot per file: " << GetTimeSlotPerFile()); + LOGGER("\t#Number of files: " << this->GetFileNumber() << "\t#File per node: " << ((this->GetFileNumber()%this->GetCoresNumber())? this->GetFileNumber()/this->GetCoresNumber()+1 : this->GetFileNumber()/this->GetCoresNumber())) +#endif LOGGER("*************************************************") mFirstInit = true; } diff --git a/src/data-analyzer/DataAnalyzer.cpp b/src/data-analyzer/DataAnalyzer.cpp index 942c9df8..f7e42149 100644 --- a/src/data-analyzer/DataAnalyzer.cpp +++ b/src/data-analyzer/DataAnalyzer.cpp @@ -9,19 +9,53 @@ * @version 2.0.0 * @author Mahmoud ElKarargy * @author Sameh Abdulah + * @author Qinglei Cao * @date 2024-02-04 **/ #include -using namespace exageostat::configurations; +extern "C" { +#include +} + +using namespace exageostat::analyzer; +using namespace exageostat::common; template -static void AnalyzeMatrix(Configurations &aConfigurations, std::unique_ptr> &aData){ +void +DataAnalyzer::PreAnalyzeMatrix(std::unique_ptr> &aData){ + + auto *pParams = ExaGeoStatHardware::GetHicmaParams(); + auto *pHicma_data = ExaGeoStatHardware::GetHicmaData(); + auto *pAnalysis = ExaGeoStatHardware::GetAnalysis(); + auto *pStarsh_kernel = ExaGeoStatHardware::GetParamsKernel(); + auto *pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + hicma_parsec_matrix_pre_analysis(pContext, pHicma_data, pParams, pStarsh_kernel, pAnalysis); } template -static double CompareMatDifference(Configurations &aConfigurations, std::unique_ptr> &aData){ +void +DataAnalyzer::PostAnalyzeMatrix(std::unique_ptr> &aData){ + + auto *pParams = ExaGeoStatHardware::GetHicmaParams(); + auto *pHicma_data = ExaGeoStatHardware::GetHicmaData(); + auto *pAnalysis = ExaGeoStatHardware::GetAnalysis(); + auto *pStarsh_kernel = ExaGeoStatHardware::GetParamsKernel(); + auto *pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + + hicma_parsec_matrix_post_analysis(pContext, pHicma_data, pParams, pStarsh_kernel, pAnalysis); +} + +template +double +DataAnalyzer::CompareMatDifference(std::unique_ptr> &aData){ + + // Get parsec descriptors + auto flm_desc = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR,DescriptorName::DESCRIPTOR_FLM).parsec_desc; + auto flm_era_desc = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR,DescriptorName::DESCRIPTOR_FLMERA).parsec_desc; + // Call jdf generated function + DifferenceDouble((parsec_context_t *)ExaGeoStatHardware::GetParsecContext(), flm_desc, flm_era_desc); } diff --git a/src/data-generators/DataGenerator.cpp b/src/data-generators/DataGenerator.cpp index dd4bec8b..7ef8405e 100644 --- a/src/data-generators/DataGenerator.cpp +++ b/src/data-generators/DataGenerator.cpp @@ -13,44 +13,31 @@ #include #include -#include +#include +using namespace exageostat::common; using namespace exageostat::generators; -using namespace exageostat::dataLoader::csv; +using namespace exageostat::dataLoader; using namespace exageostat::generators::synthetic; -using namespace exageostat::common; -using namespace exageostat::results; template std::unique_ptr> DataGenerator::CreateGenerator(configurations::Configurations &apConfigurations) { - - //// TODO: In case of other file support, Then we can create another layer for the factory creation depending on the file size. - // Check the used Data generation method, whether it's synthetic or real. - aDataSourceType = apConfigurations.GetIsSynthetic() ? SYNTHETIC : CSV_FILE; - - // Return DataGenerator unique pointer of Synthetic type - if (aDataSourceType == SYNTHETIC) { - Results::GetInstance()->SetIsSynthetic(true); + if (apConfigurations.GetIsSynthetic()){ + aIsSynthetic = true; return std::unique_ptr>(SyntheticGenerator::GetInstance()); - } else if (aDataSourceType == CSV_FILE) { - Results::GetInstance()->SetIsSynthetic(false); - return std::unique_ptr>(CSVLoader::GetInstance()); } else { - throw std::runtime_error("Data Loading for this file type is unsupported for now"); + aIsSynthetic = false; + return DataLoader::CreateDataLoader(apConfigurations); } } template DataGenerator::~DataGenerator() { - // Return DataGenerator unique pointer of Synthetic type - if (aDataSourceType == SYNTHETIC) { + if (aIsSynthetic) { SyntheticGenerator::GetInstance()->ReleaseInstance(); - } else if (aDataSourceType == CSV_FILE) { - CSVLoader::GetInstance()->ReleaseInstance(); } else { - std::cerr << "Data Loading for this file type is unsupported for now" << std::endl; - std::exit(1); + DataLoader::ReleaseDataLoader(); } } -template DataSourceType DataGenerator::aDataSourceType = SYNTHETIC; +template bool DataGenerator::aIsSynthetic = true; diff --git a/src/data-generators/concrete/SyntheticGenerator.cpp b/src/data-generators/concrete/SyntheticGenerator.cpp index dea29b27..ef5b191b 100644 --- a/src/data-generators/concrete/SyntheticGenerator.cpp +++ b/src/data-generators/concrete/SyntheticGenerator.cpp @@ -14,8 +14,14 @@ #include #include +#if !DEFAULT_RUNTIME +#include +#else #include +#endif +//TODO: we need to make WriteData a function outside the csv, So it can be used whatever the runtime is. +// currently, it has an implementation for the CSVLoader and an empty body for the parsec loader using namespace exageostat::generators::synthetic; using namespace exageostat::common; using namespace exageostat::configurations; diff --git a/src/data-loader/DataLoader.cpp b/src/data-loader/DataLoader.cpp index 65874148..de566c2d 100644 --- a/src/data-loader/DataLoader.cpp +++ b/src/data-loader/DataLoader.cpp @@ -12,60 +12,41 @@ * @date 2023-02-14 **/ -#include +#if !DEFAULT_RUNTIME +#include +#else +#include +#endif using namespace std; using namespace exageostat::dataLoader; -using namespace exageostat::common; -using namespace exageostat::results; template std::unique_ptr> DataLoader::CreateData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) { - // create vectors that will be populated with read data. - vector measurements_vector; - vector x_locations; - vector y_locations; - vector z_locations; - - aKernel.SetPValue(aConfigurations.GetTimeSlot()); - int p = aKernel.GetVariablesNumber(); - - //Read the data out of the CSV file. - this->ReadData(aConfigurations, measurements_vector, x_locations, y_locations, z_locations, p); - - //create data object - auto data = std::make_unique>(aConfigurations.GetProblemSize() / p, - aConfigurations.GetDimension()); + auto data = this->LoadData(aConfigurations, aKernel); + return data; +} - //Initialize the descriptors. - auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(EXACT_DENSE); +template +std::unique_ptr> +DataLoader::CreateDataLoader(exageostat::configurations::Configurations &apConfigurations){ - // TODO: May need to get refactored to avoid the if/else guards #if DEFAULT_RUNTIME - linear_algebra_solver->InitiateDescriptors(aConfigurations, *data->GetDescriptorData(), p); - linear_algebra_solver->ExaGeoStatLaSetTile(EXAGEOSTAT_UPPER_LOWER, 0, 0, - data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, - DESCRIPTOR_C).chameleon_desc); - //populate data object with read data - for (int i = 0; i < aConfigurations.GetProblemSize() / p; i++) { - data->GetLocations()->GetLocationX()[i] = x_locations[i]; - data->GetLocations()->GetLocationY()[i] = y_locations[i]; - if (aConfigurations.GetDimension() != Dimension2D) { - data->GetLocations()->GetLocationZ()[i] = z_locations[i]; - } - } - for (int i = 0; i < aConfigurations.GetProblemSize(); i++) { - ((T *) data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, - DESCRIPTOR_Z).chameleon_desc->mat)[i] = measurements_vector[i]; - } + return std::unique_ptr>(csv::CSVLoader::GetInstance()); +#else + return std::unique_ptr>(parsec::ParsecLoader::GetInstance()); #endif +} - Results::GetInstance()->SetGeneratedLocationsNumber(aConfigurations.GetProblemSize() / p); - Results::GetInstance()->SetIsLogger(aConfigurations.GetLogger()); - Results::GetInstance()->SetLoggerPath(aConfigurations.GetLoggerPath()); +template +void DataLoader::ReleaseDataLoader() { - return data; -} +#if DEFAULT_RUNTIME + csv::CSVLoader::GetInstance()->ReleaseInstance(); +#else + parsec::ParsecLoader::GetInstance()->ReleaseInstance(); +#endif +} \ No newline at end of file diff --git a/src/data-loader/concrete/CMakeLists.txt b/src/data-loader/concrete/CMakeLists.txt index 88180c80..7792dea3 100644 --- a/src/data-loader/concrete/CMakeLists.txt +++ b/src/data-loader/concrete/CMakeLists.txt @@ -10,8 +10,16 @@ # @date 2023-02-14 # Add source files to the parent scope -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/CSVLoader.cpp - ${SOURCES} - PARENT_SCOPE - ) \ No newline at end of file +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/CSVLoader.cpp + ${SOURCES} + PARENT_SCOPE + ) +elseif (RUNTIME_TYPE STREQUAL "PARSEC") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ParsecLoader.cpp + ${SOURCES} + PARENT_SCOPE + ) +endif() diff --git a/src/data-loader/concrete/CSVLoader.cpp b/src/data-loader/concrete/CSVLoader.cpp index bfd153cd..2c9266d8 100644 --- a/src/data-loader/concrete/CSVLoader.cpp +++ b/src/data-loader/concrete/CSVLoader.cpp @@ -4,8 +4,8 @@ // ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). /** - * @file CSVDataGenerator.cpp - * @brief Implementation of the CSVDataGenerator class + * @file CSVLoader.cpp + * @brief Implementation of the CSVLoader class * @version 1.1.0 * @author Mahmoud ElKarargy * @author Sameh Abdulah @@ -21,6 +21,8 @@ using namespace std; using namespace exageostat::configurations; using namespace exageostat::common; using namespace exageostat::dataLoader::csv; +using namespace exageostat::results; +using namespace exageostat::dataunits; template CSVLoader *CSVLoader::GetInstance() { @@ -231,6 +233,52 @@ void CSVLoader::WriteData(const T &aMatrixPointer, const int &aProblemSize, c p_file_synthetic.close(); } +template +std::unique_ptr> +CSVLoader::LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) { + // create vectors that will be populated with read data. + vector measurements_vector; + vector x_locations; + vector y_locations; + vector z_locations; + + aKernel.SetPValue(aConfigurations.GetTimeSlot()); + int p = aKernel.GetVariablesNumber(); + + //Read the data out of the CSV file. + this->ReadData(aConfigurations, measurements_vector, x_locations, y_locations, z_locations, p); + + //create data object + auto data = std::make_unique>(aConfigurations.GetProblemSize() / p, + aConfigurations.GetDimension()); + + //Initialize the descriptors. + auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(EXACT_DENSE); + + linear_algebra_solver->InitiateDescriptors(aConfigurations, *data->GetDescriptorData(), p); + linear_algebra_solver->ExaGeoStatLaSetTile(EXAGEOSTAT_UPPER_LOWER, 0, 0, + data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, + DESCRIPTOR_C).chameleon_desc); + //populate data object with read data + for (int i = 0; i < aConfigurations.GetProblemSize() / p; i++) { + data->GetLocations()->GetLocationX()[i] = x_locations[i]; + data->GetLocations()->GetLocationY()[i] = y_locations[i]; + if (aConfigurations.GetDimension() != Dimension2D) { + data->GetLocations()->GetLocationZ()[i] = z_locations[i]; + } + } + for (int i = 0; i < aConfigurations.GetProblemSize(); i++) { + ((T *) data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, + DESCRIPTOR_Z).chameleon_desc->mat)[i] = measurements_vector[i]; + } + + Results::GetInstance()->SetGeneratedLocationsNumber(aConfigurations.GetProblemSize() / p); + Results::GetInstance()->SetIsLogger(aConfigurations.GetLogger()); + Results::GetInstance()->SetLoggerPath(aConfigurations.GetLoggerPath()); + + return data; +} + template void CSVLoader::ReleaseInstance() { if (mpInstance != nullptr) { diff --git a/src/data-loader/concrete/ParsecLoader.cpp b/src/data-loader/concrete/ParsecLoader.cpp new file mode 100644 index 00000000..d3167b1e --- /dev/null +++ b/src/data-loader/concrete/ParsecLoader.cpp @@ -0,0 +1,284 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file SyntheticGenerator.cpp + * @brief Implementation of the SyntheticGenerator class + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2023-02-14 +**/ + +#include +#include +extern "C"{ +#include +} + +using namespace std; + +using namespace exageostat::dataLoader::parsec; +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::dataunits; +using namespace exageostat::transformers; + +template +ParsecLoader *ParsecLoader::GetInstance() { + + if (mpInstance == nullptr) { + mpInstance = new ParsecLoader(); + } + return mpInstance; +} + +template +std::unique_ptr> +ParsecLoader::LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) { + + SYNC_TIME_START(); + //create data object + auto data = std::make_unique>(aConfigurations.GetProblemSize() / 1, + aConfigurations.GetDimension()); + + // Initiate Descriptors + int L = aConfigurations.GetDenseTileSize(); + int MB; + int NB; + int t = aConfigurations.GetTimeSlot(); + int P = aConfigurations.GetPGrid(); + int nodes = aConfigurations.GetCoresNumber(); + int rank = ExaGeoStatHardware::GetParsecMPIRank(); + int verbose = configurations::Configurations::GetVerbosity() == DETAILED_MODE? 1: 0; + int gpus = aConfigurations.GetGPUsNumbers(); + string files_directory_path = aConfigurations.GetDataPath(); + int path_length = files_directory_path.length(); + char filename[path_length + 50]; + char directory_path[path_length]; + sprintf(directory_path, "%s", files_directory_path.c_str()); + + MB = L + 1; + NB = L * 2; + VERBOSE_PRINT(rank, verbose, ("Reading f_data\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA); + parsec_matrix_block_cyclic_t *pF_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA).parsec_desc; + ReadCSVToComplexTimeSlot((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pF_data_desc, MB, NB, nodes, t, directory_path, rank, verbose, gpus); + + MB = 2*L-1; + NB = L+1; + VERBOSE_PRINT(rank, verbose, ("Reading Et1\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1); + parsec_matrix_block_cyclic_t *pEt1_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Et1.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pEt1_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L-1; + NB = L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Et2\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2); + parsec_matrix_block_cyclic_t *pEt2_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Et2.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pEt2_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Ep\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP); + parsec_matrix_block_cyclic_t *pEp_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Ep.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pEp_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = (L*L+L)/2; + NB = L; + VERBOSE_PRINT(rank, verbose, ("Reading Slmn\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN); + parsec_matrix_block_cyclic_t *pSlum_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Slmn.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pSlum_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Ie\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE); + parsec_matrix_block_cyclic_t *pIe_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Ie.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pIe_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Io\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO); + parsec_matrix_block_cyclic_t *pIo_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Io.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pIo_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L-1; + NB = L+1; + VERBOSE_PRINT(rank, verbose, ("Reading P\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P); + parsec_matrix_block_cyclic_t *pP_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_P.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pP_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L-1; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading D\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D); + parsec_matrix_block_cyclic_t *pD_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_D.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pD_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L; + NB = L; + VERBOSE_PRINT(rank, verbose, ("Reading flm\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM); + parsec_matrix_block_cyclic_t *pFlm_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + ReadCSVTimeSlot((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFlm_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + VERBOSE_PRINT(rank, verbose, ("Reading flmERA\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMERA); + parsec_matrix_block_cyclic_t *pFlmera_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMERA).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_flmERA.csv"); + ReadCSVTimeSlot((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFlmera_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + // Backward + if(aConfigurations.GetEnableInverse()){ + + MB = L+1; + NB = (L*L+L)/2; + VERBOSE_PRINT(rank, verbose, ("Reading Zlm\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ZLM); + parsec_matrix_block_cyclic_t *PZlm_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ZLM).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_Zlm.csv"); + ReadCSV((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), PZlm_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L-1; + NB = 2*L; + VERBOSE_PRINT(rank, verbose, ("Reading SC\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SC); + parsec_matrix_block_cyclic_t *pSc_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SC).parsec_desc; + sprintf(filename, "%s/%s", files_directory_path.c_str(), "720_SC.csv"); + ReadCSV((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pSc_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L+1; + NB = 2*L; + VERBOSE_PRINT(rank, verbose, ("f_spatial\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_SPATIAL); + parsec_matrix_block_cyclic_t *pF_spatial_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_SPATIAL).parsec_desc; + ReadCSV((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pF_spatial_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + } + + // Init and allocate memory for desc_flmT + MB = L * L; + NB = t; + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT); + parsec_matrix_block_cyclic_t *pFlmt_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT).parsec_desc; + parsec_matrix_block_cyclic_init(pFlmt_data_desc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, L*((MB/nodes%L) ? MB/nodes/L+1 : MB/nodes/L), + NB, MB, NB, 0, 0, MB, NB, nodes, 1, 1, 1, 0, 0); + + pFlmt_data_desc->mat = parsec_data_allocate((size_t)pFlmt_data_desc->super.nb_local_tiles * + (size_t)pFlmt_data_desc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(pFlmt_data_desc->super.mtype)); + parsec_data_collection_set_key((parsec_data_collection_t*)&desc_flmT, "desc_flmT"); + + // Init and allocate memory for pA_data_desc + MB = L * L; + NB = t; + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_A); + parsec_matrix_block_cyclic_t *pA_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_A).parsec_desc; + parsec_matrix_block_cyclic_init(pA_data_desc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, L, L, MB, NB, 0, 0, + pFlmt_data_desc->super.mb, pFlmt_data_desc->super.nb, P, nodes/P, 1, 1, 0, 0); + pA_data_desc->mat = parsec_data_allocate((size_t)pA_data_desc->super.nb_local_tiles * + (size_t)pA_data_desc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(pA_data_desc->super.mtype)); + parsec_data_collection_set_key((parsec_data_collection_t*)&pA_data_desc, "desc_A"); + + if(aConfigurations.GetEnableInverse()){ + int ts_test_M = 2000; + int ts_test_N = 1; + // Allocate memory + double *pFileContent = (double *)malloc(ts_test_M * ts_test_N * sizeof(double)); + sprintf(filename, "%s/%s", files_directory_path.c_str(), "ts_test.csv"); + ReadCSVFileHelper(filename, pFileContent, ts_test_M, ts_test_N); + } + + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Load Data\n")); + // Forward SHT + DataTransformer::ForwardSphericalHarmonicsTransform(L, data); + // Forward SHT Reshape + DataTransformer::ForwardReshape(aConfigurations, data); + // Generate matrix + CompressMatrixHelper(aConfigurations, data); + + return data; +} + +template +int ParsecLoader::ReadCSVFileHelper(const char* apFilename, double *apFileContent, int aM, int aN) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + status = fscanf(pFile, "%lf,", &apFileContent[i * aN + j]); + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } + } + } + fclose(pFile); + + return 0; +} + +template +void ParsecLoader::CompressMatrixHelper(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) { + + int max_rank = aConfigurations.GetMaxRank(); + int n = aConfigurations.GetProblemSize(); + int adaptive_decision = aConfigurations.GetAdaptiveDecision(); + int tol = aConfigurations.GetTolerance(); + int send_full_tile = 0; + int auto_band = 0; + int gpus = aConfigurations.GetGPUsNumbers(); + double upper_lower = EXAGEOSTAT_LOWER; + int L = aConfigurations.GetDenseTileSize(); + int N = aConfigurations.GetProblemSize(); + int NT = (N % L == 0) ? (N/L) : (N/L + 1); + + SYNC_TIME_START(); + MatrixCompress((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), &ExaGeoStatHardware::GetHicmaParams()->norm_global, upper_lower, L, NT, max_rank, n, + adaptive_decision, tol, send_full_tile, auto_band, gpus, ExaGeoStatHardware::GetHicmaData(), ExaGeoStatHardware::GetParamsKernel()); + + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Matrix genneration Matrix norm: norm_global= %le\n", ExaGeoStatHardware::GetHicmaParams()->norm_global)); +} + +template +void ParsecLoader::ReadData(Configurations &aConfigurations, vector &aMeasurementsMatrix, vector &aXLocations, + vector &aYLocations, vector &aZLocations, const int &aP) { +} + +template +void ParsecLoader::WriteData(const T &aMatrixPointer, const int &aProblemSize, const int &aP, std::string &aLoggerPath, Locations &aLocations) { + +} + +template +void ParsecLoader::ReleaseInstance() { + if (mpInstance != nullptr) { + mpInstance = nullptr; + } +} + +template ParsecLoader *ParsecLoader::mpInstance = nullptr; diff --git a/src/data-transformer/DataTransformer.cpp b/src/data-transformer/DataTransformer.cpp index 2145204e..bbca781b 100644 --- a/src/data-transformer/DataTransformer.cpp +++ b/src/data-transformer/DataTransformer.cpp @@ -9,24 +9,105 @@ * @version 2.0.0 * @author Mahmoud ElKarargy * @author Sameh Abdulah + * @author Qinglei Cao * @date 2024-02-04 **/ #include +extern "C"{ + #include +} +using namespace exageostat::common; using namespace exageostat::configurations; +using namespace exageostat::transformers; + +template void DataTransformer::ForwardSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData){ + + SYNC_TIME_START(); + parsec_tiled_matrix_t *pFDataDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA).parsec_desc; + parsec_tiled_matrix_t *pFLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + parsec_tiled_matrix_t *pFLMTDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT).parsec_desc; + parsec_tiled_matrix_t *pET1Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1).parsec_desc; + parsec_tiled_matrix_t *pET2Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2).parsec_desc; + parsec_tiled_matrix_t *pEPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP).parsec_desc; + parsec_tiled_matrix_t *pSLMNDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN).parsec_desc; + parsec_tiled_matrix_t *pIEDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE).parsec_desc; + parsec_tiled_matrix_t *pIODesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO).parsec_desc; + parsec_tiled_matrix_t *pPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P).parsec_desc; + parsec_tiled_matrix_t *pDDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D).parsec_desc; + + int f_desc_M = pFDataDesc->mb; + int ep_desc_N = pEPDesc->nb; + int et1_desc_M = pET1Desc->mb; + int et2_desc_M = pET2Desc->mb; + int p_desc_N = pPDesc->nb; + int flm_desc_M = pFLMDesc->mb; + int flm_desc_N = pFLMDesc->nb; + + ForwardSHT((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFDataDesc, pFLMDesc, + pFLMTDesc, pET1Desc, pET2Desc, pEPDesc, pSLMNDesc, pIEDesc, pIODesc, pPDesc, + pDDesc, f_desc_M, ep_desc_N, et1_desc_M, et2_desc_M, p_desc_N, flm_desc_M, flm_desc_N, aLSize); -template -static void ForwardSHT(Configurations &aConfigurations, std::unique_ptr> &aData){ + int flops_forward = 2.0*(aLSize+1)*(2*aLSize-1)*(2*aLSize) // Gmtheta_r = f_data*Ep + + 2.0*(2*aLSize-1)*(2*aLSize-1)*(aLSize+1) // Fmnm = Et1*Gmtheta_r + + 2.0*(2*aLSize-1)*(aLSize-1)*(aLSize+1) // tmp1 = Et2*P + + 2.0*(2*aLSize-1)*(2*aLSize-1)*(aLSize+1) // tmp2 = tmp1 * Gmtheta_r + + 2.0*(2*aLSize-1)*(2*aLSize-1)*(2*aLSize-1) // Fmnm += tmp2 * D + + 2.0*aLSize*aLSize/2*(aLSize*(2*aLSize-1)+aLSize); // flmn_matrix(ell+1,m+1) = Slmn(climate_emulator_getSingleIndex(ell, m),:)*Ie*Fmnm(:,L+m) + + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Forward SHT: %.2lf Gflop/s\n", flops_forward/sync_time_elapsed/1.0e9)); } -template -static void ForwardReshape(Configurations &aConfigurations, std::unique_ptr> &aData){ +template void DataTransformer::ForwardReshape(Configurations &aConfigurations, std::unique_ptr> &aData){ + + SYNC_TIME_START(); + int rank = ExaGeoStatHardware::GetParsecMPIRank(); + int verbose = configurations::Configurations::GetVerbosity() == DETAILED_MODE? 1: 0; + int L = aConfigurations.GetDenseTileSize(); + int t = aConfigurations.GetTimeSlot(); + + parsec_tiled_matrix_t *pFDataDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA).parsec_desc; + parsec_tiled_matrix_t *pFLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + parsec_tiled_matrix_t *pFLMTDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT).parsec_desc; + parsec_tiled_matrix_t *pET1Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1).parsec_desc; + parsec_tiled_matrix_t *pET2Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2).parsec_desc; + parsec_tiled_matrix_t *pEPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP).parsec_desc; + parsec_tiled_matrix_t *pSLMNDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN).parsec_desc; + parsec_tiled_matrix_t *pIEDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE).parsec_desc; + parsec_tiled_matrix_t *pIODesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO).parsec_desc; + parsec_tiled_matrix_t *pPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P).parsec_desc; + parsec_tiled_matrix_t *pDDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D).parsec_desc; + parsec_tiled_matrix_t *pADesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_A).parsec_desc; + int f_desc_M = pFDataDesc->mb; + int ep_desc_N = pEPDesc->nb; + int et1_desc_M = pET1Desc->mb; + int et2_desc_M = pET2Desc->mb; + int p_desc_N = pPDesc->nb; + int flmt_desc_M = pFLMTDesc->mb; + int nodes = aConfigurations.GetCoresNumber(); + int flmt_desc_nb = (flmt_desc_M / nodes % L) ? flmt_desc_M / nodes / L + 1 : flmt_desc_M / nodes / L; + + int N = aConfigurations.GetProblemSize(); + int NT = (N % L == 0) ? (N/L) : (N/L + 1); + double upper_lower = EXAGEOSTAT_LOWER; + ForwardSHTReshape((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), rank, verbose, pFDataDesc, pFLMDesc, + pFLMTDesc, pET1Desc, pET2Desc, pEPDesc, pSLMNDesc, pIEDesc, pIODesc, pPDesc, + pDDesc, pADesc, f_desc_M, ep_desc_N, et1_desc_M, et2_desc_M, p_desc_N, flmt_desc_nb, t, L, &ExaGeoStatHardware::GetHicmaParams()->norm_global, NT, upper_lower); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Forward SHT Reshape\n")); + } -template -static void InverseSHT(Configurations &aConfigurations, std::unique_ptr> &aData){ +template void DataTransformer::InverseSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData){ + + SYNC_TIME_START(); + parsec_tiled_matrix_t *pFSpatialDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_SPATIAL).parsec_desc; + parsec_tiled_matrix_t *pFLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + parsec_tiled_matrix_t *pZLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ZLM).parsec_desc; + parsec_tiled_matrix_t *pSCDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SC).parsec_desc; + InverseSHT((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFSpatialDesc, pFLMDesc, pZLMDesc, pSCDesc, aLSize); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Inverse SHT\n")); } diff --git a/src/data-units/CMakeLists.txt b/src/data-units/CMakeLists.txt index 43a185ae..d7de6d14 100644 --- a/src/data-units/CMakeLists.txt +++ b/src/data-units/CMakeLists.txt @@ -9,10 +9,8 @@ # @author Sameh Abdulah # @date 2023-02-27 -if (RUNTIME_TYPE STREQUAL "STARPU") - # Include the concrete implementations of the ExaGeoStat Descriptor class - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/descriptor) -endif() +# Include the concrete implementations of the ExaGeoStat Descriptor class +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/descriptor) # Initialize SOURCES variable if not already initialized set(SOURCES diff --git a/src/data-units/DescriptorData.cpp b/src/data-units/DescriptorData.cpp index 34efa842..70f79b0b 100644 --- a/src/data-units/DescriptorData.cpp +++ b/src/data-units/DescriptorData.cpp @@ -21,14 +21,14 @@ using namespace exageostat::dataunits::descriptor; template DescriptorData::~DescriptorData() { -#if DEFAULT_RUNTIME - ExaGeoStatDescriptor exaGeoStatDescriptor; + ExaGeoStatDescriptor exageostat_descriptor; // Destroy descriptors. const std::string &chameleon = "_CHAMELEON"; for (const auto &pair: this->mDictionary) { const std::string &key = pair.first; +#if DEFAULT_RUNTIME if (key.find("CHAMELEON") != std::string::npos && pair.second != nullptr) { - exaGeoStatDescriptor.DestroyDescriptor(CHAMELEON_DESCRIPTOR, pair.second); + exageostat_descriptor.DestroyDescriptor(CHAMELEON_DESCRIPTOR, pair.second); #ifdef USE_HICMA // Since there are converted descriptors from Chameleon to Hicma, which have the same memory address. // So, by deleting the owner which is Chameleon, no need to delete hicma. Therefore, we remove the row of that descriptor. @@ -41,10 +41,20 @@ DescriptorData::~DescriptorData() { } #endif } else if (key.find("HICMA") != std::string::npos && pair.second != nullptr) { - exaGeoStatDescriptor.DestroyDescriptor(HICMA_DESCRIPTOR, pair.second); + exageostat_descriptor.DestroyDescriptor(HICMA_DESCRIPTOR, pair.second); } +#else + if (key.find("PARSEC") != std::string::npos && pair.second != nullptr) { + if(key == "DESCRIPTOR_FLMT_PARSEC") { + continue; + } + exageostat_descriptor.DestroyDescriptor(PARSEC_DESCRIPTOR, pair.second); + } +#endif + } this->mDictionary.clear(); +#if DEFAULT_RUNTIME if (this->mpSequence) { CHAMELEON_Sequence_Destroy((RUNTIME_sequence_t *) this->mpSequence); } @@ -123,6 +133,14 @@ DescriptorData::GetDescriptor(const DescriptorType &aDescriptorType, const De throw std::runtime_error("To use HiCMA descriptor you need to enable USE_HICMA!"); #endif } +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + if (this->mDictionary.find(GetDescriptorName(aDescriptorName) + "_PARSEC") == this->mDictionary.end()) { + descriptor.parsec_desc = nullptr; + } + descriptor.parsec_desc = (parsec_matrix_block_cyclic_t *) this->mDictionary[GetDescriptorName(aDescriptorName) + + "_PARSEC"]; + } #endif return descriptor; } @@ -136,18 +154,18 @@ void DescriptorData::SetDescriptor(const DescriptorType &aDescriptorType, con void *descriptor; std::string type; - ExaGeoStatDescriptor exaGeoStatDescriptor; + ExaGeoStatDescriptor exageostat_descriptor; #if DEFAULT_RUNTIME if (aDescriptorType == CHAMELEON_DESCRIPTOR) { - descriptor = exaGeoStatDescriptor.CreateDescriptor((CHAM_desc_t *) descriptor, aDescriptorType, aIsOOC, + descriptor = exageostat_descriptor.CreateDescriptor((CHAM_desc_t *) descriptor, aDescriptorType, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); type = "_CHAMELEON"; } else { #ifdef USE_HICMA - descriptor = exaGeoStatDescriptor.CreateDescriptor((HICMA_desc_t *) descriptor, aDescriptorType, aIsOOC, + descriptor = exageostat_descriptor.CreateDescriptor((HICMA_desc_t *) descriptor, aDescriptorType, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); type = "_HICMA"; @@ -155,12 +173,21 @@ void DescriptorData::SetDescriptor(const DescriptorType &aDescriptorType, con throw std::runtime_error("To create HiCMA descriptor you need to enable USE_HICMA!"); #endif } - if (aConverted) { type = "_CHAM_HIC"; } - this->mDictionary[GetDescriptorName(aDescriptorName) + type] = descriptor; +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + descriptor = exageostat_descriptor.CreateDescriptor((parsec_matrix_block_cyclic_t *) descriptor, aDescriptorType, aIsOOC, + apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, + aN, aP, aQ, aValidOOC); + type = "_PARSEC"; + } + else { + throw std::runtime_error("While using PaRSEC as a runtime, only PaRSEC descriptors are enabled!"); + } #endif + this->mDictionary[GetDescriptorName(aDescriptorName) + type] = descriptor; } template @@ -293,6 +320,36 @@ std::string DescriptorData::GetDescriptorName(const DescriptorName &aDescript return "DESCRIPTOR_R"; case DESCRIPTOR_R_COPY : return "DESCRIPTOR_R_COPY"; + case DESCRIPTOR_F_DATA: + return "DESCRIPTOR_F_DATA"; + case DESCRIPTOR_ET1: + return "DESCRIPTOR_ET1"; + case DESCRIPTOR_ET2: + return "DESCRIPTOR_ET2"; + case DESCRIPTOR_EP: + return "DESCRIPTOR_EP"; + case DESCRIPTOR_SLMN: + return "DESCRIPTOR_SLMN"; + case DESCRIPTOR_IE: + return "DESCRIPTOR_IE"; + case DESCRIPTOR_IO: + return "DESCRIPTOR_IO"; + case DESCRIPTOR_P: + return "DESCRIPTOR_P"; + case DESCRIPTOR_D: + return "DESCRIPTOR_D"; + case DESCRIPTOR_FLMERA: + return "DESCRIPTOR_FLMERA"; + case DESCRIPTOR_ZLM: + return "DESCRIPTOR_ZLM"; + case DESCRIPTOR_SC: + return "DESCRIPTOR_SC"; + case DESCRIPTOR_F_SPATIAL: + return "DESCRIPTOR_F_SPATIAL"; + case DESCRIPTOR_FLM: + return "DESCRIPTOR_FLM"; + case DESCRIPTOR_FLMT: + return "DESCRIPTOR_FLMT"; default: throw std::invalid_argument( "The name of descriptor you provided is undefined, Please read the user manual to know the available descriptors"); diff --git a/src/data-units/descriptor/CMakeLists.txt b/src/data-units/descriptor/CMakeLists.txt index 608bbf07..352aec33 100644 --- a/src/data-units/descriptor/CMakeLists.txt +++ b/src/data-units/descriptor/CMakeLists.txt @@ -10,9 +10,7 @@ # @date 2023-08-15 # Include the concrete implementations of the ExaGeoStat Descriptor class -if (RUNTIME_TYPE STREQUAL "STARPU") - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/concrete) -endif () +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/concrete) set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ExaGeoStatDescriptor.cpp diff --git a/src/data-units/descriptor/ExaGeoStatDescriptor.cpp b/src/data-units/descriptor/ExaGeoStatDescriptor.cpp index 5d6cd730..829684b3 100644 --- a/src/data-units/descriptor/ExaGeoStatDescriptor.cpp +++ b/src/data-units/descriptor/ExaGeoStatDescriptor.cpp @@ -15,12 +15,16 @@ #include #include + +#if DEFAULT_RUNTIME + #include #ifdef USE_HICMA - #include - +#endif +#else +#include #endif using namespace exageostat::common; @@ -28,34 +32,49 @@ using namespace exageostat::dataunits::descriptor; template void * -ExaGeoStatDescriptor::CreateDescriptor(void *apDescriptor, const common::DescriptorType &aDescriptorType, - const bool &aIsOOC, void *apMatrix, const common::FloatPoint &aFloatPoint, +ExaGeoStatDescriptor::CreateDescriptor(void *apDescriptor, const DescriptorType &aDescriptorType, + const bool &aIsOOC, void *apMatrix, const FloatPoint &aFloatPoint, const int &aMB, const int &aNB, const int &aSize, const int &aLM, const int &aLN, const int &aI, const int &aJ, const int &aM, const int &aN, const int &aP, const int &aQ, const bool &aValidOOC) { +#if DEFAULT_RUNTIME if (aDescriptorType == CHAMELEON_DESCRIPTOR) { return ChameleonDescriptor::CreateChameleonDescriptor(apDescriptor, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); - } else if (aDescriptorType == HICMA_DESCRIPTOR) { + } #ifdef USE_HICMA + if (aDescriptorType == HICMA_DESCRIPTOR) { return HicmaDescriptor::CreateHicmaDescriptor(apDescriptor, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); + } #endif +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + return ParsecDescriptor::CreateParsecDescriptor(apDescriptor); } +#endif std::cerr << "Error, please select the correct descriptor type!" << std::endl; return nullptr; } template int ExaGeoStatDescriptor::DestroyDescriptor(const DescriptorType &aDescriptorType, void *apDesc) { + +#if DEFAULT_RUNTIME if (aDescriptorType == CHAMELEON_DESCRIPTOR) { return ChameleonDescriptor::DestroyChameleonDescriptor(apDesc); - } else if (aDescriptorType == HICMA_DESCRIPTOR) { + } #ifdef USE_HICMA + if (aDescriptorType == HICMA_DESCRIPTOR) { return HicmaDescriptor::DestroyHicmaDescriptor(apDesc); + } #endif +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + return ParsecDescriptor::DestroyParsecDescriptor(apDesc); } +#endif std::cerr << "Error, please select the correct descriptor type!" << std::endl; return -1; } \ No newline at end of file diff --git a/src/data-units/descriptor/concrete/CMakeLists.txt b/src/data-units/descriptor/concrete/CMakeLists.txt index 6bb197d4..961cd150 100644 --- a/src/data-units/descriptor/concrete/CMakeLists.txt +++ b/src/data-units/descriptor/concrete/CMakeLists.txt @@ -12,14 +12,20 @@ # @date 2023-03-20 # Include the concrete implementations of the Descriptor class based on the enabled libraries (HiCMA or Chameleon) -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/ChameleonDescriptor.cpp - ${SOURCES} - ) - -if (USE_HICMA) - list(APPEND SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/HicmaDescriptor.cpp +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ChameleonDescriptor.cpp + ${SOURCES} + ) + if (USE_HICMA) + list(APPEND SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/HicmaDescriptor.cpp + ) + endif () +else () + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ParsecDescriptor.cpp + ${SOURCES} ) endif () diff --git a/src/data-units/descriptor/concrete/ParsecDescriptor.cpp b/src/data-units/descriptor/concrete/ParsecDescriptor.cpp new file mode 100644 index 00000000..63ac2283 --- /dev/null +++ b/src/data-units/descriptor/concrete/ParsecDescriptor.cpp @@ -0,0 +1,34 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecDescriptor.cpp + * @brief Defines the ParsecDescriptor class for creating matrix descriptors using the PaRSEC library. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-10-18 +**/ + +#include + +using namespace exageostat::dataunits::descriptor; + +template +parsec_matrix_block_cyclic_t * +ParsecDescriptor::CreateParsecDescriptor(void *apDescriptor) { + + parsec_matrix_block_cyclic_t *parsec_desc = new parsec_matrix_block_cyclic_t(); + return parsec_desc; +} + +template +int ParsecDescriptor::DestroyParsecDescriptor(void *apDesc) { + auto Parsec_desc = (parsec_matrix_block_cyclic_t *) apDesc; + parsec_data_free(Parsec_desc->mat); + parsec_tiled_matrix_destroy((parsec_tiled_matrix_t *) Parsec_desc); + return 0; +} \ No newline at end of file diff --git a/src/hardware/ExaGeoStatHardware.cpp b/src/hardware/ExaGeoStatHardware.cpp index d17631aa..d09105ad 100644 --- a/src/hardware/ExaGeoStatHardware.cpp +++ b/src/hardware/ExaGeoStatHardware.cpp @@ -22,7 +22,7 @@ #include #include #else -#include +#include #endif #include @@ -83,16 +83,15 @@ ExaGeoStatHardware::ExaGeoStatHardware(exageostat::configurations::Configuration int iparam[IPARAM_SIZEOF] = {0}; double dparam[DPARAM_SIZEOF]; char *cparam[CPARAM_SIZEOF]; - hicma_parsec_params_t params; - starsh_params_t params_kernel; - hicma_parsec_data_t data; - hicma_parsec_matrix_analysis_t analysis; - this->mpParsecContext = hicma_parsec_init( new_argc, new_argv, iparam, dparam, cparam, ¶ms, ¶ms_kernel, &data ); -#endif + this->mpHicmaParams = make_unique(); + this->mpParamsKernel = make_unique(); + this->mpHicmaData = make_unique(); + this->mpAnalysis = make_unique(); - LOGGER("Climate Emulator Parameters") - LOGGER("L: " << t << "\tT: " << J << "\tNB: " << t << "\tgpus: " << g) - LOGGER("nodes: " << c << "\ttime_slot_per_file: " << time_slot_per_file << "\tnum_file: " << num_file << "\tfile_per_node: " << ((num_file%c)? num_file/c+1 : num_file/c)) + this->mpParsecContext = hicma_parsec_init(new_argc, new_argv, iparam, dparam, cparam, this->mpHicmaParams.get(), this->mpParamsKernel.get(), this->mpHicmaData.get()); + SetParsecMPIRank(this->mpHicmaParams->rank); +#endif + exageostat::helpers::CommunicatorMPI::GetInstance()->SetHardwareInitialization(); } ExaGeoStatHardware::ExaGeoStatHardware(const Computation &aComputation, const int &aCoreNumber, const int &aGpuNumber, @@ -156,27 +155,35 @@ void ExaGeoStatHardware::InitHardware(const Computation &aComputation, const int void ExaGeoStatHardware::FinalizeHardware() { - #if DEFAULT_RUNTIME +#if DEFAULT_RUNTIME // finalize hardware using HiCMA -#ifdef USE_HICMA + #ifdef USE_HICMA if (mpHicmaContext) { HICMA_Finalize(); mpHicmaContext = nullptr; } -#endif + #endif // finalize hardware using Chameleon if (mpChameleonContext) { -#if defined(USE_MPI) && defined(USE_HICMA) + #if defined(USE_MPI) && defined(USE_HICMA) // Since already HiCMA do so, then no need to remove empty cache. starpu_mpi_cache_set(0); -#endif + #endif CHAMELEON_Finalize() mpChameleonContext = nullptr; } +#else + if (mpParsecContext) { -#endif + int iparam[IPARAM_SIZEOF] = {0}; + double dparam[DPARAM_SIZEOF]; + char *cparam[CPARAM_SIZEOF]; + hicma_parsec_fini((parsec_context_t *) mpParsecContext, 0, NULL, iparam, dparam, cparam, this->mpHicmaParams.get(), this->mpParamsKernel.get(), this->mpHicmaData.get(), this->mpAnalysis.get()); + mpParsecContext = nullptr; + } +#endif exageostat::helpers::CommunicatorMPI::GetInstance()->RemoveHardwareInitialization(); } @@ -217,6 +224,14 @@ void *ExaGeoStatHardware::GetContext(Computation aComputation) { return nullptr; } +void ExaGeoStatHardware::SetParsecMPIRank(int aRank){ + mParsecMPIRank = aRank; +} + +int ExaGeoStatHardware::GetParsecMPIRank() { + return mParsecMPIRank; +} + int ExaGeoStatHardware::GetPGrid() { return mPGrid; } @@ -233,9 +248,34 @@ void ExaGeoStatHardware::SetQGrid(int aQ) { mQGrid = aQ; } +#if !DEFAULT_RUNTIME +hicma_parsec_params_t* ExaGeoStatHardware::GetHicmaParams() { + return mpHicmaParams.get(); +} + +starsh_params_t* ExaGeoStatHardware::GetParamsKernel() { + return mpParamsKernel.get(); +} + +hicma_parsec_data_t* ExaGeoStatHardware::GetHicmaData() { + return mpHicmaData.get(); +} + +hicma_parsec_matrix_analysis_t* ExaGeoStatHardware::GetAnalysis() { + return mpAnalysis.get(); +} +#endif + void *ExaGeoStatHardware::mpChameleonContext = nullptr; void *ExaGeoStatHardware::mpHicmaContext = nullptr; void *ExaGeoStatHardware::mpParsecContext = nullptr; +int ExaGeoStatHardware::mParsecMPIRank = 0; int ExaGeoStatHardware::mPGrid = 1; int ExaGeoStatHardware::mQGrid = 1; bool ExaGeoStatHardware::mIsMPIInit = false; +#if !DEFAULT_RUNTIME +unique_ptr ExaGeoStatHardware::mpHicmaParams = nullptr; +unique_ptr ExaGeoStatHardware::mpParamsKernel = nullptr; +unique_ptr ExaGeoStatHardware::mpHicmaData = nullptr; +unique_ptr ExaGeoStatHardware::mpAnalysis = nullptr; +#endif \ No newline at end of file diff --git a/src/helpers/CommunicatorMPI.cpp b/src/helpers/CommunicatorMPI.cpp index b4227e26..b2d6b891 100644 --- a/src/helpers/CommunicatorMPI.cpp +++ b/src/helpers/CommunicatorMPI.cpp @@ -10,11 +10,13 @@ * @author Sameh Abdulah * @date 2023-11-10 **/ - +#include #include #if DEFAULT_RUNTIME #include #endif +#include + using namespace exageostat::helpers; CommunicatorMPI *CommunicatorMPI::GetInstance() { @@ -33,6 +35,10 @@ int CommunicatorMPI::GetRank() const { else { return CHAMELEON_Comm_rank(); } + #else + else { + return ExaGeoStatHardware::GetParsecMPIRank(); + } #endif #endif return 0; diff --git a/src/linear-algebra-solvers/LinearAlgebraFactory.cpp b/src/linear-algebra-solvers/LinearAlgebraFactory.cpp index 5a1fef26..0d72dbad 100644 --- a/src/linear-algebra-solvers/LinearAlgebraFactory.cpp +++ b/src/linear-algebra-solvers/LinearAlgebraFactory.cpp @@ -15,14 +15,13 @@ #include -#if DEFAULT_RUNTIME #include #include #ifdef USE_HICMA + #include -#endif #endif @@ -32,7 +31,6 @@ using namespace exageostat::common; template std::unique_ptr> LinearAlgebraFactory::CreateLinearAlgebraSolver(Computation aComputation) { -#if DEFAULT_RUNTIME // Check the used Linear Algebra solver library, whether it's HiCMA or Chameleon. if (aComputation == EXACT_DENSE) { @@ -51,7 +49,6 @@ std::unique_ptr> LinearAlgebraFactory::CreateLinearAl return std::make_unique>(); } -#endif // Return nullptr if no computation is selected throw std::runtime_error("You need to enable whether HiCMA or Chameleon"); } diff --git a/src/linear-algebra-solvers/LinearAlgebraMethods.cpp b/src/linear-algebra-solvers/LinearAlgebraMethods.cpp index 8a083d72..31931325 100644 --- a/src/linear-algebra-solvers/LinearAlgebraMethods.cpp +++ b/src/linear-algebra-solvers/LinearAlgebraMethods.cpp @@ -22,7 +22,6 @@ #include #include -#include using namespace std; @@ -1340,6 +1339,37 @@ LinearAlgebraMethods::ExaGeoStatGetZObs(Configurations &aConfigurations, T *a this->ExaGeoStatDesc2Lap(apZ, aSize, z_desc, UpperLower::EXAGEOSTAT_UPPER_LOWER); } +#ifdef USE_HICMA + +template +void LinearAlgebraMethods::CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, + const common::CopyDirection &aDirection) { + auto *z = new T[aSize]; + int status; + if (aDirection == common::CHAMELEON_TO_HICMA) { + status = CHAMELEON_Desc2Lap((cham_uplo_t) EXAGEOSTAT_UPPER_LOWER, (CHAM_desc_t *) apSourceDesc, z, aSize); + if (status != CHAMELEON_SUCCESS) { + throw std::runtime_error("CHAMELEON_Desc2Lap Failed!"); + } + status = HICMA_Lapack_to_Tile(z, aSize, (HICMA_desc_t *) apDestinationDesc); + if (status != HICMA_SUCCESS) { + throw std::runtime_error("HICMA_Lapack_to_Tile Failed!"); + } + } else if (aDirection == common::HICMA_TO_CHAMELEON) { + status = HICMA_Tile_to_Lapack((HICMA_desc_t *) apSourceDesc, z, aSize); + if (status != HICMA_SUCCESS) { + throw std::runtime_error("HICMA_Tile_to_Lapack Failed!"); + } + status = CHAMELEON_Lap2Desc((cham_uplo_t) EXAGEOSTAT_UPPER_LOWER, z, aSize, (CHAM_desc_t *) apDestinationDesc); + if (status != CHAMELEON_SUCCESS) { + throw std::runtime_error("CHAMELEON_Lap2Desc Failed!"); + } + } + delete[] z; +} + +#endif + template void LinearAlgebraMethods::ExaGeoStatDesc2Lap(T *apA, const int &aLDA, void *apDescA, const UpperLower &aUpperLower) { diff --git a/src/linear-algebra-solvers/concrete/CMakeLists.txt b/src/linear-algebra-solvers/concrete/CMakeLists.txt index f1989e2b..e0017fb7 100644 --- a/src/linear-algebra-solvers/concrete/CMakeLists.txt +++ b/src/linear-algebra-solvers/concrete/CMakeLists.txt @@ -25,11 +25,6 @@ if (RUNTIME_TYPE STREQUAL "STARPU") ${CMAKE_CURRENT_SOURCE_DIR}/tlr/HicmaImplementation.cpp ) endif () -elseif (RUNTIME_TYPE STREQUAL "PARSEC") - set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/parsec/ParsecImplementation.cpp - ${SOURCES} - ) endif () set(SOURCES ${SOURCES} PARENT_SCOPE) diff --git a/src/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.cpp b/src/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.cpp index 5c3dd8ef..8ec6abe8 100644 --- a/src/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.cpp +++ b/src/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.cpp @@ -33,9 +33,9 @@ using namespace exageostat::configurations; using namespace exageostat::linearAlgebra; template -T ChameleonImplementation::ModelingOperations(std::unique_ptr > &aData, - configurations::Configurations &aConfigurations, const double *theta, - T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { +T ChameleonImplementation::ExaGeoStatMLETile(std::unique_ptr> &aData, + configurations::Configurations &aConfigurations, const double *theta, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { if (!aData->GetDescriptorData()->GetIsDescriptorInitiated()) { this->InitiateDescriptors(aConfigurations, *aData->GetDescriptorData(), aKernel.GetVariablesNumber(), @@ -361,34 +361,3 @@ void ChameleonImplementation::ExaGeoStatCreateSequence(void *apSequence) { throw std::runtime_error("CHAMELEON_Sequence_Create Failed!"); } } - -template -void ChameleonImplementation::CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) { - -} - -template -void -ChameleonImplementation::ExaGeoStatSYRK(Configurations &aConfigurations, unique_ptr > &aData) { - -} - -template -void ChameleonImplementation::ExaGeoStatTLRCholesky(Configurations &aConfigurations, - unique_ptr > &aData) { - -} - -template -void -ChameleonImplementation::ExaGeoStatNorm(Configurations &aConfigurations, unique_ptr > &aData) { - -} - -template -double -ChameleonImplementation::CalculateMSE(Configurations &aConfigurations, unique_ptr > &aData) { - - return 0; -} \ No newline at end of file diff --git a/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp b/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp index f5f843a7..f08c8c3b 100644 --- a/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp +++ b/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp @@ -15,7 +15,6 @@ #include #include -#include using namespace std; diff --git a/src/linear-algebra-solvers/concrete/parsec/ParsecImplementation.cpp b/src/linear-algebra-solvers/concrete/parsec/ParsecImplementation.cpp deleted file mode 100644 index 60a5f8fd..00000000 --- a/src/linear-algebra-solvers/concrete/parsec/ParsecImplementation.cpp +++ /dev/null @@ -1,90 +0,0 @@ - -// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, -// All rights reserved. -// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). - -/** - * @file ParsecImplementation.cpp - * @brief This file contains the implementation of ParsecImplementation class. - * @details ParsecImplementation is a concrete implementation of the LinearAlgebraMethods class. - * @version 2.0.0 - * @author Mahmoud ElKarargy - * @author Sameh Abdulah - * @date 2024-10-15 -**/ - -#include - -using namespace exageostat::linearAlgebra; -using namespace exageostat::configurations; - -template -void ParsecImplementation::ExaGeoStatSYRK(Configurations &aConfigurations, std::unique_ptr> &aData){ - -} - -template -void ParsecImplementation::ExaGeoStatTLRCholesky(Configurations &aConfigurations, std::unique_ptr> &aData){ - -} - -template -void -ParsecImplementation::ExaGeoStatNorm(Configurations &aConfigurations, std::unique_ptr> &aData) { - -} - -template -double -ParsecImplementation::CalculateMSE(Configurations &aConfigurations, std::unique_ptr> &aData) { - - return 0; -} - -template -T ParsecImplementation::ModelingOperations(std::unique_ptr> &aData, - configurations::Configurations &aConfigurations, const double *apTheta, - T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { - -} - -template -void ParsecImplementation::ExaGeoStatLapackCopyTile(const common::UpperLower &aUpperLower, void *apA, void *apB) { - -} - - -template -void ParsecImplementation::ExaGeoStatSequenceWait(void *apSequence) { - -} - - -template -void ParsecImplementation::ExaGeoStatCreateSequence(void *apSequence) { - -} - - -template -void -ParsecImplementation::ExaGeoStatPotrfTile(const common::UpperLower &aUpperLower, void *apA, int aBand, void *apCD, - void *apCrk, const int &aMaxRank, const int &aAcc) { - -} - - -template -void ParsecImplementation::ExaGeoStatTrsmTile(const common::Side &aSide, const common::UpperLower &aUpperLower, - const common::Trans &aTrans, const common::Diag &aDiag, - const T &aAlpha, void *apA, void *apCD, void *apCrk, void *apZ, - const int &aMaxRank) { - -} - - -template -void ParsecImplementation::CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) { - -} \ No newline at end of file diff --git a/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp b/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp index a466ea4e..c297bb01 100644 --- a/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp +++ b/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp @@ -31,15 +31,14 @@ using namespace exageostat::dataunits; using namespace exageostat::kernels; using namespace exageostat::helpers; using namespace exageostat::results; -using namespace exageostat::configurations; int store_only_diagonal_tiles = 1; int use_scratch = 1; int global_check = 0; //used to create dense matrix for accuracy check template -void HicmaImplementation::SetModelingDescriptors(std::unique_ptr > &aData, - Configurations &aConfigurations, const int &aP) { +void HicmaImplementation::SetModelingDescriptors(std::unique_ptr> &aData, + configurations::Configurations &aConfigurations, const int &aP) { int full_problem_size = aConfigurations.GetProblemSize() * aP; int lts = aConfigurations.GetLowTileSize(); @@ -115,9 +114,9 @@ void HicmaImplementation::SetModelingDescriptors(std::unique_ptr -T HicmaImplementation::ModelingOperations(std::unique_ptr > &aData, - Configurations &aConfigurations, const double *theta, - T *apMeasurementsMatrix, const Kernel &aKernel) { +T HicmaImplementation::ExaGeoStatMLETile(std::unique_ptr> &aData, + configurations::Configurations &aConfigurations, const double *theta, + T *apMeasurementsMatrix, const Kernel &aKernel) { if (!aData->GetDescriptorData()->GetIsDescriptorInitiated()) { this->InitiateDescriptors(aConfigurations, *aData->GetDescriptorData(), aKernel.GetVariablesNumber(), @@ -403,55 +402,3 @@ void HicmaImplementation::ExaGeoStatTrsmTile(const common::Side &aSide, const throw std::runtime_error("HICMA_dtrsmd_Tile Failed!"); } } - - -template -void HicmaImplementation::CopyDescriptors(void *apSourceDesc, void *apDestinationDesc, const int &aSize, - const common::CopyDirection &aDirection) { - - auto *z = new T[aSize]; - int status; - if (aDirection == common::CHAMELEON_TO_HICMA) { - status = CHAMELEON_Desc2Lap((cham_uplo_t) EXAGEOSTAT_UPPER_LOWER, (CHAM_desc_t *) apSourceDesc, z, aSize); - if (status != CHAMELEON_SUCCESS) { - throw std::runtime_error("CHAMELEON_Desc2Lap Failed!"); - } - status = HICMA_Lapack_to_Tile(z, aSize, (HICMA_desc_t *) apDestinationDesc); - if (status != HICMA_SUCCESS) { - throw std::runtime_error("HICMA_Lapack_to_Tile Failed!"); - } - } else if (aDirection == common::HICMA_TO_CHAMELEON) { - status = HICMA_Tile_to_Lapack((HICMA_desc_t *) apSourceDesc, z, aSize); - if (status != HICMA_SUCCESS) { - throw std::runtime_error("HICMA_Tile_to_Lapack Failed!"); - } - status = CHAMELEON_Lap2Desc((cham_uplo_t) EXAGEOSTAT_UPPER_LOWER, z, aSize, (CHAM_desc_t *) apDestinationDesc); - if (status != CHAMELEON_SUCCESS) { - throw std::runtime_error("CHAMELEON_Lap2Desc Failed!"); - } - } - delete[] z; -} - - -template -void HicmaImplementation::ExaGeoStatSYRK(Configurations &aConfigurations, unique_ptr > &aData) { - -} - -template -void -HicmaImplementation::ExaGeoStatTLRCholesky(Configurations &aConfigurations, unique_ptr > &aData) { - -} - -template -void HicmaImplementation::ExaGeoStatNorm(Configurations &aConfigurations, unique_ptr > &aData) { - -} - -template -double HicmaImplementation::CalculateMSE(Configurations &aConfigurations, unique_ptr > &aData) { - - return 0; -} \ No newline at end of file diff --git a/src/runtime-solver/CMakeLists.txt b/src/runtime-solver/CMakeLists.txt new file mode 100644 index 00000000..17e296d0 --- /dev/null +++ b/src/runtime-solver/CMakeLists.txt @@ -0,0 +1,19 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @version 2.0.0 +# @brief CMake build script for the runtime-solvers library, which includes the RuntimeSolversMethods base class and the RuntimeSolversFactory +# @author Mahmoud ElKarargy +# @date 2024-11-04 + +# Include the concrete implementations of the RuntimeSolversMethods class +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/concrete) +# Define the sources for the library +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/RuntimeSolverFactory.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/runtime-solver/RuntimeSolverFactory.cpp b/src/runtime-solver/RuntimeSolverFactory.cpp new file mode 100644 index 00000000..1eed2ddc --- /dev/null +++ b/src/runtime-solver/RuntimeSolverFactory.cpp @@ -0,0 +1,36 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file RuntimeSolverFactory.cpp + * @brief Implementation of the RuntimeSolverFactory class for creating runtime solvers for different runtime systems using StarPU or PaRSEC libraries. + * The factory creates a unique pointer to a concrete implementation of the RuntimeSolverMethods class based on the runtime specified. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-11-04 +**/ + +#include + +#if DEFAULT_RUNTIME +#include +#else +#include +#endif + +using namespace exageostat::runtimesolver; +using namespace exageostat::common; + +template +std::unique_ptr> RuntimeSolverFactory::CreateRuntimeSolver() { + + // Check which Runtime is used +#if DEFAULT_RUNTIME + return std::make_unique>(); +#else + return std::make_unique>(); +#endif + +} diff --git a/src/runtime-solver/concrete/CMakeLists.txt b/src/runtime-solver/concrete/CMakeLists.txt new file mode 100644 index 00000000..d3d75c77 --- /dev/null +++ b/src/runtime-solver/concrete/CMakeLists.txt @@ -0,0 +1,27 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @version 2.0.0 +# @brief CMake build script for the runtime-solvers library, which includes the concrete implementations of the +# RuntimeSolversMethods class based on the enabled runtime (StarPU or PaRSEC). +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-11-04 + +# Include the concrete implementations of the RuntimeSolversMethods class based on the enabled runtime (StarPU or PaRSEC) +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/StarpuRuntimeSolver.cpp + ${SOURCES} + ) +elseif (RUNTIME_TYPE STREQUAL "PARSEC") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ParsecRuntimeSolver.cpp; + ${SOURCES} + ) +endif () +set(SOURCES ${SOURCES} PARENT_SCOPE) + diff --git a/src/runtime-solver/concrete/ParsecRuntimeSolver.cpp b/src/runtime-solver/concrete/ParsecRuntimeSolver.cpp new file mode 100644 index 00000000..75410f18 --- /dev/null +++ b/src/runtime-solver/concrete/ParsecRuntimeSolver.cpp @@ -0,0 +1,105 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecRuntimeSolver.cpp + * @brief This file contains the implementation of ParsecRuntimeSolver class. + * @details ParsecRuntimeSolver is a concrete implementation of the RuntimeSolversMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-11-04 +**/ + +#include +#include +#include + +extern "C"{ +#include +} + +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::analyzer; +using namespace exageostat::runtimesolver; + +template +void ParsecRuntimeSolver::ExaGeoStatSYRK(std::unique_ptr> &aData){ + auto* pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + auto* pDesc_A = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, DescriptorName::DESCRIPTOR_A).parsec_desc; + + SYNC_TIME_START(); + dplasma_dsyrk(pContext, dplasmaLower, dplasmaNoTrans, 1.0, pDesc_A, 0.0, (parsec_tiled_matrix_t *) &ExaGeoStatHardware::GetHicmaData()->dcA); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(),("SYRK\n")); +} + +template +void ParsecRuntimeSolver::ExaGeoStatTLRCholesky(std::unique_ptr> &aData){ + + auto *pParams = ExaGeoStatHardware::GetHicmaParams(); + auto *pHicma_data = ExaGeoStatHardware::GetHicmaData(); + auto *pAnalysis = ExaGeoStatHardware::GetAnalysis(); + auto *pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + for( int i= 0; i < pParams->nruns; i++ ) { + hicma_parsec_potrf(pContext, pHicma_data, pParams, pAnalysis); + } +} + +template +double ParsecRuntimeSolver::ExaGeoStatNorm(Configurations &aConfigurations, std::unique_ptr> &aData){ + + int L = aConfigurations.GetDenseTileSize(); + int N = aConfigurations.GetProblemSize(); + double aNT = (N % L == 0) ? (N/L) : (N/L + 1); + int aUpperLower = EXAGEOSTAT_LOWER; + auto* pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + auto* pDescA = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, DescriptorName::DESCRIPTOR_A).parsec_desc; + + SYNC_TIME_START(); + GetMatrixNorm(pContext, &ExaGeoStatHardware::GetHicmaParams()->norm_global, (parsec_tiled_matrix_t *) &ExaGeoStatHardware::GetHicmaData()->dcA, aNT, aUpperLower, 1); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Matrix norm: norm_global= %le\n", ExaGeoStatHardware::GetHicmaParams()->norm_global)); + return ExaGeoStatHardware::GetHicmaParams()->norm_global; +} + +template +double ParsecRuntimeSolver::CalculateMSE(Configurations &aConfigurations, std::unique_ptr> &aData) { + + auto* pContext = (parsec_context_t * )ExaGeoStatHardware::GetParsecContext(); + auto* pDesc_f_data = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, + DescriptorName::DESCRIPTOR_F_DATA).parsec_desc; + auto* pDesc_f_spatial = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, + DescriptorName::DESCRIPTOR_F_SPATIAL).parsec_desc; + + SYNC_TIME_START(); + auto mse_result = MeanSquaredError(pContext, pDesc_f_data, pDesc_f_spatial, aConfigurations.GetDenseTileSize()); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(),("mse\n")); + return mse_result; +} + +template +T ParsecRuntimeSolver::ModelingOperations(std::unique_ptr> &aData, Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { + + // SYRK + ExaGeoStatSYRK(aData); + // Calculate norm + ExaGeoStatNorm(aConfigurations, aData); + // Analyze matrix before Cholesky + DataAnalyzer::PreAnalyzeMatrix(aData); + // HiCMA Cholesky + ExaGeoStatTLRCholesky(aData); + // Analyze matrix after Cholesky + DataAnalyzer::PostAnalyzeMatrix(aData); + // Diff to matlab result + DataAnalyzer::CompareMatDifference(aData); + + if(aConfigurations.GetEnableInverse()){ + transformers::DataTransformer::InverseSphericalHarmonicsTransform(aConfigurations.GetDenseTileSize(), aData); + // TODO: results in a seg fault in C + CalculateMSE(aConfigurations, aData); + } +} diff --git a/src/runtime-solver/concrete/StarpuRuntimeSolver.cpp b/src/runtime-solver/concrete/StarpuRuntimeSolver.cpp new file mode 100644 index 00000000..39dae64c --- /dev/null +++ b/src/runtime-solver/concrete/StarpuRuntimeSolver.cpp @@ -0,0 +1,79 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StarpuRuntimeSolver.cpp + * @brief This file contains the implementation of StarpuRuntimeSolver class. + * @details StarpuRuntimeSolver is a concrete implementation of the RuntimeSolversMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-11-04 +**/ + +#include +#include +#include +#include + +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::runtimesolver; +using namespace exageostat::dataunits; +using namespace nlopt; + +template +T StarpuRuntimeSolver::ModelingOperations(std::unique_ptr> &aData, Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { + + int parameters_number = aKernel.GetParametersNumbers(); + int max_number_of_iterations = aConfigurations.GetMaxMleIterations(); + // Setting struct of data to pass to the modeling. + auto modeling_data = new mModelingData(aData, aConfigurations, *apMeasurementsMatrix, aKernel); + // Create nlopt + double opt_f; + opt optimizing_function(nlopt::LN_BOBYQA, parameters_number); + // Initialize problem's bound. + optimizing_function.set_lower_bounds(aConfigurations.GetLowerBounds()); + optimizing_function.set_upper_bounds(aConfigurations.GetUpperBounds()); + optimizing_function.set_ftol_abs(aConfigurations.GetTolerance()); + // Set max iterations value. + optimizing_function.set_maxeval(max_number_of_iterations); + optimizing_function.set_max_objective(DataModelingAPI, (void *) modeling_data); + + // Optimize mle using nlopt. + optimizing_function.optimize(aConfigurations.GetStartingTheta(), opt_f); + aConfigurations.SetEstimatedTheta(aConfigurations.GetStartingTheta()); + + auto theta = aConfigurations.GetStartingTheta(); + + LOGGER("--> Final Theta Values (", true) + for (int i = 0; i < parameters_number; i++) { + LOGGER_PRECISION(theta[i]) + if (i != parameters_number - 1) { + LOGGER_PRECISION(", ") + } + } + LOGGER_PRECISION(")") + LOGGER("") + + delete modeling_data; + return optimizing_function.last_optimum_value(); + +} + +template +double StarpuRuntimeSolver::DataModelingAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo) { + + auto config = ((mModelingData *) apInfo)->mpConfiguration; + auto data = ((mModelingData *) apInfo)->mpData; + auto measurements = ((mModelingData *) apInfo)->mpMeasurementsMatrix; + auto kernel = ((mModelingData *) apInfo)->mpKernel; + + // We do Date Modeling with any computation. + auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(config->GetComputation()); + return linear_algebra_solver->ExaGeoStatMLETile(*data, *config, aTheta.data(), measurements, *kernel); +} diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index e4d999e5..afeef659 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -12,6 +12,10 @@ # Include runtime directory,based on runtime flag. if ("${RUNTIME_TYPE}" STREQUAL "PARSEC") add_subdirectory(parsec) + set(JDF_GENERATED_SOURCES + ${JDF_GENERATED_SOURCES} + PARENT_SCOPE + ) else () #by default use StarPu runtime. add_subdirectory(starpu) @@ -22,4 +26,3 @@ set(SOURCES ${SOURCES} PARENT_SCOPE ) - diff --git a/src/runtime/parsec/CMakeLists.txt b/src/runtime/parsec/CMakeLists.txt index f1b427db..8c8e25a9 100644 --- a/src/runtime/parsec/CMakeLists.txt +++ b/src/runtime/parsec/CMakeLists.txt @@ -5,13 +5,24 @@ # @file CMakeLists.txt # @version 1.1.0 -# @brief CMake build script for Parsec runtime +# @brief CMake build script for Parsec directory. # @author Mahmoud ElKarargy # @date 2024-03-10 -# Define the sources for the library +# Add subdirectory for JDF files +add_subdirectory(jdf) + +# Set source files for ClimateEmulator set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ParsecFunctions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/JDFHelperFunctions.c ${SOURCES} PARENT_SCOPE - ) \ No newline at end of file +) + +# Set JDF generated source files for Climate Emulator +set(JDF_GENERATED_SOURCES + ${JDF_GENERATED_SOURCES} + PARENT_SCOPE +) + diff --git a/src/runtime/parsec/JDFHelperFunctions.c b/src/runtime/parsec/JDFHelperFunctions.c new file mode 100644 index 00000000..06b54135 --- /dev/null +++ b/src/runtime/parsec/JDFHelperFunctions.c @@ -0,0 +1,185 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file JDFHelperFunctions.c + * @brief Implementation of JDF helper functions. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-10-20 +**/ + +#include + +int CalculateSingleIndex(int aN, int aM) { + return aN * (aN + 1) / 2 + aM; +} + +double SumDoubleData(double *apData, int aColumn, int aRow) { + double sum = 0.0; + for (int j = 0; j < aRow; j++) { + for (int i = 0; i < aColumn; i++) { + sum += apData[j * aColumn + i]; + } + } + return sum; +} + +complex double SumComplexData(complex double *apData, int aColumn, int aRow) { + complex double sum = 0.0; + for (int j = 0; j < aRow; j++) { + for (int i = 0; i < aColumn; i++) { + sum += apData[j * aColumn + i]; + } + } + return sum; +} + +void ForwardSHTHelper(double *apFlm, complex double *apF_data, int aFDataM, int aFDataN, + complex double *apEt1, int aEt1M, complex double *apEt2, int aEt2M, + complex double *apEp, int aEpM, int aEpN, complex double *apSlmn, + int aSlmnM, int aSlmnN, complex double *apIe, int aIeM, int aIeN, + complex double *apIo, int aIoM, int aIoN, complex double *apP, + int aPM, int aPN, complex double *apD, complex double *apGmtheta_r, + complex double *apFmnm, complex double *apTmp1, + complex double *apTmp2, int aL){ + + complex double alpha_complex, beta_complex; + double alpha_double, beta_double; + + assert(aFDataN == aEpM); + alpha_complex = (complex double) 1.0; + beta_complex = (complex double) 0.0; + + int gmtheta_r_M = aFDataM; + int gmtheta_r_N = aEpN; + int gmtheta_r_K = aFDataN; + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + gmtheta_r_M, gmtheta_r_N, gmtheta_r_K, + &alpha_complex, apF_data, gmtheta_r_M, + apEp, gmtheta_r_K, &beta_complex, + apGmtheta_r, gmtheta_r_M); + + int fmnm_M = aEt1M; + int fmnm_N = gmtheta_r_N; + int fmnm_K = gmtheta_r_M; + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + fmnm_M, fmnm_N, fmnm_K, + &alpha_complex,apEt1, fmnm_M, + apGmtheta_r, fmnm_K, + &beta_complex, apFmnm, fmnm_M); + + int tmp1_M = aEt2M; + int tmp1_N = aPN; + int tmp1_K = aPM; + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + tmp1_M, tmp1_N, tmp1_K, + &alpha_complex, apEt2, tmp1_M, + apP, tmp1_K, + &beta_complex, apTmp1, tmp1_M); + + assert(tmp1_N == gmtheta_r_M); + int tmp2_M = tmp1_M; + int tmp2_N = gmtheta_r_N; + int tmp2_K = gmtheta_r_M; + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + tmp2_M, tmp2_N, tmp2_K, + &alpha_complex, apTmp1, tmp2_M, + apGmtheta_r, tmp2_K, + &beta_complex, apTmp2, tmp2_M); + + assert(fmnm_M == tmp2_M); + fmnm_K = tmp2_N; + beta_complex = (complex double) 1.0; + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + fmnm_M, fmnm_N, fmnm_K, + &alpha_complex, apTmp2, fmnm_M, + apD, fmnm_K, + &beta_complex, apFmnm, fmnm_M); + + assert(aSlmnN == aIeM); + assert(aIeN == fmnm_M); + + int flmn_matrix_M = aL; + + complex double *pFlmn_matrix = apTmp1; + complex double *pFmnm_tmp; + complex double *pSlmn_tmp; + complex double *multipy_tmp = apTmp2 + fmnm_M + aSlmnN; + + for (int m = 0; m < aL; m++) { + pFmnm_tmp = apFmnm + (aL + m - 1) * fmnm_M; + if (0 == m % 2) { + for (int n = m; n < aL; n++) { + pSlmn_tmp = apSlmn + CalculateSingleIndex(n, m); + + alpha_complex = (complex double) 1.0; + beta_complex = (complex double) 0.0; + cblas_zgemv(CblasColMajor, CblasNoTrans, + aIeM, aIeN, + &alpha_complex, apIe, aIeM, + pFmnm_tmp, 1, + &beta_complex, multipy_tmp, 1); + cblas_zdotu_sub(aSlmnN, pSlmn_tmp, aSlmnM, multipy_tmp, 1, &pFlmn_matrix[m * flmn_matrix_M + n]); + } + } else { + for (int n = m; n < aL; n++) { + pSlmn_tmp = apSlmn + CalculateSingleIndex(n, m); + + alpha_complex = (complex double) 1.0; + beta_complex = (complex double) 0.0; + cblas_zgemv(CblasColMajor, CblasNoTrans, + aIoM, aIoN, + &alpha_complex, apIo, aIoM, + pFmnm_tmp, 1, + &beta_complex, multipy_tmp, 1); + + cblas_zdotu_sub(aSlmnN, pSlmn_tmp, aSlmnM, multipy_tmp, 1, &pFlmn_matrix[m * flmn_matrix_M + n]); + } + } + + } + for (int n = 0; n < aL; n++) { + for (int m = 0; m <= n; m++) { + apFlm[n * n + n + m] = creal(pFlmn_matrix[m * flmn_matrix_M + n]); + if (m != 0) { + apFlm[n * n + n - m] = cimag(pFlmn_matrix[m * flmn_matrix_M + n]); + } + } + } +} + +void InverseSHTHelper(double *apFlm, double *apFspatial, double *apZlm, double *apSC, double *apSmt, int aL) { + + int index_Zlm, index_flm; + int Smt_M = aL + 1; + int Smt_N = 2 * aL - 1; + + memset(apSmt, 0, Smt_M * Smt_N * sizeof(double)); + + for (int m = -(aL - 1); m < aL; m++) { + for (int n = abs(m); n < aL; n++) { + index_Zlm = CalculateSingleIndex(n, abs(m)); + index_flm = n * n + n + m; + cblas_daxpy(Smt_M, apFlm[index_flm], apZlm + index_Zlm * aL + 1, 1, apSmt + (m + aL - 1) * Smt_M, 1); + } + } + + int f_spatial_M = Smt_M; + int f_spatial_N = 2 * aL; + int f_spatial_K = Smt_N; + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + f_spatial_M, f_spatial_N, f_spatial_K, + (double) 1.0, apSmt, f_spatial_M, + apSC, f_spatial_K, + (double) 0.0, apFspatial, f_spatial_M); + +} diff --git a/src/runtime/parsec/jdf/CMakeLists.txt b/src/runtime/parsec/jdf/CMakeLists.txt new file mode 100644 index 00000000..1463fb93 --- /dev/null +++ b/src/runtime/parsec/jdf/CMakeLists.txt @@ -0,0 +1,50 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @version 1.1.0 +# @brief CMake build script for jdf directory +# @author Mahmoud ElKarargy +# @date 2024-10-17 + +# Add .jdf files +set(JDF_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVTimeSlot.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVComplex.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVToComplex.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVToComplexTimeSlot.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSV.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ForwardSHT.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/MeanSquaredError.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/DifferenceDouble.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ForwardSHTReshape.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/GetMatrixNorm.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/InverseSHT.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/MatrixCompress.jdf +) + +# Output directory for generated sources +set(JDF_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated_jdf") + +# Make sure the output directory exists +file(MAKE_DIRECTORY ${JDF_OUTPUT_DIR}) + +foreach (jdf_file ${JDF_FILES}) + get_filename_component(jdf_name ${jdf_file} NAME_WE) + execute_process( + COMMAND ${HICMA_X_BIN_DIR}/parsec-ptgpp -i ${jdf_file} -o ${jdf_name} + ERROR_QUIET + ) + execute_process( + COMMAND mv ${jdf_name}.c ${CMAKE_CURRENT_BINARY_DIR}/generated_jdf/${jdf_name}.c + COMMAND mv ${jdf_name}.h ${CMAKE_CURRENT_BINARY_DIR}/generated_jdf/${jdf_name}.h + ) + list(APPEND JDF_GENERATED_SOURCES ${JDF_OUTPUT_DIR}/${jdf_name}.c) +endforeach () + +set(JDF_GENERATED_SOURCES + ${JDF_GENERATED_SOURCES} + PARENT_SCOPE +) diff --git a/src/runtime/parsec/jdf/DifferenceDouble.jdf b/src/runtime/parsec/jdf/DifferenceDouble.jdf new file mode 100644 index 00000000..76e49e79 --- /dev/null +++ b/src/runtime/parsec/jdf/DifferenceDouble.jdf @@ -0,0 +1,92 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static double DifferenceDouble_core(double *apDataA, double *apDataB, int aM, int aN) { + double result = 0.0; + for (int j = 0; j < aN; j++) { + for (int i = 0; i < aM; i++) { + result = fmax(result, fabs(apDataA[j*aM+i]-apDataB[j*aM+i])); + if( result > 1.0e-4 ) { + return result; + } + } + } + return result; +} + +%} + +apDescA [ type = "parsec_tiled_matrix_t*" ] +apDescB [ type = "parsec_tiled_matrix_t*" aligned = apDescA ] + +task(m, n) + +m = 0 .. apDescA->lmt-1 +n = 0 .. apDescA->lnt-1 + +: apDescA(m, n) + +READ apDataA <- apDescA(m, n) +READ apDataB <- apDescB(m, n) + +BODY +{ + double diff = DifferenceDouble_core(apDataA, apDataB, apDescA->mb, apDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +DifferenceDoubleConstructor(parsec_matrix_block_cyclic_t *apDescA, parsec_matrix_block_cyclic_t *apDescB) +{ + assert(apDescA->super.mb == apDescB->super.mb); + assert(apDescA->super.nb == apDescB->super.nb); + parsec_DifferenceDouble_taskpool_t *taskpool = parsec_DifferenceDouble_new(&apDescA->super, &apDescB->super); + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_DifferenceDouble_DEFAULT_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apDescA->super.mb, apDescA->super.nb, apDescA->super.mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void DifferenceDouble_destructor(parsec_taskpool_t *pTaskpool) +{ + parsec_DifferenceDouble_taskpool_t *difference_double_taskpool = (parsec_DifferenceDouble_taskpool_t *)pTaskpool; + parsec_del2arena(&difference_double_taskpool->arenas_datatypes[PARSEC_DifferenceDouble_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +/** + */ +int DifferenceDouble(parsec_context_t *apContext, + parsec_matrix_block_cyclic_t *apDescA, + parsec_matrix_block_cyclic_t *apDescB) +{ + parsec_taskpool_t *parsec_difference_double = NULL; + parsec_difference_double = DifferenceDoubleConstructor(apDescA, apDescB); + if(parsec_difference_double != NULL ){ + parsec_context_add_taskpool(apContext, parsec_difference_double); + parsec_context_start(apContext); + parsec_context_wait(apContext); + DifferenceDouble_destructor(parsec_difference_double); + } + + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ForwardSHT.jdf b/src/runtime/parsec/jdf/ForwardSHT.jdf new file mode 100644 index 00000000..af674f1b --- /dev/null +++ b/src/runtime/parsec/jdf/ForwardSHT.jdf @@ -0,0 +1,290 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static void FlmToFlmT(double *apFlmT, double *apFlm, parsec_matrix_block_cyclic_t *apFLMTDesc, int aFlmM, int aFlmN, int aM, int aN) { + int flm_offset = aM * apFLMTDesc->super.mb; + int flmT_offset = aN * apFLMTDesc->super.mb; + int size = (aM == apFLMTDesc->super.lmt - 1) ? (aFlmM * aFlmN) - flm_offset : apFLMTDesc->super.mb; + memcpy(apFlmT + flmT_offset, apFlm + flm_offset, size*sizeof(double)); +} + + +%} + +/* Globals + */ +apFDataDesc [ type = "parsec_tiled_matrix_t*" ] +apFLMDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc] +apFLMTDesc [ type = "parsec_tiled_matrix_t*" ] +apET1Desc [ type = "parsec_tiled_matrix_t*" ] +apET2Desc [ type = "parsec_tiled_matrix_t*" ] +apEPDesc [ type = "parsec_tiled_matrix_t*" ] +apSLMNDesc [ type = "parsec_tiled_matrix_t*" ] +apIEDesc [ type = "parsec_tiled_matrix_t*" ] +apIODesc [ type = "parsec_tiled_matrix_t*" ] +apPDesc [ type = "parsec_tiled_matrix_t*" ] +apDDesc [ type = "parsec_tiled_matrix_t*" ] +aFlmM [ type = "int" ] +aFlmN [ type = "int" ] +aLSize [ type = "int" ] + +/* Temporary buffer used for convert */ +apGmtheta_r [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apFmnm [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork1 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork2 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] + +/* GPU workspace */ +ws_gpu [ type = "void *" hidden = on default = NULL ] + +/* GPU number and index */ +nb_cuda_devices [ type = "int" hidden = on default = 0 ] +cuda_device_index [ type = "int *" hidden = on default = "NULL"] + +bind_gpu(n) + +n = 0 .. apFDataDesc->lnt-1 + +: apFDataDesc(0, n) + +READ apFlm <- apFLMDesc(0, n) + -> apFlm task(n) [ type_remote = apFlm ] + +READ apF_data <- apFDataDesc(0, n) + -> apF_data task(n) [ type_remote = apF_data ] + + +BODY +{ +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + if( nb_cuda_devices > 0 ) { + int g = climate_emualtor_gpu_load_balance( n, gb->nodes, nb_cuda_devices ); + parsec_advise_data_on_device( _f_apFlm->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + parsec_advise_data_on_device( _f_apF_data->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + } +#endif +} +END + + + +task(n) + +n = 0 .. apFDataDesc->lnt-1 + +my_rank = %{ return apFDataDesc->super.rank_of(&apFDataDesc->super, 0, n); %} + +: apFDataDesc(0, n) + +// TODO: check whether this will evict apF_data first on GPU +READ apF_data <- apF_data bind_gpu(n) [ type_remote = apF_data ] +RW apFlm <- apFlm bind_gpu(n) [ type_remote = apFlm ] + -> apFlm task_apFlmT(0..apFLMTDesc->lmt-1, n) [ type_remote = apFlm ] + -> apFLMDesc(0, n) + +READ apEt1 <- apET1Desc(0, my_rank) +READ apEt2 <- apET2Desc(0, my_rank) +READ apEp <- apEPDesc(0, my_rank) +READ apSlmn <- apSLMNDesc(0, my_rank) +READ apIe <- apIEDesc(0, my_rank) +READ apIo <- apIODesc(0, my_rank) +READ apP <- apPDesc(0, my_rank) +READ apD <- apDDesc(0, my_rank) + + +BODY[type=CUDA] +{ +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) + ForwardSHT_gpu_core(apFlm, apF_data, apEt1, apEt2, apEp, apSlmn, apIe, apIo, apP, apD, cuda_device, gpu_task, cuda_stream, gb); +#endif +} +END + +BODY[type=HIP] +{ +#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + ForwardSHT_gpu_core(apFlm, apF_data, apEt1, apEt2, apEp, apSlmn, apIe, apIo, apP, apD, cuda_device, gpu_task, cuda_stream, gb); +#endif +} +END + +BODY +{ + complex double *pGmtheta_r = (complex double *) parsec_private_memory_pop(apGmtheta_r); + complex double *pFmnm = (complex double *) parsec_private_memory_pop(apFmnm); + complex double *pTmp1 = (complex double *) parsec_private_memory_pop(apWork1); + complex double *pTmp2 = (complex double *) parsec_private_memory_pop(apWork2); + + ForwardSHTHelper(apFlm, apF_data, apFDataDesc->mb, apFDataDesc->nb, apEt1, apET1Desc->mb, + apEt2, apET2Desc->mb, apEp, apEPDesc->mb, apEPDesc->nb, apSlmn, + apSLMNDesc->mb, apSLMNDesc->nb, apIe, apIEDesc->mb, apIEDesc->nb, apIo, + apIODesc->mb, apIODesc->nb, apP, apPDesc->mb, apPDesc->nb, apD, + pGmtheta_r, pFmnm, pTmp1, pTmp2, aLSize); + + parsec_private_memory_push(apGmtheta_r, pGmtheta_r); + parsec_private_memory_push(apFmnm, pFmnm); + parsec_private_memory_push(apWork1, pTmp1); + parsec_private_memory_push(apWork2, pTmp2); + +} +END + + +task_apFlmT(m, n) + +m = 0 .. apFLMTDesc->lmt-1 +n = 0 .. apFLMDesc->lnt-1 + +: apFLMTDesc(m, 0) + +READ apFlm <- apFlm task(n) [ type_remote = apFlm ] + +READ apFlmT <- apFLMTDesc(m, 0) + + +BODY +{ + FlmToFlmT(apFlmT, apFlm, (parsec_matrix_block_cyclic_t *) apFLMTDesc, aFlmM, aFlmN, m, n); +} +END + + + +extern "C" %{ + + +#if 0 +void *gb_forward_create_workspace(void *obj, void *user) +{ + parsec_device_module_t *mod = (parsec_device_module_t *)obj; + zone_malloc_t *memory = ((parsec_device_cuda_module_t*)mod)->super.memory; + parsec_ForwardSHT_taskpool_t *tp = (parsec_ForwardSHT_taskpool_t*)user; + gb_forward_workspace_t *wp = NULL; + int nb = tp->_g_descA->nb; + int workspace_size = tp->_g_gb->apF_data_M * tp->_g_gb->apEp_N + + tp->_g_gb->apEt1_M * tp->_g_gb->apEp_N + + tp->_g_gb->apEt2_M * tp->_g_gb->apP_N + + tp->_g_gb->apEt2_M * tp->_g_gb->apEp_N; + size_t elt_size = sizeof(complex double); + + wp = (gb_forward_workspace_t*)malloc(sizeof(gb_forward_workspace_t)); + wp->tmpmem = zone_malloc(memory, workspace_size * elt_size + sizeof(int)); + assert(NULL != wp->tmpmem); + wp->lwork = workspace_size; + wp->memory = memory; + + return wp; +} + +static void destroy_workspace(void *apWorkSpace, void *aN) +{ + gb_forward_workspace_t *ws = (gb_forward_workspace_t*) apWorkSpace; + zone_free((zone_malloc_t*)ws->memory, ws->tmpmem); + free(ws); + (void)aN; +} +#endif + + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +ForwardSHTConstructor(parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmM, int aFlmN, int aLSize) +{ + + parsec_ForwardSHT_taskpool_t *pTaskpool = + parsec_ForwardSHT_new(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, apEPDesc, + apSLMNDesc, apIEDesc,apIODesc, apPDesc, apDDesc, aFlmM, aFlmN, aLSize); + + pTaskpool->_g_apGmtheta_r = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apGmtheta_r, aFDataM * aEPN * sizeof(complex double)); + + pTaskpool->_g_apFmnm = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apFmnm, aET1M * aEPN * sizeof(complex double)); + + pTaskpool->_g_apWork1 = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork1, aET2M * aPN * sizeof(complex double)); + + pTaskpool->_g_apWork2 = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init( pTaskpool->_g_apWork2, aET2M * aEPN * sizeof(complex double)); + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + int nb = 0, *dev_index; + + /** Find all CUDA devices */ + hicma_parsec_find_cuda_devices( parsec, &dev_index, &nb); + + pTaskpool->_g_ws_gpu = (void *)gb->ws; + pTaskpool->_g_nb_cuda_devices = nb; + pTaskpool->_g_cuda_device_index = dev_index; +#endif + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apF_data_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, apFDataDesc->mb, apFDataDesc->nb, apFDataDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apFlm_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFLMTDesc->mb, apFLMTDesc->nb, apFLMTDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ForwardSHTDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_ForwardSHT_taskpool_t *pTaskpool = (parsec_ForwardSHT_taskpool_t *)apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apF_data_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apFlm_ADT_IDX]); + parsec_private_memory_fini(pTaskpool->_g_apGmtheta_r); + parsec_private_memory_fini(pTaskpool->_g_apFmnm); + parsec_private_memory_fini(pTaskpool->_g_apWork1); + parsec_private_memory_fini(pTaskpool->_g_apWork2); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int ForwardSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmM, int aFlmN, int aLSize) { + + parsec_taskpool_t *pTaskpool = ForwardSHTConstructor(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, + apEPDesc, apSLMNDesc, apIEDesc, apIODesc, apPDesc, apDDesc, + aFDataM, aEPN, aET1M, aET2M, aPN, aFlmM, aFlmN, aLSize); + if( pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ForwardSHTDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ForwardSHTReshape.jdf b/src/runtime/parsec/jdf/ForwardSHTReshape.jdf new file mode 100644 index 00000000..7874ba14 --- /dev/null +++ b/src/runtime/parsec/jdf/ForwardSHTReshape.jdf @@ -0,0 +1,236 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static void FlmTReshape(double *apFlmT, int aM, double *apTemp, parsec_matrix_block_cyclic_t *apFlmTDesc, int aT) { + + int flmT_offset = aM * apFlmTDesc->super.mb; + int M = (aM == apFlmTDesc->super.lmt - 1)? apFlmTDesc->super.mb - flmT_offset : apFlmTDesc->super.mb; + if( aT-3 <= 0 ) return; + int N = aT-3; + int lda_flmT = apFlmTDesc->super.mb; + + // N * 1 + double *Y = apTemp; + // N * 3 + double *X = apTemp + N; + // 3 * 3 + double *XtX = apTemp + 4 * N; + // 3 * 1 + double *XtY = apTemp + 4 * N + 10; + double *phi = apTemp + 4 * N + 14; + + for(int i = 0; i < M; i++) { + // Get Y + for(int j = 0; j < N; j++) { + Y[j] = apFlmT[(j+3) * lda_flmT + i]; + } + + // Get X + for(int j = 0; j < N; j++) { + X[3*j+0] = apFlmT[(j+2) * lda_flmT + i]; + X[3*j+1] = apFlmT[(j+1) * lda_flmT + i]; + X[3*j+2] = apFlmT[(j+0) * lda_flmT + i]; + } + + // X transpose times X + double alpha = 1.0; + double beta = 0.0; + + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, + 3, 3, N, + alpha, X, 3, + X, 3, + beta, XtX, 3); + + // X transpose times y + cblas_dgemv(CblasRowMajor, CblasTrans, + N, 3, + alpha, X, 3, + Y, 1, + beta, XtY, 1); + + // Solve + LAPACKE_dposv(LAPACK_ROW_MAJOR, 'U', 3, 1, XtX, 3, XtY, 1); + + // Use phi to compute eps_out + cblas_dgemv(CblasRowMajor, CblasNoTrans, + N, 3, + -1.0, X, 3, + XtY, 1, + 1.0, &apFlmT[3*lda_flmT+i], lda_flmT); + } +} + + +%} + +/* Globals + */ +apFDataDesc [ type = "parsec_tiled_matrix_t*" ] +apFLMDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc] +apFLMTDesc [ type = "parsec_tiled_matrix_t*" ] +apET1Desc [ type = "parsec_tiled_matrix_t*" ] +apET2Desc [ type = "parsec_tiled_matrix_t*" ] +apEPDesc [ type = "parsec_tiled_matrix_t*" ] +apSLMNDesc [ type = "parsec_tiled_matrix_t*" ] +apIEDesc [ type = "parsec_tiled_matrix_t*" ] +apIODesc [ type = "parsec_tiled_matrix_t*" ] +apPDesc [ type = "parsec_tiled_matrix_t*" ] +apDDesc [ type = "parsec_tiled_matrix_t*" ] +apADesc [ type = "parsec_tiled_matrix_t*" ] + +aFlmTNB [ type = "int" ] +aLSize [ type = "int" ] +aT [ type = "int" ] + +/* Temporary buffer used for convert */ +apGmtheta_r [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apFmnm [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork1 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork2 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] + +/* GPU workspace */ +ws_gpu [ type = "void *" hidden = on default = NULL ] + +/* GPU number and index */ +nb_cuda_devices [ type = "int" hidden = on default = 0 ] +cuda_device_index [ type = "int *" hidden = on default = "NULL"] + + + +task(m) + +m = 0 .. apFLMTDesc->lmt-1 +m_s = %{ return m*aFlmTNB; %} +m_e = %{ return parsec_imin((m+1)*aFlmTNB-1, apADesc->lmt-1); %} + +: apFLMTDesc(m, 0) + +RW apFlmT <- apFLMTDesc(m, 0) + -> apFLMTDesc(m, 0) + + +BODY +{ + + double *pTemp = (double *) parsec_private_memory_pop(apWork1); + FlmTReshape(apFlmT, m, pTemp, (parsec_matrix_block_cyclic_t *) apFLMTDesc, aT); + parsec_private_memory_push(apWork1, pTemp); +} +END + +extern "C" %{ + + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +ForwardSHTReshape_constructor(parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, parsec_tiled_matrix_t *apADesc, + int aFDataM, int aEPN, int aET1M, int aET2M, int aPN, + int aFlmTNB, int aT, int aLSize) +{ + + parsec_ForwardSHTReshape_taskpool_t + *pTaskpool = parsec_ForwardSHTReshape_new(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, apEPDesc, + apSLMNDesc, apIEDesc, apIODesc, apPDesc, apDDesc, apADesc, aFlmTNB, aLSize, aT); + + pTaskpool->_g_apGmtheta_r = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apGmtheta_r, aFDataM * aEPN * sizeof(complex double) ); + + pTaskpool->_g_apFmnm = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apFmnm, aET1M * aEPN * sizeof(complex double) ); + + pTaskpool->_g_apWork1 = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork1, aET2M * aPN * sizeof(complex double) ); + + pTaskpool->_g_apWork2 = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork2, aET2M * aEPN * sizeof(complex double) ); + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + int nb = 0, *dev_index; + + /** Find all CUDA devices */ + hicma_parsec_find_cuda_devices( parsec, &dev_index, &nb); + + pTaskpool->_g_ws_gpu = (void *)gb->ws; + pTaskpool->_g_nb_cuda_devices = nb; + pTaskpool->_g_cuda_device_index = dev_index; +#endif + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHTReshape_DEFAULT_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFLMTDesc->mb, apFLMTDesc->nb, apFLMTDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1); + + return (parsec_taskpool_t*) pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ForwardSHTReshape_destructor(parsec_taskpool_t *apTaskpool) +{ + parsec_ForwardSHTReshape_taskpool_t *pTaskpool = (parsec_ForwardSHTReshape_taskpool_t *)apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHTReshape_DEFAULT_ADT_IDX]); + parsec_private_memory_fini(pTaskpool->_g_apGmtheta_r); + parsec_private_memory_fini(pTaskpool->_g_apFmnm); + parsec_private_memory_fini(pTaskpool->_g_apWork1); + parsec_private_memory_fini(pTaskpool->_g_apWork2); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int ForwardSHTReshape(parsec_context_t *apContext, int aRank, int aVerbose, parsec_tiled_matrix_t *apFDataDesc, + parsec_tiled_matrix_t *apFLMDesc, parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, parsec_tiled_matrix_t *apSLMNDesc, + parsec_tiled_matrix_t *apIEDesc, parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, parsec_tiled_matrix_t *apADesc, int aFDataM, int aEPN, int aET1M, int aET2M, + int aPN, int aFlmTNB, int aT, int aLSize, double *apNormGlobal, int aNT, int aUpperLower) +{ + parsec_taskpool_t *pTaskpool = ForwardSHTReshape_constructor(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, + apEPDesc, apSLMNDesc, apIEDesc, apIODesc, apPDesc, apDDesc, + apADesc, aFDataM, aEPN, aET1M, aET2M, aPN, aFlmTNB, aT, aLSize); + + if( pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ForwardSHTReshape_destructor(pTaskpool); + } + // Reshape + VERBOSE_PRINT(aRank, aVerbose, ("Redistribute apFLMTDesc -> desc_A\n")); + parsec_redistribute(apContext, apFLMTDesc, apADesc, apADesc->m, apADesc->n, 0, 0, 0, 0); + + double norm_flmT = 0.0; + GetMatrixNorm(apContext, apNormGlobal, apFLMTDesc, aNT, aUpperLower, 0); + norm_flmT = *apNormGlobal; + double norm_A = 0.0; + GetMatrixNorm(apContext, apNormGlobal, apADesc, aNT, aUpperLower, 0); + norm_A = *apNormGlobal; + + if( 0 == aRank ) { + fprintf(stderr, RED "norm_flmT %lf norm_A %lf\n" RESET, norm_flmT, norm_A); + } + + // Free memory + parsec_data_free(((parsec_matrix_block_cyclic_t *)apFLMTDesc)->mat); + parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)apFLMTDesc); + + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/GetMatrixNorm.jdf b/src/runtime/parsec/jdf/GetMatrixNorm.jdf new file mode 100644 index 00000000..705eaa40 --- /dev/null +++ b/src/runtime/parsec/jdf/GetMatrixNorm.jdf @@ -0,0 +1,139 @@ +extern "C" %{ +/** + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + **/ + +#include +#include + +%} + +/* Globals + */ +apADesc [ type = "parsec_tiled_matrix_t*" ] +aNT [ type = "int" ] +aUpperLower [ type = "int" ] +apNorm [ type = "double *" ] +apNormTile [ type = "double *" ] +aIsSymmetric [ type = "int" ] + +/************************************************** + * generate diagonal tiles * + **************************************************/ +task(m, n) + +// Execution space +m = 0 .. apADesc->mt-1 +n = 0 .. (aIsSymmetric ? m : apADesc->nt-1) // Adjust based on matrix symmetry + +// Parallel partitioning +:apADesc(m, n) + +// Parameters +READ D <- apADesc(m, n) + +BODY +{ + int ldd = BLKLDD(apADesc, m); + int tempmm = m == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + int tempnn = tempmm; + + /* Calcuate the global norm */ + int tid = es->th_id; + double current_value = 0.0; + apNormTile[n * aNT + m] = 0.0; + + // Diagonal modification for symmetric matrices + if (aIsSymmetric && m == n) { + for(int i = 0; i < apADesc->mb; i++) { + ((double *)D)[i * apADesc->mb + i] += 1.0e-6; + } + } + + // Norm calculation + for (int j = 0; j < apADesc->nb; j++) { + for (int i = 0; i < apADesc->mb; i++) { + current_value = ((double *) D)[j * apADesc->mb + i]; + apNormTile[n * aNT + m] += current_value * current_value; + } + } + + apNorm[tid] += apNormTile[n * aNT + m]; + apNormTile[n * aNT + m] = sqrt(apNormTile[n * aNT + m]); + + if(m - n >= aNT * PORTION_NORM ){ + apNorm[tid] = 0.0; + } +} +END + +extern "C" %{ + +/** + * Generate matrix + * @return the parsec object to schedule + */ +parsec_taskpool_t* +GetMatrixNormConstructor(parsec_tiled_matrix_t *apADesc, int aUpperLower, int aNT, + double *apNormTmp, double *apNormTile, int aIsSymmetric) +{ + + /* Check input arguments */ + if (aUpperLower != PlasmaLower) { + dplasma_error("STARSH_appr_New", "illegal value of uplo, should be PlasmaLower\n"); + return NULL; + } + + parsec_GetMatrixNorm_taskpool_t *pTaskpool = parsec_GetMatrixNorm_new(apADesc, aNT, aUpperLower, + apNormTmp, apNormTile, aIsSymmetric); + + return (parsec_taskpool_t*) pTaskpool; +} + +/* Destructor */ +void GetMatrixNormDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_taskpool_free(apTaskpool); +} + +/** + * Generate matrix + */ +void GetMatrixNorm(parsec_context_t *apContext, double *apNormGlobal, parsec_tiled_matrix_t *apADesc, + int aNT, int aUpperLower, int aIsSymmetric) +{ + + /* Only for 1 vp */ + assert(apContext->nb_vp == 1); + int nb_threads = apContext->virtual_processes[0]->nb_cores; + double *pNormTmp = (double *) calloc(nb_threads, sizeof(double)); + + /* Make sure norm_tile and norm_global is fresh */ + double* pNormTile = (double*) malloc(aNT * aNT * sizeof(double)); + memset(pNormTile, 0, aNT * aNT * sizeof(double)); + *apNormGlobal = 0.0; + parsec_taskpool_t *pTaskpool = GetMatrixNormConstructor(apADesc, aUpperLower, aNT, + pNormTmp, pNormTile, aIsSymmetric); + + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + GetMatrixNormDestructor(pTaskpool); + + /* Reduce to the global norm */ + double norm_process = 0.0; + for(int i = 0; i < nb_threads; i++) { + norm_process += pNormTmp[i]; + } + + MPI_Allreduce(MPI_IN_PLACE, pNormTile, aNT * aNT, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&norm_process, apNormGlobal, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + *apNormGlobal = sqrt(*apNormGlobal); + + free(pNormTile); + free(pNormTmp); +} + +%} diff --git a/src/runtime/parsec/jdf/InverseSHT.jdf b/src/runtime/parsec/jdf/InverseSHT.jdf new file mode 100644 index 00000000..5fc52e17 --- /dev/null +++ b/src/runtime/parsec/jdf/InverseSHT.jdf @@ -0,0 +1,162 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static void FlmToFlmT(double *apFlmT, double *apFlm, parsec_matrix_block_cyclic_t *apFLMTDesc, int aFlmM, int aFlmN, int aM, int aN) { + int flm_offset = aM * apFLMTDesc->super.mb; + int flmT_offset = aN * apFLMTDesc->super.mb; + int size = (aM == apFLMTDesc->super.lmt - 1) ? (aFlmM * aFlmN) - flm_offset : apFLMTDesc->super.mb; + memcpy(apFlmT + flmT_offset, apFlm + flm_offset, size*sizeof(double)); +} + + +%} + +/* Globals + */ +apFSpatialDesc [ type = "parsec_tiled_matrix_t*" ] +apFLMDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc] +apZLMDesc [ type = "parsec_tiled_matrix_t*" ] +apSCDesc [ type = "parsec_tiled_matrix_t*" ] +aLSize [ type = "int" ] + +/* Temporary buffer used for convert */ +apWork [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] + +/* GPU workspace */ +ws_gpu [ type = "void *" hidden = on default = NULL ] + +/* GPU number and index */ +nb_cuda_devices [ type = "int" hidden = on default = 0 ] +cuda_device_index [ type = "int *" hidden = on default = "NULL"] + +bind_gpu(n) + +n = 0 .. apFSpatialDesc->lnt-1 + +: apFSpatialDesc(0, n) + +READ flm <- apFLMDesc(0, n) + -> flm task(n) [ type_remote = flm ] + +READ f_spatial <- apFSpatialDesc(0, n) + -> f_spatial task(n) [ type_remote = f_spatial ] + + +BODY +{ +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + if( nb_cuda_devices > 0 ) { + int g = climate_emulator_gpu_load_balance( n, gb->nodes, nb_cuda_devices ); + parsec_advise_data_on_device( _f_flm->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + parsec_advise_data_on_device( _f_f_spatial->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + } +#endif +} +END + + +task(n) + +n = 0 .. apFSpatialDesc->lnt-1 + +: apFSpatialDesc(0, n) + +// TODO: check whether this will evict f_spatial first on GPU +RW f_spatial <- f_spatial bind_gpu(n) [ type_remote = f_spatial ] + -> apFSpatialDesc(0, n) + +READ flm <- flm bind_gpu(n) [ type_remote = flm ] +READ Zlm <- apZLMDesc(0, %{ return apFSpatialDesc->super.rank_of(&apFSpatialDesc->super, 0, n); %}) +READ SC <- apSCDesc(0, %{ return apFSpatialDesc->super.rank_of(&apFSpatialDesc->super, 0, n); %}) + +BODY +{ + double *pSmt = (double *) parsec_private_memory_pop(apWork); + InverseSHTHelper(flm, f_spatial, Zlm, SC, pSmt, aLSize); + parsec_private_memory_push(apWork, pSmt); + +} +END + + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +InverseSHTConstructor(parsec_tiled_matrix_t *apFSpatialDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apZLMDesc, parsec_tiled_matrix_t *apSCDesc, + int aLSize) +{ + + parsec_InverseSHT_taskpool_t *pTaskpool = parsec_InverseSHT_new(apFSpatialDesc, apFLMDesc, apZLMDesc, apSCDesc, aLSize); + + pTaskpool->_g_apWork = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork, (aLSize + 1) * (2 * aLSize - 1) * sizeof(double) ); + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + int nb = 0, *dev_index; + + /** Find all CUDA devices */ + + hicma_parsec_find_cuda_devices( parsec, &dev_index, &nb); + + pTaskpool->_g_ws_gpu = (void *)gb->ws; + pTaskpool->_g_nb_cuda_devices = nb; + pTaskpool->_g_cuda_device_index = dev_index; +#endif + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_f_spatial_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFSpatialDesc->mb, apFSpatialDesc->nb, apFSpatialDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_flm_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFLMDesc->mb, apFLMDesc->nb, apFLMDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void InverseSHTDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_InverseSHT_taskpool_t *pTaskpool = (parsec_InverseSHT_taskpool_t *) apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_f_spatial_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_flm_ADT_IDX]); + parsec_private_memory_fini(pTaskpool->_g_apWork); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int InverseSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFSpatialDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apZLMDesc, parsec_tiled_matrix_t *apSCDesc, int aLSize) { + + parsec_taskpool_t *pTaskpool = InverseSHTConstructor(apFSpatialDesc, apFLMDesc, apZLMDesc, apSCDesc, aLSize); + + if( pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + InverseSHTDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/MatrixCompress.jdf b/src/runtime/parsec/jdf/MatrixCompress.jdf new file mode 100644 index 00000000..c64a7c93 --- /dev/null +++ b/src/runtime/parsec/jdf/MatrixCompress.jdf @@ -0,0 +1,409 @@ +extern "C" %{ +/** + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + **/ + +#include +#include +#define GENERATE_RANDOM_DATA 0 + +%} + +/** Generate matrix + */ + +/* Globals + */ +apADesc [ type = "parsec_tiled_matrix_t*" ] +apArDesc [ type = "parsec_tiled_matrix_t*" ] +aBandSizeDense [ type = "int" ] +aNT [ type = "int" ] +aMaxRank [ type = "int" ] +aN [ type = "int" ] +apNorm [ type = "double *" ] +apNormTile [ type = "double *" ] +aAdaptiveDecision [ type = "int" ] +aTolerance [ type = "double" ] +aSendFullTile [ type = "int" ] +aAutoBand [ type = "int" ] +aGpus [ type = "int" ] +params_kernel [ type = "starsh_params_t *" ] + +aRsvd_oversample [ type = "int" hidden = on ] +aRsvd_lwork [ type = "size_t" hidden = on ] +aRsvd_liwork [ type = "size_t" hidden = on ] +aUv_work [ type = "parsec_memory_pool_t *" hidden = on default = NULL] +aD_work [ type = "parsec_memory_pool_t *" hidden = on default = NULL] +aRsvaD_work [ type = "parsec_memory_pool_t *" hidden = on default = NULL] +aRsvd_iwork [ type = "parsec_memory_pool_t *" hidden = on default = NULL] + +/************************************************** + * generate diagonal tiles * + **************************************************/ +generate_band(m, n) [high_priority = on] + +// Execution space +m = 0 .. apADesc->mt-1 +n = %{ return parsec_imax(m-aBandSizeDense+1, 0); %} .. m + +// Parallel partitioning +:apADesc(m, n) + +// Parameters +READ D <- apADesc(m, n) +READ D1 <- NULL [ type_remote = FULL ] + +BODY +{ + int ldd = BLKLDD(apADesc, m); + int tempmm = m == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + int tempnn = tempmm; + + /* New data_copy and allocate memory on band if not allocated */ +#if !BAND_MEMORY_CONTIGUOUS + if( aBandSizeDense < aNT || aAutoBand == 1 || !MEMORY_IN_CHOLEKSY_DP ) { + this_task->data._f_D.data_out = parsec_data_copy_new(data_of_apADesc(m, n), 0, PARSEC_MatrixCompress_FULL_ADT->opaque_dtt, PARSEC_DATA_FLAG_PARSEC_MANAGED); + if( aGpus > 0 ) { +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) + cudaMallocHost((void**)&this_task->data._f_D.data_out->device_private, apADesc->mb * apADesc->mb * sizeof(double)); +#endif + +#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + // TODO A better way + hipHostMalloc((void**)&this_task->data._f_D.data_out->device_private, apADesc->mb * apADesc->mb * sizeof(double), hipHostMallocDefault); +#endif + } else { + this_task->data._f_D.data_out->device_private = calloc(apADesc->mb * apADesc->mb, sizeof(double)); + } + } +#endif + + /* Calcuate the global norm */ + if( 1 || aAdaptiveDecision ) { + int tid = es->th_id; + double current_value = 0.0; + apNormTile[n*aNT+m] = 0.0; + + for(int j = 0; j < apADesc->nb; j++) { + for(int i = 0; i < apADesc->mb; i++) { + current_value = ((double *)this_task->data._f_D.data_out->device_private)[j*apADesc->mb+i]; + apNormTile[n * aNT + m] += current_value * current_value; + } + } + + apNorm[tid] += apNormTile[n * aNT + m]; + apNormTile[n * aNT + m] = sqrt(apNormTile[n * aNT + m]); + + if( m - n >= aNT * PORTION_NORM ) + apNorm[tid] = 0.0; + } +} +END + + +/************************************************** + **************************************************/ +READ_R(m, n) + +// Execution space +m = aBandSizeDense .. apADesc->mt-1 +n = 0 .. m-aBandSizeDense + +:apArDesc(m, n) + +READ R <- apArDesc(m, n) + -> R generate_approximate_L(m, n) [ type_remote = AR ] + +BODY +{ +} +END + + +/************************************************** + **************************************************/ +WRITE_R(m, n) + +// Execution space +m = aBandSizeDense .. apADesc->mt-1 +n = 0 .. m-aBandSizeDense + +:apArDesc(m, n) + +RW R <- R generate_approximate_L(m, n) [ type_remote = AR ] + -> apArDesc(m, n) + +BODY +{ +} +END + + +/************************************************** + * generate and approximate lower triangular part * + **************************************************/ +generate_approximate_L(m, n) [high_priority = on] + +// Execution space +m = aBandSizeDense .. apADesc->mt-1 +n = 0 .. m-aBandSizeDense + +// Parallel partitioning +:apADesc(m, n) + +// Parameters +RW R <- R READ_R(m, n) [ type_remote = AR ] + -> R WRITE_R(m, n) [ type_remote = AR ] + +READ A <- NULL [ type_remote = UV ] + +BODY +{ + int size = 0; + int ldU = BLKLDD(apADesc, m); + int ldV = BLKLDD(apADesc, m); + int tempmm = m == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + int tempnn = n == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + void *U = parsec_private_memory_pop(aUv_work); + void *tmp_D = parsec_private_memory_pop(aD_work); + void *work = parsec_private_memory_pop(aRsvaD_work); + void *iwork = parsec_private_memory_pop(aRsvd_iwork); + int rank = -1; + void *V = (void *)U + apADesc->mb * aMaxRank * sizeof(double); + +#if GENERATE_RANDOM_DATA + CORE_dplgsy( + aN, tempmm, tempnn, tmp_D, ldU, + apADesc->m, m*apADesc->mb, n*apADesc->nb, 3872 ); +#else + params_kernel->kernel(tempmm, tempnn, params_kernel->index + m*apADesc->mb, + params_kernel->index + n*apADesc->mb, params_kernel->data, params_kernel->data, tmp_D, + tempmm); +#endif + + /* Calcuate the global norm */ + if( 1 || aAdaptiveDecision ) { + int tid = es->th_id; + double current_value = 0.0; + apNormTile[n*aNT+m] = 0.0; + + for(int j = 0; j < apADesc->nb; j++) { + for(int i = 0; i < apADesc->mb; i++) { + current_value = ((double *)tmp_D)[j*apADesc->mb+i]; + //norm[tid] += current_value * current_value; + apNormTile[n*aNT+m] += current_value * current_value; + } + } + + apNorm[tid] += apNormTile[n*aNT+m]; + apNormTile[n*aNT+m] = sqrt(apNormTile[n*aNT+m]); + + if( m - n >= aNT*PORTION_NORM ) + apNorm[tid] = 0.0; + } + +#if 1 + starsh_dense_dlrrsdd(tempmm, tempnn, tmp_D, tempmm, U, ldU, V, ldV, &rank, + aMaxRank, aRsvd_oversample, aTolerance, work, aRsvd_lwork, iwork); +#else + int maxrank_used = hicma_parsec_min(100, aMaxRank); + while( 1 ) { + starsh_dense_dlrrsdd(tempmm, tempnn, tmp_D, tempmm, U, ldU, V, ldV, &rank, + maxrank_used, aRsvd_oversample, aTolerance, work, aRsvd_lwork, iwork); + maxrank_used *= 2; + maxrank_used = hicma_parsec_min( apADesc->nb / 2, maxrank_used ); + if( rank != -1 || maxrank_used > apADesc->nb / 2 ) break; + + params_kernel->kernel(tempmm, tempnn, params_kernel->index + m*apADesc->mb, + params_kernel->index + n*apADesc->mb, params_kernel->data, params_kernel->data, tmp_D, + tempmm); + } +#endif + + if(rank == -1) { + printf("Tile(%d, %d) is dense, try increasing NB or aMaxRank \n", m, n); + } else { + /* Update R and size */ + *(int *)R = rank; + + if(aSendFullTile == 1){ /* Storage of UV tiles is MB by maxrank by 2 */ + size = apADesc->mb * aMaxRank * 2; + } else { + size = apADesc->mb * parsec_imin(aMaxRank, rank) * 2; + } + + /* New data_copy and allocate memory for apADesc(m, n); + * For off band, if send_full_tile, allocate mb * maxrank * 2, + * else, size = mb * min(maxrank, rank) * 2 + */ + this_task->data._f_A.data_out = parsec_data_copy_new(data_of_apADesc(m, n), 0, PARSEC_MatrixCompress_UV_ADT->opaque_dtt, PARSEC_DATA_FLAG_PARSEC_MANAGED); + this_task->data._f_A.data_out->device_private = calloc(size, sizeof(double)); + + /* New nb_elts for data_of(m, n) */ + (data_of_apADesc(m, n))->nb_elts = size * sizeof(double); + + /* Copy U to A */ + memcpy((void *)this_task->data._f_A.data_out->device_private, + (void *)U, apADesc->mb * rank * sizeof(double)); + + /* Copy V to A */ + memcpy((void *)this_task->data._f_A.data_out->device_private + apADesc->mb * rank * sizeof(double), + (void *)V, apADesc->mb * rank * sizeof(double)); + } + + parsec_private_memory_push(aUv_work, U); + parsec_private_memory_push(aD_work, tmp_D); + parsec_private_memory_push(aRsvaD_work, work); + parsec_private_memory_push(aRsvd_iwork, iwork); +} +END + +extern "C" %{ + +/** + * Generate matrix + * @return the parsec object to schedule + */ +parsec_taskpool_t* +MatrixCompress_constructor(int aUpperLower, int aBandSizeDense, int aNT, int aMaxRank, int aN, double *apNormTmp, double *apNormTile, + int aAdaptiveDecision, int aTolerance, int aSendFullTile, int aAutoBand, int aGpus, + hicma_parsec_data_t *data, starsh_params_t *params_kernel) +{ + + parsec_tiled_matrix_t *apADesc = (parsec_tiled_matrix_t *)&data->dcA; + // TODO: get */params_tlr->auto_band == 0 &&*/ + if(aBandSizeDense >= aNT && MEMORY_IN_CHOLEKSY_DP ) { + apADesc = (parsec_tiled_matrix_t *)&data->dcAd; + } + parsec_tiled_matrix_t *apArDesc = (parsec_tiled_matrix_t *)&data->dcAr; + + /* Check input arguments */ + if (aUpperLower != PlasmaLower) { + dplasma_error("STARSH_appr_New", "illegal value of uplo, should be PlasmaLower\n"); + return NULL; + } + + /* Check aBandSizeDense */ + if(aBandSizeDense < 1 ) { + if(0 == apADesc->super.myrank ) + fprintf(stderr, "\nERROR: band_size_dense should be not less that 1 : %d\n\n", aBandSizeDense); + exit(1); + } + + /* Calculate workspace */ + int rsvd_oversample = 10; + int mn = rsvd_oversample + aMaxRank; + if(mn > apADesc->mb) { + mn = apADesc->mb; + } + size_t rsvd_lwork = (4*mn+7) * mn; + + if(rsvd_lwork < apADesc->mb){ + rsvd_lwork = apADesc->mb; + } + rsvd_lwork += mn*(3*apADesc->mb+mn+1); + size_t rsvd_liwork = 8*mn; + + parsec_MatrixCompress_taskpool_t *pTaskpool = + parsec_MatrixCompress_new(apADesc, apArDesc, aBandSizeDense, aNT, aMaxRank, aN, + apNormTmp, apNormTile, aAdaptiveDecision, aTolerance, aSendFullTile, + aAutoBand, aGpus, params_kernel); + + pTaskpool->_g_apNorm = apNormTmp; + pTaskpool->_g_aRsvd_oversample = rsvd_oversample; + pTaskpool->_g_aRsvd_lwork = rsvd_lwork; + pTaskpool->_g_aRsvd_liwork = rsvd_liwork; + + /* Memery pool */ + pTaskpool->_g_aUv_work = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aUv_work, (apADesc->mb*aMaxRank*2)*sizeof(double)); + + pTaskpool->_g_aD_work = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aD_work, (apADesc->mb*apADesc->mb)*sizeof(double)); + + pTaskpool->_g_aRsvaD_work = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aRsvaD_work, rsvd_lwork*sizeof(double)); + + pTaskpool->_g_aRsvd_iwork = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aRsvd_iwork, rsvd_liwork*sizeof(int)); + + /* Arena */ + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_FULL_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apADesc->mb, apADesc->mb, apADesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_UV_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apADesc->mb, aMaxRank*2, apADesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_AR_ADT_IDX], + parsec_datatype_int_t, PARSEC_MATRIX_FULL, + 1, 1, 1, 1, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/* Destructor */ +void MatrixCompress_destructor(parsec_taskpool_t *apTaskpool) +{ + parsec_MatrixCompress_taskpool_t *pTaskpool = (parsec_MatrixCompress_taskpool_t *)apTaskpool; + + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_FULL_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_UV_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_AR_ADT_IDX]); + + parsec_private_memory_fini(pTaskpool->_g_aUv_work ); + parsec_private_memory_fini(pTaskpool->_g_aD_work ); + parsec_private_memory_fini(pTaskpool->_g_aRsvaD_work ); + parsec_private_memory_fini(pTaskpool->_g_aRsvd_iwork ); + + parsec_taskpool_free(apTaskpool); +} + +/** + * Generate matrix + */ +void MatrixCompress(parsec_context_t *apContext, double *apNormGlobal, int aUpperLower, int aBandSizeDense, int aNT, + int aMaxRank, int aN, int aAdaptiveDecision, int aTolerance, int aSendFullTile, int aAutoBand, + int aGpus, hicma_parsec_data_t *data, starsh_params_t *params_kernel) +{ + + /* Only for 1 vp */ + assert(apContext->nb_vp == 1); + int nb_threads = apContext->virtual_processes[0]->nb_cores; + double *pNormTmp = (double *) calloc(sizeof(double), nb_threads); + + /* Make sure norm_tile and norm_global is fresh */ + double* pNormTile = (double*) malloc(aNT * aNT * sizeof(double)); + memset(pNormTile, 0, aNT * aNT * sizeof(double)); + // Make sure norm_tile and norm_global is fresh + *apNormGlobal = 0.0; + parsec_taskpool_t *pTaskpool = + MatrixCompress_constructor(aUpperLower, aBandSizeDense, aNT, aMaxRank, aN, pNormTmp, pNormTile, + aAdaptiveDecision, aTolerance, aSendFullTile, aAutoBand, aGpus, data, params_kernel); + + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + MatrixCompress_destructor(pTaskpool); + + /* Reduce to the global norm */ + double norm_process = 0.0; + for( int i = 0; i < nb_threads; i++ ) { + norm_process += pNormTmp[i]; + } + + MPI_Allreduce(MPI_IN_PLACE, pNormTile, aNT * aNT, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&norm_process, apNormGlobal, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + *apNormGlobal = sqrt(*apNormGlobal); + + free(pNormTmp); + free(pNormTile); +} + +%} diff --git a/src/runtime/parsec/jdf/MeanSquaredError.jdf b/src/runtime/parsec/jdf/MeanSquaredError.jdf new file mode 100644 index 00000000..c925aff7 --- /dev/null +++ b/src/runtime/parsec/jdf/MeanSquaredError.jdf @@ -0,0 +1,96 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static double NormCore(complex double *apDataA, double *apDataB, int aM, int aN, int aLSize) { + double result = 0.0, diff; + for(int j = 0; j < aN; j++) { + for(int i = 0; i < aM; i++) { + double diff = creal(apDataA[j*aM+i]) - apDataB[j*aM+i]; + result += diff * diff; + } + } + return sqrt(result) / (2*aLSize*(aLSize+1)); +} + +%} + +apFDataDesc [ type = "parsec_tiled_matrix_t*" ] +apFSpatialDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc ] +aLSize [type="int"] + +task(n) + +n = 0 .. apFDataDesc->lnt-1 + +: apFDataDesc(0, n) + +READ f_data <- apFDataDesc(0, n) [ type = f_data ] +READ f_spatial <- apFSpatialDesc(0, n) [ type = f_spatial ] + +BODY +{ + double norm = NormCore(f_data, f_spatial, apFDataDesc->mb, apFDataDesc->nb, aLSize); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +MeanSquaredErrorConstructor( parsec_matrix_block_cyclic_t * apDataDesc, parsec_matrix_block_cyclic_t * apSpatialDesc, int aLSize) +{ + int mb = apDataDesc->super.mb; + int nb = apDataDesc->super.nb; + parsec_MeanSquaredError_taskpool_t *pTaskpool = parsec_MeanSquaredError_new(&apDataDesc->super, &apSpatialDesc->super, aLSize); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_data_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, mb, nb, mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_spatial_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, mb, nb, mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void MeanSquaredErrorDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_MeanSquaredError_taskpool_t *pTaskpool = (parsec_MeanSquaredError_taskpool_t *)apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_data_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_spatial_ADT_IDX]); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int MeanSquaredError(parsec_context_t *apContext, parsec_matrix_block_cyclic_t* apDataDesc, + parsec_matrix_block_cyclic_t* apSpatialDesc, int aLSize) +{ + parsec_taskpool_t *pParsec_MeanSquaredError = NULL; + pParsec_MeanSquaredError = MeanSquaredErrorConstructor(apDataDesc, apSpatialDesc, aLSize); + if( pParsec_MeanSquaredError != NULL ){ + parsec_context_add_taskpool(apContext, pParsec_MeanSquaredError); + parsec_context_start(apContext); + parsec_context_wait(apContext); + MeanSquaredErrorDestructor(pParsec_MeanSquaredError); + } + + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSV.jdf b/src/runtime/parsec/jdf/ReadCSV.jdf new file mode 100644 index 00000000..475f27de --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSV.jdf @@ -0,0 +1,123 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVCore(const char* apFilename, double *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &apData[j*aM+i]); +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } + } + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumDoubleData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + int kq = (aTimeSlot%aNodes)? aTimeSlot/aNodes+1 : aTimeSlot/aNodes; + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aNodes, 0, 0, aMB, aNB*aNodes, 1, aNodes, 1, kq, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSV_taskpool_t *taskpool = parsec_ReadCSV_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSV_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSV_taskpool_t *ReadCSV_taskpool = (parsec_ReadCSV_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSV_taskpool->arenas_datatypes[PARSEC_ReadCSV_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSV(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVComplex.jdf b/src/runtime/parsec/jdf/ReadCSVComplex.jdf new file mode 100644 index 00000000..5d4196c2 --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVComplex.jdf @@ -0,0 +1,133 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVComplexCore(const char* apFilename, complex double *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + size_t len = 0, row = 0; + ssize_t read; + double real, imag; + int status; + + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + status = fscanf(pFile, "%lf%lfi,", &real, &imag); + pData[j*aM+i] = real + imag * I; + } + } + } else { + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + cuDoubleComplex *pData = (cuDoubleComplex *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf%lfi,", &real, &imag); + pData[j*aM+i] = make_cuDoubleComplex(real, imag); + } + } +#endif + } + + + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVComplexCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumComplexData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVComplexConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, + int aNB, int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_COMPLEX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aNodes, 0, 0, aMB, aNB*aNodes, 1, aNodes, 1, 1, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVComplex_taskpool_t *taskpool = parsec_ReadCSVComplex_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVComplex_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVComplexDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVComplex_taskpool_t *ReadCSVComplex_taskpool = (parsec_ReadCSVComplex_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVComplex_taskpool->arenas_datatypes[PARSEC_ReadCSVComplex_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVComplexConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVComplexDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVTimeSlot.jdf b/src/runtime/parsec/jdf/ReadCSVTimeSlot.jdf new file mode 100644 index 00000000..eeba2c8c --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVTimeSlot.jdf @@ -0,0 +1,123 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVTimeSlotCore(const char* apFilename, double *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &apData[j*aM+i]); +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } + } + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVTimeSlotCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumDoubleData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVTimeSlotConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + int kq = (aTimeSlot%aNodes)? aTimeSlot/aNodes+1 : aTimeSlot/aNodes; + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aTimeSlot, 0, 0, aMB, aNB*aTimeSlot, 1, aNodes, 1, kq, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVTimeSlot_taskpool_t *taskpool = parsec_ReadCSVTimeSlot_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVTimeSlot_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVTimeSlotDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVTimeSlot_taskpool_t *ReadCSVTimeSlot_taskpool = (parsec_ReadCSVTimeSlot_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVTimeSlot_taskpool->arenas_datatypes[PARSEC_ReadCSVTimeSlot_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVTimeSlotConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVTimeSlotDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVToComplex.jdf b/src/runtime/parsec/jdf/ReadCSVToComplex.jdf new file mode 100644 index 00000000..2aa68c3d --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVToComplex.jdf @@ -0,0 +1,130 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVToComplexCore(const char* apFilename, void *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + double real; + int status; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + status = fscanf(pFile, "%lf,", &real); + pData[j*aM+i] = (complex double)real; + } + } + } else { + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + cuDoubleComplex *pData = (cuDoubleComplex *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &real); + pData[j*aM+i] = make_cuDoubleComplex(real, 0); + } + } +#endif + } + + + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVToComplexCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumComplexData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVToComplexConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, + int aNB, int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_COMPLEX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aNodes, 0, 0, aMB, aNB*aNodes, 1, aNodes, 1, 1, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVToComplex_taskpool_t *taskpool = parsec_ReadCSVToComplex_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVToComplex_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVToComplexDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVToComplex_taskpool_t *ReadCSVToComplex_taskpool = (parsec_ReadCSVToComplex_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVToComplex_taskpool->arenas_datatypes[PARSEC_ReadCSVToComplex_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVToComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVToComplexConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVToComplexDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVToComplexTimeSlot.jdf b/src/runtime/parsec/jdf/ReadCSVToComplexTimeSlot.jdf new file mode 100644 index 00000000..dc3cb4a3 --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVToComplexTimeSlot.jdf @@ -0,0 +1,154 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVToComplexTimeSlotCore(const char* apFilename, void *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + double real; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf\n", &real); + pData[j*aM+i] = (complex double)real; +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } + +#if DEBUG_INFO_GB24 + climate_emulator_print_matrix_col_complex(pData, 10, 10, aM); +#endif + + + } else { + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + cuDoubleComplex *pData = (cuDoubleComplex *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &real); + pData[j*aM+i] = make_cuDoubleComplex(real, 0); +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } +#endif + + } + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + char *pFileZ_data = (char *) malloc(200 * sizeof(char)); + snprintf(pFileZ_data, 200, "%s%s%d%s", pFilename,"/z_", n, ".csv"); + ReadCSVToComplexTimeSlotCore(pFileZ_data, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumComplexData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVToComplexTimeSlotConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + int kq = (aTimeSlot%aNodes)? aTimeSlot/aNodes+1 : aTimeSlot/aNodes; + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_COMPLEX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aTimeSlot, 0, 0, aMB, aNB*aTimeSlot, 1, aNodes, 1, kq, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVToComplexTimeSlot_taskpool_t *taskpool = parsec_ReadCSVToComplexTimeSlot_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVToComplexTimeSlot_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVToComplexTimeSlotDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVToComplexTimeSlot_taskpool_t *ReadCSVToComplexTimeSlot_taskpool = (parsec_ReadCSVToComplexTimeSlot_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVToComplexTimeSlot_taskpool->arenas_datatypes[PARSEC_ReadCSVToComplexTimeSlot_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVToComplexTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVToComplexTimeSlotConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVToComplexTimeSlotDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/tests/cpp-tests/configurations/TestConfigurations.cpp b/tests/cpp-tests/configurations/TestConfigurations.cpp index cfe0afa8..2ce15865 100644 --- a/tests/cpp-tests/configurations/TestConfigurations.cpp +++ b/tests/cpp-tests/configurations/TestConfigurations.cpp @@ -63,7 +63,6 @@ void TEST_ARGUMENT_INITIALIZATION() { // No data modeling arguments initialized REQUIRE_THROWS(configurations.GetMaxMleIterations()); - REQUIRE_THROWS(configurations.GetTolerance()); // No data prediction arguments initialized REQUIRE(configurations.GetIsMSPE() == false); @@ -133,6 +132,10 @@ void TEST_ARGUMENT_INITIALIZATION_PARSEC() { // No data generation arguments initialized REQUIRE(configurations.GetDataPath() == string("")); + + // Passing an empty data path throws an exception + REQUIRE_THROWS(configurations.InitializeDataGenerationArguments()); + } @@ -208,7 +211,7 @@ void TEST_COPY_CONSTRUCTOR() { Configurations synthetic_data_configurations; synthetic_data_configurations.SetProblemSize(10); synthetic_data_configurations.SetKernelName("BivariateSpacetimeMaternStationary"); - synthetic_data_configurations.SetPrecision(exageostat::common::MIXED); + synthetic_data_configurations.SetPrecision(MIXED); synthetic_data_configurations.SetLoggerPath("any/path"); vector lb{0.1, 0.1, 0.1}; synthetic_data_configurations.SetLowerBounds(lb);