From fb2adf18ef0b88287c00f05b0862c06424e0d59c Mon Sep 17 00:00:00 2001 From: Adam Lugowski Date: Fri, 7 Jul 2023 21:07:59 -0700 Subject: [PATCH] Add generator --- README.generator.md | 68 +++++++++++++ README.md | 32 ++++++ benchmark/CMakeLists.txt | 1 + benchmark/bench_generator.cpp | 42 ++++++++ include/fast_matrix_market/app/generator.hpp | 101 +++++++++++++++++++ tests/basic_test.cpp | 57 +++++++++++ 6 files changed, 301 insertions(+) create mode 100644 README.generator.md create mode 100644 benchmark/bench_generator.cpp create mode 100644 include/fast_matrix_market/app/generator.hpp diff --git a/README.generator.md b/README.generator.md new file mode 100644 index 0000000..46688b2 --- /dev/null +++ b/README.generator.md @@ -0,0 +1,68 @@ +# Procedurally generate Matrix Market + +The `fast_matrix_market` write mechanism can write procedurally generated data. + +To make this process simpler, the `generator.hpp` header includes a method that can generate a coordinate Matrix Market file +where each `row`, `column`, `value` triplet is individually generated using a *Callable*. + +# Usage + +```c++ +#include +``` + +Create a *Callable* with the signature: +```c++ +void generate_tuple(int64_t coo_index, IT &row, IT &col, VT &value); +``` +where: +* `coo_index` is an input parameter with the index of the tuple to be generated. +* `row`, `col`, `value` are the output parameters defining the generated tuple. +* `IT` is the integral type of the row and column indices, eg. `int64_t` or `int`. +* `VT` is the value type, eg. `double` or `float`. + + +Then call `fast_matrix_market::write_matrix_market_generated_triplet` which takes the output stream, +the header, number of nonzeros `nnz`, and the callable. + +The callable is called when a value of a tuple is needed, so eventually it will be called for every index in the half-open range [0, `nnz`). +The calls may be out of order and in parallel. The callable must be thread safe. + +The Matrix Market `field` type is deduced from `VT`, or can be set to `pattern` in the header. + + +### Example: Generate an identity matrix + +```c++ +// #rows, #cols, and nnz +const int64_t eye_rank = 10; + +fast_matrix_market::write_matrix_market_generated_triplet( + output_stream, {eye_rank, eye_rank}, eye_rank, + [](auto coo_index, auto& row, auto& col, auto& value) { + row = coo_index; + col = coo_index; + value = 1; + }); +``` + +### Example: Generate a random matrix + +Generate a 100-by-100 matrix with 1000 randomized elements. +```c++ +void generate_random_tuple([[maybe_unused]] int64_t coo_index, int64_t &row, int64_t &col, double& value) { + // The RNG is cheap to use but expensive to create and not thread safe. + // Use thread_local to create one instance per thread. + static thread_local std::mt19937 generator; + // distribution objects are effectively optimized away + std::uniform_int_distribution index_distribution(0, 99); + std::uniform_real_distribution value_distribution(0, 1); + + row = index_distribution(generator); + col = index_distribution(generator); + value = value_distribution(generator); +} + +fast_matrix_market::write_matrix_market_generated_triplet( + output_stream, {100, 100}, 1000, generate_random_tuple); +``` diff --git a/README.md b/README.md index f09b1fd..2b144e0 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,8 @@ The methods also accept an optional `header` argument that can be used to read a Matrix composed of row and column index vectors and a value vector. Any vector class that can be resized and iterated like `std::vector` will work. ```c++ +#include + struct triplet_matrix { int64_t nrows = 0, ncols = 0; std::vector rows, cols; @@ -116,6 +118,8 @@ Any vector class that can be resized and iterated like `std::vector` will work. Be mindful of whether your code expects row or column major ordering. ```c++ +#include + struct array_matrix { int64_t nrows = 0, ncols = 0; std::vector vals; // or int64_t, float, std::complex, etc. @@ -133,6 +137,8 @@ fast_matrix_market::read_matrix_market_array( ## GraphBLAS `GrB_Matrix` and `GrB_Vector`s are supported, with zero-copy where possible. See [GraphBLAS README](README.GraphBLAS.md). ```c++ +#include + GrB_Matrix A; fast_matrix_market::read_matrix_market_graphblas(input_stream, &A); ``` @@ -141,6 +147,8 @@ fast_matrix_market::read_matrix_market_graphblas(input_stream, &A); ## Eigen Sparse and dense matrices and vectors are supported. See [Eigen README](README.Eigen.md). ```c++ +#include + Eigen::SparseMatrix mat; fast_matrix_market::read_matrix_market_eigen(input_stream, mat); ``` @@ -148,6 +156,8 @@ fast_matrix_market::read_matrix_market_eigen(input_stream, mat); ## SuiteSparse CXSparse `cs_xx` structures (in both COO and CSC modes) are supported. See [CXSparse README](README.CXSparse.md). ```c++ +#include + cs_dl *A; fast_matrix_market::read_matrix_market_cxsparse(input_stream, &A, cs_dl_spalloc); ``` @@ -155,6 +165,8 @@ fast_matrix_market::read_matrix_market_cxsparse(input_stream, &A, cs_dl_spalloc) ## Blaze [Blaze](https://bitbucket.org/blaze-lib/blaze) sparse and dense matrices and vectors are supported. See [Blaze README](README.Blaze.md). ```c++ +#include + blaze::CompressedMatrix A; fast_matrix_market::read_matrix_market_blaze(input_stream, A); ``` @@ -162,6 +174,8 @@ fast_matrix_market::read_matrix_market_blaze(input_stream, A); ## Armadillo [Armadillo](https://arma.sourceforge.net/) sparse and dense matrices are supported. See [Armadillo README](README.Armadillo.md). ```c++ +#include + arma::SpMat A; fast_matrix_market::read_matrix_market_arma(input_stream, A); ``` @@ -174,6 +188,24 @@ Next read or write the body. You'll mostly just need to provide `parse_handler` Follow the example of the triplet and array implementations in [include/fast_matrix_market/app/](include/fast_matrix_market/app). +## Generator + +The `fast_matrix_market` write mechanism can write procedurally generated data as well as materialized datastructures. +See [generator README](README.generator.md). + +For example, write a 10-by-10 identity matrix to `output_stream`: +```c++ +#include + +fast_matrix_market::write_matrix_market_generated_triplet( + output_stream, {10, 10}, 10, + [](auto coo_index, auto& row, auto& col, auto& value) { + row = coo_index; + col = coo_index; + value = 1; + }); +``` + # Installation `fast_matrix_market` is written in C++17. Parallelism uses C++11 threads. Header-only if optional dependencies are disabled. diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 6774171..318f1e7 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -19,6 +19,7 @@ add_executable(fmm_bench bench_iostream.cpp bench_triplet.cpp bench_csc.cpp + bench_generator.cpp main.cpp fmm_bench.hpp) target_link_libraries(fmm_bench benchmark::benchmark fast_matrix_market::fast_matrix_market) diff --git a/benchmark/bench_generator.cpp b/benchmark/bench_generator.cpp new file mode 100644 index 0000000..4868b87 --- /dev/null +++ b/benchmark/bench_generator.cpp @@ -0,0 +1,42 @@ +// Copyright (C) 2023 Adam Lugowski. All rights reserved. +// Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file. +// SPDX-License-Identifier: BSD-2-Clause + +#include + +#include "fmm_bench.hpp" +#include + +using VT = double; +static int num_iterations = 3; + +/** + * Write a generated identity matrix. + */ +static void generate_eye(benchmark::State& state) { + const int64_t eye_rank = 1 << 22; + + std::size_t num_bytes = 0; + + fast_matrix_market::write_options options; + options.parallel_ok = true; + options.num_threads = (int)state.range(0); + + for ([[maybe_unused]] auto _ : state) { + std::ostringstream oss; + fast_matrix_market::write_matrix_market_generated_triplet( + oss, {eye_rank, eye_rank}, eye_rank, + [](auto coo_index, auto& row, auto& col, auto& value) { + row = coo_index; + col = coo_index; + value = 1; + }, options); + + num_bytes += oss.str().size(); + benchmark::ClobberMemory(); + } + + state.SetBytesProcessed((int64_t)num_bytes); +} + +BENCHMARK(generate_eye)->Name("op:write/matrix:generated_eye/impl:FMM/lang:C++")->UseRealTime()->Iterations(num_iterations)->Apply(NumThreadsArgument); diff --git a/include/fast_matrix_market/app/generator.hpp b/include/fast_matrix_market/app/generator.hpp new file mode 100644 index 0000000..8fdd8d9 --- /dev/null +++ b/include/fast_matrix_market/app/generator.hpp @@ -0,0 +1,101 @@ +// Copyright (C) 2023 Adam Lugowski. All rights reserved. +// Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file. +// SPDX-License-Identifier: BSD-2-Clause + +#pragma once + +#include "../fast_matrix_market.hpp" + +namespace fast_matrix_market { + /** + * Format (row, col, value) triplets generated by a generator callable like this one: + * [](auto coo_index, auto& row, auto& col, auto& value) { row = coo_index; col = coo_index; value = 1; } + */ + template + class coo_independent_generator_formatter { + public: + explicit coo_independent_generator_formatter(LF lf, int64_t nnz, GEN_CALLABLE gen_callable) : + line_formatter(lf), nnz(nnz), gen_callable(gen_callable) { + if (nnz < 0) { + throw invalid_argument("nnz cannot be negative."); + } + } + + [[nodiscard]] bool has_next() const { + return next_chunk_offset < nnz; + } + + class chunk { + public: + explicit chunk(LF lf, int64_t chunk_offset, int64_t chunk_nnz, GEN_CALLABLE gen_callable) : + line_formatter(lf), chunk_offset(chunk_offset), chunk_nnz(chunk_nnz), gen_callable(gen_callable) { + } + + std::string operator()() { + std::string chunk; + chunk.reserve(chunk_nnz*25); + + for (int64_t i = 0; i < chunk_nnz; ++i) { + IT row, col; + VT value; + gen_callable(chunk_offset + i, row, col, value); + chunk += line_formatter.coord_matrix(row, col, value); + } + + return chunk; + } + + LF line_formatter; + int64_t chunk_offset; + int64_t chunk_nnz; + GEN_CALLABLE gen_callable; + }; + + chunk next_chunk(const write_options& options) { + auto chunk_size = std::min(options.chunk_size_values, (nnz - next_chunk_offset)); + chunk c(line_formatter, next_chunk_offset, chunk_size, gen_callable); + next_chunk_offset += chunk_size; + return c; + } + + protected: + LF line_formatter; + int64_t nnz; + GEN_CALLABLE gen_callable; + int64_t next_chunk_offset = 0; + }; + + /** + * Write generated triplets to a Matrix Market file. + * + * @tparam IT index type of generated row and column indices + * @tparam VT value type of generated values + * @tparam GEN_CALLABLE + * @param os stream to write to + * @param header header, use {row, col} syntax for just dimensions + * @param nnz number of nonzeros in the generated MatrixMarket file + * @param gen_callable a Callable that accepts the triplet index as an in parameter and row, column, value as out parameter + * @param options + */ + template + void write_matrix_market_generated_triplet(std::ostream &os, + matrix_market_header header, + int64_t nnz, + GEN_CALLABLE gen_callable, + const write_options& options = {}) { + header.nnz = nnz; + + header.object = matrix; + if (header.field != pattern) { + header.field = get_field_type((const VT *) nullptr); + } + header.format = coordinate; + + write_header(os, header, options); + + line_formatter lf(header, options); + auto formatter = coo_independent_generator_formatter(lf, nnz, gen_callable); + write_body(os, formatter, options); + } + +} \ No newline at end of file diff --git a/tests/basic_test.cpp b/tests/basic_test.cpp index 1d71a68..1f3b4c1 100644 --- a/tests/basic_test.cpp +++ b/tests/basic_test.cpp @@ -12,6 +12,7 @@ #endif #include "fmm_tests.hpp" +#include #if defined(__clang__) // for TYPED_TEST_SUITE @@ -35,6 +36,14 @@ void read_triplet_file(const std::string& matrix_filename, TRIPLET& triplet, fas fast_matrix_market::read_matrix_market_triplet(f, triplet.nrows, triplet.ncols, triplet.rows, triplet.cols, triplet.vals, options); } +template +void read_triplet_string(const std::string& s, TRIPLET& triplet, fast_matrix_market::read_options options = {}) { + std::istringstream f(s); + options.chunk_size_bytes = 1; + + fast_matrix_market::read_matrix_market_triplet(f, triplet.nrows, triplet.ncols, triplet.rows, triplet.cols, triplet.vals, options); +} + template void read_array_file(const std::string& matrix_filename, ARRAY& array, fast_matrix_market::read_options options = {}) { std::ifstream f(kTestMatrixDir + "/" + matrix_filename); @@ -691,3 +700,51 @@ TEST(Whitespace, Whitespace) { } } } + +TEST(Generator, Generator) { + { + // Generate a 3x3 identity matrix + std::string gen_mtx; + { + std::ostringstream f; + fast_matrix_market::write_matrix_market_generated_triplet( + f, {3, 3}, 3, + [](auto coo_index, auto& row, auto& col, auto& value) { + row = coo_index; + col = coo_index; + value = 1; + }); + + gen_mtx = f.str(); + } + + triplet_matrix triplet, triplet2; + read_triplet_file("eye3.mtx", triplet); + read_triplet_string(gen_mtx, triplet2); + EXPECT_EQ(triplet, triplet2); + } + { + // Generate a 3x3 pattern matrix + std::string gen_mtx; + { + std::ostringstream f; + fast_matrix_market::matrix_market_header pattern_header{3, 3}; + pattern_header.field = fast_matrix_market::pattern; + fast_matrix_market::write_matrix_market_generated_triplet( + f, pattern_header, 3, + [](auto coo_index, auto& row, auto& col, auto& value) { + row = coo_index; + col = coo_index; + value = 1; + }); + + gen_mtx = f.str(); + } + + EXPECT_NE(gen_mtx.find("pattern"), std::string::npos); + triplet_matrix triplet, triplet2; + read_triplet_file("eye3_pattern.mtx", triplet); + read_triplet_string(gen_mtx, triplet2); + EXPECT_EQ(triplet, triplet2); + } +}