From fb2adf18ef0b88287c00f05b0862c06424e0d59c Mon Sep 17 00:00:00 2001
From: Adam Lugowski <alugowski@gmail.com>
Date: Fri, 7 Jul 2023 21:07:59 -0700
Subject: [PATCH] Add generator

---
 README.generator.md                          |  68 +++++++++++++
 README.md                                    |  32 ++++++
 benchmark/CMakeLists.txt                     |   1 +
 benchmark/bench_generator.cpp                |  42 ++++++++
 include/fast_matrix_market/app/generator.hpp | 101 +++++++++++++++++++
 tests/basic_test.cpp                         |  57 +++++++++++
 6 files changed, 301 insertions(+)
 create mode 100644 README.generator.md
 create mode 100644 benchmark/bench_generator.cpp
 create mode 100644 include/fast_matrix_market/app/generator.hpp
diff --git a/README.generator.md b/README.generator.md
new file mode 100644
index 0000000..46688b2
--- /dev/null
+++ b/README.generator.md
@@ -0,0 +1,68 @@
+# Procedurally generate Matrix Market
+
+The `fast_matrix_market` write mechanism can write procedurally generated data.
+
+To make this process simpler, the `generator.hpp` header includes a method that can generate a coordinate Matrix Market file
+where each `row`, `column`, `value` triplet is individually generated using a *Callable*.
+
+# Usage
+
+```c++
+#include <fast_matrix_market/app/generator.hpp>
+```
+
+Create a *Callable* with the signature:
+```c++
+void generate_tuple(int64_t coo_index, IT &row, IT &col, VT &value);
+```
+where:
+* `coo_index` is an input parameter with the index of the tuple to be generated.
+* `row`, `col`, `value` are the output parameters defining the generated tuple.
+* `IT` is the integral type of the row and column indices, eg. `int64_t` or `int`.
+* `VT` is the value type, eg. `double` or `float`.
+
+
+Then call `fast_matrix_market::write_matrix_market_generated_triplet<IT, VT>` which takes the output stream,
+the header, number of nonzeros `nnz`, and the callable.
+
+The callable is called when a value of a tuple is needed, so eventually it will be called for every index in the half-open range [0, `nnz`).
+The calls may be out of order and in parallel. The callable must be thread safe.
+
+The Matrix Market `field` type is deduced from `VT`, or can be set to `pattern` in the header.
+
+
+### Example: Generate an identity matrix
+
+```c++
+// #rows, #cols, and nnz
+const int64_t eye_rank = 10;
+
+fast_matrix_market::write_matrix_market_generated_triplet<int64_t, double>(
+    output_stream, {eye_rank, eye_rank}, eye_rank,
+    [](auto coo_index, auto& row, auto& col, auto& value) {
+        row = coo_index;
+        col = coo_index;
+        value = 1;
+    });
+```
+
+### Example: Generate a random matrix
+
+Generate a 100-by-100 matrix with 1000 randomized elements.
+```c++
+void generate_random_tuple([[maybe_unused]] int64_t coo_index, int64_t &row, int64_t &col, double& value) {
+    // The RNG is cheap to use but expensive to create and not thread safe.
+    // Use thread_local to create one instance per thread.
+    static thread_local std::mt19937 generator;
+    // distribution objects are effectively optimized away
+    std::uniform_int_distribution<int64_t> index_distribution(0, 99);
+    std::uniform_real_distribution<double> value_distribution(0, 1);
+
+    row = index_distribution(generator);
+    col = index_distribution(generator);
+    value = value_distribution(generator);
+}
+
+fast_matrix_market::write_matrix_market_generated_triplet<int64_t, double>(
+    output_stream, {100, 100}, 1000, generate_random_tuple);
+```
diff --git a/README.md b/README.md
index f09b1fd..2b144e0 100644
--- a/README.md
+++ b/README.md
@@ -93,6 +93,8 @@ The methods also accept an optional `header` argument that can be used to read a
 Matrix composed of row and column index vectors and a value vector. Any vector class that can be resized and iterated like `std::vector` will work. 
 
 ```c++
+#include <fast_matrix_market/fast_matrix_market.hpp>
+
 struct triplet_matrix {
     int64_t nrows = 0, ncols = 0;
     std::vector<int64_t> rows, cols;
@@ -116,6 +118,8 @@ Any vector class that can be resized and iterated like `std::vector` will work.
 Be mindful of whether your code expects row or column major ordering.
 
 ```c++
+#include <fast_matrix_market/fast_matrix_market.hpp>
+
 struct array_matrix {
     int64_t nrows = 0, ncols = 0;
     std::vector<double> vals;       // or int64_t, float, std::complex<double>, etc.
@@ -133,6 +137,8 @@ fast_matrix_market::read_matrix_market_array(
 ## GraphBLAS
 `GrB_Matrix` and `GrB_Vector`s are supported, with zero-copy where possible. See [GraphBLAS README](README.GraphBLAS.md).
 ```c++
+#include <fast_matrix_market/app/GraphBLAS.hpp>
+
 GrB_Matrix A;
 fast_matrix_market::read_matrix_market_graphblas(input_stream, &A);
 ```
@@ -141,6 +147,8 @@ fast_matrix_market::read_matrix_market_graphblas(input_stream, &A);
 ## Eigen
 Sparse and dense matrices and vectors are supported. See [Eigen README](README.Eigen.md).
 ```c++
+#include <fast_matrix_market/app/Eigen.hpp>
+
 Eigen::SparseMatrix<double> mat;
 fast_matrix_market::read_matrix_market_eigen(input_stream, mat);
 ```
@@ -148,6 +156,8 @@ fast_matrix_market::read_matrix_market_eigen(input_stream, mat);
 ## SuiteSparse CXSparse
 `cs_xx` structures (in both COO and CSC modes) are supported. See [CXSparse README](README.CXSparse.md).
 ```c++
+#include <fast_matrix_market/app/CXSparse.hpp>
+
 cs_dl *A;
 fast_matrix_market::read_matrix_market_cxsparse(input_stream, &A, cs_dl_spalloc);
 ```
@@ -155,6 +165,8 @@ fast_matrix_market::read_matrix_market_cxsparse(input_stream, &A, cs_dl_spalloc)
 ## Blaze
 [Blaze](https://bitbucket.org/blaze-lib/blaze) sparse and dense matrices and vectors are supported. See [Blaze README](README.Blaze.md).
 ```c++
+#include <fast_matrix_market/app/Blaze.hpp>
+
 blaze::CompressedMatrix<double> A;
 fast_matrix_market::read_matrix_market_blaze(input_stream, A);
 ```
@@ -162,6 +174,8 @@ fast_matrix_market::read_matrix_market_blaze(input_stream, A);
 ## Armadillo
 [Armadillo](https://arma.sourceforge.net/) sparse and dense matrices are supported. See [Armadillo README](README.Armadillo.md).
 ```c++
+#include <fast_matrix_market/app/Armadillo.hpp>
+
 arma::SpMat<double> A;
 fast_matrix_market::read_matrix_market_arma(input_stream, A);
 ```
@@ -174,6 +188,24 @@ Next read or write the body. You'll mostly just need to provide `parse_handler`
 
 Follow the example of the triplet and array implementations in [include/fast_matrix_market/app/](include/fast_matrix_market/app).
 
+## Generator
+
+The `fast_matrix_market` write mechanism can write procedurally generated data as well as materialized datastructures.
+See [generator README](README.generator.md).
+
+For example, write a 10-by-10 identity matrix to `output_stream`:
+```c++
+#include <fast_matrix_market/app/generator.hpp>
+
+fast_matrix_market::write_matrix_market_generated_triplet<int64_t, double>(
+    output_stream, {10, 10}, 10,
+    [](auto coo_index, auto& row, auto& col, auto& value) {
+        row = coo_index;
+        col = coo_index;
+        value = 1;
+    });
+```
+
 # Installation
 
 `fast_matrix_market` is written in C++17. Parallelism uses C++11 threads. Header-only if optional dependencies are disabled.
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 6774171..318f1e7 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -19,6 +19,7 @@ add_executable(fmm_bench
         bench_iostream.cpp
         bench_triplet.cpp
         bench_csc.cpp
+        bench_generator.cpp
         main.cpp
         fmm_bench.hpp)
 target_link_libraries(fmm_bench benchmark::benchmark fast_matrix_market::fast_matrix_market)
diff --git a/benchmark/bench_generator.cpp b/benchmark/bench_generator.cpp
new file mode 100644
index 0000000..4868b87
--- /dev/null
+++ b/benchmark/bench_generator.cpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2023 Adam Lugowski. All rights reserved.
+// Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file.
+// SPDX-License-Identifier: BSD-2-Clause
+
+#include <sstream>
+
+#include "fmm_bench.hpp"
+#include <fast_matrix_market/app/generator.hpp>
+
+using VT = double;
+static int num_iterations = 3;
+
+/**
+ * Write a generated identity matrix.
+ */
+static void generate_eye(benchmark::State& state) {
+    const int64_t eye_rank = 1 << 22;
+
+    std::size_t num_bytes = 0;
+
+    fast_matrix_market::write_options options;
+    options.parallel_ok = true;
+    options.num_threads = (int)state.range(0);
+
+    for ([[maybe_unused]] auto _ : state) {
+        std::ostringstream oss;
+        fast_matrix_market::write_matrix_market_generated_triplet<int64_t, VT>(
+            oss, {eye_rank, eye_rank}, eye_rank,
+            [](auto coo_index, auto& row, auto& col, auto& value) {
+                row = coo_index;
+                col = coo_index;
+                value = 1;
+            }, options);
+
+        num_bytes += oss.str().size();
+        benchmark::ClobberMemory();
+    }
+
+    state.SetBytesProcessed((int64_t)num_bytes);
+}
+
+BENCHMARK(generate_eye)->Name("op:write/matrix:generated_eye/impl:FMM/lang:C++")->UseRealTime()->Iterations(num_iterations)->Apply(NumThreadsArgument);
diff --git a/include/fast_matrix_market/app/generator.hpp b/include/fast_matrix_market/app/generator.hpp
new file mode 100644
index 0000000..8fdd8d9
--- /dev/null
+++ b/include/fast_matrix_market/app/generator.hpp
@@ -0,0 +1,101 @@
+// Copyright (C) 2023 Adam Lugowski. All rights reserved.
+// Use of this source code is governed by the BSD 2-clause license found in the LICENSE.txt file.
+// SPDX-License-Identifier: BSD-2-Clause
+
+#pragma once
+
+#include "../fast_matrix_market.hpp"
+
+namespace fast_matrix_market {
+    /**
+     * Format (row, col, value) triplets generated by a generator callable like this one:
+     * [](auto coo_index, auto& row, auto& col, auto& value) { row = coo_index; col = coo_index; value = 1; }
+     */
+    template<typename IT, typename VT, typename LF, typename GEN_CALLABLE>
+    class coo_independent_generator_formatter {
+    public:
+        explicit coo_independent_generator_formatter(LF lf, int64_t nnz, GEN_CALLABLE gen_callable) :
+            line_formatter(lf), nnz(nnz), gen_callable(gen_callable) {
+            if (nnz < 0) {
+                throw invalid_argument("nnz cannot be negative.");
+            }
+        }
+
+        [[nodiscard]] bool has_next() const {
+            return next_chunk_offset < nnz;
+        }
+
+        class chunk {
+        public:
+            explicit chunk(LF lf, int64_t chunk_offset, int64_t chunk_nnz, GEN_CALLABLE gen_callable) :
+                line_formatter(lf), chunk_offset(chunk_offset), chunk_nnz(chunk_nnz), gen_callable(gen_callable) {
+            }
+
+            std::string operator()() {
+                std::string chunk;
+                chunk.reserve(chunk_nnz*25);
+                
+                for (int64_t i = 0; i < chunk_nnz; ++i) {
+                    IT row, col;
+                    VT value;
+                    gen_callable(chunk_offset + i, row, col, value);
+                    chunk += line_formatter.coord_matrix(row, col, value);
+                }
+
+                return chunk;
+            }
+
+            LF line_formatter;
+            int64_t chunk_offset;
+            int64_t chunk_nnz;
+            GEN_CALLABLE gen_callable;
+        };
+
+        chunk next_chunk(const write_options& options) {
+            auto chunk_size = std::min(options.chunk_size_values, (nnz - next_chunk_offset));
+            chunk c(line_formatter, next_chunk_offset, chunk_size, gen_callable);
+            next_chunk_offset += chunk_size;
+            return c;
+        }
+
+    protected:
+        LF line_formatter;
+        int64_t nnz;
+        GEN_CALLABLE gen_callable;
+        int64_t next_chunk_offset = 0;
+    };
+
+    /**
+     * Write generated triplets to a Matrix Market file.
+     *
+     * @tparam IT index type of generated row and column indices
+     * @tparam VT value type of generated values
+     * @tparam GEN_CALLABLE
+     * @param os stream to write to
+     * @param header header, use {row, col} syntax for just dimensions
+     * @param nnz number of nonzeros in the generated MatrixMarket file
+     * @param gen_callable a Callable that accepts the triplet index as an in parameter and row, column, value as out parameter
+     * @param options
+     */
+    template <typename IT, typename VT, typename GEN_CALLABLE>
+    void write_matrix_market_generated_triplet(std::ostream &os,
+                                               matrix_market_header header,
+                                               int64_t nnz,
+                                               GEN_CALLABLE gen_callable,
+                                               const write_options& options = {}) {
+        header.nnz = nnz;
+
+        header.object = matrix;
+        if (header.field != pattern) {
+            header.field = get_field_type((const VT *) nullptr);
+        }
+        header.format = coordinate;
+
+        write_header(os, header, options);
+
+        line_formatter<IT, VT> lf(header, options);
+        auto formatter = coo_independent_generator_formatter<IT, VT, decltype(lf), decltype(gen_callable)>(lf, nnz, gen_callable);
+        write_body(os, formatter, options);
+    }
+
+}
\ No newline at end of file
diff --git a/tests/basic_test.cpp b/tests/basic_test.cpp
index 1d71a68..1f3b4c1 100644
--- a/tests/basic_test.cpp
+++ b/tests/basic_test.cpp
@@ -12,6 +12,7 @@
 #endif
 
 #include "fmm_tests.hpp"
+#include <fast_matrix_market/app/generator.hpp>
 
 #if defined(__clang__)
 // for TYPED_TEST_SUITE
@@ -35,6 +36,14 @@ void read_triplet_file(const std::string& matrix_filename, TRIPLET& triplet, fas
     fast_matrix_market::read_matrix_market_triplet(f, triplet.nrows, triplet.ncols, triplet.rows, triplet.cols, triplet.vals, options);
 }
 
+template <typename TRIPLET>
+void read_triplet_string(const std::string& s, TRIPLET& triplet, fast_matrix_market::read_options options = {}) {
+    std::istringstream f(s);
+    options.chunk_size_bytes = 1;
+
+    fast_matrix_market::read_matrix_market_triplet(f, triplet.nrows, triplet.ncols, triplet.rows, triplet.cols, triplet.vals, options);
+}
+
 template <typename ARRAY>
 void read_array_file(const std::string& matrix_filename, ARRAY& array, fast_matrix_market::read_options options = {}) {
     std::ifstream f(kTestMatrixDir + "/" + matrix_filename);
@@ -691,3 +700,51 @@ TEST(Whitespace, Whitespace) {
         }
     }
 }
+
+TEST(Generator, Generator) {
+    {
+        // Generate a 3x3 identity matrix
+        std::string gen_mtx;
+        {
+            std::ostringstream f;
+            fast_matrix_market::write_matrix_market_generated_triplet<int64_t, double>(
+                f, {3, 3}, 3,
+                [](auto coo_index, auto& row, auto& col, auto& value) {
+                    row = coo_index;
+                    col = coo_index;
+                    value = 1;
+                });
+
+            gen_mtx = f.str();
+        }
+
+        triplet_matrix<int64_t, double> triplet, triplet2;
+        read_triplet_file("eye3.mtx", triplet);
+        read_triplet_string(gen_mtx, triplet2);
+        EXPECT_EQ(triplet, triplet2);
+    }
+    {
+        // Generate a 3x3 pattern matrix
+        std::string gen_mtx;
+        {
+            std::ostringstream f;
+            fast_matrix_market::matrix_market_header pattern_header{3, 3};
+            pattern_header.field = fast_matrix_market::pattern;
+            fast_matrix_market::write_matrix_market_generated_triplet<int64_t, double>(
+                f, pattern_header, 3,
+                [](auto coo_index, auto& row, auto& col, auto& value) {
+                    row = coo_index;
+                    col = coo_index;
+                    value = 1;
+                });
+
+            gen_mtx = f.str();
+        }
+
+        EXPECT_NE(gen_mtx.find("pattern"), std::string::npos);
+        triplet_matrix<int64_t, double> triplet, triplet2;
+        read_triplet_file("eye3_pattern.mtx", triplet);
+        read_triplet_string(gen_mtx, triplet2);
+        EXPECT_EQ(triplet, triplet2);
+    }
+}