diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index c83ab69..b8d4b67 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,4 +3,5 @@ add_subdirectory(range) add_subdirectory(saxpy) add_subdirectory(spmv) add_subdirectory(spmm) +add_subdirectory(spgemm) # end /* Add examples' subdirectories */ \ No newline at end of file diff --git a/examples/spgemm/CMakeLists.txt b/examples/spgemm/CMakeLists.txt new file mode 100644 index 0000000..16f6a03 --- /dev/null +++ b/examples/spgemm/CMakeLists.txt @@ -0,0 +1,16 @@ +# begin /* Add application */ +set(SOURCES + thread_mapped.cu +) + +foreach(SOURCE IN LISTS SOURCES) + get_filename_component(TEST_NAME ${SOURCE} NAME_WLE) + add_executable(loops.spgemm.${TEST_NAME} ${SOURCE}) + target_link_libraries(loops.spgemm.${TEST_NAME} PRIVATE loops) + set_target_properties(loops.spgemm.${TEST_NAME} + PROPERTIES + CUDA_ARCHITECTURES ${CMAKE_CUDA_ARCHITECTURES} + ) + message(STATUS "Example Added: loops.spgemm.${TEST_NAME}") +endforeach() +# end /* Add application */ \ No newline at end of file diff --git a/examples/spgemm/filter_zeros.ipynb b/examples/spgemm/filter_zeros.ipynb new file mode 100644 index 0000000..160b252 --- /dev/null +++ b/examples/spgemm/filter_zeros.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.io\n", + "import scipy.sparse\n", + "\n", + "import os\n", + "from pathlib import Path\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def remove_zeros_and_save(input_file, output_file):\n", + " matrix = scipy.io.mmread(input_file)\n", + "\n", + " rows, cols, data = zip(*[(r, c, d) for r, c, d in zip(matrix.row, matrix.col, matrix.data) if d != 0])\n", + "\n", + " filtered_matrix = scipy.sparse.coo_matrix((data, (rows, cols)), shape=matrix.shape)\n", + "\n", + " scipy.io.mmwrite(output_file, filtered_matrix)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processed matrix saved to /home/ychenfei/research/libs/loops/datasets/filtered_zeros/rma10.mtx\n" + ] + } + ], + "source": [ + "remove_zeros_and_save('/data/toodemuy/datasets/floridaMatrices/rma10.mtx','/home/ychenfei/research/libs/loops/datasets/filtered_zeros/rma10.mtx')\n", + "print(f\"Processed matrix saved to {'/home/ychenfei/research/libs/loops/datasets/filtered_zeros/rma10.mtx'}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remove_zeros_and_save(input_file, output_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "def filter_zero_in_bulk(dir):\n", + " path = Path(str(dir))\n", + " for file in path.iterdir():\n", + " # print(str(file))\n", + " file = str(file)\n", + " if file.endswith(\".mtx\"):\n", + " new_file = '/home/ychenfei/research/libs/loops/datasets/filtered_zeros/'+os.path.basename(file)\n", + " # print(new_file)\n", + " remove_zeros_and_save(file,new_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "filter_zero_in_bulk('/data/toodemuy/datasets/floridaMatrices/')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/spgemm/helpers.hxx b/examples/spgemm/helpers.hxx new file mode 100644 index 0000000..6f03e01 --- /dev/null +++ b/examples/spgemm/helpers.hxx @@ -0,0 +1,78 @@ +/** + * @file helpers.hxx + * @author Muhammad Osama (mosama@ucdavis.edu) + * @brief Header file for SpGEMM. + * @version 0.1 + * @copyright Copyright (c) 2022 + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct parameters_t { + std::string filename; + bool validate; + bool verbose; + cxxopts::Options options; + + /** + * @brief Construct a new parameters object and parse command line arguments. + * + * @param argc Number of command line arguments. + * @param argv Command line arguments. + */ + parameters_t(int argc, char** argv) + : options(argv[0], "Sparse Matrix-Matrix Multiplication") { + // Add command line options + options.add_options()("h,help", "Print help") // help + ("m,market", "Matrix file", cxxopts::value()) // mtx + ("validate", "CPU validation") // validate + ("v,verbose", "Verbose output"); // verbose + + // Parse command line arguments + auto result = options.parse(argc, argv); + + if (result.count("help") || (result.count("market") == 0)) { + std::cout << options.help({""}) << std::endl; + std::exit(0); + } + + if (result.count("market") == 1) { + filename = result["market"].as(); + if (loops::is_market(filename)) { + } else { + std::cout << options.help({""}) << std::endl; + std::exit(0); + } + } else { + std::cout << options.help({""}) << std::endl; + std::exit(0); + } + + if (result.count("validate") == 1) { + validate = true; + } else { + validate = false; + } + + if (result.count("verbose") == 1) { + verbose = true; + } else { + verbose = false; + } + } +}; diff --git a/examples/spgemm/run.sh b/examples/spgemm/run.sh new file mode 100644 index 0000000..3f364dc --- /dev/null +++ b/examples/spgemm/run.sh @@ -0,0 +1,70 @@ +# /home/ychenfei/research/libs/loops/build/bin/loops.spgemm.thread_mapped -m /home/ychenfei/research/sparse_matrix_perf_analysis/spgemm_dataflow_analysis/test_mtx/s100/ck104.mtx + +# filepath="/home/ychenfei/research/sparse_matrix_perf_analysis/spgemm_dataflow_analysis/test_mtx2/bcsstk17.mtx" +# filename=$(basename "$filepath") + +# echo "$filename" >> /home/ychenfei/research/libs/loops/examples/spgemm/running_time.txt +# /home/ychenfei/research/libs/loops/build/bin/loops.spgemm.thread_mapped -m /home/ychenfei/research/sparse_matrix_perf_analysis/spgemm_dataflow_analysis/test_mtx2/bcsstk17.mtx >> /home/ychenfei/research/libs/loops/examples/spgemm/running_time.txt + +############## Output matrix C in dense format ############## +# export_file="/home/ychenfei/research/libs/loops/examples/spgemm/running_time/dense_C/dense_C_running_time_$(date +%Y-%m-%d).txt" + +# export_file="/home/ychenfei/research/libs/loops/examples/spgemm/running_time/dense_C/testing.txt" + +# exe="/home/ychenfei/research/libs/loops/build/bin/loops.spgemm.thread_mapped" + +# > $export_file + +# for f in /data/toodemuy/datasets/floridaMatrices/*.mtx +# do +# filename=$(basename "$f") +# echo "$filename" >> $export_file +# $exe -m $f >> $export_file +# echo >> $export_file +# done + + +############## Count the nnz of input matrices without explicit zeros ############## +# export_file="/home/ychenfei/research/libs/loops/examples/spgemm/running_time/nnz_C/non_explicit_zeros.txt" + +# exe="/home/ychenfei/research/libs/loops/build/bin/loops.spgemm.thread_mapped" + +# > $export_file + +# for f in /home/ychenfei/research/libs/loops/datasets/non_explicit_zeros/*.mtx +# do +# filename=$(basename "$f") +# echo "$filename" >> $export_file +# $exe -m $f >> $export_file +# echo >> $export_file +# done + +############## Count the explicit zeros from the input matrices when applying SpGEMM ############## +export_file="/home/ychenfei/research/libs/loops/examples/spgemm/export_mtx/nnz_C/explicit_zeros.txt" + +exe="/home/ychenfei/research/libs/loops/build/bin/loops.spgemm.thread_mapped" + +> $export_file + +for f in /data/toodemuy/datasets/floridaMatrices/*.mtx +do + filename=$(basename "$f") + echo "$filename" >> $export_file + $exe -m $f >> $export_file + echo >> $export_file +done + +############## Count the NNZ of C with input matrices have explicit zeros ############## +# export_file="/home/ychenfei/research/libs/loops/examples/spgemm/export_mtx/nnz_C/nnz_C_explicit_zeros_$(date +%Y-%m-%d).txt" + +# exe="/home/ychenfei/research/libs/loops/build/bin/loops.spgemm.thread_mapped" + +# > $export_file + +# for f in /data/toodemuy/datasets/floridaMatrices/*.mtx +# do +# filename=$(basename "$f") +# echo "$filename" >> $export_file +# $exe -m $f >> $export_file +# echo >> $export_file +# done \ No newline at end of file diff --git a/examples/spgemm/running_time.txt b/examples/spgemm/running_time.txt new file mode 100644 index 0000000..c78bb27 --- /dev/null +++ b/examples/spgemm/running_time.txt @@ -0,0 +1,7 @@ +bcsstk17.mtx +Elapsed (ms): 2458.75 + +cant.mtx +Elapsed (ms): 27056 + +consph.mtx diff --git a/examples/spgemm/running_time/dense_C/running_time_2023-11-06.txt b/examples/spgemm/running_time/dense_C/running_time_2023-11-06.txt new file mode 100644 index 0000000..0a83895 --- /dev/null +++ b/examples/spgemm/running_time/dense_C/running_time_2023-11-06.txt @@ -0,0 +1,20 @@ +bcsstk17.mtx +Elapsed (ms): 2130.66 + +cant.mtx +Elapsed (ms): 23811.8 + +consph.mtx +Elapsed (ms): 48775.6 + +mac_econ_fwd500.mtx + +pwtk.mtx + +rma10.mtx +Elapsed (ms): 14880.4 + +scircuit.mtx + +shipsec1.mtx + diff --git a/examples/spgemm/running_time/dense_C/testing.txt b/examples/spgemm/running_time/dense_C/testing.txt new file mode 100644 index 0000000..695fb3d --- /dev/null +++ b/examples/spgemm/running_time/dense_C/testing.txt @@ -0,0 +1,20 @@ +bcsstk17.mtx +Elapsed (ms): 0.012288 + +cant.mtx +Elapsed (ms): 0.01536 + +consph.mtx +Elapsed (ms): 0.015328 + +mac_econ_fwd500.mtx + +pwtk.mtx + +rma10.mtx +Elapsed (ms): 0.018432 + +scircuit.mtx + +shipsec1.mtx + diff --git a/examples/spgemm/test_spgemm.cpp b/examples/spgemm/test_spgemm.cpp new file mode 100644 index 0000000..67cb44d --- /dev/null +++ b/examples/spgemm/test_spgemm.cpp @@ -0,0 +1,66 @@ +#include +#include +#include + +using type_t = float; + +void copyDeviceMtxToHost(const loops::matrix_t& d_C, loops::matrix_t& h_C){ + // Ensure the host matrix has the correct dimensions + h_C.rows = d_C.rows; + h_C.cols = d_C.cols; + + // Allocate memory for the host matrix data + h_C.m_data.resize(d_C.rows * d_C.cols); + + // Copy matrix data from device to host + cudaMemcpy(h_C.m_data.data(), d_C.m_data_ptr, sizeof(type_t) * d_C.rows * d_C.cols, cudaMemcpyDeviceToHost); + + // Update m_data_ptr on the host-side matrix_t object + h_C.m_data_ptr = h_C.m_data.data(); +} + +void writeMtxToFile(loops::matrix_t& C_host, int rows, int cols, const std::string& filename) { + std::cout<<"filename: "< +#include +#include +#include + +#include "helpers/test_spgemm.cpp" + +using namespace loops; + +int main(int argc, char** argv) { + util::timer_t timer; + + using index_t = int; + using offset_t = int; + using type_t = float; + + // ... I/O parameters, mtx, etc. + parameters_t parameters(argc, argv); + + matrix_market_t mtx; + csr_t csr(mtx.load(parameters.filename)); + csc_t csc(mtx.load(parameters.filename)); + + // Timer for benchmarking starts here + timer.start(); + + int* d_c_nnz_by_row; + cudaMalloc(&d_c_nnz_by_row, csr.rows * sizeof(int)); + cudaMemset(d_c_nnz_by_row, 0, csr.rows * sizeof(int)); + int* h_c_nnz_by_row = new int[csr.rows](); + + algorithms::spgemm::estimate_nnz_test_v3(csr, csc, d_c_nnz_by_row); + cudaMemcpy(h_c_nnz_by_row, d_c_nnz_by_row, csr.rows * sizeof(int), cudaMemcpyDeviceToHost); + + timer.stop(); + + float estimate_nnz_elapsed = timer.milliseconds(); + std::cout << "estimate_nnz_elapsed (ms):\t" << estimate_nnz_elapsed << std::endl; + + // timer.start(); + csr_t c(csr.rows, 0, 0); + + // prefix sum d_c_nnz_by_row to get the row offset of C + algorithms::spgemm::scanNnzC(d_c_nnz_by_row, c.offsets.data().get(), csr.rows); + c.nnzs = c.offsets.back(); + + // allocate indices array and values array in device + index_t* d_c_indices; + cudaMalloc(&d_c_indices, c.nnzs * sizeof(index_t)); + cudaMemset(d_c_indices, 0, c.nnzs * sizeof(index_t)); + + type_t* d_c_values; + cudaMalloc(&d_c_values, c.nnzs * sizeof(type_t)); + cudaMemset(d_c_values, 0, c.nnzs * sizeof(type_t)); + + // Test estimate_nnz + // printDeviceArr(d_c_nnz_by_row, csr.rows); + // printDeviceArr(c.offsets.data().get(), csr.rows+1); + // printDeviceArr(d_c_indices, c.nnzs); + + // copyAndSumEstimateNnzToHost(d_c_nnz_by_row, csr.rows); + + + // Apply SpGEMM + /* + // algorithms::spgemm::thread_mapped(csr, csc, c, d_c_indices, d_c_values); + ////////// TODO: can I use c.indices.data().get() instead of d_c_indices? ////////// + + // Copy back to C + // c.indices.resize(c.nnzs); + // c.values.resize(c.nnzs); + // thrust::copy(d_c_indices, d_c_indices + c.nnzs, c.indices.begin()); + // thrust::copy(d_c_values, d_c_values + c.nnzs, c.values.begin()); + // Timer for benchmarking stops here + */ + + c.indices.resize(c.nnzs); + c.values.resize(c.nnzs); + + algorithms::spgemm::thread_mapped_v2(csr, csc, c); + timer.stop(); + + float spgemm_elapsed = timer.milliseconds(); + std::cout << "spgemm_elapsed (ms):\t" << spgemm_elapsed << std::endl; + + std::cout << "Total Elapsed (ms):\t" << estimate_nnz_elapsed+spgemm_elapsed << std::endl; + + + + // Sanity check thrust::copy + /* + std::vector h_c_indices(c.nnzs); + std::vector h_c_values(c.nnzs); + try{ + thrust::copy(c.indices.begin(), c.indices.end(), h_c_indices.begin()); + } catch(thrust::system_error &e) { + std::cerr << "Error accessing vector element: " << e.what() << std::endl; + exit(-1); + } + + try{ + thrust::copy(c.values.begin(), c.values.end(), h_c_values.begin()); + } catch(thrust::system_error &e) { + std::cerr << "Error accessing vector element: " << e.what() << std::endl; + exit(-1); + } + + cudaMemcpy(h_c_indices, d_c_indices, c.nnzs * sizeof(index_t), cudaMemcpyDeviceToHost); + cudaMemcpy(h_c_values, d_c_values, c.nnzs * sizeof(type_t), cudaMemcpyDeviceToHost); + + // for(int i = 0; i < c.nnzs; ++i) { + // std::cout << h_c_indices[i] << ","; + // } + // std::cout << std::endl; + // for(int i = 0; i < c.nnzs; ++i) { + // std::cout << h_c_indices[i] << ","; + // } + + // std::cout << std::endl; + // for(int i = 0; i < c.nnzs; ++i) { + // std::cout << h_c_values[i] << ","; + // } + // std::cout << std::endl; + // for(int i = 0; i < c.nnzs; ++i) { + // std::cout << h_c_values[i] << ","; + // } + + for(int i = 0; i < c.nnzs; ++i) { + if(h_c_indices[i] != h_c_indices[i]) { + std::cout << "index not equal" << std::endl; + std::cout << "h_c_indices[" << i << "]: " << h_c_indices[i] << std::endl; + std::cout << "h_c_indices[" << i << "]: " << h_c_indices[i] << std::endl; + } + } + + for(int i = 0; i < c.nnzs; ++i) { + if(h_c_values[i] != h_c_values[i]) { + std::cout << "value not equal" << std::endl; + std::cout << "h_c_values[" << i << "]: " << h_c_values[i] << std::endl; + std::cout << "h_c_values[" << i << "]: " << h_c_values[i] << std::endl; + } + } + */ + + // Run the benchmark. + /* + timer.start(); + // algorithms::spgemm::thread_mapped(csr, csc, C); + algorithms::spgemm::thread_mapped(csr, csc, coo); + timer.stop(); +*/ + // std::cout << "Elapsed (ms):\t" << timer.milliseconds() << std::endl; + + // writeMtxToFile(h_coo, csr.rows, csc.cols, "/home/ychenfei/research/libs/loops/examples/spgemm/export_mtx/test.txt"); + + cudaFree(d_c_nnz_by_row); + cudaFree(d_c_indices); + // cudaFree(d_c_values); +} \ No newline at end of file diff --git a/examples/spgemm/validate_spgemm.ipynb b/examples/spgemm/validate_spgemm.ipynb new file mode 100644 index 0000000..b133908 --- /dev/null +++ b/examples/spgemm/validate_spgemm.ipynb @@ -0,0 +1,514 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.io\n", + "import scipy.sparse\n", + "import numpy as np\n", + "\n", + "def compute_spgemm(fileA, fileB):\n", + " # Load matrices from .mtx files\n", + " A = scipy.io.mmread(fileA).tocsr() # Convert to Compressed Sparse Column format\n", + " B = scipy.io.mmread(fileB).tocsc() # Convert to Compressed Sparse Column format\n", + " # B = np.loadtxt(fileB, delimiter=',')\n", + "\n", + " # Perform SpGEMM\n", + " C = A.dot(B)\n", + " C = C.toarray().astype(int)\n", + " return C\n", + " # return B" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11 9 1 2 3 3 6 5 4 3 3 3 3 1 1 1 1 1 3 2 2 3 3 2 1 2 2 2 1 3 2 3 3 1 8 6 2 5 7\n", + "9 11 2 2 2 3 7 6 5 5 4 4 4 2 2 2 2 2 4 3 3 4 4 3 2 3 3 3 2 4 3 4 4 0 9 7 1 5 8\n", + "1 2 7 3 1 1 1 1 2 2 2 2 2 2 2 2 2 2 5 2 2 2 2 2 5 5 6 4 5 2 2 3 2 0 1 5 0 0 5\n", + "2 2 3 4 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 0 0 1\n", + "3 2 1 1 4 2 3 2 2 2 2 2 2 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 0 1 1 2 0 1\n", + "3 3 1 1 2 4 2 3 3 2 3 3 3 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 2 1 1 0 2\n", + "6 7 1 1 3 2 10 5 4 4 4 4 4 1 1 1 1 1 3 2 2 5 5 2 1 2 1 1 1 1 1 1 1 1 8 6 2 3 8\n", + "5 6 1 1 2 3 5 13 5 3 5 6 6 1 1 1 1 1 2 2 2 5 5 2 2 2 3 3 2 4 3 4 5 1 7 9 2 5 10\n", + "4 5 2 2 2 3 4 5 7 4 5 5 5 2 2 2 2 2 2 3 3 5 5 3 2 2 2 2 2 2 2 2 2 0 5 3 1 1 4\n", + "3 5 2 2 2 2 4 3 4 5 3 3 3 2 2 2 2 2 2 3 3 4 4 3 2 2 2 2 2 2 2 2 2 0 3 2 1 1 2\n", + "3 4 2 2 2 3 4 5 5 3 7 7 6 2 3 3 2 3 2 2 2 5 5 2 2 2 2 2 2 2 2 2 3 1 4 3 2 1 4\n", + "3 4 2 2 2 3 4 6 5 3 7 9 7 3 4 3 2 4 3 3 3 6 6 3 3 3 3 3 3 3 3 3 4 1 4 4 2 2 5\n", + "3 4 2 2 2 3 4 6 5 3 6 7 7 2 2 2 2 2 3 3 3 6 6 3 3 3 3 3 3 3 3 3 3 1 4 2 2 0 3\n", + "1 2 2 2 1 1 1 1 2 2 2 3 2 8 8 4 3 7 2 2 2 4 2 2 2 2 2 2 2 3 2 2 2 0 1 6 0 5 6\n", + "1 2 2 2 1 1 1 1 2 2 3 4 2 8 9 5 3 8 2 2 2 4 2 2 2 2 2 2 2 3 2 2 3 0 1 7 0 6 7\n", + "1 2 2 2 1 1 1 1 2 2 3 3 2 4 5 5 3 5 2 2 2 3 2 2 2 2 2 2 2 3 2 2 3 0 1 3 0 2 3\n", + "1 2 2 2 1 1 1 1 2 2 2 2 2 3 3 3 4 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 0 1 2 1 0 2\n", + "1 2 2 2 1 1 1 1 2 2 3 4 2 7 8 5 3 9 2 2 2 4 3 2 2 2 2 2 2 3 3 2 3 0 1 7 0 6 7\n", + "3 4 5 2 1 1 3 2 2 2 2 3 3 2 2 2 2 2 9 3 4 4 4 3 5 6 6 4 5 3 3 4 3 0 3 6 0 2 6\n", + "2 3 2 2 1 1 2 2 3 3 2 3 3 2 2 2 2 2 3 4 4 4 4 4 3 3 3 3 3 3 3 3 3 0 2 0 0 1 1\n", + "2 3 2 2 1 1 2 2 3 3 2 3 3 2 2 2 2 2 4 4 5 5 5 4 3 3 3 3 3 3 3 3 3 0 2 1 0 2 2\n", + "3 4 2 2 2 2 5 5 5 4 5 6 6 4 4 3 2 4 4 4 5 13 10 4 3 3 4 3 3 4 4 4 3 1 4 8 2 7 9\n", + "3 4 2 2 2 2 5 5 5 4 5 6 6 2 2 2 2 3 4 4 5 10 10 4 3 3 4 3 3 3 4 3 3 1 4 5 2 4 6\n", + "2 3 2 2 1 1 2 2 3 3 2 3 3 2 2 2 2 2 3 4 4 4 4 4 3 3 3 3 3 3 3 3 3 0 2 0 0 1 1\n", + "1 2 5 2 1 1 1 2 2 2 2 3 3 2 2 2 2 2 5 3 3 3 3 3 6 6 6 5 6 3 3 4 3 0 1 3 0 0 3\n", + "2 3 5 2 1 1 2 2 2 2 2 3 3 2 2 2 2 2 6 3 3 3 3 3 6 7 6 5 6 3 3 4 3 0 2 4 0 1 4\n", + "2 3 6 2 1 1 1 3 2 2 2 3 3 2 2 2 2 2 6 3 3 4 4 3 6 6 9 6 6 4 4 5 4 0 2 6 0 2 6\n", + "2 3 4 2 1 1 1 3 2 2 2 3 3 2 2 2 2 2 4 3 3 3 3 3 5 5 6 6 5 4 4 4 4 0 2 3 0 1 3\n", + "1 2 5 2 1 1 1 2 2 2 2 3 3 2 2 2 2 2 5 3 3 3 3 3 6 6 6 5 6 3 3 4 3 0 1 3 0 0 3\n", + "3 4 2 2 1 1 1 4 2 2 2 3 3 3 3 3 2 3 3 3 3 4 3 3 3 3 4 4 3 6 4 5 5 0 3 3 0 3 3\n", + "2 3 2 2 1 1 1 3 2 2 2 3 3 2 2 2 2 3 3 3 3 4 4 3 3 3 4 4 3 4 5 4 4 0 2 2 0 2 2\n", + "3 4 3 2 1 1 1 4 2 2 2 3 3 2 2 2 2 2 4 3 3 4 3 3 4 4 5 4 4 5 4 7 5 0 3 4 0 3 4\n", + "3 4 2 2 1 1 1 5 2 2 3 4 3 2 3 3 2 3 3 3 3 3 3 3 3 3 4 4 3 5 4 5 7 0 3 4 0 4 4\n", + "1 0 0 1 0 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 3 3 0 1 0 2\n", + "8 9 1 2 1 2 8 7 5 3 4 4 4 1 1 1 1 1 3 2 2 4 4 2 1 2 2 2 1 3 2 3 3 3 15 8 1 5 13\n", + "6 7 5 1 1 1 6 9 3 2 3 4 2 6 7 3 2 7 6 0 1 8 5 0 3 4 6 3 3 3 2 4 4 0 8 29 2 17 28\n", + "2 1 0 0 2 1 2 2 1 1 2 2 2 0 0 0 1 0 0 0 0 2 2 0 0 0 0 0 0 0 0 0 0 1 1 2 4 0 2\n", + "5 5 0 0 0 0 3 5 1 1 1 2 0 5 6 2 0 6 2 1 2 7 4 1 0 1 2 1 0 3 2 3 4 0 5 17 0 18 18\n", + "7 8 5 1 1 2 8 10 4 2 4 5 3 6 7 3 2 7 6 1 2 9 6 1 3 4 6 3 3 3 2 4 4 2 13 28 2 18 33\n" + ] + } + ], + "source": [ + "# Example of usage\n", + "# C_python = compute_spgemm('/home/ychenfei/research/libs/loops/datasets/chesapeake/chesapeake.mtx','/home/ychenfei/research/libs/loops/examples/spmm/mtx_B.txt')\n", + "C_python = compute_spgemm('/home/ychenfei/research/libs/loops/datasets/chesapeake/chesapeake.mtx','/home/ychenfei/research/libs/loops/datasets/chesapeake/chesapeake.mtx')\n", + "# C_python = compute_spgemm('/home/ychenfei/research/libs/loops/examples/spgemm/test_A.mtx','/home/ychenfei/research/libs/loops/examples/spgemm/test_A.mtx')\n", + "np.set_printoptions(threshold=np.inf)\n", + "# print(C_python)\n", + "for row in C_python:\n", + " print(' '.join(map(str, row)))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def compare_matrices(C_python, txt_file):\n", + " \"\"\"Compare matrix C computed in Python with matrix from txt/csv file.\"\"\"\n", + " # Load matrix from txt/csv (from C++)\n", + " C_cpp = np.loadtxt(txt_file, delimiter=',')\n", + " \n", + " # print(C_cpp)\n", + "\n", + " # Check if the matrices have the same shape\n", + " if C_python.shape != C_cpp.shape:\n", + " print(\"The matrices have different shapes!\")\n", + " return False\n", + " else:\n", + " print(\"The matrices have same shapes!\")\n", + "\n", + " # # Determine where the matrices differ\n", + " tolerance = 1e-6\n", + " differing_elements = np.where(np.abs(C_python - C_cpp) > tolerance)\n", + "\n", + " if differing_elements[0].size > 0:\n", + " print(\"Differences found at the following indices:\")\n", + " for i, j in zip(differing_elements[0], differing_elements[1]):\n", + " print(f\"Row: {i}, Column: {j}, C_python: {C_python[i,j]}, C_cpp: {C_cpp[i,j]}\")\n", + " return False\n", + " else:\n", + " print(\"The matrices are approximately equal!\")\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The matrices have same shapes!\n", + "The matrices are approximately equal!\n" + ] + } + ], + "source": [ + "result = compare_matrices(C_python, '/home/ychenfei/research/libs/loops/examples/spgemm/new_spgemm_result_cuda.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Example data for Operational Intensity and Performance\n", + "operational_intensity = np.logspace(0, 3, 100) # Operational Intensity (x-axis)\n", + "performance = np.minimum(10, 2*operational_intensity) # Performance (y-axis)\n", + "\n", + "# Create the Roofline plot\n", + "plt.figure(figsize=(10, 6))\n", + "plt.loglog(operational_intensity, performance, label='Roofline')\n", + "plt.axhline(y=10, color='r', linestyle='--', label='Peak Performance')\n", + "plt.axvline(x=5, color='b', linestyle='--', label='Peak Memory Bandwidth')\n", + "plt.legend()\n", + "plt.xlabel('Operational Intensity (FLOPs/Byte)')\n", + "plt.ylabel('Performance (GFLOPs/s)')\n", + "plt.title('Mock Roofline Plot')\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1EAAAIoCAYAAACI32EXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAADEP0lEQVR4nOzdd3hTZf/H8XeSpnuwy957I0v2BnEzVESfR0VEFEVBKHuLLEVUUETE9VMfBVkOlgqUIYiy994gLaPQ3Sbn90ekgFBsoeWctp/XdeXiTnLnnA9JTppvzn3uYzMMw0BERERERETSxG52ABERERERkaxERZSIiIiIiEg6qIgSERERERFJBxVRIiIiIiIi6aAiSkREREREJB1URImIiIiIiKSDiigREREREZF0UBElIiIiIiKSDiqiRERERERE0kFFlIjkeCVLluT+++83O8Yt+eKLL6hYsSJOp5NcuXIB0Lx5c5o3b57S5/Dhw9hsNj799FNTMqbFp59+is1m4/Dhw2ZHuaEVK1Zgs9lYsWJFmvvOmTPnjmTK7PXkVFlhuxER86iIEhHTXP7ibLPZWL169XX3G4ZBsWLFsNlsli1ymjdvnvJ/sNls+Pn5Ub16daZMmYLb7c7Ude/evZunn36aMmXK8NFHHzFjxoxMXd+tGDly5DXPj7+/P5UrV2bo0KFcvHgxQ9bx1VdfMWXKlAxZltnrvfq5utklLcWclVy9rf/zMnDgQFOzmfX+EZGszcvsACIivr6+fPXVVzRu3Pia21euXMnx48fx8fExKVnaFC1alHHjxgEQGRnJV199RZ8+fYiIiGDs2LGZtt4VK1bgdrt55513KFu2bKr9SpQoQVxcHE6nM9Oy/JsPPviAwMBAoqOjWbp0KWPHjuXXX39lzZo12Gy221r2V199xfbt23n11VczJuwNNG3alLi4OLy9vTN1vV988cU11z///HOWLVt23e2VKlVi165dGbbeO2X06NGUKlXqmtuqVq1qUhqP1F5HK2w3ImJdKqJExHT33nsvs2fP5t1338XL68rH0ldffUXt2rWJjIw0Md2/CwkJ4cknn0y53rNnTypWrMh7773H6NGjcTgcmbLeM2fOAKQM40uNzWbD19c3UzKkVefOncmXLx/geX46derE3LlzWbduHQ0aNDA1W1rY7fY78hxe/T4CWLduHcuWLbvuduC2i6jY2Fj8/f1vaxnp1b59e+rUqXNH13mrrLDdiIh1aTifiJju8ccf5+zZsyxbtizltsTERObMmUPXrl1v+JiYmBhee+01ihUrho+PDxUqVODNN9/EMIzr+v7f//0f9erVw9/fn9y5c9O0aVOWLl1600yfffYZXl5e9O/fP93/H19fX+rWrculS5dSCh2A5ORkxowZQ5kyZfDx8aFkyZIMHjyYhISE65bx/vvvU6VKFXx8fChcuDC9evXiwoULKfeXLFmSESNGAJA/f35sNhsjR468YZ4bHdvx9NNPExgYyIkTJ3j44YcJDAwkf/789OvXD5fLdc3j3W43U6ZMoUqVKvj6+hIaGsrzzz/P+fPn0/3cXNayZUsADh06dNN+//Y8NG/enB9//JEjR46kDA8rWbJkqsvr2LEjd9111zW3PfDAA9hsNhYuXJhy2/r167HZbCxatAi4/piotKzX7XYzduxYihYtiq+vL61atWL//v3/8sykX1rW07x5c6pWrcqff/5J06ZN8ff3Z/DgwQAkJCQwYsQIypYti4+PD8WKFSMsLOyG78v/+7//o3bt2vj5+ZEnTx66dOnCsWPHMuT/kdp7uGTJkjz99NMp1y8PDVyzZg19+/Ylf/78BAQE0KFDByIiIq57/KJFi2jWrBlBQUEEBwdTt25dvvrqq5TnJbXXMbVjon799VeaNGlCQEAAuXLl4qGHHrquoL08jHX//v08/fTT5MqVi5CQEJ555hliY2Ov6bts2TIaN25Mrly5CAwMpEKFCimvjYhYl/ZEiYjpSpYsSYMGDfj6669p37494PniExUVRZcuXXj33Xev6W8YBg8++CDLly/n2WefpWbNmixZsoT+/ftz4sQJ3n777ZS+o0aNYuTIkTRs2JDRo0fj7e3N+vXr+fXXX2nbtu0N88yYMYOePXsyePBgXn/99Vv6P13+Anb1XqLu3bvz2Wef0blzZ1577TXWr1/PuHHj2LVrF/PmzUvpN3LkSEaNGkXr1q154YUX2LNnDx988AEbNmxgzZo1OJ1OpkyZwueff868efNShspVr149XRldLhft2rWjfv36vPnmm/z888+89dZblClThhdeeCGl3/PPP8+nn37KM888Q+/evTl06BBTp05l06ZNKXnS68CBAwDkzZs31T5peR6GDBlCVFQUx48fT3ndAwMDU11mkyZNWLBgARcvXiQ4OBjDMFizZg12u51Vq1bx4IMPArBq1SrsdjuNGjW64XLSst7x48djt9vp168fUVFRTJw4kSeeeIL169en/YlKg7Su5+zZs7Rv354uXbrw5JNPEhoaitvt5sEHH2T16tX06NGDSpUqsW3bNt5++2327t3L/PnzUx4/duxYhg0bxqOPPkr37t2JiIjgvffeo2nTpmzatOlf94gCREVFXbdn+fIeyvR6+eWXyZ07NyNGjODw4cNMmTKFl156iW+++Salz6effkq3bt2oUqUKgwYNIleuXGzatInFixfTtWvXdL9/fv75Z9q3b0/p0qUZOXIkcXFxvPfeezRq1IiNGzdeV0g/+uijlCpVinHjxrFx40ZmzpxJgQIFmDBhAgA7duzg/vvvp3r16owePRofHx/279/PmjVrbuk5EZE7yBARMcknn3xiAMaGDRuMqVOnGkFBQUZsbKxhGIbxyCOPGC1atDAMwzBKlChh3HfffSmPmz9/vgEYr7/++jXL69y5s2Gz2Yz9+/cbhmEY+/btM+x2u9GhQwfD5XJd09ftdqe0r17+O++8Y9hsNmPMmDFp+j80a9bMqFixohEREWFEREQYu3fvNvr3728A12TevHmzARjdu3e/5vH9+vUzAOPXX381DMMwzpw5Y3h7extt27a9JvPUqVMNwJg1a1bKbSNGjDAAIyIi4rpMzZo1S7l+6NAhAzA++eSTlNueeuopAzBGjx59zWNr1apl1K5dO+X6qlWrDMD48ssvr+m3ePHiG97+T5cz7tmzx4iIiDAOHTpkfPjhh4aPj48RGhpqxMTEGIZx5b1w6NChdD8P9913n1GiRImb5rhsw4YNBmD89NNPhmEYxtatWw3AeOSRR4z69eun9HvwwQeNWrVqpVxfvny5ARjLly//1/Ve7lupUiUjISEh5fZ33nnHAIxt27alKathGEavXr2M1P5Up2c9zZo1MwBj+vTp1yzjiy++MOx2u7Fq1aprbp8+fboBGGvWrDEMwzAOHz5sOBwOY+zYsdf027Ztm+Hl5XXd7f90+fW90eUywBgxYsR1jy1RooTx1FNPXbes1q1bX7Md9+nTx3A4HMaFCxcMwzCMCxcuGEFBQUb9+vWNuLi4a5Z59eNSex1vtN3UrFnTKFCggHH27NmU27Zs2WLY7Xbjv//9b8ptl9/33bp1u2aZHTp0MPLmzZty/e23377hNiwi1qfhfCJiCY8++ihxcXH88MMPXLp0iR9++CHVoXw//fQTDoeD3r17X3P7a6+9hmEYKUOw5s+fj9vtZvjw4djt137c3Wgyg4kTJ/LKK68wYcIEhg4dmubsu3fvJn/+/OTPn5+KFSsyadIkHnzwwWuGAf30008A9O3b97rMAD/++CPg+aU7MTGRV1999ZrMzz33HMHBwSn9MkrPnj2vud6kSRMOHjyYcn327NmEhITQpk0bIiMjUy61a9cmMDCQ5cuXp2k9FSpUIH/+/JQqVYrnn3+esmXL8uOPP6Z6TE5mPQ+1atUiMDCQ8PBwwLPHqWjRovz3v/9l48aNxMbGYhgGq1evpkmTJre0jsueeeaZayaiuLy8q5/fjJDW9fj4+PDMM89cc9vs2bOpVKkSFStWvOb1vTzc8vLrO3fuXNxuN48++ug1/QoWLEi5cuXS/D6YNm0ay5Ytu+Zyq3r06HHNdtykSRNcLhdHjhwBPMPkLl26xMCBA687tulWJjM5deoUmzdv5umnnyZPnjwpt1evXp02bdqkbONXu9H2dfbs2ZSZKS/vvVuwYEGmz+YpIhlLw/lExBLy589P69at+eqrr4iNjcXlctG5c+cb9j1y5AiFCxcmKCjomtsrVaqUcj94hozZ7XYqV678r+tfuXIlP/74IwMGDEj3cVAlS5bko48+wu12c+DAAcaOHUtERMQ1X9yOHDmC3W6/bha9ggULkitXrpTMl/+tUKHCNf28vb0pXbp0yv0ZwdfXl/z5819zW+7cua851mnfvn1ERUVRoECBGy7j6mO+bua7774jODgYp9NJ0aJFKVOmzE37Z9bz4HA4aNCgAatWrQI8RVSTJk1o3LgxLpeLdevWERoayrlz5267iCpevPg113Pnzg1wW8eS3c56ihQpck2xBZ7Xd9euXde9Dy67/Pru27cPwzAoV67cDfuldUhnvXr1MmxiiX/7f18eMppRs/+l9p4Ez2fPkiVLiImJISAgIE0Zg4ODeeyxx5g5cybdu3dn4MCBtGrVio4dO9K5c+frfvgREWtRESUiltG1a1eee+45Tp8+Tfv27dN0jEVGqVKlChcuXOCLL77g+eefv24a5psJCAigdevWKdcbNWrEXXfdxeDBg687nut2p/POSGmZNdDtdlOgQAG+/PLLG96f2pfvf2ratOktH/uS0Ro3bszYsWOJj49n1apVDBkyhFy5clG1alVWrVpFaGgowG0XUak9v8YNJj+5E+vx8/O7ro/b7aZatWpMnjz5hssoVqxYSr/LE23caH03O47odv1zopPL7tTzezv+LaOfnx/h4eEsX76cH3/8kcWLF/PNN9/QsmVLli5dmmkze4rI7VMRJSKW0aFDB55//nnWrVt3zcHh/1SiRAl+/vlnLl26dM3eqN27d6fcD1CmTBncbjc7d+6kZs2aN113vnz5mDNnDo0bN6ZVq1asXr2awoUL39L/o3r16jz55JN8+OGH9OvXj+LFi1OiRAncbjf79u1L2WMG8Ndff3HhwoWUzJf/3bNnD6VLl07pl5iYyKFDh64p1u6EMmXK8PPPP9OoUaMbfgnPLOl5HtJbmDZp0oTExES+/vprTpw4kVIsNW3aNKWIKl++fEoxlRorFcS3qkyZMmzZsoVWrVrd9P9TpkwZDMOgVKlSlC9fPlOy5M6d+5qZF8Hzep86deqWlnd5b+f27dtveh61tL6OV78n/2n37t3ky5fvmr1QaWW322nVqhWtWrVi8uTJvPHGGwwZMoTly5ff8e1dRNJO+4pFxDICAwP54IMPGDlyJA888ECq/e69915cLhdTp0695va3334bm82WMsPfww8/jN1uZ/To0dcdb3CjX6uLFi3Kzz//TFxcHG3atOHs2bO3/H8JCwsjKSkp5Rf+e++9F4ApU6Zc0+/y/ffddx8ArVu3xtvbm3ffffeajB9//DFRUVEp/e6URx99FJfLxZgxY667Lzk5+bovvRklPc9DQEAAUVFRaV52/fr1cTqdTJgwgTx58lClShXAU1ytW7eOlStXpmkvVHrXa0WPPvooJ06c4KOPPrruvri4OGJiYgDP1PAOh4NRo0Zdt+0YhnFb28plZcqUSTlW7bIZM2akuifq37Rt25agoCDGjRtHfHz8Nfdd/X9I6+tYqFAhatasyWeffXbN+3779u0sXbo0ZRtPj3Pnzl132+UffG40xbyIWIf2RImIpTz11FP/2ueBBx6gRYsWDBkyhMOHD1OjRg2WLl3KggULePXVV1N+gS5btixDhgxhzJgxNGnShI4dO+Lj48OGDRsoXLgw48aNu27ZZcuWZenSpTRv3px27drx66+/EhwcnO7/R+XKlbn33nuZOXMmw4YNo0aNGjz11FPMmDGDCxcu0KxZM37//Xc+++wzHn74YVq0aAF4hscNGjSIUaNGcc899/Dggw+yZ88e3n//ferWrXvDk65mpmbNmvH8888zbtw4Nm/eTNu2bXE6nezbt4/Zs2fzzjvvpHrs2u1Iz/NQu3ZtvvnmG/r27UvdunUJDAy8aRHu7+9P7dq1WbduXco5osCzJyomJoaYmJg0FVHpXa8V/ec//+Hbb7+lZ8+eLF++nEaNGuFyudi9ezfffvstS5YsoU6dOpQpU4bXX3+dQYMGcfjwYR5++GGCgoI4dOgQ8+bNo0ePHvTr1++2snTv3j3lRMxt2rRhy5YtLFmy5JaHgQYHB/P222/TvXt36tatS9euXcmdOzdbtmwhNjaWzz77DEjf6zhp0iTat29PgwYNePbZZ1OmOA8JCUn1PG03M3r0aMLDw7nvvvsoUaIEZ86c4f3336do0aI0btz4lv7fInKHmDEloIiIYVw7xfnN/HOKc8MwjEuXLhl9+vQxChcubDidTqNcuXLGpEmTrpm6+LJZs2YZtWrVMnx8fIzcuXMbzZo1M5YtW3bT5a9fv94ICgoymjZtmjLt+o00a9bMqFKlyg3vW7FixTXTNiclJRmjRo0ySpUqZTidTqNYsWLGoEGDjPj4+OseO3XqVKNixYqG0+k0QkNDjRdeeME4f/78NX1ud4rzgICA69Z7eZn/NGPGDKN27dqGn5+fERQUZFSrVs0ICwszTp48mcozc/OM//TPKc4vS8vzEB0dbXTt2tXIlSuXAaRpuvPL09BPmDDhmtvLli1rAMaBAweuuf1GU5yntt7LfWfPnn3NMm70OvybtExxnpb13Ox9mpiYaEyYMMGoUqVKyjZSu3ZtY9SoUUZUVNQ1fb/77jujcePGRkBAgBEQEGBUrFjR6NWrl7Fnz56b/j/Ssq27XC5jwIABRr58+Qx/f3+jXbt2xv79+1Od4vyfy7rRa2QYhrFw4UKjYcOGhp+fnxEcHGzUq1fP+Prrr1PuT+11TO31+vnnn41GjRqlLO+BBx4wdu7ceU2f1N73/3yf//LLL8ZDDz1kFC5c2PD29jYKFy5sPP7448bevXtv8myKiBXYDMNCR2CKiIiIiIhYnI6JEhERERERSQcVUSIiIiIiIumgIkpERERERCQdVESJiIiIiIikg4ooERERERGRdFARJSIiIiIikg45/mS7brebkydPEhQUlHLCRRERERERyXkMw+DSpUsULlwYuz31/U05vog6efIkxYoVMzuGiIiIiIhYxLFjxyhatGiq9+f4IiooKAjwPFHBwcGmZklKSmLp0qW0bdsWp9NpahbJ2ZKS4JNPPO1nngG9Ha+lbVXE+rSdimQNVttWL168SLFixVJqhNTk+CLq8hC+4OBgSxRR/v7+BAcHW+JNJDlXTAz07+9pv/ACBASYm8dqtK2KWJ+2U5Gswarb6r8d5qOJJURERERERNJBRZSIiIiIiEg6qIgSERERERFJBxVRIiIiIiIi6aAiSkREREREJB1URImIiIiIiKRDjp/iXESu5+MDP/xwpS0iIiIiV6iIEpHreHnBffeZnUJERETEmjScT0REREREJB20J0pErpOUBF9+6Wk/8QRY6ATiIiIiIqZTESUi10lMhGee8bQfeURFlIiIiMjVNJxPREREREQkHbJFEdWhQwdy585N586dzY4iIiIiIiLZXLYool555RU+//xzs2OIiIiIiEgOkC2KqObNmxMUFGR2DBERERERyQFML6LCw8N54IEHKFy4MDabjfnz51/XZ9q0aZQsWRJfX1/q16/P77//fueDioiIiIiIYIHZ+WJiYqhRowbdunWjY8eO193/zTff0LdvX6ZPn079+vWZMmUK7dq1Y8+ePRQoUCAjg4DDcf3tDgf4+l7bLzV2O/j53Vrf2FhITMQRH+953NXTodls4O9/bV/DuPFy/9k3Lg7c7tRzBATcWt/4eHC5Mqavv78nN0BCAiQnZ0xfPz/P8wye6eaSkjKmr6/vlfdKevomJXn6p8bHx3OW2/T2TU72PBep8fa+8n5KT9/LUnsfO52e/uB5fePjU1/u1X3dbs97LSP6enl5ngvwbBOxsRnT99+2+6SkK9uqr++d+4xI63avz4i09dVnhMetfkakZ7s34zMiKQn71c/9nfyMuNW++oy4tb76jPDI6p8R//z+m1rfzP4ecbPt7uqHpqlXJmrfvj3t27dP9f7Jkyfz3HPP8czf8y1Pnz6dH3/8kVmzZjFw4MB0ry8hIYGEq178ixcvehqFC9+wv7t9e1wLFqRc9ypQAFsqH6zupk1x/fzzlb4lS2KLjLxx39q1cf3225W+lSvjPHKE+2/Q16hUieQtW670rVMH265dN1yuUaIEyfv2pVx3NGmC/c8/b9w3Xz6ST5680veee7CHh9+4r78/yRcuXOnbsSP2RYtu2Bcg6aoN1/HEE9jnzk297/nzKR+Wjueew/7FF6n3PXEC8ucHwP7qqzimT0+97969ULKkp++gQTgmT06976ZNUKWKp++YMThefz3Vvslr12LUqePpO3kyjkGDUu+7bBlGs2aevh98gOOVV1LvO38+xr33AmD7/HO8undPve9XX2H8PZGKbc4cvLp2Tb3vzJkY//2vp+9PP+H18MOp9nW98w7uF17AZjN486VNFJ/6Jj6h84Dr/3i93aY7nzd+BIDKJ/bw5YzeqS53evMn+bDFfwAofeYw3017PtW+nzXszJR2zwFQ6PxpfpryVKp9v6n7AOPvfwmA3DEX+HXiY6n2XVizDSM69APANzGe38Y+lGrfZZWbEPbY0JTrm0a0u+Z+J6Rsq0nlfIjumi/lvlxvnMSWdOMvJ0klvIl+On/K9ZBJp7DH3vgLR3JhJ5eeu/JDUfCU0ziibvwlwpXfi4svhl7p+/5fOCJu/MXAFeLg4qsFU64HfXQGr5M3/gPu9rcT1b9QyvXATyNwHrnxH2XDaWPPsCKMKRDAEaeDiVOO0nBb9A37AjT+uHJKe8z7x2jx56VU+7Z+vyLxPp4vJ4M/PsG9a6NS7Xv/lPJcCPL8aev7f6fouPx8qn07TyjL6XyeP7QvfvsXXZecTbXvf0aX5lARzxfhbgvO0G3hjT/bAboPLcXuUp4vt48vjqTX7DOp9n25fwk2VfR8/nX89Rx9vzydat/+vYvxWw3P0PX2qy8w5JOTqfYd1rMoy+sGA9Biw0XGTD+eat+xzxRmUeNcADTYcolJ7x5Lte/kJwoyt2UeAGrtjuG9SUdS7TvtkQJ8fY9n26h4KI6Zrx9Kte+sB/Mx6yHP+73UiXi+GH4w1b5ftcvL+4963u8FIxOZM2D/Dfs5gaQWuWlxcQQAuS4l88Ore1Nd7k8NQ3jj2SIA+Ca4+fnF3an2XV47iGEvFku5vvrZnan2XVstkLBXi6dcX/bCLvwSb/wZsamCPy+HlUy5/v0re8gdfePtfldJX54bVjrl+uywfRQ6e+Nt+VBhH/4zpkzK9S+GHaDUyRt/ET6V18kjE8ulXP9ozEEqHb7xF+HzgQ4eeKdCyvX3Jh6m1p4bf0eK87bR5oNKKdf1GaHPiH9+RjzUKfXPCNfQobiHD/dc2bEDZ61aqfft2xf3+PGeK4cP4yxfPvW+PXvifvddz5WICJxFiqTa959ML6JuJjExkT///JNBV31JtdvttG7dmt+uKkDSY9y4cYwaNSrN/c+cOcP6n35KuX6fy5Xqk3bu7FnWXNX3nsREfFLpGxUVRfhVfdvExuKfSt9L0dEsv6pvi+hoglPpGxcby7Kr+jaNiiJ3Kn0TExNZfFXfRmfPki+Vvi6Xi5+u6lv/zBkKptIXuKZvndOnudlbcsmSJbj+/pWu1vHjFL9J359//pnEkBAAqh85Qqmb9F2+fDlxoZ4/tJUPHqTcTfquWrWKS0c8G3qFffuoeJO+a9as4cIZzwde2d27qXKTvuvWrePs379olNqxg+o36fvHH3/w19/tYlu2cNdN+m7atImTf/9SWHjTJurepO/WLVs49vfrEfrHH9x9k747duzg0E8/8b8Ddoyz23mNOan2jUtycT7W8wf7UvxNfskD4q/qezHuX/omu1P6BvzLchOu6mv7l+UmXtXXL/Emv/oBia4rff+NExe5ufqPeyq/7qazr9d1fVP/ddeB+zb6pv7rrh0jzX3BYEwBP7b6en5FTLbdpCsQ5bgymjzJdvPOFx124v7u/6997faUZSf+S99LV/VN+Le+jit94/+lb7Tddkt94/6lb8xVeePsN+8be9VyY/+lb9xVfWPs9pv3tV3pG/0vy42/xb6XHDfPkHBV34B/yZt4VV/7v/RNuqpv4g0GpqTW998k2659v6e+1UMy/+h7k6fN9Y8MN9n/g+sfy73Zluz+Z9+bvC+Nf/zfbvYpbPxjufqM0GdEej4j9u3bx56/v8sEHT1Ky5v0PXjwIDv/7uv311+0vUnfo0eOsPXvvt5RUaS+W+d6NsNIbX/unWez2Zg3bx4P//1L+cmTJylSpAhr166lQYMGKf3CwsJYuXIl69evB6B169Zs2bKFmJgY8uTJw+zZs6/pf7Ub7YkqVqwYkUeOEBx8g9LkDu6GT0pM5Ndff6Vly5Y4NZzv9vtqN7xHOnfD/7Arkj6zt+EwXDQvHkS+AG/yBHqTN8BzyRPgQ54AJw4f72t2w9sSUt9lb3hdu2vdFp/6rvX09MXhhXHVbnhb3M2G36Sjr92BcdV2b4uNAcOgcHgYIQd/JDGwMD/meZYqNe/C4fS+slzAFnuTvHbbP5abjr5x8Tfd7g2/W+wbHw/u1P8MGP5+aer78fH/8f2l1QQ4/Blctjchyc6bfp64r8pgT0i8eV9fn5Tt3paYhO0mnyfp6uvjnbLd25KSsCVnUF9vZ8p2n66+ycnYklL/TDOcTgyvW+nrwnaTzynD6YXx9+dJevricmG/yQ8ShpcD46phPWnu63Z73hO32dflTmb3vv2Ur14Vh90LDAN7fOqff4bDgeH993L/pS92u+c9cflq3E2GLKWnr83meQ/fSt/4hJtu95nSl39sy+np+2/bvT4jru+bTT8jXO5k9uzYSbu721OicCo/ed/B4XwXL14kX4kSREVF3bg2uPzQ1Jeadfx81RC6f+Pj44OPz/X7h5y5cuG8yROVIleutAdLT9+QEEhKwuXr68mS2pjQy33T6mbLUd8739c/tf2Nt9n36oL8NvoeOxfL8IW7MNw2GnvX5p4CRejQ4Uq9Zn15Mqlvbtj8FRz/CXy8sD8+C/vWCErVbnrzbTWH+P7A93y/azUAE5tPomnRpiYnEoGkpCQuRf1Es7vu1XYqYmFJSUnEnHZSolzNtG+rN/gunyF9vb1x/svetstMn53vZvLly4fD4eCvv/665va//vqLggVvNphMRNIryeWm9/82cSkhmZqF8/D52CI8+ujNd1zlGJH74UfP8VS0GIxR9GYDKHOWPef2MPq30QD0rNFTBZSIiOQIli6ivL29qV27Nr/88kvKbW63m19++SXV4XoicmveXraXTUcvEOzrxaTONzt6K4dJToA5z0BSDJRsAo37mJ3IMi4mXqTPij7Eu+JpVLgRPav3NDuSiIjIHWH6IJ3o6Gj2778ys86hQ4fYvHkzefLkoXjx4vTt25ennnqKOnXqUK9ePaZMmUJMTEzKbH0icvvW7o/kg5UHABjfqTpFcqdxKGFO8MtoOL0V/PJAxxlgd4DrZodw5wxuw82QVUM4dukYhQMKM77JeBz2fzkaX0REJJswvYj6448/aNGiRcr1vn37AvDUU0/x6aef8thjjxEREcHw4cM5ffo0NWvWZPHixYSGhqa2SBFJh7PRCbz6zWYMAx6vV5x7qxVK6ykSsr99y+C3qZ72w+9D8I1PhZATfbztY1YcX4G33ZvJLSaTyzeX2ZFERETuGNOLqObNm/NvEwS+9NJLvPTSS3cokUjOYRgG/eds5cylBMoWCGT4/ZX//UE5xaXTMO/v4Wn1e0KF9Ex8mr39dvI3pm72FJeD6w+mSt6bTfQvIiKS/Vj6mCgRyVyfrDnMr7vP4O1l573Ha+HnreFYgGdK1HnPQ2wkhFaD1mk/t1x2dzrmNAPCB+A23HQs15FO5TuZHUlEROSOUxElkkNtPxHF+EW7ARh6XyUqFUrDFP85xdp34eAKcPpD51ng9P3Xh+QELreLAeEDOJ9wnkp5KjG4/mCzI4mIiJjC9OF8InLnxSQk0/vrTSS63LSpHMp/7i5xzf3e3vDJJ1faOcrxP+HXMZ52+wmQv7y5eSxkxrYZbDyzkQBnAG81ewsfRzrOvSEiIpKNqIgSyYFGLtzBwcgYCgb7MrFTdWx/n739MqcTnn7anGymir8I33UDdzJU6Qi1/mN2Isv4868/mb5lOgBD7x5KseBiJicSERExj4bzieQwCzafYPafx7HZYEqXmuQOyGm7mlJhGPBDHzh/GHIVh/vfhn8UlzlVVEIUA1cNxG24ebDMg9xf+n6zI4mIiJhKe6JEcpCjZ2MZOm87AC+3KMvdpfPesF9yMixZ4mm3awdeOeGTYsvXsH0O2BzQ6WPwy2V2IkswDIORa0dyOuY0xYOK6zgoERERVESJ5BhJLje9/7eJSwnJ1CmRm96tyqXaNyEB7v97Z0N0dA4ooiL3w4/9PO0Wg6FYPXPzWMjsvbP5+ejPeNm9mNhsIgHOALMjiYiImC7HDuebNm0alStXpm7dumZHEbkjJi/by+ZjFwj29WJKl5p4OXLs5n+t5ASY8wwkxUCpptC4j9mJLGPf+X1M3DARgFfvelXngxIREflbjv0W1atXL3bu3MmGDRvMjiKS6Vbvi2T6ygMAjO9UnaK5/U1OZCE/j4LTW8EvD3SYAXadKwsgPjmesPAwElwJNCrSiP9U1iQbIiIil+XYIkokpzgbnUCfbzdjGPB4veLcW62Q2ZGsY+9SWDfN0374AwjWc3PZm3+8yf4L+8nrm5fXG72O3aY/FyIiIpfpr6JINuZ2G/SbvYWISwmULRDI8Psrmx3JOi6dhvkveNr1e0KFe8zNYyG/HPmFb/Z8A8AbTd4gn18+kxOJiIhYi4ookWzsk7WHWb4nAm8vO1O71sLPW0PVAHC7Yd7zEBsJodWg9SizE1nGqehTDFs7DIBnqjxDw8INTU4kIiJiPSqiRLKp7SeiGL9oFwBD76tExYLBJieykLXvwMEV4PSHzrPA6Wt2IktIdiczcNVALiVeomreqrxc62WzI4mIiFhSdp+4WCRHiklIpvfXm0hyGbSpHMp/7i6Rrsd7e8PUqVfa2crxP+DX1z3t9hMhf3lz81jIjK0z2HhmIwHOACY2nYjT4TQ7koiIiCWpiBLJhkYu3MHByBgKBvsysVN1bDZbuh7vdEKvXpkUzkzxUTCnG7iToUpHqPWk2Yks44/Tf/Dh1g8BGHb3MIoFFzM5kYiIiHVpOJ9INrNg8wlm/3kcuw2mdKlJ7oDstivpFhkG/NAHLhyBXMXhgSmQzuIyu4pKiGLgqoG4DTcPlnmQ+0rfZ3YkERERS9OeKJFs5OjZWIbM2w7ASy3LcXfpvLe0HJcLVq3ytJs0AUd2mI9i81ew/TuwOaDTLPANMTuRJRiGwYi1I/gr9i9KBJdgcP3BZkcSERGxPBVRItlEksvNy//bRHRCMnVK5KZ3y7K3vKz4eGjRwtOOjoaAgAwKaZbIffBTP0+75RAoVtfcPBby7Z5v+eXoL3jZvZjQdAIBzqz+YouIiGQ+DecTySYmL9vLlmMXCPb1YkqXmng5tHkDkJwAc56BpFgo1RQavWp2IsvYe34vEzdMBODVu16lSt4qJicSERHJGvQtSyQbWL0vkukrDwAwoVN1iub2NzmRhfw8Ek5vA/+80GEG2LPD2MTbF5ccx4DwASS6E2lcpDH/qfwfsyOJiIhkGSqiRLK4yOgE+ny7GcOArvWL075aIbMjWcfeJbDufU/74Q8gWM/NZW9ueJP9F/aT1zcvrzd6HbtNfw5ERETSSn81RbIwwzDoP3sLEZcSKFcgkGH3VTY7knVcPAXzX/C0678A5duZm8dClh1Zxrd7vwXgjSZvkNfv1iYgERERyalURIlkYbPWHGb5ngi8vey817UWft4aqgaA2wXzekDsWShYDVqPNDuRZZyKPsWItSMAeKbqMzQs3NDkRCIiIllPji2ipk2bRuXKlalbV7N0Sda0/UQU4xftAmDYfZWoWDDY5EQWsuYdOBQOTn/o/Ak4fc1OZAnJ7mQGrhrIpcRLVMtXjZdrvWx2JBERkSwpx05x3qtXL3r16sXFixcJCdH5YiRriUlIpvfXm0hyGbStHMqTd5fI0OU7nTBx4pV2lnJsA/z6uqd97yTIV87cPBYyY+sMNp7ZSIAzgAlNJuC0Z7UXV0RExBpybBElkpWNWLiDg5ExFArxZWLn6thstgxdvrc39O+foYu8M+Kj4LtnwXBB1U5Q8wmzE1nGH6f/4MOtHwIw7O5hFAsuZnIiERGRrCvHDucTyaoWbD7BnD+PY7fB24/VJJe/t9mRrMEw4Ic+cOEI5CoB978NGVxcZlVRCVEMXDUQt+HmwTIPcl/p+8yOJCIikqVpT5RIFnL0bCxD5m0H4KWW5bi7dObMquZywcaNnvZdd4EjK8xXsflL2P4d2L2g8yzw1TBd8MzgOHzNcP6K/YsSwSUYUn+I2ZFERESyPBVRIllEksvNy//bRHRCMnVL5qZ3y7KZtq74eKhXz9OOjoaAgExbVcaI3Ac//T3+sMUQKFrH3DwW8u2eb/n12K942b2Y2HQi/k6diFlEROR2aTifSBYxedlethy7QLCvF1O61MLLoc0XgOQEmPMMJMVCqWbQ6FWzE1nGvvP7mPTHJAD63NWHynl1HjEREZGMoG9hIlnA6n2RTF95AIAJnapTJJefyYks5OeRcHob+OeFDh+CXR9rAHHJcYSFh5HgSqBxkcY8WflJsyOJiIhkG/q2IWJxkdEJ9Pl2M4YBXesXp321QmZHso69S2Dd+572wx9AsJ6by97c8Cb7L+wnn18+Xm/0OnabPu5FREQyiv6qiliY223Qb/YWIi4lUK5AIMPu03CsFBdPwfwXPO27X4Ty7czNYyHLjizj273fYsPGG43fIK9f5kxAIiIiklOpiBKxsE/WHmbFngh8vOy817UWft5ZYZq8O8Dtgnk9IPYsFKwGrUeancgyTkWfYsTaEQA8U/UZGhRuYHIiERGR7EdFlIhFbT8RxfhFuwAYen9lKhYMNjmRhayZAofCwRkAnT8BLx+zE1lCsjuZgasGcinxEtXyVeOlWi+ZHUlERCRb0hTnIhYUk5DMy19vIsll0LZyKE/WL35H1+90wogRV9qWcmwD/DrW0753EuQrZ24eC5mxdQYbz2wkwBnAhKYTcNqt9uKJiIhkDyqiRCxoxMIdHIqMoVCILxM7V8dms93R9Xt7w8iRd3SVaRMfBd91A8MFVTtDza5mJ7KMP07/wYdbPwRg+N3DKRZUzOREIiIi2ZeG84lYzILNJ5jz53HsNpjyWE1y+XubHckaDAO+fxUuHIVcJeD+yXCHi0uruhB/gYGrBuI23DxU5iHuLX2v2ZFERESyNe2JErGQI2djGDJvOwAvtyxH/dLmzKrmdsMuz+FYVKpkkVMvbfo/2DEX7F7QeRb4hpidyBIMw2DE2hH8FfsXJYNLMrj+YLMjiYiIZHsqokQsIjHZTe+vNxGdkEzdkrl5uWVZ07LExUHVqp52dDQEBJgWxSNiLywK87RbDIGidczNYyHf7vmWX4/9itPuZGLTifg7/c2OJCIiku1Z4fdlEQEmL9vLluNRhPg5mdKlFl4ObZ4AJCd4joNKioXSzaHRq2Ynsoy95/cyccNEAPrU7kOlvJVMTiQiIpIz6FuaiAWs2hfB9JUHAJjQqRpFcvmZnMhClo2A09vAPy90+NAiYwvNF5ccR9jKMBLdiTQp0oQnKz1pdiQREZEcI8d+G5k2bRqVK1embt26ZkeRHC4yOoG+324BoGv94txTtZDJiSxkz2JY/4Gn/fB0CCpobh4LmbRhEgeiDpDPLx+vN379js/gKCIikpPl2CKqV69e7Ny5kw0bNpgdRXIwt9vgtW+3EHEpgfKhgQy/v7LZkazj4imY/4KnffeLUL6tuXksZNmRZczeOxsbNsY1GUce3zxmRxIREclRcmwRJWIFs9YcYuXeCHy87Lz3+F34Oh1mR7IGtwvmPgdx56BgNWg90uxElnEq+hQj1nrOhNytajfuLnS3yYlERERyHhVRIibZfiKKCYt3AzDs/spUKBhkciILWf02HF4FzgDo/Al4+ZidyBKS3ckMWDWAS4mXqJ6vOr1q9TI7koiISI6kKc5FTBCTkMzLX28iyWXQrkooT9Qvbnakazid0K/flfYddex3WP6Gp33vJMhX7g4HsK4Pt37IpjObCHQGMqHpBJz2O/3iiIiICKiIEjHF8AU7OBQZQ+EQXyZ0qm65SQG8vWHSJBNWHHcB5jwLhguqdoaaXU0IYU0bTm9gxtYZAAy7exhFg4qanEhERCTn0nA+kTts/qYTfLfxOHYbTOlSi1z+3mZHsgbDgB/6QNRRyFUC7p8MFisuzXIh/gKDVg3Cbbh5uOzD3Fv6XrMjiYiI5GjaEyVyBx05G8PQ+dsB6N2qHPVKWXNWNbcbjh71tIsXv0OnZtr0BeyYC3Yv6DwLfEPuwEqtzzAMRqwdwV+xf1EyuCSD6g0yO5KIiEiOpyJK5A5Jcrnp/b/NRCckU69kHl5qUdbsSKmKi4NSpTzt6GgICMjkFUbsgUUDPO0WQ6BonUxeYdbxzZ5v+PXYrzjtTiY2nYi/09/sSCIiIjmehvOJ3CFvLd3LlmMXCPFzMqVLTbwc2vwASIr3HAeVFAulmkGjV81OZBl7z+9l0gbPwWl9a/elUt5KJicSERERUBElckes2hfB9JUHAJjQqTqFc/mZnMhCfh4Bf20D/7zQccYdGjtofXHJcfRf2Z9EdyJNizbliUpPmB1JRERE/qZvKyKZLDI6gT7fbAHgybuLc0/VgiYnspA9i2D9dE/74ekQpOfmsokbJnIw6iD5/PIxptEYy83gKCIikpOpiBLJRG63wWvfbiEyOoHyoYEMva+y2ZGs4+IpmP+ip333i1C+rbl5LGTp4aXM2TsHGzbGNRlHHl9rTkAiIiKSU6mIEslEs9YcYuXeCHy87Lz3+F34Oh1mR7IGtwvmPgdx56BgdWg90uxElnEy+iQjfxsJwLPVnuXuQnebG0hERESuoyJKJJNsOx7FhMW7ARh2f2UqFAwyOZGFrJkCh1eBMwA6fwJePmYnsoRkdzIDVw3kUuIlquerzos1XzQ7koiIiNyApjgXyQTRCcm8/PVGklwG91QpyBP1i5sdKV28vODFF6+0M9SxDfDrWE/73kmQz7pTvd9p07dMZ9OZTQQ6A5nQdAJOu9PsSCIiInIDKqJEMsGIBTs4fDaWwiG+jO9ULctNCuDjA9OmZcKC4y7Ad93AcEHVzlCzayasJGvacHoDM7bOAGB4g+EUDSpqciIRERFJjYbziWSw+ZtO8N3G49htMKVLLXL5e5sdyRoMA37oAxeOQq4ScP/bkMWKy8xyIf4CA1cNxMCgQ9kOtC/V3uxIIiIichPaEyWSgY6cjWHIvG0A9G5VjnqlsuasaoYBkZGedr58GVTrbPo/2DEX7F7QeRb4BmfAQrM+wzAYtnYYZ2LPUDK4JAPrDTQ7koiIiPwLFVEiGSQx2U3vrzcRk+iiXsk8vNQi6x7rExsLBQp42tHREBBwmwuM2AuLwjztFkOgaJ3bXGD28b89/2PFsRU47U4mNZuEv9Pf7EgiIiLyLzScTySDvLVsD1uORxHi52RKl5p4ObR5AZAUD3O6QVIslG4OjV41O5Fl7Dm3hzc3vAnAa3Veo2KeiiYnEhERkbTQtzyRDBC+N4IPVx4EYEKn6hTO5WdyIgv5eQT8tQ3880GHD8Gujx2AuOQ4wsLDSHQn0rRoU7pW1CQbIiIiWUWO/TYzbdo0KleuTN26dc2OIllcxKUE+n67BYAn7y7OPVULmpzIQvYsgvXTPe2HP4AgPTeXTdwwkYNRB8nvl58xjcZkuRkcRUREcrIcW0T16tWLnTt3smHDBrOjSBbmdhv0m72FyOgEKoQGMfS+ymZHso6LJ2H+3yebursXlG9rbh4LWXJ4CXP2zsGGjXFNxpHHN2tOQCIiIpJT5dgiSiQjfLz6ECv3RuDjZee9rrXwdTrMjmQNbhfM7QFx56BgdWg9wuxElnEy+iSj1o4CoHu17tQvVN/kRCIiIpJeKqJEbtHW4xeYuGQ3AMMfqEz50CCTE1nI6rfh8CpwBkDnT8DLx+xElpDsTmZA+AAuJV2iev7qvFDzBbMjiYiIyC3QFOcityA6IZneX28iyWXQvmpButYrbnakDOXlBU89daWdLsd+h+VveNr3vQn5su5U7xlt+pbpbI7YTKAzkAlNJuC0O82OJCIiIrdARZTILRi+YDuHz8ZSOMSX8R2rZ7tJAXx84NNPb+GBcRdgzrNguKDaI1Dj8QxOlnVtOL2BGVtnADCiwQiKBhU1OZGIiIjcKg3nE0mneZuOM3fjCew2eOfxWoT4a28CAIYB378CUUchd0m4bzJks+LyVl2Iv8DAVQMxMOhQtgP3lLrH7EgiIiJyG7QnSiQdDkfGMHTedgB6typH3ZLZc1Y1w4DYWE/b3z+NtdCmL2DnfLB7QadZ4BucmRGzDMMwGLZ2GGdiz1AyuCQD6w00O5KIiIjcJu2JEkmjxGQ3vf+3iZhEF/VK5eHlluXMjpRpYmMhMNBzuVxM3VTEHlg0wNNuORSK1s7UfFnJ//b8jxXHVuC0O5nUbBL+Tn+zI4mIiMhtUhElkkZvLd3D1uNR5PJ38k6XmjjsGqoGQFI8zOkGSbFQujk0fMXsRJax59we3tzwJgCv1XmNinkqmpxIREREMoKKKJE0CN8bwYfhBwGY0Kk6hUL8TE5kIcuGw1/bwT8fdPgQ7PpYAYhNiqV/eH8S3Yk0LdqUrhW7mh1JREREMoi+7Yj8i4hLCfT9dgsA/7m7BO2qFDQ5kYXsWQS/f+hpd5gOQXpuLpu4YSKHog6R3y8/YxqNyXYzOIqIiORkKqJEbsLtNnht9hYioxOoEBrEkPsqmR3JOi6ehPkvetoNXoJybczNYyFLDi/hu33fYcPGG03eII9v9pyAREREJKdSESVyEx+vPkT43gh8vOy817UWvk6H2ZGswe2CuT0g7hwUqgGthpudyDJORJ9g1NpRADxb7VnuLnS3yYlEREQko6mIEknF1uMXmLhkNwDDH6hM+dAgkxNZyOrJcHgVOAM805l7+ZidyBKS3ckMCB/ApaRLVM9fnRdrvmh2JBEREckEOk+UyA1EJyTT++tNJLkM2lctSNd6xc2OdEc5HNC585X2NY6uh+XjPO373oR8Ze9oNiv7YMsHbInYQqAzkAlNJuC060TMIiIi2ZGKKJEbGD5/O4fPxlI4xJfxHavnuEkBfH1h9uwb3BF3Ab7rDoYLqj0KNR6/09Esa8PpDXy09SMAhjcYTtGgoiYnEhERkcyi4Xwi/zB343HmbjqB3QbvPF6LEH/tTQDAMOD7VyDqKOQuCfe9BTmsuEzN+fjzDAwfiIFBh7IdaF+qvdmRREREJBOpiBK5yuHIGIbN3w7AK63KU7ekZlVLsfFz2Dkf7F6e46B8g81OZAmGYTB8zXDOxJ2hZHBJBtYbaHYkERERyWQqokT+lpjspvf/NhGT6KJeqTy81DLnHusTE+PZyWSzedpE7IFFAzx3thwGRWubms9Kvt79NSuOr8BpdzKp2ST8nf5mRxIREZFMpiJK5G9vLt3D1uNR5PJ38k6XmjjsGqoGQFI8zOkGyXFQugU07G12IsvYe34vb/3xFgCv1XmNinkqmpxIRERE7gQVUSLAyr0RzAg/CMCETtUpFOJnciIL+XUM/LUd/PNBhw/Bro8NgEQjkYFrBpLoTqRp0aZ0rdjV7EgiIiJyh2h2PsnxIi4l8Nq3mwH4z90laFeloLmBrOaPWeCNp4AKCjU7jWX8GPcjhxMPk98vP2MajclxMziKiIjkZPpJWXI0t9ug77ebiYxOpEJoEEPuq2R2JGtq8BKUa212CstYdnQZfyb+iQ0b45qMI4+vJiARERHJSVRESY42c/VBVu2LxNdp572utfB1/vPMsjmU23WlXbA6tBphXhaLORF9gjHrxwDwTJVnqF+ovsmJRERE5E7LsUXUtGnTqFy5MnXr1jU7iphk6/ELTFy8B4Dh91ehfGiQyYksZO17V9oPfwBe3uZlsZBkdzIDwgcQnRRNMUcxnq/2vNmRRERExAQ59pioXr160atXLy5evEhISIjZceQOi05I5uWvN5HsNmhftSCP1ytmdiTrOLoex6oJ3FuuEoRWxVGgtNmJLOP9ze+zJWILgc5AHvF9BKddJ2IWERHJiXJsESU52/D52zlyNpbCIb6M71hdkwJcFncBvuuOr1csP47/Djq2MzuRZfx+6ndmbpsJwJB6Q3DtcP3LI0RERCS7yrHD+STnmrvxOHM3ncBug3cer0WIv/YmAGAY8P0rEHUUcpeCe980O5FlnI8/z6BVgzAw6FiuI+1KqLgUERHJyVRESY5yKDKGYfO3A/BKq/LULalZ1VJs/Bx2zge7F3T+GHyDzU5kCYZhMHzNcM7EnaFUSCkG1B1gdiQRERExmYooyTESk930/noTMYku6pXKw0sty5odyTrO7IZFfxcHrYYTk6s2AQEQEAAxMeZGM9tXu79ixfEVOO1OJjWdhL/T3+xIIiIiYjIdEyU5xptL97DtRBS5/J2806UmDruOgwIgKR7mdIPkOCjTEhq8DHEQG2t2MPPtObeHt/54C4DX6rxGhTwVTE4kIiIiVqA9UZIjrNwbwYzwgwBM7FSdQiF+JieykGXD4MwOCMgPD08Huz4WAGKTYukf3p8kdxLNijaja8WuZkcSERERi9C3Jcn2zlyK57VvNwPw3wYlaFuloLmBrGT3T/D7DE/74ekQFGpuHguZuGEih6IOkd8vP2MajdEMjiIiIpJCRZRka263wWvfbiEyOpGKBYMYfG8lsyNZR9QJWPCip93gJSjX2tw8FrL40GK+2/cdNmyMbzKe3L65zY4kIiIiFqIiSrK1masPsmpfJL5OO+89Xgtfp8PsSNbgdsHcHhB3HgrVhFYjzE5kGccvHWfUb6MA6F6tO/UK1TM5kYiIiFiNiijJtrYcu8DExXsAGH5/FcqFBpmcyEJWTYYjq8E7EDrPAi9vsxNZQpI7iQGrBhCdFE2N/DV4oeYLZkcSERERC9LsfJItRSck0/t/m0h2G9xbrSCP1ytmdiTrOLoOVozztO97C/KWua6L3Q7Nml1p5xQfbP6ArRFbCXQGMqHpBJx2nYhZRERErqciSrKlYfO3c+RsLEVy+TGuQ3VNCnBZ3Hn4rjsYLqj+GNTocsNufn6wYsWdjWa230/9zsxtMwEY0XAERQKLmJxIRERErCoH/cYsOcXcjceZt+kEDruNd7rUJMRfexMAMAxY2BuijkHuUnDvm2Ynsozz8ecZtGoQBgadynXinpL3mB1JRERELExFlGQrhyJjGDZ/OwCvtipHnZJ5TE5kIRs/g10Lwe4FnT8G32CzE1mCYRgMXzOcM3FnKBVSirC6YWZHEhEREYtTESXZRmKym95fbyIm0UX9Unl4sUVZsyNZx5ndsGigp91qOBSpfdPuMTGQP7/nEhNzB/KZ6KvdX7Hi+AqcdieTmk7C3+lvdiQRERGxOB0TJdnGpCW72XYiilz+TqZ0qYnDruOgAEiKgzndIDkOyrSEBi+n6WGRkZmcywL2nNvDW3+8BcBrdV6jQp4KJicSERGRrEB7oiRbWLHnDB+tOgTAxE7VKRTiZ3IiC1k6DM7sgID88PD0nDXd3k3EJsXSP7w/Se4kmhdtTteKXc2OJCIiIlmEvk1JlnfmUjz9Zm8B4KkGJWhbpaDJiSxk94+w4SNP++HpEBRqbh4LmbhhIoeiDlHArwCjG43WDI4iIiKSZiqiJEtzuw1e+3YLkdGJVCwYxKB7K5kdyTqiTsCCXp52g5egXGtz81jI4kOL+W7fd9iwMa7JOHL75jY7koiIiGQhKqIkS/to1UFW7YvE12nnvcdr4et0mB3JGtwumPuc57xQhWpCqxFmJ7KM45eOM+q3UQB0r9adeoXqmZxIREREshoVUZJlbTl2gUlL9gAw4oEqlAsNMjmRhax6C46sAe9A6DwLvLzNTmQJSe4kBqwaQHRSNDXy1+DFmi+aHUlERESyIM3OJ1nSpfgkev9vE8lug/uqFaJL3WJmR7KOI7/BinGe9n1vQd4y6V6E3Q516lxpZxcfbP6ArRFbCXIGMaHpBLzs+ggUERGR9NM3CMmShi/YwZGzsRTJ5ccbHatpUoDL4s7Dd93BcEP1x6BGl1tajJ8fbNiQwdlMtv7UemZumwnAiIYjKBJYxOREIiIiklVlo9+YJaf47s/jzNt0AofdxruP1yTEz2l2JGswDFjYGy4eh9ylPHuhBIDz8ecZtGoQBgadynWiXcl2ZkcSERGRLExFlGQpByOiGbZgOwB9Wpejdok8JieykD8/hV0Lwe7lOQ7KR8eIARiGwbA1w4iIi6B0SGkG1BtgdiQRERHJ4lRESZaRmOym9/82EZvo4u7SeXiheVmzI1nHmV2weKCn3WoEFLnrthYXGwslS3ousbG3nc5UX+3+ipXHV+Jt92Zi04n4eelEzCIiInJ7dEyUZBmTluxm+4mL5PJ3MuWxWjjsOg4KgKQ4mNMNkuOhTEvPOaFuk2HAkSNX2lnV7nO7eesPz7DG1+q8RoU8FUxOJCIiItmB9kRJlrBizxk+WnUIgEmda1AwxNfkRBaydCic2QkB+eHh6dlrOr3bEJsUS/+V/UlyJ9G8WHMer/i42ZFEREQkm9C3LbG8M5fi6Td7CwBPNyxJm8qhJieykF0/wAbPjHN0mA5Bem4uG//7eA5fPEwBvwKMbjhaMziKiIhIhrmtIiohISGjcojckNtt8Nq3W4iMTqRiwSAGtq9odiTriDoOC3p52g1fhrKtzc1jIYsOLWLe/nnYsDG+6Xhy++Y2O5KIiIhkI+kqohYtWsRTTz1F6dKlcTqd+Pv7ExwcTLNmzRg7diwnT57MrJwZbtq0aVSuXJm6deuaHUVu4qNVB1m1LxJfp52pXWvh63SYHcka3C6Y2wPiL0DhWtByuNmJLOP4peOM/m00AM9Vf466BbWNi4iISMZKUxE1b948ypcvT7du3fDy8mLAgAHMnTuXJUuWMHPmTJo1a8bPP/9M6dKl6dmzJxEREZmd+7b16tWLnTt3siG7nVE0G9ly7AKTluwBYOQDVShbQFN2pwh/E46sAe9A6PQxeHmbncgSktxJDFg1gOikaGrmr8kLNV4wO5KIiIhkQ2manW/ixIm8/fbbtG/fHvsNDlp/9NFHAThx4gTvvfce//d//0efPn0yNqnkKJfik3j5600kuw3uq16Ix+oWMzuSdRz5DVaO97Tvmwx5y2T4Kmw2qFz5SjureH/z+2yN2EqQM4gJTSfgZdcEpCIiIpLx0vQN47fffkvTwooUKcL48eNvK5CIYRgMnb+do+diKZLLjzc6VNOkAJfFnYfvuoPhhupdoMZjmbIaf3/YsSNTFp1p1p1ax8fbPgZgRMMRFA4sbHIiERERya5ue3Y+l8vF5s2bOX/+fEbkEWHuxhMs2HwSh93Gu4/XJMTPaXYkazAMWNgbLh6HPKXhvjfNTmQZ5+LPMXjVYAwMOpXrRLuS7cyOJCIiItlYuouoV199lY8/9vza63K5aNasGXfddRfFihVjxYoVGZ1PcpiDEdEMW7AdgD6ty1G7RB6TE1nIn5/CroVgd0LnWeCjY8TAs+dy2JphRMRFUDqkNAPqDTA7koiIiGRz6S6i5syZQ40aNQD4/vvvOXToELt376ZPnz4MGTIkwwNKzpGQ7OLlrzcRm+ji7tJ5eKF5WbMjWceZXbB4oKfdeoRnRr5MFBsLVap4LrGxmbqq2/blri8JPx6Ot92biU0n4uflZ3YkERERyebSXURFRkZSsGBBAH766SceeeSRlJn7tm3bluEBJeeYtHgPO05eJLe/kymP1cJh13FQACTFwZxnITkeyrSCu3tl+ioNA3bu9FwMI9NXd8t2nd3F5D8nA9Cvbj8q5KlgciIRERHJCdJdRIWGhrJz505cLheLFy+mTZs2AMTGxuJw6Bw+cmuW7znDzNWHAJjUuQYFQ3xNTmQhS4fBmR0QkB86TIcbzJCZE8UmxRIWHkaSO4kWxVrQpUIXsyOJiIhIDpHu+X+feeYZHn30UQoVKoTNZqN169YArF+/nooVK2Z4QMn+zlyMp9+3WwB4qkEJWlcONTmRhez6ATZ85Gl3+BACC5ibx0LG/z6ewxcPU8C/AKMbjtYMjiIiInLHpLuIGjlyJFWrVuXYsWM88sgj+Pj4AOBwOBg4cGCGB5Tsze026PvtFs7GJFKxYBCD7q1kdiTriDoOC/4eutewN5RtZW4eC1l0aBHz9s/Dho3xTcaTyzeX2ZFEREQkB0lzEfXf//6Xhx56iHbt2tG5c+fr7n/qqacyNJjkDDNWHWT1/kj8nA6mdq2Fr1NDQgFwu2BuD4i/4JlEouUwsxNZxvFLxxn922gAelTvQd2CdU1OJCIiIjlNmg+uKFu2LG+88Qb58+enffv2fPDBB5w4cSIzs0k2t/nYBd5csgeAkQ9WpmwBTdmdIvxNOLIGvAOh08fg5W12IktIcicxIHwA0UnR1CpQi541epodSURERHKgNBdRw4cP588//2Tfvn088MADzJ8/nzJlylC7dm1Gjx7N5s2bMzGmZDeX4pPo/fUmkt0G91UvxKN1ipkdyTqO/AYrx3va902GvGXueASbDUqU8FysdKjR+5vfZ2vkVoK8gxjfZDxe9nSPSBYRERG5beme5qto0aK8+OKLLFmyhIiICAYMGMCePXto2bIlJUqU4KWXXmLHjh2ZkVWyCcMwGDJvO0fPxVIklx9vdKimSQEuizsP33UHww01Hocaj5kSw98fDh/2XPz9TYlwnXWn1vHxNs+Jvkc2GEnhwMImJxIREZGc6rbmSg4KCuLRRx/lyy+/JCIiglmzZuFwOPjtt98yKp9kQ99tPMHCLSdx2G28+3gtQvycZkeyBsOAhb3h4nHIUxrunWR2Iss4F3+OwasGY2DQuXxn2pZsa3YkERERycHSPRYmLi4OwzDw//vn6SNHjjBv3jwqV65M27ZtadVKM4hJ6g5GRDN8wXYA+rYpT+0SuU1OZCF/fgK7FoLdCZ1ngY+OEQPPnstha4YRERdBmZAyhNUNMzuSiIiI5HDp3hP10EMP8fnnnwNw4cIF6tWrx1tvvcVDDz3EBx98kOEBJftISHbx8tebiE100aB0Xno2u/PH+ljWmV2weJCn3XqEZ0Y+E8XFQd26nktcnKlR+HLXl4QfD8fb7s3EZhPx8/IzN5CIiIjkeOkuojZu3EiTJk0AmDNnDgULFuTIkSN8/vnnvPvuuxkeULKPiYv3sOPkRXL7O3n7sZo47DoOCoCkOJjTDZLjoUwruLuX2Ylwu+GPPzwXt9u8HLvO7mLyn5MB6F+3P+VzlzcvjIiIiMjf0l1ExcbGEhTkGWa0dOlSOnbsiN1u5+677+bIkSMZHlCyh+W7z/Dx6kMATOpcg4IhviYnspClQ+HMTggoAB2mg/22DlXMNmKTYgkLDyPJnUSLYi14rII5k2yIiIiI/FO6v62VLVuW+fPnc+zYMZYsWULbtp4DvM+cOUNwcHCGB5Ss78zFePrN3gLA0w1L0rpyqMmJLGTX97BhpqfdYToEFjA3j4WM+30chy8eJtQ/lNENR2sGRxEREbGMdBdRw4cPp1+/fpQsWZJ69erRoEEDwLNXqlYtc4/jEOtxuw36fruFszGJVCoUzMD2Fc2OZB1Rx2HBS552w95QVpOyXPbTwZ+Yv38+dpudcU3Gkcs3l9mRRERERFKke3a+zp0707hxY06dOkWNGjVSbm/VqhUdOnTI0HCS9X0YfpDV+yPxczp47/Fa+DodZkeyBrcLvnsO4i9A4bug5TCzE1nGsUvHGLNuDAA9qvegbsG6JicSERERuVa6iqhvvvmGhQsXkpiYSKtWra7Z81SvXr0MDydZ26aj53lr6R4ARj5YmbIFAk1OZCHhk+DoWvAOgs4fg5e32YksIcmdxMDwgUQnRVOrQC2er/682ZFERERErpPmIuqDDz6gV69elCtXDj8/P+bOncuBAweYNEknBJXrXYxPovf/NpHsNri/eiEerVPM7EjWcWQtrJzgad8/2XNiXQvKl+/Or3PapmlsjdxKkHcQ45uMx8ue7p3lIiIiIpkuzcdETZ06lREjRrBnzx42b97MZ599xvvvv5+Z2SSLMgyDofO2c+xcHEVz+zG2QzVNCnBZ7DnPMD7DDTUeh+qPmp3ohgICICLCcwkIuDPr/O3kb8zaPguAkQ1GUjiw8J1ZsYiIiEg6pbmIOnjwIE899VTK9a5du5KcnMypU6cyJZhkXXP+PM7CLSdx2G2806UWIX5OsyNZg2HA973h4nHIUwbu1V7cy87Fn2Pw6sEYGHQu35m2JduaHUlEREQkVWkuohISEgi46idpu92Ot7c3cXFxmRJMsqaDEdGMWLgDgL5tylO7RG6TE1nIH7M8U5rbnZ7joHyCzE5kCW7DzdDVQ4mMi6R0SGnC6oaZHUlERETkptJ1wMGwYcPw9/dPuZ6YmMjYsWMJCQlJuW3y5MkZl06ylIRkFy9/vYnYRBcNSuelZ7MyZkeyjr92wpLBnnbrEVDY2qcDiIuD9u097UWLwM8v89b15a4vWXViFd52byY1m4SfVyauTERERCQDpLmIatq0KXv27LnmtoYNG3Lw4MGU6zruJWebuHgPO05eJLe/k7cfq4nDrvcDAElxMKcbJMdD2dZwdy+zE/0rtxtWrrzSziw7z+5k8p+eH1761+1P+dzlM29lIiIiIhkkzUXUihUrMjGGZHXLd5/h49WHAHjzkRoUDPE1OZGFLBkCEbsgoAA8PB3s6T7HdbYUmxTLgPABJLuTaVGsBY9VeMzsSCIiIiJpcsvf5iIjI4mMjMzILJJFnbkYz2uztwDwdMOStKoUanIiC9n1Pfzxsafd8UMIzG9uHgsZ9/s4Dl88TAH/AoxuOFp7skVERCTLSFcRdeHCBXr16kW+fPkIDQ0lNDSUfPny8dJLL3HhwoVMiihW5nYb9Pl2M+diEqlUKJiB7SuaHck6LhyDBS952o1egTItzc1jIT8d/In5++djt9kZ32Q8uXxzmR1JREREJM3SPJzv3LlzNGjQgBMnTvDEE09QqVIlAHbu3Mmnn37KL7/8wtq1a8mdW7Ox5STTww+wZv9Z/JwO3nu8Fr5Oh9mRrMGVDHN7QPwFKFIbWg4zO5FlHLt0jNHrRgPQo3oP6hasa3IiERERkfRJcxE1evRovL29OXDgAKGhodfd17ZtW0aPHs3bb7+d4SHFmjYePc9bS/cCMPLBypQtEGhyIgtZ9SYcXQveQdBpJjh0riyAJHcSA8IHEJMUQ60CtXi++vNmRxIRERFJtzQP55s/fz5vvvnmdQUUQMGCBZk4cSLz5s3L0HBiXRfjk3jlf5twuQ3ur16IR+sUMzuSdRxZCysneNr3vw15Spub5xb5+3suGWnapmlsi9xGkHcQ45uMx8uerrMsiIiIiFhCmr/BnDp1iipVqqR6f9WqVTl9+nSGhBJrMwyDIfO2c+xcHEVz+zG2QzVNCnBZ7Dn47jkw3FCjK1R/xOxEtyQgAGJiMnaZ606tY9b2WQCMajiKwoGFM3YFIiIiIndImvdE5cuXj8OHD6d6/6FDh8iTJ09GZBKLm/3ncb7fchKH3cY7XWoR4qehagAYBix8GS4ehzxl4N5JZieyjHPx5xi0ahAGBp3Ld6ZNiTZmRxIRERG5ZWkuotq1a8eQIUNITEy87r6EhASGDRvGPffck6HhxHoOREQzYsEOAPq0LkftEppIJMUfs2D3D2B3QudZ4KNjxADchpuhq4cSGRdJmZAyhNUNMzuSiIiIyG1J18QSderUoVy5cvTq1YuKFStiGAa7du3i/fffJyEhgS+++CIzs4rJEpJdvPzVJuKSXDQonZcXmpc1O5J1/LUTlgz2tNuMgsI1TY1zu+LjoVMnT/u778D3Ns6d/OWuL1l1YhXedm8mNpuIn5dfxoQUERERMUmai6iiRYvy22+/8eKLLzJo0CAMwwDAZrPRpk0bpk6dSrFiWWdygWnTpjFt2jRcLpfZUbKMCYv2sPPURXL7O5nSpSYOu46DAiAxFuZ0g+R4KNsG6r9gdqLb5nLBTz9dad+qnWd3MvnPyQD0r9uf8rnLZ0A6EREREXOl62S7pUqVYtGiRURGRrJu3TrWrVtHREQEixcvJl++fHz11VeZlTPD9erVi507d7Jhwwazo2QJv+7+i1lrDgHw5iM1CA2+jV0T2c3SIRCxCwJD4eEPwJ6uzSrbik2KZUD4AJLdybQs1pLHKjxmdiQRERGRDHFL3/Zy585NvXr1qFevXspkEkeOHOE///lPhoYTazhzMZ5+s7cC8HTDkrSqdP009znWzoWeY6GwQYcPITC/2YksY9zv4zh88TCh/qGMbjRaMziKiIhItqGfzOWm3G6DPt9u5lxMIpULBTPo3opmR7KOC8dg4UuedqNXoEwLc/NYyE8Hf2L+/vnYbXbGNRlHiE+I2ZFEREREMoyKKLmp6eEHWLP/LH5OB+8+XgsfL4fZkazBlQxzn4P4KChSG1oONTuRZRy7dIzR60YD0KN6D+oWrGtyIhEREZGMpSJKUrXx6HneWroXgFEPVqFsAU3ZnSJ8Ehz9DbyDoNPH4NC5sgCS3EkMCB9ATFIMdxW4i+erP292JBEREZEMl+bZ+d59992b3n/ixInbDiPWcTE+iVf+twmX2+D+6oV4pE5RsyNZx+E1ED7R035gCuQpZWocK5m2aRrbIrcR5B3E+Cbj8bKn+SNGREREJMtI8zect99++1/7FC9e/LbCiDUYhsGQeds5di6Oorn9eKNjNU0KcFnsOc8wPsMNNbpCtc5mJ8oUAQHw91kM0uy3k78xa/ssAEY1HEWhwEKZkExERETEfGkuog4dOpSZOcRCZv95nO+3nMRht/Hu47UI9tVQNcBTVSx8GS6egDxl4N5JZieyjLNxZxm8ejAGBp3Ld6ZNiTZmRxIRERHJNDomSq5xICKaEQt2ANC3TXnuKp7b5EQW8sfHsPsHsDuh8yzw0TFiAG7DzdA1Q4mMi6RsrrKE1Q0zO5KIiIhIpkpzEVW8eHHOnj2bcn3q1KlcvHgxU0KJORKSXbz81Sbiklw0LJOXns3KmB3JOv7aAYsHe9ptRkHhmqbGyWzx8fDII55LfPzN+36560tWn1iNj8OHiU0n4ufld2dCioiIiJgkzUXU8ePHcblcKdcHDx5MZGRkpoQSc0xYtIedpy6SJ8Cbtx+ricOu46AASIyFOd3AlQBl20D9F8xOlOlcLpgzx3O5arO/zs6zO5n852QA+tfpT7nc5e5QQhERERHz3PJwPiO9R52Lpf26+y9mrfEc9/bmI9UJDfY1OZGFLBkMEbshMBQe/gDsGgULEJsUS1h4GMnuZFoWa8mjFR41O5KIiIjIHaFvg8JfF+PpN3srAM80KknLiqEmJ7KQnQvgz08AG3T4EALzm53IMt5Y/wZHLh4h1D+U0Y1GawZHERERyTHSdRKXmTNnEhjoOZg+OTmZTz/9lHz58l3Tp3fv3hmXTjKdy23Q55vNnItJpHKhYAa2r2h2JOu4cMwzGx9Ao1egTAtz81jITwd/YsGBBdhtdsY3GU+IT4jZkURERETumDQXUcWLF+ejjz5KuV6wYEG++OKLa/rYbDYVUVnMh+EHWHvgLH5OB+91rYWPl8PsSNbgSobvukN8FBSpDS2Hmp3IMo5dPMbodaMBeL7689QpWMfkRCIiIiJ3VpqLqMOHD2diDDHDxqPneWvpXgBGPVSFMvk1ZXeK8IlwbB14B0Gnj8Ghc2UBJLmSCAsPIyYphrsK3EWP6j3MjiQiIiJyx+mYqBzqYnwSr/xvEy63wQM1CvNI7aJmR7KOw6sh/O8T6d7/NuQpZW4eC5m6eSrbz24n2DuY8U3G42VP14hgERERkWwhzd+A4uLi+OWXX7j//vsBGDRoEAkJCSn3OxwOxowZg6+vZnWzOsMwGDJvO8fOxVEsjx9jO1TVpACXxZ6DuT3AcEPNJ6D6I2YnMoW/P0RHX2kDrD25llnbZwEwquEoCgUWMimdiIiIiLnSXER99tln/PjjjylF1NSpU6lSpQp+fp4Ta+7evZvChQvTp0+fzEkqGWb2n8f5fstJvOw23u1Si2BfDVUDwDA8E0lcPAF5ykD7iWYnMo3NBgEBV66fjTvLkNVDAHi0/KO0LtHapGQiIiIi5kvzcL4vv/ySHj2uPf7hq6++Yvny5SxfvpxJkybx7bffZnhAyVgHIqIZsWAHAH3blqdW8dwmJ7KQDTNh9w9gd0LnWeCjY8QA3IabIWuGEBkXSdlcZelft7/ZkURERERMleYiav/+/VSrVi3luq+vL/arTjpar149du7cmbHpJEMlJLt4+atNxCW5aFQ2Lz2bljE7knX8tQOWePa00GY0FK5pahyzJSTA0097Lp9s/oo1J9bg4/BhYtOJ+HppyK6IiIjkbGkeznfhwoVrjoGKiIi45n63233N/WI94xftZuepi+QJ8GbyozWx23UcFACJsTCnG7gSoFxbuPsFsxOZLjkZPvvM097YeBo4IaxuGOVylzM3mIiIiIgFpHlPVNGiRdm+fXuq92/dupWiRTXDm1X9susvPllzGIC3HqlBaLD2JqRYMhgidkNgKDz0vueAIEnhcrtoVbwVj5TPmZNsiIiIiPxTmouoe++9l+HDhxMfH3/dfXFxcYwaNYr77rsvQ8NJxvjrYjz952wFoFujUrSoWMDkRBaycwH8+Qlggw4fQmB+sxNZTqh/KKMajtIMjiIiIiJ/S/NwvsGDB/Ptt99SoUIFXnrpJcqXLw/Anj17mDp1KsnJyQwePDjTgsqtcbkN+nyzmXMxiVQuFMyA9hXMjmQdF456ZuMDaPwqlGlhahwrWXxoMXAPAKMbjibEJ8TcQCIiIiIWkuYiKjQ0lLVr1/LCCy8wcOBADMMAwGaz0aZNG95//31CQ0MzLajcmukrD7D2wFn8nA7e61oLHy+H2ZGswZUM3z0H8VFQpDa0GGJ2Iss4dvEY438fz+UiqlZoLXMDiYiIiFhMmosogFKlSrF48WLOnTvH/v37AShbtix58uTJlHByezYePc/kZXsBGPVQFcrk15TdKcInwrF14B0EnT4Gh86VBZDkSiIsPIzY5Fizo4iIiIhYVrqKqMvy5MlDvXr1MjqLZKCL8Un0/noTLrfBAzUK80htTfqR4vBqCJ/kaT8wBfKUMjWOlby3+T22n91OkFPHzYmIiIikJk0TS/Ts2ZPjx4+naYHffPMNX3755W2FkttjGAaD527j+Pk4iuXxY2yHqpoU4LLYczC3BxhuqPkkVOtsdiLLWHtiLZ9s/wSA11sM5swZOHMG/P1NDiYiIiJiMWnaE5U/f36qVKlCo0aNeOCBB6hTpw6FCxfG19eX8+fPs3PnTlavXs3//vc/ChcuzIwZMzI7t9zE7D+O88PWU3jZbbzbpRbBvhqqBoBhwIKX4OIJyFsW2k8wO5FlnI07y+DVnolhHqvwGK1LtjI5kYiIiIh1pamIGjNmDC+99BIzZ87k/fffZ+fOndfcHxQUROvWrZkxYwb33HNPpgSVtNl/JpoRC3cA8FrbCtQqntvkRBayYSbs+REc3tB5FvjoGDEAt+FmyJohnI0/S9lcZelXp5/ZkUREREQsLV2z8w0ZMoQhQ4Zw/vx5jh49SlxcHPny5aNMmTIaLmYB8UkuXv56E3FJLhqXzcfzTUubHck6/toBS/6ega/1KChUw9w8FvLFzi9Yc2INPg4fJjWdhK+XLwkJ0Lev5/7Jk8HHx9yMIiIiIlZySxNL5M6dm9y5tYfDasYv2s2uUxfJE+DN5EdrYLersAUgMRbmdANXApRrC3e/YHYiy9hxdgdTNk4BIKxuGGVzlwUgORnef9/TZ+JEFVEiIiIiV0vTxBJifT/v/ItP1x4G4K1HalAg2NfcQFayZDBE7IbAUHj4A9BeUwBikmIIWxlGsjuZ1sVb80j5R8yOJCIiIpIlqIjKBv66GE//OVsAeLZxKVpU1PTUKXYugD8/AWzQcQYE5DM7kWW8sf4Njl46SsGAgoxsOFJDckVERETSSEVUFudyG7z6v82cj02iSuFgwu6pYHYk67hwFBa+7Gk3fhVKNzczjaX8cPAHFh5YiN1mZ0KTCYT4hJgdSURERCTLUBGVxU1feYDfDp7F39vBe4/XwsfLYXYka3Alw3fPQXwUFKkNLYaYncgyjl08xpjfxgDQs0ZP7gq9y+REIiIiIlnLLRVRycnJ/Pzzz3z44YdcunQJgJMnTxIdHZ2h4eTm/jxynsnL9gIw6sEqlM6vKbtThE+EY+vAJxg6fQwOnSsLIMmVRFh4GLHJsdQOrU2Paj3MjiQiIiKS5aR7dr4jR45wzz33cPToURISEmjTpg1BQUFMmDCBhIQEpk+fnhk55R+i4pLo/fUmXG6Dh2oWpnPtomZHso7DqyF8kqd9/9uQp5S5eSzkvU3vsf3sdoK9gxnfZDwOu/ZcioiIiKRXuvdEvfLKK9SpU4fz58/j5+eXcnuHDh345ZdfMjSc3JhhGAyZt40TF+IolseP1x+uqkkBLos95xnGZ7ih5pNQrbPZiSxj7Ym1fLLjEwBGNxxNwYCCqfb184NDhzyXqzZzEREREeEW9kStWrWKtWvX4u3tfc3tJUuW5MSJExkWTFL37R/H+GHrKbzsNt57/C6CfDVUDQDDgAUvwaWTkLcctJ9gdiLLiIyLZPDqwQA8VuExWpVoddP+djuULHkHgomIiIhkQeneE+V2u3G5XNfdfvz4cYKCgjIklKRu/5lLjFy4E4B+7SpQs1gucwNZyYaZsOdHcHhD54/BR8eIAbgNN0PXDOVs/FnK5ipLvzr9zI4kIiIikqWlu4hq27YtU6ZMSblus9mIjo5mxIgR3HvvvRmZTf4hPsnFy19vJi7JReOy+ejRpLTZkazj9HZY8vcMfG1GQ6Ea5uaxkC92fsGaE2vwcfgwqekkfL3+/UTMiYnQv7/nkph4B0KKiIiIZCHpHs731ltv0a5dOypXrkx8fDxdu3Zl37595MuXj6+//jozMsrfxi/aza5TF8kb4M3kR2tgt+s4KAASY2FON3AlQLl2UL+n2YksY0fkDqZsnAJAWN0wyuYum6bHJSXBm2962iNHwj9G74qIiIjkaOkuoooWLcqWLVv45ptv2LJlC9HR0Tz77LM88cQT10w0IRnr551/8enawwC8+WgNCgT/+96EHGPJIIjcA4EF4eH3QZNsABCTFENYeBjJ7mRaF2/NI+UfMTuSiIiISLaQ7iIKwMvLiyeeeIInnngio/PIDZyOiqf/nC0AdG9cihYVCpicyEJ2zIc/PwVs0PFDCMhnciDreGP9Gxy9dJSCAQUZ2XCkZnAUERERySDpPiZq3LhxzJo167rbZ82axYQJmg0to7ncBn2+2cz52CSqFgmm/z0VzI5kHReOwve9Pe3GfaB0c1PjWMn3B75n4YGF2G12xjcZT4hPiNmRRERERLKNdBdRH374IRUrVrzu9ipVquhEu5lg+soD/HbwLP7eDt7tUgsfL50cFQBXMnzXHeKjoGhdaDHY7ESWcfTiUV5f9zoAPav3pHZobZMTiYiIiGQv6S6iTp8+TaFCha67PX/+/Jw6dSpDQonHn0fOMXnZXgBGP1SV0vk1ZXeKlRPg2HrwCYZOM8Ghc2UBJLmSCAsPIzY5ltqhtelRvYfZkURERESynXQXUcWKFWPNmjXX3b5mzRoKFy6cIaEEouKS6P31Zlxug4dqFqbTXUXMjmQdh1ZB+CRP+4EpkLukmWks5d1N77Lj7A6CvYMZ32Q8Drv2XIqIiIhktHRPLPHcc8/x6quvkpSURMuWLQH45ZdfCAsL47XXXsvwgDmRYRgMnrudExfiKJ7Hn9cfrqpJAS6LPQdzewAG1HoSqnYyO5FlrDmxhk93fArA6IajKRhQ8JaX5ecH27dfaYuIiIjIFekuovr378/Zs2d58cUXSfz7LJy+vr4MGDCAQYMGZXjAnGj2nyf4cdspvOw23n28FkG+GqoGgGHAgl5w6STkLQftJ5qdyDIi4yIZvNpzXNhjFR6jVYlWt7U8ux2qVMmIZCIiIiLZT7qLKJvNxoQJExg2bBi7du3Cz8+PcuXK4ePjkxn5cpzTsfD2T7sB6NeuAjWL5TI3kJVsmAl7fgKHN3SeBd4BZieyBLfhZujqoZyLP0fZXGXpV6ef2ZFEREREsrVbOk8UQGBgIHXr1s3ILDleQpKLz/Y5iE9y07hsPno0KW12JOs4vR2WDPG024yBQtXNzWMhn+/4nDUn1+Dj8GFS00n4et3+iZgTE+GNNzztwYPB2/u2FykiIiKSbaS7iIqJiWH8+PH88ssvnDlzBrfbfc39Bw8ezLBwOc2EJXs5GWsjT4CTyY/WwG7XcVAAJMbCnG7gSoBy7aD+82Ynsoztkdt5Z+M7AITVDaNs7rIZstykJBg1ytPu319FlIiIiMjV0l1Ede/enZUrV/Kf//yHQoUKacKDDLJs5198sf4YABM7VqVA8O3vTcg2lgyCyD0QWBAefh/0ngMgJimGsPAwko1kWhdvzSPlHzE7koiIiEiOkO4iatGiRfz44480atQoM/LcMdOmTWPatGm4XC6zowBQOn8AlQoGEWqLoln5/GbHsY4d8+DPTwEbdJwBAfnMTmQZY9eN5dilYxQMKMjIhiP1g4aIiIjIHZLu80Tlzp2bPHnyZEaWO6pXr17s3LmTDRs2mB0FgDL5A5n9fH0eKO7+9845xYWjsPAVT7tJXyjdzNw8FvL9ge/5/uD32G12JjSZQIhPiNmRRERERHKMdBdRY8aMYfjw4cTGxmZGnhzNx8uOV7pfkWzKlQzfdYeEKChaF5pr+vzLjl48yuvrXgegZ42e3BV6l8mJRERERHKWdA/ne+uttzhw4AChoaGULFkSp/Pacxht3Lgxw8JJDrZyPBxbDz7B0GkmOHSuLIAkVxJh4WHEJsdSO7Q2Par1MDuSiIiISI6T7iLq4YcfzoQYIlc5tArC3/S0H5gCuUuamcZS3tv0HjvO7iDEJ4TxTcbjsDvMjiQiIiKS46S7iBoxYkRm5BDxiDkLc58DDKj1H6jayexElrH2xFo+2fEJAKMajqJgQMFMW5evL/z++5W2iIiIiFxxyyfbFclwhgELX4JLpyBvOWg/wexElhEZF8ng1YMBeKzCY7Qq3ipT1+dwgM6lLSIiInJj6S6iXC4Xb7/9Nt9++y1Hjx4lMTHxmvvPnTuXYeEkh/n9I9jzEzi8ofMs8A4wO5EluA03Q1cP5Wz8WcrlLke/Ov3MjiQiIiKSo6V7LrhRo0YxefJkHnvsMaKioujbty8dO3bEbrczcuTITIgoOcLpbbB0qKfdZgwUqm5uHgv5YucXrDm5Bh+HD5OaTsLXK/PH1yUmwqRJnss/ficRERERyfHSXUR9+eWXfPTRR7z22mt4eXnx+OOPM3PmTIYPH866desyI6Nkd4kxMKcbuBKg/D1Q/3mzE1nGjsgdTNk4BYAB9QZQJleZO7LepCQIC/NckpLuyCpFREREsox0F1GnT5+mWrVqAAQGBhIVFQXA/fffz48//pix6SRnWDwIIvdCYEF46H2w2cxOZAkxSTGEhYeR7E6mTYk2dC7X2exIIiIiIsItFFFFixbl1KlTAJQpU4alS5cCsGHDBnx8fDI2nWR/O+bBxs8AG3ScAQF5zU5kGWPXjeXopaMUCijEiAYjsKm4FBEREbGEdBdRHTp04JdffgHg5ZdfZtiwYZQrV47//ve/dOvWLcMDSjZ2/ggsfMXTbtIXSjczN4+FfH/ge74/+D12m50JTScQ4hNidiQRERER+Vu6Z+cbP358Svuxxx6jePHi/Pbbb5QrV44HHnggQ8NJNuZKhu+6Q0IUFK0HzQeZncgyjl48yuvrXgfghRovUKtALZMTiYiIiMjVbvs8UQ0aNKBBgwYZkUVykhXj4Pjv4BMMnWaCw2l2IktIciURFh5GbHIstUNr81y158yOJCIiIiL/cEtF1MmTJ1m9ejVnzpzB7XZfc1/v3r0zJJhkY4fCYdVbnvYDUyB3CVPjWMm7m95lx9kdhPiEML7JeBx2h9mRREREROQf0l1Effrppzz//PN4e3uTN2/eaw52t9lsKqLk5mLOwtwegAG1/gNVO5mdyDLWnFjDpzs+BWB0w9EUDChoWhZfX1i+/EpbRERERK5IdxE1bNgwhg8fzqBBg7Db0z0vheRkhgELesGlU5CvPLSfYHYiy4iMi2Tw6sEAdKnQhZbFW5qax+GA5s1NjSAiIiJiWemugmJjY+nSpYsKKEm/32fA3kXg8IbOs8A7wOxEluA23AxZPYRz8ecol7scr9V5zexIIiIiInIT6a6Enn32WWbPnp0ZWSQ7O70Nlg71tNuMgYLVzM1jIZ/v+Jy1J9fi6/BlUtNJ+HqZP34uKQmmTfNckpLMTiMiIiJiLekezjdu3Djuv/9+Fi9eTLVq1XA6r51VbfLkyRkWTrKJxBiY0w1ciVC+PdR/3uxElrE9cjvvbHwHgAH1BlAmVxmTE3kkJsJLL3naTz8NTk2eKCIiIpLiloqoJUuWUKFCBYDrJpYQuc6iARC5F4IKwUPTQO8TAKITowkLDyPZSKZNiTZ0KqdJNkRERESygnQXUW+99RazZs3i6aefzoQ4ku1s/w42fQHYoOMMCMhrdiLLGLt+LMcuHaNQQCFGNBihHyFEREREsoh0HxPl4+NDo0aNMiOLZDfnD8P3r3raTV6DUk3NTGMp3x/4nh8O/oDD5mBC0wmE+ISYHUlERERE0ijdRdQrr7zCe++9lxlZJDtxJcF33SHhIhStB80Hmp3IMo5cPMLr614H4IUaL1CrQC2TE4mIiIhIeqR7ON/vv//Or7/+yg8//ECVKlWum1hi7ty5GRZOsrAV4+D4BvAJhk4zwaGZCQCSXEmEhYcRmxxLndA6dK/W3exIIiIiIpJO6S6icuXKRceOHTMji2QXB1fCqr9naXzgHchdwtw8FvLOxnfYeXYnIT4hjGsyDofdYXYkEREREUmndBVRycnJtGjRgrZt21KwYMHMyiRZWcxZmNsDMOCu/0JVFdyXrT6xms92fgbA6IajKRhg3W3Ixwd++OFKW0RERESuSFcR5eXlRc+ePdm1a1dm5ZGszDBgwYsQfRrylYd7xpudyDIi4yIZsnoIAF0qdKFl8ZYmJ7o5Ly+47z6zU4iIiIhYU7onlqhXrx6bNm3KjCyS1a3/EPYuBoc3dJ4F3gFmJ7IEt+Fm8KrBnIs/R7nc5ehXt5/ZkURERETkNqT7mKgXX3yR1157jePHj1O7dm0CAq79oly9evUMCydZyKmtsGyYp932dShYzdw8FvLZjs/47dRv+Dp8ebPpm/g4rD8+LikJvvzS037iCXBqXhARERGRFOkuorp06QJA7969U26z2WwYhoHNZsPlcmVcOskaEmNgTjdwJUL59lCvh9mJLGN75Hbe3fguAAPrDaR0rtImJ0qbxER45hlP+5FHVESJiIiIXC3dRdShQ4cyI4dkZYsGwNl9EFQIHpoGNpvZiSwhOjGa/iv7k2wk07ZEWzqW0yQbIiIiItlBuouoEiU0XbVcZft3sOkLwAYdZ0BAXrMTWYJhGIxZN4bj0ccpHFCYEQ1HYFNxKSIiIpItpLuIAjhw4ABTpkxJmaWvcuXKvPLKK5QpUyZDw4nFnT8M37/qaTd5DUo1NTONpXx/8Ht+OvQTDpuDCU0nEOwdbHYkEREREckg6Z6db8mSJVSuXJnff/+d6tWrU716ddavX0+VKlVYtmxZZmQUK3IlwXfdIeEiFK0HzQeancgyDkcd5vV1rwPwYs0XqVmgprmBRERERCRDpXtP1MCBA+nTpw/jx4+/7vYBAwbQpk2bDAsnFrZiHBzfAD4h0GkmODTzAECSK4mw8DDikuOoW7Auz1Z91uxIIiIiIpLB0r0nateuXTz77PVfDLt168bOnTszJJRY3MGVsGqyp/3gO5Bbx8ldNmXjFHad20WITwjjGo/DYXeYHUlEREREMli690Tlz5+fzZs3U65cuWtu37x5MwUKFMiwYGJRMWdhbg/AgFr/gSodzE5kGauOr+LznZ8DMKbhGEIDQk1OdOt8fODbb6+0RUREROSKdBdRzz33HD169ODgwYM0bNgQgDVr1jBhwgT69u2b4QHFQgwDFrwI0achX3loP8HsRJYRGRfJ0DVDAehasSstircwOdHt8fLynB9KRERERK6X7iJq2LBhBAUF8dZbbzFo0CAAChcuzMiRI685Aa9kQ+s/hL2LweEDnWeBd4DZiSzBbbgZvGow5+LPUT53efrW0Y8JIiIiItlZmo6JWrhwIUlJSQDYbDb69OnD8ePHiYqKIioqiuPHj/PKK6/oPDjZ2amtsGyYp932dShYzdw8FvLpjk/57dRv+Dp8mdR0Ej6OrD/+LTkZZs/2XJKTzU4jIiIiYi1p2hPVoUMHTp8+Tf78+XE4HJw6dYoCBQoQFBSU2fnEChJjYE43cCVC+fZQ7zmzE1nGtohtvLfxPQAG1htI6VylTU6UMRIS4NFHPe3oaM/wPhERERHxSNOeqPz587Nu3ToADMPQHqecZtEAOLsPggrBQ9NArz8A0YnRhIWHkWwk07ZEWzqW62h2JBERERG5A9L0+3LPnj156KGHsNls2Gw2ChYsmGpfl8uVYeHEArZ/B5u+AGzQcQYE5DU7kSUYhsGYdWM4Hn2cwgGFGdFwhH5cEBEREckh0lREjRw5ki5durB//34efPBBPvnkE3LlypXJ0cR05w/D96962k37QammZqaxlO8Pfs9Ph37CYXMwoekEgr2DzY4kIiIiIndImo90qFixIhUqVOCpp56iU6dOBAYGZmYuMZsrCb7rDgkXoWg9aDbQ7ESWcTjqMK+vex2AF2u+SM0CNc0NJCIiIiJ3VJqOibrMMAy+/PJLTp06lVl5xCpWjIPjG8AnBDrNBIdmFgBIdCUSFh5GXHIcdQvW5dmqz5odSURERETusHQVUXa7nXLlynH27NnMyiNWcHAlrJrsaT/4DuQuYW4eC3ln4zvsOreLXD65GNd4HA67w+xIIiIiInKHpauIAhg/fjz9+/dn+/btmZFHzBZzFub2AAy46ymo0sHsRJax6vgqPt/5OQBjGo0hNCDU5ESZx9sbPvnEc/H2NjuNiIiIiLWke4zWf//7X2JjY6lRowbe3t74+fldc/+5c+cyLJzcYYYBC16E6NOQrwLcM97sRJYRERvB0DVDAehasSvNizU3N1Amczrh6afNTiEiIiJiTekuoqZMmZIJMcQS1n8IexeDwwc6zwJvf7MTWYLbcDN49WDOxZ+jfO7y9K3T1+xIIiIiImKidBdRTz31VGbkELOd2grLhnna7cZCwarm5rGQT3d8yrpT6/B1+DKp6SR8HD5mR8p0ycmwZImn3a4deGleEREREZEU6T4mCuDAgQMMHTqUxx9/nDNnzgCwaNEiduzYkaHh5A5JjIE53cCVCBXuhbrdzU5kGVsjtvLexvcAGFhvIKVzlTY50Z2RkAD33++5JCSYnUZERETEWtJdRK1cuZJq1aqxfv165s6dS3R0NABbtmxhxIgRGR5Q7oBFYXB2HwQVhoemgc1mdiJLuJR4ibDwMJKNZNqVbEfHch3NjiQiIiIiFpDuImrgwIG8/vrrLFu2DO+rpu1q2bIl69aty9BwcgdsmwOb/g+wQccZ4J/H7ESWYBgGY9aN4UT0CQoHFGZ4g+HYVFyKiIiICLdQRG3bto0OHa6f9rpAgQJERkZmSCi5Q84fhh/6eNpN+0OpJqbGsZKFBxay6NAiHDYHE5pOINg72OxIIiIiImIR6S6icuXKxalTp667fdOmTRQpUiRDQskd4EqCOc9CwkUoVh+aDTA7kWUcjjrM2PVjAXihxgvULFDT3EAiIiIiYinpLqK6dOnCgAEDOH36NDabDbfbzZo1a+jXrx///e9/MyOjZIblb8CJP8AnBDrNBIemXwNIdCUSFh5GXHIcdQvWpXs1TbIhIiIiItdKdxH1xhtvULFiRYoVK0Z0dDSVK1emadOmNGzYkKFDh2ZGRsloB1fA6rc97QffhVzFTY1jJVM2TmHXuV3k8snFuMbjcNgdZkcSEREREYtJ9+4Hb29vPvroI4YPH862bduIjo6mVq1alCtXLjPySUaLiYS5zwMG1H4aqjxsciDrCD8ezhc7vwBgTKMxhAaEmpzIPN7eMHXqlbaIiIiIXJHmIsrtdjNp0iQWLlxIYmIirVq1YsSIEfj5+WVmPslIhgHzX4To05CvArQbZ3Yiy4iIjWDoas+e1K4Vu9K8WHNzA5nM6YRevcxOISIiImJNaR7ON3bsWAYPHkxgYCBFihThnXfeoZe+ZWUt66fDviXg8IFHPgFvf7MTWYLbcDNo9SDOJ5ynQu4K9K3T1+xIIiIiImJhaS6iPv/8c95//32WLFnC/Pnz+f777/nyyy9xu92ZmU8yyqktsGy4p91uLIRWMTePhXyy/RPWn1qPn5cfE5tNxMfhY3Yk07lcsGKF5+JymZ1GRERExFrSPJzv6NGj3HvvvSnXW7dujc1m4+TJkxQtWjRTwkkGSYiGOd3AlQgV7oO6mnHusq0RW5m6yXPwz8B6AykdUtrkRNYQHw8tWnja0dEQEGBuHhERERErSfOeqOTkZHx9fa+5zel0kpSUlOGhJIMtHgBn90NQYXhoKthsZieyhEuJlwgLDyPZSKZdyXZ0KHv9SaRFRERERP4pzXuiDMPg6aefxsfnylCn+Ph4evbsScBVP1PPnTs3YxPK7dk2Bzb9H9js0Okj8M9jdiJLMAyDMevGcCL6BIUDCjO8wXBsKi5FREREJA3SXEQ99dRT19325JNPZmgYyWDnD8MPfTztpv2hZGNT41jJggMLWHRoEQ6bgwlNJxDsHWx2JBERERHJItJcRH3yySeZmUMymisJ5jwLCReh2N3QNMzsRJZxKOoQb6x/A4BeNXtRs0BNcwOJiIiISJaS5mOiJItZ/gac+AN8QzzD+BzpPq9ytpToSmRA+ADikuOoV7Ae3ap2MzuSiIiIiGQxKqKyo4MrYPXbnvaD70Gu4qbGsZIpG6ew69wucvnkYlyTcTjsDrMjiYiIiEgWo90T2U1MJMztARhQ+xmo/JDZiSwj/Hg4X+z8AoDXG71OAf8CJieyLqcTJk680hYRERGRK1REZSeGAfNfgOi/IH9FaPeG2YksIyI2gqGrhwLwRKUnaFasmcmJrM3bG/r3NzuFiIiIiDVpOF92sn467FsKDh/oPAu8/c1OZAluw82g1YM4n3CeCrkr0Kd2H7MjiYiIiEgWpj1R2cWpLbBsuKfdbiyEVjE3j4V8sv0T1p9aj5+XHxObTcTH4fPvD8rhXC7YuNHTvusucOjQMREREZEUKqKyg4RomNMNXIlQ8X6o293sRJaxNWIrUzdNBWBQvUGUDiltcqKsIT4e6tXztKOj4arzaYuIiIjkeBrOlx0sGgBn90NwEc9sfDab2YksIToxmgHhA0g2krmn5D08XPZhsyOJiIiISDagIiqr2zYHNv8f2OzQcQb45zE7kSUYhsHodaM5Hn2cIoFFGNZgGDYVlyIiIiKSAVREZWXnDsH3r3raTftDycamxrGSBQcWsOjQIhw2BxOaTiDYO9jsSCIiIiKSTaiIyqpcSfDds5B4CYo3gKZhZieyjENRh3hjvWd695dqvUSN/DVMTiQiIiIi2YmKqKxq+Vg48Sf4hkDHj8ChOUIAEl2JDAgfQFxyHPUL1ueZKs+YHUlEREREshkVUVnRgeWweoqn/eB7kKuYqXGs5O0/32bXuV3k8snFG03ewGHX3NwiIiIikrG0+yKriY6Aec8DBtR+Bio/ZHYiywg/Hs7/7fo/AF5v9DoF/AuYnCjrcjphxIgrbRERERG5QkVUVuJ2w/wXIPovyF8J2r1hdiLLiIiNYOjqoQA8WelJmhVrZnKirM3bG0aONDuFiIiIiDVpOF9Wsn467F8GXr7QeRZ4+5udyBLchptBqwdxPuE8FfNUpE/tPmZHEhEREZFsTHuisoqTm2HZcE+73VgIrWxqHCuZtX0W60+tx8/LjwlNJ+Dt8DY7UpbndsOuXZ52pUpg188tIiIiIilURGUFCdEwpxu4k6Di/VDnWbMTWcaWiC1M3TQVgEH1BlE6pLTJibKHuDioWtXTjo6GgABz84iIiIhYiX5fzgoWhcG5AxBcxDMbn81mdiJLuJR4iQHhA3AZLu4peQ8Pl33Y7EgiIiIikgOoiLK6rbNh85dgs3vOB+Wfx+xElmAYBmN+G8OJ6BMUCSzC8AbDsam4FBEREZE7QEWUlZ07CD/8PUlC0zAo2cjcPBYyf/98Fh1ehMPmYELTCQR5B5kdSURERERyCBVRVpWcCHOehcRLULwBNO1vdiLLOBR1iHG/jwPgpVovUSN/DZMTiYiIiEhOki2KqB9++IEKFSpQrlw5Zs6caXacjLF8LJzcCL4hnmF8Ds0BApDoSiQsPIy45DjqF6pPt6rdzI4kIiIiIjlMlv9mnpycTN++fVm+fDkhISHUrl2bDh06kDdvXrOj3TLbwRWwZornyoNTIVcxM+NYytt/vs3uc7vJ7ZObNxq/gd2WLX4HEBEREZEsJMt/A/3999+pUqUKRYoUITAwkPbt27N06VKzY90y76SLOBa+6LlSpxtUftDcQBay8thK/m/X/wHweuPXKeBfwORE2ZfTCf36eS5Op9lpRERERKzF9CIqPDycBx54gMKFC2Oz2Zg/f/51faZNm0bJkiXx9fWlfv36/P777yn3nTx5kiJFiqRcL1KkCCdOnLgT0TOe4eauIzOwxZyB/JWg3RtmJ7KMM7FnGLZmGABPVnqSpkWbmpwoe/P2hkmTPBdvnbtYRERE5BqmF1ExMTHUqFGDadOm3fD+b775hr59+zJixAg2btxIjRo1aNeuHWfOnLnDSTOf/fcPCb20FcPLFzrPAqef2ZEsweV2Mfj/27vzsKjK/n/g72HYVwVEdjAXQkUFcUfAsgANl8fKjBRQ08w11yyXMkPF9GuFWW7gU26ZSz0qKqG4oAkqaCahEC4pgiaC7Dhz//7gx+TIIihwQN6v65rrmjnnPvd5z4Fb58M5557jHyGrKAsvmr6ID7p+IHUkIiIiImrCJL8nys/PD35+fpWuX7lyJd59910EBwcDAL799lvs27cPGzduxIcffghra2u1M083b95E9+7dK+2vqKgIRUVFqtc5OTkAgJKSEpSUlDzr23l66eeheXhRaZZ+n0Bm2haQMk8DsvGPjTh9+zR05boI6R0CmVKGEiWPTV1SKoHr10uf29sDGpL/uaVhKfu3QtJ/M4ioShynRI1DQxur1c0heRFVleLiYpw9exZz585VLdPQ0ED//v1x6tQpAED37t1x8eJF3Lx5EyYmJoiMjMT8+fMr7XPJkiX49NNPyy0/dOgQ9PX1a/9NVJP5gz/grqGLe0adEHfHCti/X7IsDcmNhzewLncdAMBP2w+XTlzCJVySONXzr7BQjrfeeg0AsG3bXujqKiRO1DBFRUVJHYGInoDjlKhxaChjNT8/v1rtGnQRdffuXSgUCrRs2VJtecuWLfHnn38CADQ1NbFixQr069cPSqUSs2fPrnJmvrlz52L69Omq1zk5ObCzs8Orr74KY2Pjunkj1TIAJVkjkHD8JF559VVo8W5+PCh+gBGRI6CEEr4OvpjXex5kMpnUsZqEvLx/n/v4+MDAQLosDVFJSQmioqLwyiuvcKwSNVCNbZwqFAo8fPgQQgipoxDVq4cPH+LkyZPo3bs3NDXrtjSRyWTQ1NSEXC6vtE3ZVWpP0qCLqOoaNGgQBg2q3ix2Ojo60NHRKbdcS0tL+n9km9uiRNOwYWSRmBACS04uwa28W7AxtMGC3gugzRkO6s2jv36lv4/SZWnIOFaJGr6GPk6FELh9+zbu378vdRQiSQghYGlpifT09Hr7Y3mzZs1gaWlZ4f6q++9Fgy6izM3NIZfLkZGRobY8IyMDlpaWEqWi+rAnZQ8OXD0ATZkmQj1DYaRtJHUkIiKiWldWQFlYWEBfX59XXFCTo1QqkZubC0NDQ2jU8U3YQgjk5+erJqizsrJ66r4adBGlra2Nrl27Ijo6GkOGDAFQeqCjo6MxadIkacNRnfkr+y8siVsCAJjoOhGdWnSSOBEREVHtUygUqgKqqlsRiJ5nSqUSxcXF0NXVrfMiCgD09Epnv87MzISFhUWVl/ZVRfIiKjc3FykpKarXaWlpSExMhKmpKezt7TF9+nQEBgbC3d0d3bt3x6pVq5CXl6earY+eL0WKIsw+OhsFDwvQ06onRnccLXUkIiKiOlE2C5iUE1sRNUVlY66kpKTxFlFnzpxBv379VK/LJn0IDAxEREQEhg8fjjt37mDBggW4ffs2unTpggMHDpSbbIKeD6vOrkJyVjKa6zRHiEcINGScW5uIiJ5vvISPqH7VxpiTvIjy9vZ+4kw0kyZN4uV7TcDRG0fxQ9IPAIDFHovRQr+FxImaLk1N4P33/31ORERERP/ixyNqEDLzMzE/tvT7vd5xfgeetp4SJ2radHSA1aulTkFERETUMPFaKZKcQqnAR8c/QlZRFpxNnfFB1w+kjkRERERVCAoKgkwmw3vvvVdu3cSJEyGTyRAUFKRqWzZBWJmffvoJurq6WLFihVp/S5cuVWu3Z88eXu5IDRKLKJJc+B/hOH37NPQ09RDqGQptOb8PSmpCAHfulD74vY9ERFQROzs7bNu2DQUFBaplhYWF2LJlC+zt7Svdbv369QgICMCaNWswY8YM1XJdXV0sW7YMWVlZdZqbqDawiCJJnb9zHmEJYQCAj3p8BEcTR2kDEQAgPx+wsCh95OdLnYaIiBoiNzc32NnZYdeuXaplu3btgr29PVxdXSvcJjQ0FJMnT8a2bdvKzbTcv39/WFpaYsmSJXWam6g2sIgiyeQU52DOsTlQCAX8WvlhcOvBUkciIiKSlBAC+cUP6/3xpEm+KjN69GiEh4erXm/cuLHSr6GZM2cOPvvsM+zduxdDhw4tt14ulyMkJARff/01/v7776fKQ1RfOLEESUIIgc9OfYabuTdhY2iDBT0X8JpnIiJq8gpKFGi/4GC97/fSIh/oa9f8Y+E777yDuXPn4tq1awCA2NhYbNu2DTExMWrtIiMj8fPPPyM6OhovvfRSpf0NHToUXbp0wcKFC7Fhw4Ya5yGqLyyiSBJ7UvbgwNUD0JRpItQzFIbahlJHIiIiohpq0aIFBg4ciIiICAghMHDgQJibm5dr16lTJ9y9excLFy5E9+7dYWhY+f/7y5Ytw0svvYSZM2fWZXSiZ9Jki6jVq1dj9erVUCgUUkdpcv7K/gtL4kqvd57kOgmdWnSSOBEREVHDoKclx6VFPpLs92mNHj1a9X2eqyv5fgwbGxv89NNP6NevH3x9fREZGQkjI6MK23p6esLHxwdz585VzfBH1NA02SJq4sSJmDhxInJycmBiYiJ1nCajSFGE2Udno+BhAXpa9URwx4qvmyYiImqKZDLZU11WJyVfX18UFxdDJpPBx6fyAtDBwQFHjx5VFVIHDhyotJBaunQpunTpAicnp7qKTfRMOLEE1av/O/t/SM5KhqmuKUI8QqAh468gERFRYyaXy5GUlIRLly5BLq/6jJadnR1iYmKQmZkJHx8f5OTkVNjOxcUFAQEB+Oqrr+oiMtEz4ydYqjdHbxzF5qTNAIDP+nyGFvotJE5EldHUBAIDSx+ajesPokREJAFjY2MYGxtXq62trS1iYmJw9+7dKgupRYsWQalU1mZMolrDj0dULzLzMzEvdh4A4B3nd+Bp6ylxIqqKjg4QESF1CiIiaqginvCfxJ49e6psa2Njg8uXL1fZxtHREUVFRU+ZkKhu8UwU1TmFUoG5x+fiftF9OJs644OuH0gdiYiIiIjoqfFMFNW5jRc3Iu52HPQ09RDqGQptubbUkegJhADy80uf6+sD/AovIiIion/xTBTVqcTMRKxOLJ3u9KMeH8HRxFHaQFQt+fmAoWHpo6yYIiIiIqJSLKKozuQU52DOsTlQCAX8WvlhcOvBUkciIiIiInpmLKKoTggh8OnJT3Er7xZsDG2woOcCyHhNGBERERE9B1hEUZ3YnbIbh64dgqZME6GeoTDUNpQ6EhERERFRrWARRbXur/t/YWncUgDAJNdJ6NSik8SJiIiIiIhqD4soqlVFiiLMOjYLBQ8L0NOqJ4I7BksdiYiIiIioVrGIolq18sxKXM66DFNdU4R4hEBDxl8xIiIiInq+8HuiqNbE3IjBlj+3AAA+6/MZWui3kDYQPTW5HHj99X+fExEREdG/eJqAakVGXgbmx84HAIxsPxKetp4SJ6JnoasL7NhR+tDVlToNERE1NEFBQZDJZHjvvffKrZs4cSJkMhmCgoLU2i5dulSt3Z49e9Rm7o2JiYFMJsP9+/dVy27dugUXFxd4enoiOzu7Tt4L0dNoskXU6tWr0b59e3Tr1k3qKI2eQqnARyc+wv2i+3A2dcY0t2lSRyIiIqI6Zmdnh23btqGgoEC1rLCwEFu2bIG9vb1aW11dXSxbtgxZWVnV7j81NRUeHh5wcHDAwYMHYWJiUmvZiZ5Vky2iJk6ciEuXLiE+Pl7qKI3ehosbEHc7Dnqaegj1DIW2XFvqSERERFTH3NzcYGdnh127dqmW7dq1C/b29nB1dVVr279/f1haWmLJkiXV6vvChQvw8PBAr169sGfPHujp6dVqdqJn1WSLKKodiZmJ+CbxGwDAxz0+hqOJo7SBqFbk5QEyWekjL0/qNERETYgQQHFe/T+EeKq4o0ePRnh4uOr1xo0bERxcfmZeuVyOkJAQfP311/j777+r7PPkyZPw8vLCsGHD8MMPP0BTk7fwU8PD30p6ajnFOZhzbA4UQoEBrQZgUOtBUkciIiJq3ErygRDr+t/vR7cAbYMab/bOO+9g7ty5uHbtGgAgNjYW27ZtQ0xMTLm2Q4cORZcuXbBw4UJs2LCh0j6HDh2K4cOHIywsrMZ5iOoLz0TRUxFC4NOTn+JW3i3YGNpgfs/5ajeHEhER0fOvRYsWGDhwICIiIhAeHo6BAwfC3Ny80vbLli3Dpk2bkJSUVGmbwYMHY/fu3Th+/HhdRCaqFTwTRU9ld8puHLp2CJoyTSz3XA5DbUOpIxERETV+WvqlZ4Wk2O9TGj16NCZNmgSgdOKuqnh6esLHxwdz585Vzd73uO+++w6zZ8+Gn58f9u/fD09PzvhLDQ+LKKqxv+7/haVxpdOUTnabDJcWLhInIiIiek7IZE91WZ2UfH19UVxcDJlMBh8fnye2X7p0Kbp06QInJ6cK18tkMqxduxYaGhoYMGAA9u3bBy8vr9qOTfRMWERRjRQpijDr2CwUPCxAT6ueCOoQJHUkIiIikpBcLlddnievxje0u7i4ICAgAF999VWlbWQyGb799lvI5XJVIeXt7V1bkYmeGe+JohpZeWYlLmddhqmuKUI8QqAh468QERFRU2dsbAxjY+Nqt1+0aBGUSmWVbWQyGVavXo3g4GAMHDgQR44cedaYRLWGZ6Ko2mJuxGDLn1sAAIv7LEYL/RbSBqI6I5cDAwb8+5yIiOhRERERVa7fs2dPlW0dHR1RVFSktszb2xvisanWZTIZwsLCOFMfNTgsoqhaMvIyMD92PgBgVPtR6GvbV+JEVJd0dYF9+6ROQURERNQw8VoseiKFUoG5J+biftF9OJs6Y6rbVKkjERERERFJhkUUPdGGixsQfzseepp6CPUMhbZcW+pIRERERESSYRFFVUrMTMQ3id8AAD7u8TEcTRylDUT1Ii8PMDAofeTlSZ2GiIiIqGHhPVFUqZziHMw5NgcKocCAVgMwqPUgqSNRPcrPlzoBERERUcPEM1FUISEEPj35KW7l3YKtoS3m95wPmUwmdSwiIiIiIsmxiKIK7U7ZjUPXDkFTpolQz1AYahtKHYmIiIiIqEFgEUXl/HX/Lyw5vQQAMMl1ElxauEiciIiIiIio4WARRWqKFEWYdWwWChWF6GXVC8Edg6WORERERETUoDTZImr16tVo3749unXrJnWUBmXlmZW4nHUZprqmCOkbAg1Zk/0VISIiIiKqUJP9hDxx4kRcunQJ8fHxUkdpMI5cP4Itf24BACzusxjmeuYSJyKpaGgAXl6lD40m+68EERFVJigoCDKZDEuXLlVbvmfPHtVEVDExMZDJZGjevDkKCwvV2sXHx0Mmk6lNWlXW/v79+6plt27dgouLCzw9PZGdnV13b4iohvjxiAAAGXkZmH9yPgBgVPtR6GvbV+JEJCU9PSAmpvShpyd1GiIiaoh0dXWxbNkyZGVlVdnOyMgIu3fvVlu2YcMG2NvbV7ldamoqPDw84ODggIMHD8LExOSZMxPVFhZRBIVSgbkn5iK7KBvOps6Y6jZV6khERETUwPXv3x+WlpZYsmRJle0CAwOxceNG1euCggJs27YNgYGBlW5z4cIFeHh4oFevXtizZw/0+Bc9amBYRBE2XNyA+Nvx0NPUw3Kv5dCWa0sdiYiIqEkSQiC/JL/eH0KIGmeVy+UICQnB119/jb///rvSdiNHjsTx48dx/fp1AMDOnTvh6OgINze3CtufPHkSXl5eGDZsGH744QdoamrWOBtRXeNvZROXmJmIbxK/AQDM6zkPDsYOEieihiAvD3B0LH1+9SpgYCBlGiKipqPgYQF6bOlR7/s9/fZp6Gvp13i7oUOHokuXLli4cCE2bNhQYRsLCwv4+fkhIiICCxYswMaNGzF69Ogq+xw+fDjCwsJqnIeovvBMVBOWU5yD2cdmQyEUGPjCQPi/4C91JGpA7t4tfRAREVVl2bJl2LRpE5KSkiptM3r0aEREROCvv/7CqVOnEBAQUGnbwYMHY/fu3Th+/HhdxCWqFTwT1UQJIfDpyU+RnpcOOyM7zOsxT22GHCIiIqp/epp6OP32aUn2+7Q8PT3h4+ODuXPnIigoqMI2fn5+GDduHMaMGQN/f3+YmZlV2t93332H2bNnw8/PD/v374enp+dTZyOqKyyimqhdV3bh0LVD0JRpItQzFIbahlJHIiIiavJkMtlTXVYntaVLl6JLly5wcnKqcL2mpiZGjRqF0NBQREZGVtmXTCbD2rVroaGhgQEDBmDfvn3w8vKqi9hET42X8zVBqfdTsTSu9HsdprhNQUfzjhInIiIiosbMxcUFAQEB+Oqrrypt89lnn+HOnTvw8fF5Yn8ymQzffvstRo0ahQEDBiAmJqYW0xI9OxZRTUyRogizjs1CoaIQva17I7BD5dOLEhEREVXXokWLoFQqK12vra0Nc3Pzat8+IJPJsHr1agQHB2PgwIE4cuRIbUUlema8nK+JWXFmBa5kXYGprik+9/gcGjLW0URERFQzERER5ZY5OjqiqKhI9drb27vKqdOHDBmitr6i9jKZDGFhYZypjxocFlFNyOHrh7H1z60AgM89Poe5nrnEiaih0tAA3N3/fU5ERERE/2IR1UTczruNBScXAAAC2wfCw8ZD4kTUkOnpAfHxUqcgIiIiapj4N+YmQKFUYO7xucguykZ7s/aY6jZV6khERERERI0Wi6gmYP3v63Em4wz0NfUR6hkKLbmW1JGIiIiIiBotFlHPuYTMBKw5vwYAMK/nPDgYO0iciBqD/HzA0bH0kZ8vdRoiIiKihoX3RD3HsouyMefYHCiEAq+98Br8W/tLHYkaCSGAa9f+fU5ERERE/+KZqOeUEAKfnvoU6XnpsDOyw8c9PpY6EhERERHRc4FF1HNq55WdiLoWBU2ZJpZ7LoehtqHUkYiIiIiIngssop5DqfdTsSxuGQBgqttUdDDvIHEiIiIiIqLnB4uo50zhw0LMOjYLhYpC9LbujVEdRkkdiYiIiKhGvL29MW3aNKljAAD+/PNP9OzZE7q6uujSpYvUcaiBaLJF1OrVq9G+fXt069ZN6ii1asWZFbiSdQWmuqb43ONzaMia7I+YiIiI6kBQUBBkMhlkMhm0tbXRpk0bLFq0CA8fPpQs09WrV1WZZDIZzMzM8OqrryIhIeGZ+164cCEMDAyQnJyM6OjoWkhLz4Mm+wl74sSJuHTpEuLj46WOUmsOXz+MbcnbAAAhHiEw1zOXOBE1VjIZ0L596UMmkzoNERE1NL6+vkhPT8eVK1cwY8YMfPLJJ1i+fLnUsfDrr78iPT0dBw8eRG5uLvz8/HD//v2n6qu4uBgAkJqaCg8PDzg4OMDMzOyZ+qLnR5Mtop43t/NuY8HJBQCAoA5B6GPTR+JE1Jjp6wN//FH60NeXOg0RETU0Ojo6sLS0hIODAyZMmID+/fvjl19+AQAUFRVh5syZsLGxgYGBAXr06IGYmBjVtv/88w9GjBgBGxsb6Ovrw8XFBVu3bq1yf/v27YOJiQk2b95cZTszMzNYWlrC3d0dX3zxBTIyMnD69GkAwIkTJ9C3b1/o6enBzs4OU6ZMQV5enmpbR0dHfPbZZxg1ahSMjY0xbtw4yGQynD17FosWLYJMJsMnn3wCAPj999/x0ksvQU9PD2ZmZhg3bhxyc3NVfQUFBWHIkCH4/PPPYW1tDScnJ9XZsh9//FGVo1u3brh8+TLi4+Ph7u4OQ0ND+Pn54c6dO6q+4uPj8corr8Dc3BwmJibw8vLCuXPn1N63TCbD+vXrMXToUOjr66Nt27aqn0eZP/74A6+99hqMjY1hZGSEvn37IjU1VbV+/fr1cHZ2hq6uLl588UV88803VR7rpo5F1HNAoVRg7vG5yC7KRnuz9pjiOkXqSERERPQs8vIqfxQWVr9tQcGT29YCPT091dmWSZMm4dSpU9i2bRsuXLiAN954A76+vrhy5QoAoLCwEF27dsW+fftw8eJFjBs3DiNHjkRcXFyFfW/ZsgUjRozA5s2bERAQUKNMQOlZoNTUVPj6+mLYsGG4cOECtm/fjhMnTmDSpElq23zxxRfo3LkzEhISMH/+fKSnp6NDhw6YMWMG0tPTMXPmTOTl5cHHxwfNmzdHfHw8duzYgV9//bVcX9HR0UhOTkZUVBT27t2rWr5w4ULMmzcP586dg6amJt5++23Mnj0bX375JY4fP46UlBQsWLBA1f7BgwcIDAzEiRMn8Ntvv6Ft27YYMGAAHjx4oLa/Tz/9FG+++SYuXLiAAQMGICAgAPfu3QMA3Lx5E56entDR0cHhw4dx9uxZjB49WnUJ5ubNm7FgwQJ8/vnnSEpKQkhICObPn49NmzZV+3g3OaKJy87OFgBEdna21FFEcXGx2LNnjyguLq7RdmsS14iOER1F9x+6i6vZV+soHRGVedqxSkT1pzGM04KCAnHp0iVRUFBQfmXpd51X/BgwQL2tvn7lbb281Nuam5dvU0OBgYFi8ODBQgghlEqliIqKEjo6OmLmzJni2rVrQi6Xi5s3b6pt8/LLL4u5c+dW2ufAgQPFjBkzVK+9vLzE1KlTRVhYmDAxMRExMTFVZkpLSxMAREJCghBCiKysLDF06FBhaGgobt++LcaMGSPGjRunts3x48eFhoaG6vg7ODiIIUOGlOu7c+fOYuHCharXa9euFc2bNxe5ubmqZfv27RMaGhri9u3bqmPUsmVLUVRUVC7j+vXrVcu2bt0qAIjo6GjVsiVLlggnJ6dK36tCoRBGRkbif//7n2oZADFv3jzV69zcXAFAREZGCiGEmDt3rmjVqlWl46F169Ziy5Ytass+++wz0atXr0pz1BaFQiGysrKEQqGo832VqWrsVbc20JSwfqNakJCZgDXn1wAA5vWcBwdjB4kT0fMgPx8om3MlPp6X9BERkbq9e/fC0NAQJSUlUCqVePvtt/HJJ58gJiYGCoUC7dq1U2tfVFSkup9IoVAgJCQEP/74I27evIni4mIUFRVB/7H/bH766SdkZmYiNja22hOB9e7dGxoaGsjLy8MLL7yA7du3o2XLljh//jwuXLigdjmgEAJKpRJpaWlwdnYGALi7uz9xH0lJSejcuTMMDAxUy/r06QOlUonk5GS0bNkSAODi4gJtbe1y23fq1En1/NG2jy7LzMxUvc7IyMC8efMQExODzMxMKBQK5Ofn4/r165X2a2BgAGNjY1U/iYmJ6Nu3L7S0tMrlycvLQ2pqKsaMGYN3331Xtfzhw4cwMTF54vFoqlhENWLZRdmYc2wOlEKJ1154Df6t/aWORM8JIYBLl/59TkRE9eyR+2vKkcvVXz/ygbscjcfu3Lh69akjPapfv35Ys2YNtLW1YW1tDU3N0o+Uubm5kMvlOHv2LOSP5TQ0NAQALF++HF9++SVWrVoFFxcXGBgYYNq0aeUmX3B1dcW5c+ewceNGuLu7Q1aNmY62b9+O9u3bw8zMDM2aNVMtz83Nxfjx4zFlSvlbHuzt7VXPHy2MnlVlfT1ayJS9p8eXKZVK1evAwED8888/+PLLL+Hg4AAdHR306tWr3PF6vEB6tJ+ySxsrUnYv17p169CjRw+1dY//DOlfLKIaKSEEPj31KdLz0mFvZI95PedJHYmIiIhqS00+zNdV2yq7MUCbNm3KLXd1dYVCoUBmZib69u1b4baxsbEYPHgw3nnnHQCAUqnE5cuX0b59e7V2rVu3xooVK+Dt7Q25XI6wsLAn5rKzs0Pr1q3LLXdzc8OlS5cqzFxTzs7OiIiIQF5enqpQio2NhYaGBpycnJ65/8fFxsbim2++wYABAwAAN27cwN27d2vUR6dOnbBp0yaUlJSUK7ZatmwJa2tr/PXXXzW656yp48QSjdRPV35C1LUoaMo0EeoZCgOt2vvLCREREdHTaNeuHQICAjBq1Cjs2rULaWlpiIuLw5IlS7Bv3z4AQNu2bREVFYWTJ08iKSkJ48ePR0ZGRqX9HTlyBDt37nymL9+dM2cOTp48iUmTJiExMRFXrlzBzz//XG4yiOoICAiArq4uAgMDcfHiRRw5cgSTJ0/GyJEjVZfn1aa2bdvi+++/R1JSEk6fPo2AgIAqzyxVZNKkScjJycFbb72FM2fO4MqVK/j++++RnJwMoHRSiiVLluCrr77C5cuX8fvvvyM8PBwrV66s9ffzvGAR1QilZKVgWdwyAMBUt6noYN5B4kREREREpcLDwzFq1CjMmDEDTk5OGDJkCOLj41WXzc2bNw9ubm7w8fGBt7c3LC0tMWTIkEr7c3JywuHDh7F161bMmDHjqTJ16tQJR48exeXLl9G3b1+4urpiwYIFsLa2rnFf+vr6OHjwIO7du4du3brh9ddfx8svv1ytM2VPY8OGDcjKyoKbmxtGjhyJKVOmwMLCokZ9mJmZ4fDhw8jNzYWXlxe6du2KdevWqc5KjR07FuvXr0d4eDhcXFzg5eWFiIgItGrVqi7e0nNBJkTTvuMhJycHJiYmyM7OhrGxsaRZSkpKsH//fgwYMKDCG/8AoPBhId7e/zauZF1BH+s++Kb/N9CQsRam2pWXB/z/S9eRm1trV388N6ozVolIWo1hnBYWFiItLQ2tWrWCrq6u1HGIJKFUKpGTkwNjY2NoPH4PXx2pauxVtzbgp+9G5oszX+BK1hWY6ppiscdiFlBERERERPWME0s0ItHXo7E9eTsAIMQjBOZ65hInoueVTAY4OPz7nIiIiIj+xSKqkbiddxsLYku/vTqoQxD62PSROBE9z/T1a20WXCIiIqLnDq8FawQUSgU+PP4hcopz0MGsA6a4lv+OAyIiIiIiqh8sohqBtb+vxdmMs9DX1EeoZyi05A3zBlkiIiIioqaARVQDdy7jHL49/y0AYF7PebA3tn/CFkTPrqAA6Nat9FFQIHUaIiIiooaF90Q1YNlF2ZhzfA6UQgn/F/zh39pf6kjURCiVwJkz/z4nIiIion/xTFQDJYTAp6c+xe2827A3ssfHPT+WOhIREREREYFFVIO14/IORF2LgqaGJkK9QmGgxW87JSIiIiJqCFhENUCp91MRGh8KAJjmNg0dzDpInIiIiIio/nh7e2PatGlSx6AqREREoFmzZk+17dWrVyGTyZCYmFhpm5iYGMhkMty/f7/KvhwdHbFq1aqnyvEsWEQ1MCWiBHNj56JIUYQ+1n0wsv1IqSMRERERqQQFBUEmk0Emk0FbWxtt2rTBokWL8PDhQ8kylX0ol8vluHnzptq69PR0aGpqQiaT4epz9CWIZUVG2UNPTw8dOnTA2rVrpY72RHZ2dkhPT0fHjh2rvc2zFG11gUVUAxNZEImU7BSY6ppiscdiaMj4IyIiIqKGxdfXF+np6bhy5QpmzJiBTz75BMuXL5c6FmxsbPDf//5XbdmmTZtgY2MjUaInKy4ufqbtk5OTkZ6ejkuXLmH8+PGYMGECoqOjayld3ZDL5bC0tISmZuOd446f0BuQwzcOI644DgCwxGMJzPXMJU5ETZm5eemDiIjocTo6OrC0tISDgwMmTJiA/v3745dffgEAFBUVYebMmbCxsYGBgQF69OiBmJgY1bb//PMPRowYARsbG+jr68PFxQVbt26tcn/79u2DiYkJNm/eXGW7wMBAhIeHqy0LDw9HYGBgubYXL16En58fDA0N0bJlS4wcORJ3795Vrff29sbkyZMxbdo0NG/eHC1btsS6deuQl5eH4OBgGBkZoU2bNoiMjFTr9+jRo+jevTt0dHRgZWWFDz/8UO0snbe3NyZNmoRp06bB3NwcPj4+GD16NF577TW1fkpKSmBhYYENGzZU+Z4tLCxgaWmJVq1aYcqUKWjVqhXOnTunWn/gwAF4eHigWbNmMDMzw2uvvYbU1FTV+rKzeLt27UK/fv2gr6+Pzp0749SpU2r7iYiIgL29PfT19TF06FD8888/qnXZ2dmQy+U48/+n9lUqlTA1NUXPnj1VbX744QfY2dmp7fPRy/n279+Pdu3aQU9PD/369VM7axgTE4Pg4GBkZ2erzrx98sknqvX5+fkYPXo0jIyMYG9vXy9n41hENRC3825j0elFAIBRzqPQ26a3xImoKTMwAO7cKX0YcE4TIqJ6l5dX+aOwsPptH/+uv4ra1AY9PT3VGZVJkybh1KlT2LZtGy5cuIA33ngDvr6+uHLlCgCgsLAQXbt2xb59+3Dx4kWMGzcOI0eORFxcXIV9b9myBSNGjMDmzZsREBBQZY5BgwYhKysLJ06cAACcOHECWVlZ8PdX/5qY+/fv46WXXoKrqyvOnDmDAwcOICMjA2+++aZau02bNsHc3BxxcXGYPHkyJkyYgDfeeAO9e/fGuXPn8Oqrr2LkyJHIz88HANy8eRMDBgxAt27dcP78eaxZswYbNmzA4sWLy/Wrra2N2NhYfPvttxg7diwOHDiA9PR0VZu9e/ciPz8fw4cPf9LhB1A6s/OBAwdw/fp19OjRQ7U8Ly8P06dPx5kzZxAdHQ0NDQ0MHToUyse+w+Tjjz/GzJkzkZiYiHbt2mHEiBGq4u/06dMYM2YMJk2ahMTERPTr10/tPZmYmKBLly6qYvn333+HTCZDQkICcnNzAZQWl15eXhVm//vvv/H666/D398fiYmJGDt2LD788EPV+t69e2PVqlUwNjZGeno60tPTMXPmTNX6FStWwN3dHQkJCXj//fcxYcIEJCcnV+u4PTXRRIWFhQlnZ2fRrl07AUBkZ2dLmmf/X/tFl01dhM/3PiKvIE/SLERUteLiYrFnzx5RXFwsdRQiqkRjGKcFBQXi0qVLoqCgoNw6oPLHgAHqbfX1K2/r5aXe1ty8fJuaCgwMFIMHDxZCCKFUKkVUVJTQ0dERM2fOFNeuXRNyuVzcvHlTbZuXX35ZzJ07t9I+Bw4cKGbMmKF67eXlJaZOnSrCwsKEiYmJiImJqTJTWlqaACASEhLEtGnTRHBwsBBCiODgYPHBBx+IhIQEAUCkpaUJIYT47LPPxKuvvqrWx40bNwQAkZycrMrg4eGhWv/w4UNhYGAgRo4cqVqWnp4uAIhTp04JIYT46KOPhJOTk1Aqlao2q1evFoaGhkKhUKj6dXV1Lfce2rdvL5YtW6Z67e/vL4KCgip9z0eOHBEAhIGBgTAwMBCamppCQ0NDLF68uMpjdefOHQFA/P7772rHbv369ao2f/zxhwAgkpKShBBCjBgxQgx47Bdv+PDhwsTERPV6+vTpYuDAgUIIIVatWiWGDx8uOnfuLCIjI4UQQrRp00asXbtWbZ8JCQlCoVCIDz74QLRv316t/zlz5ggAIisrSwghRHh4uNr+yjg4OIh33nlH9VqpVAoLCwuxZs2aSo9BVWMvOzu7WrVB470Q8RlNnDgREydORE5ODkxMTKSOA79Wfmip2xKJpxKhJdeSOg4RERFRpfbu3QtDQ0OUlJRAqVTi7bffxieffIKYmBgoFAq0a9dOrX1RURHMzMwAAAqFAiEhIfjxxx9x8+ZNFBcXo6ioCPr6+mrb/PTTT8jMzERsbCy6detW7WyjR49G7969ERISgh07duDUqVPlJr04f/48jhw5AkNDw3Lbp6amqvJ36tRJtVwul8PMzAwuLi6qZS1btgQAZGZmAgCSkpLQq1cvyGQyVZs+ffogNzcXf//9N+zt7QEAXbt2LbffsWPHYu3atZg9ezYyMjIQGRmJw4cPP/H9Hj9+HEZGRigqKkJcXBwmTZoEU1NTTJgwAQBw5coVLFiwAKdPn8bdu3dVZ6CuX7+uNrHDo+/VyspK9b5efPFFJCUlYejQoWr77dWrFw4cOKB67eXlhQ0bNkChUODo0aN49dVXYWlpiZiYGHTq1AkpKSnw9vau8D1cvnwZ3bt3L9d/dT2aXSaTwdLSUvUzqStNtohqiFzMXXBDfkPqGEQoKAD8/EqfR0YCenrS5iEiamr+/xVQFZLL1V9X9VlR47EbN2prcrp+/fphzZo10NbWhrW1tWqCgNzcXMjlcpw9exbyx4KWFSzLly/Hl19+iVWrVsHFxQUGBgaYNm1auQkWXF1dce7cOWzcuBHu7u5qhUlVXFxc8OKLL2LEiBFwdnZGx44dy02lnZubC39/fyxbtqzc9mUFBABoaan/YVsmk6ktK8v0+KVxT2JQwbXyo0aNwocffohTp07h5MmTaNWqFfr27fvEvlq1aqWata5Dhw44ffo0Pv/8c1UR5e/vDwcHB6xbtw7W1tZQKpXo2LFjueP9rO/L09MTDx48wLlz53Ds2DGEhITA0tISS5cuRefOnWFtbY22bdtWu7+aqOjnVNOfSU2xiCKicpRK4OjRf58TEVH9qsn9qHXVtup+DNCmTZtyy11dXaFQKJCZmVlpARAbG4vBgwfjnXfeAVD6Qf3y5cto3769WrvWrVtjxYoV8Pb2hlwuR1hYWLXzjR49Gu+//z7WrFlT4Xo3Nzfs3LkTjo6OtTpDnLOzM3bu3AkhhKoQiY2NhZGREWxtbavc1szMDEOGDEF4eDhOnTqF4ODgp8ogl8tR8P9vhvvnn3+QnJyMdevWqX4eZfeL1YSzszNOnz6ttuy3335Te92sWTN06tQJYWFh0NLSwosvvggLCwsMHz4ce/furfR+KABo164dDh06VGX/2traUCgUNc5eVzixBBERERHVinbt2iEgIACjRo3Crl27kJaWhri4OCxZsgT79u0DALRt2xZRUVE4efIkkpKSMH78eGRkZFTa35EjR7Bz584affnuu+++izt37mDs2LEVrp84cSLu3buHESNGID4+HqmpqTh48CCCg4Of6YP6+++/jxs3bmDy5Mn4888/8fPPP2PhwoWYPn06NB4/LViBsWPHYtOmTUhKSqpwRsGKZGZm4vbt27h27Rp27NiB77//HoMHDwYANG/eHGZmZli7di1SUlJw+PBhTJ8+vcbva8qUKThw4AC++OILXLlyBWFhYWqX8pXx9vbG5s2bVQWTqakpnJ2dsX379iqLqODgYFy5cgWzZs1CcnIytmzZgoiICLU2jo6OyM3NRXR0NO7evauazEMqLKKIiIiIqNaEh4dj1KhRmDFjBpycnDBkyBDEx8er7geaN28e3Nzc4OPjA29vb1haWmLIkCGV9ufk5ITDhw9j69atmDFjRrUyaGpqwtzcvNKzTNbW1oiNjYVCocCrr74KFxcXTJs2Dc2aNatWsVMZGxsb7N+/H3FxcejcuTPee+89jBkzBvPmzavW9v3794eVlRV8fHxgbW1drW2cnJxgZWWFNm3aYM6cORg/fjy+/vprAICGhga2bduGs2fPomPHjvjggw+e6vu8evbsiXXr1uHLL79E586dcejQoQrfk5eXFxQKhdq9T97e3uWWPc7Ozg47duzAnj170LlzZ3z77bcICQlRa9O7d2+89957GD58OFq0aIHQ0NAav4/aJBNCCEkTSKxsYons7GwYGxtLmqWkpAT79+/HgAEDyl3bSVSf8vKAsnttc3M5zfnjOFaJGr7GME4LCwuRlpaGVq1aQVdXV+o41ADk5ubCxsYG4eHh+M9//iN1nHqhVCqRk5MDY2PjZypga6KqsVfd2oD3RBERERERSUipVOLu3btYsWIFmjVrhkGDBkkdiZ6ARRQRERERkYSuX7+OVq1awdbWFhEREbU62QXVDf6EiKhCj31dBxEREdURR0dHNPE7bBodFlFEVI6BQel9UURERERUHmfnIyIiIpIQz0AQ1a/aGHMsooiIiIgkUDZroNTfd0PU1JSNuWeZuZOX8xFROYWFwLBhpc937gQ48y4RUe2Ty+Vo1qwZMjMzAQD6+vqQyWQSpyKqX0qlEsXFxSgsLKzzKc6FEMjPz0dmZiaaNWsGuVz+1H2xiCKichQKYP/+f58TEVHdsLS0BABVIUXU1AghUFBQAD09vXr7I0KzZs1UY+9psYgiIiIikohMJoOVlRUsLCxQUlIidRyieldSUoJjx47B09OzXr4YW0tL65nOQJVhEUVEREQkMblcXisf7IgaG7lcjocPH0JXV7deiqjawokliIiIiIiIaoBFFBERERERUQ2wiCIiIiIiIqqBJn9PVNmXbeXk5EicpPTGuvz8fOTk5DSqa0Lp+ZOX9+/znBzO0Pc4jlWiho/jlKhxaGhjtawmeNIX8jb5IurBgwcAADs7O4mTEDVM1tZSJyAiIiKqXw8ePICJiUml62XiSWXWc06pVOLWrVswMjKq9tz03bp1Q3x8fLX3Ud32OTk5sLOzw40bN2BsbFzt/puCmh5zqUiRsy73WZt9P0tfT7stx2r941iVZp+11fez9tOQxirHadU4VqXZJ8dqeQ1trAoh8ODBA1hbW1f55b9N/kyUhoYGbG1ta7SNXC6v0Q+5pu2NjY0bxC9RQ1LTYygVKXLW5T5rs+9n6etpt+VYrX8cq9Lss7b6ftZ+GuJY5TitGMeqNPvkWK1cQxqrVZ2BKsOJJZ7CxIkT67Q9lddYjqEUOetyn7XZ97P09bTbcqzWv8ZyDDlW66YfjtXGo7EcQ47VuumHY/XZNfnL+RqSnJwcmJiYIDs7u8FU4kRUHscqUcPHcUrUODTWscozUQ2Ijo4OFi5cCB0dHamjEFEVOFaJGj6OU6LGobGOVZ6JIiIiIiIiqgGeiSIiIiIiIqoBFlFEREREREQ1wCKKiIiIiIioBlhEERERERER1QCLKCIiIiIiohpgEdVI7N27F05OTmjbti3Wr18vdRwiqsTQoUPRvHlzvP7661JHIaJK3LhxA97e3mjfvj06deqEHTt2SB2JiCpw//59uLu7o0uXLujYsSPWrVsndSQVTnHeCDx8+BDt27fHkSNHYGJigq5du+LkyZMwMzOTOhoRPSYmJgYPHjzApk2b8NNPP0kdh4gqkJ6ejoyMDHTp0gW3b99G165dcfnyZRgYGEgdjYgeoVAoUFRUBH19feTl5aFjx444c+ZMg/gMzDNRjUBcXBw6dOgAGxsbGBoaws/PD4cOHZI6FhFVwNvbG0ZGRlLHIKIqWFlZoUuXLgAAS0tLmJub4969e9KGIqJy5HI59PX1AQBFRUUQQqChnP9hEVUPjh07Bn9/f1hbW0Mmk2HPnj3l2qxevRqOjo7Q1dVFjx49EBcXp1p369Yt2NjYqF7b2Njg5s2b9RGdqEl51rFKRPWjNsfq2bNnoVAoYGdnV8epiZqe2hir9+/fR+fOnWFra4tZs2bB3Ny8ntJXjUVUPcjLy0Pnzp2xevXqCtdv374d06dPx8KFC3Hu3Dl07twZPj4+yMzMrOekRE0bxypR41BbY/XevXsYNWoU1q5dWx+xiZqc2hirzZo1w/nz55GWloYtW7YgIyOjvuJXTVC9AiB2796ttqx79+5i4sSJqtcKhUJYW1uLJUuWCCGEiI2NFUOGDFGtnzp1qti8eXO95CVqqp5mrJY5cuSIGDZsWH3EJGrynnasFhYWir59+4r//ve/9RWVqEl7lv9Xy0yYMEHs2LGjLmNWG89ESay4uBhnz55F//79Vcs0NDTQv39/nDp1CgDQvXt3XLx4ETdv3kRubi4iIyPh4+MjVWSiJqk6Y5WIpFedsSqEQFBQEF566SWMHDlSqqhETVp1xmpGRgYePHgAAMjOzsaxY8fg5OQkSd7HaUodoKm7e/cuFAoFWrZsqba8ZcuW+PPPPwEAmpqaWLFiBfr16welUonZs2c3iFlJiJqS6oxVAOjfvz/Onz+PvLw82NraYseOHejVq1d9xyVqsqozVmNjY7F9+3Z06tRJdY/G999/DxcXl/qOS9RkVWesXrt2DePGjVNNKDF58uQGM05ZRDUSgwYNwqBBg6SOQURP8Ouvv0odgYiewMPDA0qlUuoYRPQE3bt3R2JiotQxKsTL+SRmbm4OuVxe7ia5jIwMWFpaSpSKiB7HsUrUOHCsEjUOjX2ssoiSmLa2Nrp27Yro6GjVMqVSiejoaF4CRNSAcKwSNQ4cq0SNQ2Mfq7ycrx7k5uYiJSVF9TotLQ2JiYkwNTWFvb09pk+fjsDAQLi7u6N79+5YtWoV8vLyEBwcLGFqoqaHY5WoceBYJWocnuuxKvHsgE3CkSNHBIByj8DAQFWbr7/+Wtjb2wttbW3RvXt38dtvv0kXmKiJ4lglahw4Vokah+d5rMqEEKJ+yzYiIiIiIqLGi/dEERERERER1QCLKCIiIiIiohpgEUVERERERFQDLKKIiIiIiIhqgEUUERERERFRDbCIIiIiIiIiqgEWUURERERERDXAIoqIiIiIiKgGWEQRETUhQUFBGDJkSL3v19HREatWrar3/dYFmUyGPXv21Enf0dHRcHZ2hkKhqJP+m4oPP/wQkydPljoGET3HWEQREdXQjRs3MHr0aFhbW0NbWxsODg6YOnUq/vnnH6mjqVy9ehUymQyJiYlqy7/88ktERERIkqkqn3zyCbp06VKjbaQqzNLT0+Hn5weg8uP8tGbPno158+ZBLpcDACIiIiCTyco91q9fr1rfrFmzKvvctGkTunXrBn19fRgZGcHLywt79+5VaxMTE6PWf8uWLTFs2DD89ddf1c5eUFAAAwMDpKSklMttaGiIrl27YteuXTU6Hk9b9M+cORObNm2qUX4ioppgEUVEVAN//fUX3N3dceXKFWzduhUpKSn49ttvER0djV69euHevXt1uv/i4uJn2t7ExOSJH7qpapaWltDR0an1fk+cOIHU1FQMGzZMbbmxsTHS09PVHgEBAdXqc+bMmRg/fjyGDx+OCxcuIC4uDh4eHhg8eDDCwsLKtU9OTsatW7ewY8cO/PHHH/D396/2WbGoqCg4ODigTZs25XInJCTAx8cHb775JpKTk6vV37MwNzeHj48P1qxZU+f7IqImShARUbX5+voKW1tbkZ+fr7Y8PT1d6Ovri/fee0+1zMHBQSxatEi89dZbQl9fX1hbW4uwsDC17bKyssSYMWOEubm5MDIyEv369ROJiYmq9QsXLhSdO3cW69atE46OjkImkwkhhIiMjBR9+vQRJiYmwtTUVAwcOFCkpKSotgOg9vDy8hJCCBEYGCgGDx6saldYWCgmT54sWrRoIXR0dESfPn1EXFycav2RI0cEAPHrr7+Krl27Cj09PdGrVy/x559/qtqkpKSIQYMGCQsLC2FgYCDc3d1FVFSU2vt0cHAQ//d//1fpcS17n2XKci5fvlxYWloKU1NT8f7774vi4mIhhBBeXl7l3mOZ48ePCw8PD6GrqytsbW3F5MmTRW5urlqWzz//XAQHBwtDQ0NhZ2cnvvvuO9X6oqIiMXHiRGFpaSl0dHSEvb29CAkJUTu2u3fvrvQ4Hz16VGhqaor09HS19zh16lTh4eFR6TGYOHGieP3119WWhYeHCxMTk0q3qWr9qVOnBADx1VdflVs3ffp0oaWlJa5fvy6E+PfnnJWVpWqzefNmAUD8+eef4t69e+Ltt98W5ubmQldXV7Rp00Zs3LhRrc/Ro0eLOXPmVJpLoVAILS0t8eOPPwohhPj0009Fhw4dymXr3LmzmDdvnli4cGG543vkyBEhhBDXr18Xb7zxhjAxMRHNmzcXgwYNEmlpaWr9bNq0Sdja2lZ26IiIngnPRBERVdO9e/dw8OBBvP/++9DT01NbZ2lpiYCAAGzfvh1CCNXy5cuXo3PnzkhISMCHH36IqVOnIioqSrX+jTfeQGZmJiIjI3H27Fm4ubnh5ZdfVjujlZKSgp07d2LXrl2qy8by8vIwffp0nDlzBtHR0dDQ0MDQoUOhVCoBAHFxcQCAX3/9Fenp6ZVeRjV79mzs3LkTmzZtwrlz59CmTRv4+PiUO6P28ccfY8WKFThz5gw0NTUxevRo1brc3FwMGDAA0dHRSEhIgK+vL/z9/XH9+vWnOMr/OnLkCFJTU3HkyBFs2rQJERERqksRd+3aBVtbWyxatEh1tgMAUlNT4evri2HDhuHChQvYvn07Tpw4gUmTJqn1vWLFCri7uyMhIQHvv/8+JkyYoDpD8tVXX+GXX37Bjz/+iOTkZGzevBmOjo4VZqzoOHt6euKFF17A999/r2pXUlKCzZs3qx23xx0/fhzu7u5Pe7jK2bp1KwwNDTF+/Phy62bMmIGSkhLs3Lmz0u3LfseLi4sxf/58XLp0CZGRkUhKSsKaNWtgbm6uaqtUKrF3714MHjy4wr4UCgU2bdoEAHBzcwMAjB49GklJSYiPj1e1S0hIwIULFxAcHIyZM2fizTffhK+vr+pn3Lt3b5SUlMDHxwdGRkY4fvw4YmNjYWhoCF9fX7Uztd27d8fff/+Nq1evVv+gERFVl9RVHBFRY/Hbb7+pnYV43MqVKwUAkZGRIYQoPePh6+ur1mb48OHCz89PCFF6xsTY2FgUFhaqtWndurXqzMjChQuFlpaWyMzMrDLbnTt3BADx+++/CyGESEtLEwBEQkKCWrtHz0Tl5uYKLS0tsXnzZtX64uJiYW1tLUJDQ4UQ6meiyuzbt08AEAUFBZXm6dChg/j6669Vr5/mTJSDg4N4+PChatkbb7whhg8fXmWfY8aMEePGjVNbdvz4caGhoaHK6+DgIN555x3VeqVSKSwsLMSaNWuEEEJMnjxZvPTSS0KpVFaY9dHfgcqO87Jly4Szs7Pq9c6dO4WhoaHaGbHHmZiYiP/+979qy8LDwwUAYWBgoHq0bNlSbX1lZ6J8fX3VjunjjI2NxYQJE4QQ5c9E3bp1S/Tu3VvY2NiIoqIi4e/vL4KDgyvtKzY2VlhYWAiFQlFhbg0NDaGjoyPCw8PVtvPz81NlEKL02Ht7e6teP37mVAghvv/+e+Hk5KT28ykqKhJ6enri4MGDqmXZ2dkCgIiJiak0NxHR0+KZKCKiGhKPnGl6kl69epV7nZSUBAA4f/48cnNzYWZmBkNDQ9UjLS0Nqampqm0cHBzQokULtX6uXLmCESNG4IUXXoCxsbHqTElNzv6kpqaipKQEffr0US3T0tJC9+7dVRnLdOrUSfXcysoKAJCZmQmg9EzUzJkz4ezsjGbNmsHQ0BBJSUnPfCaqQ4cOqgkWyvZbts/KnD9/HhEREWrH08fHB0qlEmlpaRW+H5lMBktLS1XfQUFBSExMhJOTE6ZMmYJDhw7VOHtQUBBSUlLw22+/ASidAOLNN9+EgYFBpdsUFBRAV1e33HIjIyMkJiaqHidPnqx2jpr8rgKAra0tDAwMYG1tjby8POzcuRPa2tqYMGECtm3bhi5dumD27NnlMvz888947bXXoKHx78eKR3MnJCQgJCQE7733Hv73v/+p2rz77rvYunUrCgsLUVxcjC1btlR5tg4o/RmnpKTAyMhI9TM2NTVFYWGh2rgpO5OWn59fo2NARFQdmlIHICJqLNq0aQOZTIakpCQMHTq03PqkpCQ0b968XMFTmdzcXFhZWSEmJqbcukcnf6jog7e/vz8cHBywbt06WFtbQ6lUomPHjs888URltLS0VM9lMhkAqC4dnDlzJqKiovDFF1+gTZs20NPTw+uvv/7MWR7dZ9l+y/ZZmdzcXIwfPx5Tpkwpt87e3r5afbu5uSEtLQ2RkZH49ddf8eabb6J///746aefqp3dwsIC/v7+CA8PR6tWrRAZGVnhz/lR5ubmyMrKKrdcQ0NDNVlDTbRr1w4nTpxAcXExtLW11dbdunULOTk5aNeundry48ePw9jYGBYWFjAyMlIt9/Pzw7Vr17B//35ERUXh5ZdfxsSJE/HFF18AAH755RcsXbq0ytydOnXCoUOHsGzZMvj7+wMo/T3W0dHB7t27oa2tjZKSErz++utVvq/c3Fx07doVmzdvLrfu0bFXdklqdccjEVFNsIgiIqomMzMzvPLKK/jmm2/wwQcfqN0Xdfv2bWzevBmjRo1SFRkAVGciHn3t7OwMoPTD+u3bt6GpqVnpPTcV+eeff5CcnIx169ahb9++AEpndntU2YfmqmZWa926NbS1tREbGwsHBwcApffuxMfHY9q0adXOExsbi6CgIFVhmZubWy/3oWhra5d7f25ubrh06dJTFR2PMjY2xvDhwzF8+HC8/vrr8PX1xb1792BqalouA1DxcR47dixGjBgBW1tbtG7dWu2MX0VcXV1x6dKlZ8r9qLfeegtfffUVvvvuu3LfmfTFF19AS0ur3EyArVq1qnT2xhYtWiAwMBCBgYHo27cvZs2ahS+++AJXrlzBtWvX8Morrzwxk1wuR0FBgeq1pqYmAgMDER4eDm1tbbz11ltq46qyn/H27dthYWEBY2PjSvd18eJFaGlpoUOHDk/MRURUUyyiiIhqICwsDL1794aPjw8WL16MVq1a4Y8//sCsWbNgY2ODzz//XK19bGwsQkNDMWTIEERFRWHHjh3Yt28fAKB///7o1asXhgwZgtDQULRr1w63bt3Cvn37MHTo0EonGWjevDnMzMywdu1aWFlZ4fr16/jwww/V2lhYWEBPTw8HDhyAra0tdHV1YWJiotbGwMAAEyZMwKxZs2Bqagp7e3uEhoYiPz8fY8aMqfYxadu2LXbt2gV/f3/IZDLMnz//iWeMaoOjoyOOHTuGt956Czo6OjA3N8ecOXPQs2dPTJo0CWPHjoWBgQEuXbqEqKioCqf0rsjKlSthZWUFV1dXaGhoYMeOHbC0tKywuKjqOPv4+MDY2BiLFy/GokWLnrhfHx8f1eQLNaFQKMp9T5WOjg569eqFqVOnYtasWSguLsaQIUNQUlKCH374AV9++SVWrVoFOzu7au1jwYIF6Nq1Kzp06ICioiLs3btX9ceAn3/+Gf3794e+vr7aNkII3L59G0DppYpRUVE4ePAgFixYoNZu7Nixqr5iY2PV1jk6OuLgwYNITk6GmZkZTExMEBAQgOXLl2Pw4MFYtGgRbG1tce3aNezatQuzZ8+Gra0tgNKzan379i03CQwRUW3gPVFERDXQtm1bnDlzBi+88ALefPNNtG7dGuPGjUO/fv1w6tSpcmcqZsyYgTNnzsDV1RWLFy/GypUr4ePjA6D0ErL9+/fD09MTwcHBaNeuHd566y1cu3YNLVu2rDSDhoYGtm3bhrNnz6Jjx4744IMPsHz5crU2mpqaqrMQ1tbWlc6atnTpUgwbNgwjR46Em5sbUlJScPDgQTRv3rzax2TlypVo3rw5evfuDX9/f/j4+KhmYKtLixYtwtWrV9G6dWvVJVudOnXC0aNHcfnyZfTt2xeurq5YsGABrK2tq92vkZERQkND4e7ujm7duuHq1avYv3+/2v0+Zao6zhoaGggKCoJCocCoUaOeuN+AgAD88ccfNf4epdzcXLi6uqo9yi6XW7VqFb755hts3boVHTt2hLu7O44dO4Y9e/aUOztVFW1tbcydOxedOnWCp6cn5HI5tm3bBqC0iBo0aFC5bXJycmBlZQUrKys4OztjxYoVWLRoET7++GO1dm3btkXv3r3x4osvokePHmrr3n33XTg5OcHd3R0tWrRAbGws9PX1cezYMdjb2+M///kPnJ2dMWbMGBQWFqqdmdq2bRvefffdar9HIqKakIma3nVKRETV4ujoiGnTptXo0jh6vowZMwZ37tzBL7/8Uq32s2bNQk5ODr777rs6TlY77t69CysrK/z9999VFv5VEUKgbdu2eP/99zF9+vRayRUZGYkZM2bgwoUL0NTkRTdEVPt4JoqIiKiWZWdn48SJE9iyZUuNzvh8/PHHcHBwqJfLIWvDvXv3sHLlyqcuoO7cuYOwsDDcvn0bwcHBtZYrLy8P4eHhLKCIqM7wXxciIqJaNnjwYMTFxeG9996r1oQLZZo1a4aPPvqoDpPVrnbt2pWb4a8mLCwsYG5ujrVr19boEtInedIMf0REz4qX8xEREREREdUAL+cjIiIiIiKqARZRRERERERENcAiioiIiIiIqAZYRBEREREREdUAiygiIiIiIqIaYBFFRERERERUAyyiiIiIiIiIaoBFFBERERERUQ2wiCIiIiIiIqqB/wfGbrCqrWb6FgAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Example data for Operational Intensity\n", + "operational_intensity = np.logspace(0, 3, 100) # Operational Intensity (x-axis)\n", + "\n", + "# Performance data for three different functions\n", + "performance1 = np.minimum(10, 2*operational_intensity) # Performance for Function 1\n", + "performance2 = np.minimum(10, 1*operational_intensity) # Performance for Function 2\n", + "performance3 = np.minimum(10, 0.5*operational_intensity) # Performance for Function 3\n", + "\n", + "# Create the Roofline plot\n", + "plt.figure(figsize=(10, 6))\n", + "plt.loglog(operational_intensity, performance1, label='MKN')\n", + "plt.loglog(operational_intensity, performance2, label='MNK')\n", + "plt.loglog(operational_intensity, performance3, label='NMK')\n", + "plt.axhline(y=10, color='r', linestyle='--', label='Peak Performance')\n", + "plt.axvline(x=5, color='b', linestyle='--', label='Peak Memory Bandwidth')\n", + "plt.legend()\n", + "plt.xlabel('Operational Intensity (FLOPs/Byte)')\n", + "plt.ylabel('Performance (GFLOPs/s)')\n", + "plt.title('Mock Roofline Plot with Three Functions')\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Define the operational intensity range (x-axis)\n", + "operational_intensity = np.logspace(0, 3, 100) # Operational Intensity (x-axis)\n", + "\n", + "# Define parameters for three different functions\n", + "params = [(10, 5), (20, 10), (30, 15)] # Each tuple contains (Peak Performance, Peak Memory Bandwidth)\n", + "\n", + "# Create the Roofline plot\n", + "plt.figure(figsize=(10, 6))\n", + "\n", + "# Plot each function on the Roofline plot\n", + "for i, (peak_performance, peak_bandwidth) in enumerate(params, 1):\n", + " performance = np.minimum(peak_performance, peak_bandwidth*operational_intensity)\n", + " plt.loglog(operational_intensity, performance, label=f'Function {i}')\n", + "\n", + " # Plot horizontal and vertical lines representing the peak performance and peak memory bandwidth\n", + " plt.axhline(y=peak_performance, color=f'C{i-1}', linestyle='--')\n", + " plt.axvline(x=peak_performance/peak_bandwidth, color=f'C{i-1}', linestyle='--')\n", + "\n", + "# Add labels, legend, and grid\n", + "plt.legend()\n", + "plt.xlabel('Operational Intensity (FLOPs/Byte)')\n", + "plt.ylabel('Performance (GFLOPs/s)')\n", + "plt.title('Mock Roofline Plot with 3 Functions')\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Define the data\n", + "matrix_labels = [\n", + " 'matrix1(A)', 'matrix1(B)', 'matrix1(C)',\n", + " 'matrix2(A)', 'matrix2(B)', 'matrix2(C)'\n", + "]\n", + "reuse_distance_labels = [\n", + " \"M_N_K_Reuse_Dist\", \"M_K_N_Reuse_Dist\", \"K_M_N_Reuse_Dist\",\n", + " \"K_N_M_Reuse_Dist\", \"N_M_K_Reuse_Dist\", \"N_K_M_Reuse_Dist\"\n", + "]\n", + "# Example data (reuse distances for each label for each matrix)\n", + "data = np.random.rand(6, 6) # Replace with your actual data\n", + "\n", + "# Set up the figure and axis\n", + "fig, ax = plt.subplots(figsize=(15, 8))\n", + "\n", + "# Define the width of the bars and the positions of the bar groups\n", + "bar_width = 0.1\n", + "bar_positions = np.arange(len(matrix_labels))\n", + "\n", + "# Create the bar groups for each matrix label\n", + "for i, label in enumerate(reuse_distance_labels):\n", + " ax.bar(bar_positions + i * bar_width, data[:, i], width=bar_width, label=label)\n", + "\n", + "# Configure the x-axis and y-axis labels, title, and legend\n", + "ax.set_xlabel('Coefficient of Row Variation')\n", + "ax.set_ylabel('Reuse Distance')\n", + "ax.set_title('Reuse Distance by Coefficient of Row Variation')\n", + "ax.set_xticks(bar_positions + bar_width * 2.5)\n", + "ax.set_xticklabels(matrix_labels)\n", + "ax.legend()\n", + "\n", + "# Display the chart\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Define the data\n", + "matrix_labels = [\n", + " 'matrix1(A)', 'matrix1(B)', 'matrix1(C)',\n", + " 'matrix2(A)', 'matrix2(B)', 'matrix2(C)'\n", + "]\n", + "reuse_distance_labels = [\n", + " \"M_N_K\", \"M_K_N\", \"K_M_N\",\n", + " \"K_N_M\", \"N_M_K\", \"N_K_M\"\n", + "]\n", + "# Example data (reuse distances for each label for each matrix)\n", + "data = np.random.rand(6, 6) # Replace with your actual data\n", + "\n", + "# Set up the figure and axis\n", + "fig, ax = plt.subplots(figsize=(15, 8))\n", + "\n", + "# Define the width of the bars and the positions of the bar groups\n", + "bar_width = 0.1\n", + "bar_positions = np.arange(len(matrix_labels))\n", + "\n", + "# Create the bar groups for each matrix label\n", + "for i, label in enumerate(reuse_distance_labels):\n", + " ax.bar(bar_positions + i * bar_width, data[:, i], width=bar_width, label=label)\n", + "\n", + "# Configure the x-axis and y-axis labels, title, and legend\n", + "ax.set_xlabel(' Matrices ordered by coefficient of row variation')\n", + "ax.set_ylabel('Reuse Distance')\n", + "ax.set_title('Reuse Distance by Matrices')\n", + "ax.set_xticks(bar_positions + bar_width * 2.5)\n", + "ax.set_xticklabels(matrix_labels)\n", + "ax.legend()\n", + "\n", + "# Display the chart\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Create more randomness in the function shapes\n", + "def random_curve(x):\n", + " curve_values = np.sin(x + np.random.rand()) * np.random.rand(len(x)) + np.random.rand(len(x))\n", + " min_value = np.min(curve_values)\n", + " return curve_values + (1 - min_value + 0.1) # 0.1 added to ensure it's strictly greater than 1\n", + "\n", + "# Generate data\n", + "x = np.linspace(1, 4, 6)\n", + "curves = [random_curve(x) for _ in range(6)]\n", + "\n", + "# Create the figure and axis objects\n", + "fig, ax = plt.subplots(figsize=(8, 5))\n", + "\n", + "# Plot the random curves with markers\n", + "marker_shapes = ['o', 'x', 's', '^', '*', '+']\n", + "labels = ['MNK', 'MKN', 'KMN', 'KNM', 'NMK', 'NKM']\n", + "\n", + "for i, (curve, marker, label) in enumerate(zip(curves, marker_shapes, labels)):\n", + " ax.plot(x, curve, label=label, marker=marker, linestyle='--')\n", + "\n", + "# Annotate the graph\n", + "ax.set_xlabel('Matrices ordered by nnz')\n", + "ax.set_ylabel('Compression ratio')\n", + "ax.set_title('SpGEMM Performance Analysis')\n", + "ax.set_xticks(x)\n", + "ax.set_xticklabels(['mtx1', 'mtx2', 'mtx3', 'mtx4', 'mtx5', 'mtx6'])\n", + "ax.legend()\n", + "ax.set_ylim(bottom=1)\n", + "\n", + "# Display the graph\n", + "plt.tight_layout()\n", + "plt.grid(True, which='both', linestyle='--', linewidth=0.5)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Generate random data points for each label\n", + "num_points = 20\n", + "labels = ['MKN', 'MNK', 'NMK', 'NKM', 'KMN', 'KNM']\n", + "markers = ['o', 'x', 's', '^', '*', '+']\n", + "data = {label: (np.random.rand(num_points) + i*0.2, np.random.rand(num_points) + i*0.2) \n", + " for i, label in enumerate(labels)}\n", + "\n", + "# Create the figure and axis objects\n", + "fig, ax = plt.subplots(figsize=(8, 5))\n", + "\n", + "# Plot the data points with their respective markers\n", + "for label, marker in zip(labels, markers):\n", + " x_data, y_data = data[label]\n", + " ax.scatter(x_data, y_data, label=label, marker=marker, s=50)\n", + "\n", + "# Annotate the graph\n", + "ax.set_xlabel('Coefficient of row variation')\n", + "ax.set_ylabel('Running time')\n", + "ax.set_title('Clustering Analysis')\n", + "ax.legend()\n", + "\n", + "# Display the graph\n", + "plt.tight_layout()\n", + "plt.grid(True, which='both', linestyle='--', linewidth=0.5)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# Define the data\n", + "matrix_labels = [\n", + " 'matrix1(A) #of reads', 'matrix1(B)#of reads', 'matrix1(C)#of writes',\n", + " 'matrix2(A)#of reads', 'matrix2(B)#of reads', 'matrix2(C)#of writes'\n", + "]\n", + "reuse_distance_labels = [\n", + " \"M_N_K\", \"M_K_N\", \"K_M_N\",\n", + " \"K_N_M\", \"N_M_K\", \"N_K_M\"\n", + "]\n", + "# Example data (reuse distances for each label for each matrix)\n", + "data = np.random.rand(6, 6) # Replace with your actual data\n", + "\n", + "# Set up the figure and axis\n", + "fig, ax = plt.subplots(figsize=(15, 8))\n", + "\n", + "# Define the width of the bars and the positions of the bar groups\n", + "bar_width = 0.1\n", + "bar_positions = np.arange(len(matrix_labels))\n", + "\n", + "# Create the bar groups for each matrix label\n", + "for i, label in enumerate(reuse_distance_labels):\n", + " ax.bar(bar_positions + i * bar_width, data[:, i], width=bar_width, label=label)\n", + "\n", + "# Configure the x-axis and y-axis labels, title, and legend\n", + "ax.set_xlabel('Matrices ordered by coefficient of row variation')\n", + "ax.set_ylabel('The number of memory access')\n", + "ax.set_title('The number of memory access by Matrices')\n", + "ax.set_xticks(bar_positions + bar_width * 2.5)\n", + "ax.set_xticklabels(matrix_labels)\n", + "ax.legend()\n", + "\n", + "# Display the chart\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/spmm/mtx_B.txt b/examples/spmm/mtx_B.txt new file mode 100644 index 0000000..b210f94 --- /dev/null +++ b/examples/spmm/mtx_B.txt @@ -0,0 +1,3 @@ +7,8,2,6,4,5,8,4,10,3 +9,1,1,5,3,5,9,10,10,3 +1,7,9,1,10,1,6,10,8,6 diff --git a/examples/spmm/mxt_C.txt b/examples/spmm/mxt_C.txt new file mode 100644 index 0000000..110e4e3 --- /dev/null +++ b/examples/spmm/mxt_C.txt @@ -0,0 +1,3 @@ +11,36,38,10,44,9,32,44,42,27 +18,2,2,10,6,10,18,20,20,6 +3,21,27,3,30,3,18,30,24,18 diff --git a/examples/spmm/thread_mapped.cu b/examples/spmm/thread_mapped.cu index 5944a0e..efb3512 100644 --- a/examples/spmm/thread_mapped.cu +++ b/examples/spmm/thread_mapped.cu @@ -12,6 +12,8 @@ #include "helpers.hxx" #include +#include "/home/ychenfei/research/libs/loops/examples/spgemm/test_spgemm.cpp" + using namespace loops; int main(int argc, char** argv) { @@ -28,12 +30,15 @@ int main(int argc, char** argv) { // Input and output matrices. std::size_t n = 10; matrix_t B(csr.cols, n); + // matrix_t B(mtx.load(parameters.filename)); matrix_t C(csr.rows, n); // Generate random numbers between [0, 10]. generate::random::uniform_distribution(B.m_data.begin(), B.m_data.end(), 1, 10); + + // Run the benchmark. util::timer_t timer; timer.start(); @@ -41,4 +46,27 @@ int main(int argc, char** argv) { timer.stop(); std::cout << "Elapsed (ms):\t" << timer.milliseconds() << std::endl; -} \ No newline at end of file + + // loops::matrix_t h_B; + // copyDeviceMtxToHost(B, h_B); + // writeMtxToFile(h_B, csr.cols, n, "/home/ychenfei/research/libs/loops/examples/spmm/mtx_B.txt"); + + // loops::matrix_t h_C; + // copyDeviceMtxToHost(C, h_C); + // writeMtxToFile(h_C, csr.rows, n, "/home/ychenfei/research/libs/loops/examples/spmm/mxt_C.txt"); + + + // Copy C from device to host +/* + type_t *C_host = new type_t[csr.rows * n]; + cudaError_t err = cudaMemcpy(C_host, C.m_data_ptr, csr.rows * n * sizeof(type_t), cudaMemcpyDeviceToHost); + if (err != cudaSuccess) { + std::cerr << "Error copying data from device to host: " << cudaGetErrorString(err) << std::endl; + exit(EXIT_FAILURE); + }else{ + std::cout << "Succeeded copying data from device to host!" << std::endl; + } + + writeMatrixToFile(C_host, csr.rows, n, "/home/ychenfei/research/libs/loops/examples/spmm/spmm_result_cuda.txt"); +*/ +} diff --git a/include/loops/algorithms/spgemm/estimate_nnz.cuh b/include/loops/algorithms/spgemm/estimate_nnz.cuh new file mode 100644 index 0000000..f8e72ba --- /dev/null +++ b/include/loops/algorithms/spgemm/estimate_nnz.cuh @@ -0,0 +1,104 @@ +/** + * @file estimate_nnz.cuh + * @author + * @brief SpGEMM kernels. + * @version 0.1 + * @date 2023-11-08 + * + * @copyright Copyright (c) 2023 + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace loops { +namespace algorithms { +namespace spgemm { + +template +__global__ void __estimate_nnz_C(const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* nnz_C_per_row) { + + int row = blockIdx.x * blockDim.x + threadIdx.x; + + if (row >= a_rows) return; + + extern __shared__ int shared_marker[]; + + for (int i = threadIdx.x; i < b_cols; i += blockDim.x) { + shared_marker[i] = -1; + } + __syncthreads(); + + int nnz_count = 0; + int start_mm_a = a_offsets[row]; + int end_mm_a = a_offsets[row + 1]; + + for (int mm = start_mm_a; mm < end_mm_a; ++mm) { + int kk_a = a_indices[mm]; + int start_nn_b = b_offsets[kk_a]; + int end_nn_b = b_offsets[kk_a + 1]; + for (int nn = start_nn_b; nn < end_nn_b; ++nn) { + int kk_b = b_indices[nn]; + if (atomicCAS(&shared_marker[kk_b], -1, row) == -1) { + nnz_count++; + } + } + } + + nnz_C_per_row[row] = nnz_count; + __syncthreads(); +} + + +/** + * @brief Estimate the nnz of output matrix C. + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param nnz of C (GPU). + * @param stream CUDA stream. + */ +template +void estimate_nnz(csr_t& csr, + csc_t& csc, + int* nnz_C_per_row) { + + std::size_t block_size = 128; + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + + __estimate_nnz_C<<>>( + csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + nnz_C_per_row); +} + +} // namespace spgemm +} // namespace algorithms +} // namespace loops \ No newline at end of file diff --git a/include/loops/algorithms/spgemm/estimate_nnz_test.cuh b/include/loops/algorithms/spgemm/estimate_nnz_test.cuh new file mode 100644 index 0000000..a57ede4 --- /dev/null +++ b/include/loops/algorithms/spgemm/estimate_nnz_test.cuh @@ -0,0 +1,1403 @@ +/** + * @file estimate_nnz_test.cuh + * @author + * @brief SpGEMM kernels. + * @version 0.1 + * @date 2023-11-08 + * + * @copyright Copyright (c) 2023 + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +#define TILE_SIZE 32 + + +namespace loops { +namespace algorithms { +namespace spgemm { + +template +__global__ void __estimate_nnz_test(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + for (auto mm : config.tiles()) { + bool found = false; + for (auto nn : + custom_stride_range(std::size_t(0), b_cols, std::size_t(1))) { + type_t sum = 0; + for (auto nz : config.atoms(mm)) { + auto kk_a = a_indices[nz]; + for (auto nz_b = b_offsets[nn]; nz_b < b_offsets[nn + 1]; ++nz_b) { + if(kk_a == b_indices[nz_b]&&!found){ + ++c_nnz_per_row[mm]; + found = true; + } + } + } + found = false; + } + } +} + +template +__global__ void __estimate_nnz_test_v2(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + for (auto mm : config.tiles()) { + bool found = false; + for (auto nz : config.atoms(mm)) { + auto kk_a = a_indices[nz]; + for (auto nn : custom_stride_range(std::size_t(0), b_cols, std::size_t(1))) { + for (auto nz_b = b_offsets[nn]; nz_b < b_offsets[nn + 1]; ++nz_b) { + if(kk_a == b_indices[nz_b]&&!found){ + ++c_nnz_per_row[mm]; + found = true; + } + } + } + found = false; + } + } +} + +template +__global__ void __estimate_nnz_test_v3(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + + int tx = threadIdx.x, ty = threadIdx.y; + + for (auto mm : config.tiles()) { // stride through rows of A + bool found = false; + for (auto tile_itr : custom_stride_range(std::size_t(0), std::size_t((b_cols + TILE_SIZE -1) / TILE_SIZE), std::size_t(1))){ + // Load a tile of A into shared memory + for (auto i : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + shared_A_cols[tx + i * TILE_SIZE] = a_indices[a_offsets[mm] + tx + i * TILE_SIZE]; + } + __syncthreads(); + + + // Load a tile of B into shared memory + for (auto i : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + shared_B_rows[ty + i * TILE_SIZE] = b_indices[b_offsets[tile_itr * TILE_SIZE + ty + i * TILE_SIZE]]; + } + + __syncthreads(); + + for (auto ak : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + for (auto bk : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + if(shared_A_cols[tx + ak * TILE_SIZE] == shared_B_rows[ty + bk * TILE_SIZE]&&!found){ + atomicAdd(&c_nnz_per_row[mm], 1); + found = true; + } + } + } + // __syncthreads(); + found = false; + + } + } +} + +// Tiling +template +__global__ void __estimate_nnz_test_v4(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + + int tx = threadIdx.x, ty = threadIdx.y; + int bx = blockIdx.x, by = blockIdx.y; + + auto m_global_idx = by * TILE_SIZE + ty; + + + + for(auto m : custom_stride_range(std::size_t(m_global_idx), std::size_t(a_rows), std::size_t(TILE_SIZE))){ // Stride over the rows of A with the stride width of M0 = TILE_SIZE + bool found = false; + + // Load a tile of A into shared memory + auto ka_start = a_offsets[m] + tx; + auto ka_end = a_offsets[m + 1]; + for(auto col_arr_idx : custom_stride_range(std::size_t(ka_start), std::size_t(ka_end), std::size_t(TILE_SIZE))){ + shared_A_cols[ty * TILE_SIZE + tx] = a_indices[col_arr_idx]; + } + __syncthreads(); + + + for (auto n1 : custom_stride_range(std::size_t(0), std::size_t((b_cols + TILE_SIZE -1) / TILE_SIZE), std::size_t(1))){ + // Load a tile of B into shared memory + for (auto i : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + shared_B_rows[ty + i * TILE_SIZE] = b_indices[b_offsets[n1 * TILE_SIZE + ty + i * TILE_SIZE]]; + } + } + __syncthreads(); + + for (auto ak : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + for (auto bk : custom_stride_range(std::size_t(0), std::size_t(TILE_SIZE), std::size_t(1))){ + if(shared_A_cols[tx + ak * TILE_SIZE] == shared_B_rows[ty + bk * TILE_SIZE]&&!found){ + atomicAdd(&c_nnz_per_row[m], 1); + found = true; + } + } + } + // __syncthreads(); + found = false; + } +} + +// Tile by row, col pair of the input matrices +// For input matrices with number of columns and rows <= TILE_SIZE && B_nnz < TILE_SIZE * TILE_SIZE +template +__global__ void __estimate_nnz_row_col_pairs_v1(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + // __shared__ int found; // allocate found in shared memory so that all the threads can read and write to it + __shared__ int C_n_nnz_per_block[TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + // For every block: load ONE row of A into shared memory, load as much of B as possible into shared memory + // For every thread: load (k - ty + 1)/TILE_SIZE elements of row m of A into shared memory, load ONE column of B into shared memory + + auto m = bx; + + bool found = false; + C_n_nnz_per_block[tx] = 0; + __syncthreads(); + + if(m < a_rows){ + auto col_arr_start = a_offsets[m]; + auto col_arr_end = a_offsets[m + 1]; + auto range = col_arr_end - col_arr_start; + + // Every thread loads one element of the mth row of A into shared memory + shared_A_cols[tx] = a_indices[col_arr_start + tx]; + __syncthreads(); + } + + for(int i = 0; i < gridDim.x; ++i){ + if(bx == i && tx == 0){ + auto start = a_offsets[i]; + auto end = a_offsets[i + 1]; + auto range_i = end - start; + for(int k0 = 0; k0 < range_i; ++k0){ + // if(shared_A_cols[k0] != a_indices[start + k0]){ + printf("m%d: shared_A_cols[%d] = %d a_indices[%d] = %d\n", m, k0, shared_A_cols[k0], k0 + start, a_indices[k0 + start]); + // } + } + } + } + + int n = tx; + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + } + __syncthreads(); + + // for(int i = 0; i < gridDim.x; ++i){ + // if(bx == 0 && tx == 0){ + // auto start = b_offsets[0]; + // for(int k0 = 0; k0 < b_nnz; ++k0){ + // // if(shared_B_rows[k0] != b_indices[start + k0]){ + // printf("shared_B_rows[%d] = %d b_indices[%d] = %d\n", k0, shared_B_rows[k0], start + k0, b_indices[start + k0]); + // // } + // } + // } + // } + + std::array helperArray; + if(m < a_rows){ + int n = tx; + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + auto col_arr_start = a_offsets[m]; + auto col_arr_end = a_offsets[m + 1]; + auto range = col_arr_end - col_arr_start; + for(auto col_arr_itr_a = 0; col_arr_itr_a < range; ++col_arr_itr_a){ + if((shared_A_cols[col_arr_itr_a] == shared_B_rows[row_arr_itr_b])){ + found = true; + C_n_nnz_per_block[n] += 1; + + if(bx == 1){ + helperArray[0] = m; + helperArray[1] = n; + helperArray[2] = col_arr_itr_a; + helperArray[3] = shared_A_cols[col_arr_itr_a]; + helperArray[4] = row_arr_itr_b - row_arr_start; + helperArray[5] = shared_B_rows[row_arr_itr_b]; + helperArray[6] = C_n_nnz_per_block[n]; + + printf("m(bx): %d, n(tx): %d, col_arr_itr_a: %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_block[%d]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[6]); + } + + break; + } + } + if(found) break; + } + } + __syncthreads(); + + int C_n_nnz = C_n_nnz_per_block[tx]; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + + // for(int i = 0; i < gridDim.x; ++i){ + // if(bx == i && tx == 0){ + // printf("bx: %d, C_nnz_per_row: %d\n", bx, C_nnz_per_row); + // } + // } + + c_nnz_per_row[m] = C_nnz_per_row; +} + + + +// For input matrices with number of columns and rows > TILE_SIZE && B_nnz < TILE_SIZE * TILE_SIZE +template +__global__ void __estimate_nnz_row_col_pairs_v2(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + __shared__ int C_n_nnz_per_block[TILE_SIZE * TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + + auto m = bx; + + bool found = false; + + C_n_nnz_per_block[tx] = 0; + __syncthreads(); + + if(m < a_rows){ + auto col_arr_start = a_offsets[m]; + auto col_arr_end = a_offsets[m + 1]; + auto range = col_arr_end - col_arr_start; + + for(int col_arr_itr = tx; col_arr_itr < range; col_arr_itr += TILE_SIZE){ + shared_A_cols[col_arr_itr] = a_indices[col_arr_start + col_arr_itr]; + } + __syncthreads(); + } + + // for(int i = 0; i < gridDim.x; ++i){ + // int i = 54; + // if(bx == i && tx == 0){ + // auto start = a_offsets[i]; + // auto end = a_offsets[i + 1]; + // auto range_i = end - start; + // for(int k0 = 0; k0 < range_i; ++k0){ + // // if(shared_A_cols[k0] != a_indices[start + k0]){ + // printf("m%d: shared_A_cols[%d] = %d a_indices[%d] = %d\n", m, k0, shared_A_cols[k0], k0 + start, a_indices[k0 + start]); + // // } + // } + // } + // } + + + for(int n0 = tx; n0 < b_cols; n0 += TILE_SIZE){ // Each tx load n0 and n0 + (b_cols / TILE_SIZE) columns of B into shared memory + auto row_arr_start = b_offsets[n0]; + auto row_arr_end = b_offsets[n0 + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + } + } + __syncthreads(); + + if(b_nnz < TILE_SIZE * TILE_SIZE){ // If the number of non-zero elements in B is less than TILE_SIZE * TILE_SIZE, pad the shared memory with -1 + int diff = TILE_SIZE * TILE_SIZE - b_nnz; + for(int i = tx; i < diff; i += TILE_SIZE){ + shared_B_rows[b_nnz + i] = -1; + } + } + __syncthreads(); + + + // for(int i = 0; i < gridDim.x; ++i){ + // if(bx == i && tx == 0){ + // printf("block: %d\n", bx); + // auto start = b_offsets[0]; + // for(int k0 = 0; k0 < TILE_SIZE * TILE_SIZE; ++k0){ + // if(shared_B_rows[k0] != b_indices[start + k0]){ + // printf("shared_B_rows[%d] = %d b_indices[%d] = %d\n", k0, shared_B_rows[k0], start + k0, b_indices[start + k0]); + // } + // } + // } + // } + + + std::array helperArray; + if(m < a_rows){ + for(int n = tx; n < b_cols; n += TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + auto col_arr_start = a_offsets[m]; + auto col_arr_end = a_offsets[m + 1]; + auto range = col_arr_end - col_arr_start; + + // for(auto col_arr_itr_a = col_arr_start; col_arr_itr_a < col_arr_end; ++col_arr_itr_a){ + for(auto col_arr_itr_a = 0; col_arr_itr_a < range; ++col_arr_itr_a){ + + // if(bx == 10 && n == 44){ + // helperArray[2] = col_arr_itr_a; + // helperArray[3] = shared_A_cols[col_arr_itr_a]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = shared_B_rows[row_arr_itr_b]; + // printf("bx: 10, tx: 44\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_block[%d]: %d\n", helperArray[2], helperArray[3], helperArray[4], helperArray[5], n, C_n_nnz_per_block[n]); + + // } + if((shared_A_cols[col_arr_itr_a] == shared_B_rows[row_arr_itr_b])){ + found = true; + + C_n_nnz_per_block[n % TILE_SIZE] += 1; + + // if(bx == 10){ + // helperArray[0] = m; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a; + // helperArray[3] = shared_A_cols[col_arr_itr_a]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = shared_B_rows[row_arr_itr_b]; + // helperArray[6] = C_n_nnz_per_block[n]; + + // printf("m(bx): %d, n(tx): %d, col_arr_itr_a: %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_block[%d]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[6]); + // } + + break; + } + } + if(found) break; + } + } + + } + __syncthreads(); + + + // if(bx == 10 && tx == 0){ + // for(int i = 0; i < TILE_SIZE; ++i){ + // printf("C_n_nnz_per_block[%d]: %d\n", i, C_n_nnz_per_block[i]); + // } + // } + + int C_n_nnz = C_n_nnz_per_block[tx]; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + + // for(int i = 0; i < gridDim.x; ++i){ + // if(bx == 10 && tx == 0){ + // printf("bx: %d, C_nnz_per_row: %d\n", bx, C_nnz_per_row); + // } + // } + + c_nnz_per_row[m] = C_nnz_per_row; + +} + + +// For input matrices with number of columns and rows > TILE_SIZE && B_nnz <= TILE_SIZE * TILE_SIZE +// Add striding to A rows +template +__global__ void __estimate_nnz_row_col_pairs_v3(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + __shared__ int C_n_nnz_per_m0[TILE_SIZE * TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + + auto m = bx; + + bool found = false; + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + std::array test; + + int shared_mem_prev_col_arr_range = 0; + for(int m0 = bx; m0 < a_rows; m0 += gridDim.x){ // Stride over the rows of A with the stride width of gridDim.x + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto shared_mem_curr_col_arr_range = col_arr_end - col_arr_start; + + for(int col_arr_itr = tx; col_arr_itr < shared_mem_curr_col_arr_range; col_arr_itr += TILE_SIZE){ + shared_A_cols[col_arr_itr + shared_mem_prev_col_arr_range] = a_indices[col_arr_itr + col_arr_start]; + } + shared_mem_prev_col_arr_range += shared_mem_curr_col_arr_range; + } + __syncthreads(); + + for(int n0 = tx; n0 < b_cols; n0 += TILE_SIZE){ // Each tx load n0 and n0 + (b_cols / TILE_SIZE) columns of B into shared memory + auto row_arr_start = b_offsets[n0]; + auto row_arr_end = b_offsets[n0 + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + } + } + __syncthreads(); + + if(b_nnz < TILE_SIZE * TILE_SIZE){ // If the number of non-zero elements in B is less than TILE_SIZE * TILE_SIZE, pad the shared memory with -1 + int diff = TILE_SIZE * TILE_SIZE - b_nnz; + for(int i = tx; i < diff; i += TILE_SIZE){ + shared_B_rows[b_nnz + i] = -1; + } + } + __syncthreads(); + + int prev_col_arr_range = 0; + for(int m0 = bx; m0 < a_rows; m0 += gridDim.x){ //TODO: which loop order will be faster? m0->n0->kb->ka or n0->kb->m0->ka? + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + for(int n = tx; n < b_cols; n += TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == shared_B_rows[row_arr_itr_b])){ + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + break; + } + } + if(found) break; + } + } + __syncthreads(); + + int C_n_nnz = C_n_nnz_per_m0[tx]; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + c_nnz_per_row[m0] = C_nnz_per_row; + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + } + __syncthreads(); +} + + +// For input matrices with number of columns and rows > TILE_SIZE && B_nnz > TILE_SIZE * TILE_SIZE +// Add striding to A rows +template +__global__ void __estimate_nnz_row_col_pairs_v4(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + int* c_nnz_per_row) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + __shared__ int C_n_nnz_per_m0[TILE_SIZE * TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + + bool found = false; + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + std::array test; + + int shared_mem_prev_col_arr_range = 0; + // int m0 = bx; + // while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) < (TILE_SIZE * TILE_SIZE - shared_mem_prev_col_arr_range)) + for(int m0 = bx; m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) <= (TILE_SIZE * TILE_SIZE - shared_mem_prev_col_arr_range); m0 += gridDim.x) //can't exploit the shared memory b/c the shared memory isn't large enough to take an entire row of A + // for(int m0 = bx; m0 < a_rows; m0 += gridDim.x) + { // Stride over the rows of A with the stride width of gridDim.x + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto shared_mem_curr_col_arr_range = col_arr_end - col_arr_start; + + // if(bx == 0 && tx == 0) + // if(tx == 0) + // { + // printf("m0: %d\na_offsets[%d]: %d, a_offsets[%d + 1]: %d\nshared_mem_prev_col_arr_range: %d, TILE_SIZE * TILE_SIZE - shared_mem_prev_col_arr_range(%d - %d) = %d, shared_mem_curr_col_arr_range: %d\n", m0, m0, col_arr_start, m0, col_arr_end, shared_mem_prev_col_arr_range, TILE_SIZE * TILE_SIZE, shared_mem_prev_col_arr_range, TILE_SIZE * TILE_SIZE - shared_mem_prev_col_arr_range, shared_mem_curr_col_arr_range); + // } + + for(int col_arr_itr = tx; col_arr_itr < shared_mem_curr_col_arr_range; col_arr_itr += TILE_SIZE){ + shared_A_cols[col_arr_itr + shared_mem_prev_col_arr_range] = a_indices[col_arr_itr + col_arr_start]; + + // if(bx == 0 && (tx == 0 || tx == 1)){ + // if(m0 == 0){ + // test[0] = m0; + // test[1] = col_arr_itr; + // test[2] = shared_mem_prev_col_arr_range; + // test[3] = col_arr_itr + shared_mem_prev_col_arr_range; + // test[4] = shared_A_cols[col_arr_itr + shared_mem_prev_col_arr_range]; + // test[9] = col_arr_start; + // test[5] = col_arr_itr + col_arr_start; + // test[6] = a_indices[col_arr_itr + col_arr_start]; + // test[7] = bx; + // test[8] = shared_mem_curr_col_arr_range; + + // printf("m0: %d, bx: %d\nshared_A_cols[%d + %d = %d]: %d, a_indices[%d + %d = %d]: %d\nshared_mem_prev_col_arr_range: %d, shared_mem_curr_col_arr_range: %d\n", test[0], test[7], test[1], test[2], test[3], test[4], test[1], test[9], test[5], test[6], test[2], test[8]); + // } + + } + shared_mem_prev_col_arr_range += shared_mem_curr_col_arr_range; + } + __syncthreads(); + + // while(m0 < a_rows){ + // m0 += gridDim.x + // } + + // for(int i = 0; i < gridDim.x; ++i){ + // int i = 0; + // if(bx == i && tx == 0){ + // for(int k = 0; k < TILE_SIZE * TILE_SIZE; ++k){ + // printf("shared_A_cols[%d]: %d\n", k, shared_A_cols[k]); + // } + // } + // } + + for(int n0 = tx; n0 < b_cols && b_offsets[n0 + 1] <= TILE_SIZE * TILE_SIZE; n0 += TILE_SIZE) + { + auto row_arr_start = b_offsets[n0]; + auto row_arr_end = b_offsets[n0 + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + } + } + __syncthreads(); + + if(b_nnz < TILE_SIZE * TILE_SIZE){ // If the number of non-zero elements in B is less than TILE_SIZE * TILE_SIZE, pad the shared memory with -1 + int diff = TILE_SIZE * TILE_SIZE - b_nnz; + for(int i = tx; i < diff; i += TILE_SIZE){ + shared_B_rows[b_nnz + i] = -1; + } + } + __syncthreads(); + + // if(bx == 0 && tx == 0){ + // auto start = b_offsets[0]; + // for(int k0 = 0; k0 < TILE_SIZE * TILE_SIZE; ++k0){ + // // if(shared_B_rows[k0] != b_indices[start + k0]){ + // printf("shared_B_rows[%d] = %d b_indices[%d] = %d\n", k0, shared_B_rows[k0], start + k0, b_indices[start + k0]); + // // } + // } + // } + + std::array helperArray; + + // SHARED_A: + int prev_col_arr_range = 0; + int m0 = bx; + while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) <= (TILE_SIZE * TILE_SIZE - prev_col_arr_range)) + // for(int m0 = bx; m0 < a_rows; m0 += gridDim.x) + { //TODO: which loop order will be faster? m0->n0->kb->ka or n0->kb->m0->ka? + + // /* + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + // if(bx == 56 && tx == 0){ + // helperArray[0] = m0; + // helperArray[1] = col_arr_start; + // helperArray[2] = col_arr_end; + // helperArray[3] = curr_col_arr_range; + // helperArray[4] = prev_col_arr_range; + // helperArray[5] = shared_A_cols[prev_col_arr_range+1]; + // printf("SHARED_A: m0: %d\ncol_arr_start: %d, col_arr_end: %d\ncurr_col_arr_range: %d, prev_col_arr_range: %d\nshared_A_cols[%d + 1] = %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[4], helperArray[5]); + // } + + // for(int n = tx; n < b_cols; n += TILE_SIZE){ + // Using SHARED B + int n = tx; + while(n < b_cols && b_offsets[n + 1] < TILE_SIZE * TILE_SIZE){ + + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + + // if(bx == 0 && tx == 0){ + // helperArray[0] = m0; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a; + // helperArray[3] = shared_A_cols[col_arr_itr_a + prev_col_arr_range]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = shared_B_rows[row_arr_itr_b]; + // printf("bx: 0, m0: %d, n: %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], C_n_nnz_per_m0[n]); + // } + + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == shared_B_rows[row_arr_itr_b])){ + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + + // if(bx == 56){ + // helperArray[0] = m0; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a + prev_col_arr_range; + // helperArray[3] = shared_A_cols[col_arr_itr_a + prev_col_arr_range]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = shared_B_rows[row_arr_itr_b]; + // helperArray[6] = C_n_nnz_per_m0[n % TILE_SIZE]; + + // // printf("SHARED_A && SHARED_B: m0: %d, n(tx): %d, col_arr_itr_a: %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + // printf("SHARED_A && SHARED_B:\nm0: %d, n(tx): %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + // } + + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + // int n = tx; + // Using GLOBAL B + while(n < b_cols && b_offsets[n + 1] >= TILE_SIZE * TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + // if(bx == 0 && tx == 0){ + // helperArray[0] = m0; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a; + // helperArray[3] = shared_A_cols[col_arr_itr_a + prev_col_arr_range]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = b_indices[row_arr_itr_b]; + // printf("bx: 0, m0: %d, n: %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], C_n_nnz_per_m0[n]); + // } + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == b_indices[row_arr_itr_b])){ + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + + // if(bx == 56){ + // helperArray[0] = m0; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a + prev_col_arr_range; + // helperArray[3] = shared_A_cols[col_arr_itr_a + prev_col_arr_range]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = b_indices[row_arr_itr_b]; + // helperArray[6] = C_n_nnz_per_m0[n % TILE_SIZE]; + + // // printf("SHARED_A && GLOBAL_B: m0: %d, n(tx): %d, col_arr_itr_a: %d\nshared_A_cols[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + // printf("SHARED_A && GLOBAL_B:\nm0: %d, n(tx): %d\nshared_A_cols[%d]: %d, b_indices[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + // } + + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + // if(bx == 56 && tx == 0){ + // for(int i = 0; i < TILE_SIZE; ++i){ + // printf("C_n_nnz_per_m0[%d]: %d\n", i, C_n_nnz_per_m0[i % TILE_SIZE]); + // } + // } + + int C_n_nnz = C_n_nnz_per_m0[tx]; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + c_nnz_per_row[m0] = C_nnz_per_row; + + // if(bx == 56 && tx == 0){ + // printf("SHARED_A: bx: %d, m0: %d, C_nnz_per_row: %d\n", bx, m0, C_nnz_per_row); + // } + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + // */ + m0 += gridDim.x; + } + __syncthreads(); + + // /* + // GLOBAL A + while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) > (TILE_SIZE * TILE_SIZE - prev_col_arr_range)) + { + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + // if(bx == 56 && tx == 0){ + // helperArray[0] = m0; + // helperArray[1] = col_arr_start; + // helperArray[2] = col_arr_end; + // helperArray[3] = curr_col_arr_range; + // helperArray[4] = prev_col_arr_range; + // helperArray[5] = shared_A_cols[prev_col_arr_range+1]; + // printf("GLOBAL_A: m0: %d\ncol_arr_start: %d, col_arr_end: %d\ncurr_col_arr_range: %d, prev_col_arr_range: %d\nshared_A_cols[%d + 1] = %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[4], helperArray[5]); + // } + + // for(int n = tx; n < b_cols; n += TILE_SIZE){ + int n = tx; + while(n < b_cols && b_offsets[n + 1] < TILE_SIZE * TILE_SIZE){ + + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + + // if(bx == 0 && tx == 0){ + // helperArray[0] = m0; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a; + // helperArray[3] = a_indices[col_arr_itr_a + prev_col_arr_range]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = shared_B_rows[row_arr_itr_b]; + // printf("bx: 0, m0: %d, n: %d\a_indices[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], C_n_nnz_per_m0[n]); + // } + + // if((a_indices[col_arr_itr_a + prev_col_arr_range] == shared_B_rows[row_arr_itr_b])) + if((a_indices[col_arr_itr_a + col_arr_start] == shared_B_rows[row_arr_itr_b])) + { + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + + // if(bx == 56){ + // helperArray[0] = m0; + // helperArray[1] = n; + // // helperArray[2] = col_arr_itr_a + prev_col_arr_range; + // // helperArray[3] = a_indices[col_arr_itr_a + prev_col_arr_range]; + // helperArray[2] = col_arr_itr_a + col_arr_start; + // helperArray[3] = a_indices[col_arr_itr_a + col_arr_start]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = shared_B_rows[row_arr_itr_b]; + // helperArray[6] = C_n_nnz_per_m0[n % TILE_SIZE]; + + // // printf("GLOBAL_A && SHARED_B: m0: %d, n(tx): %d, col_arr_itr_a: %d\a_indices[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + + // printf("GLOBAL_A && SHARED_B:\nm0: %d, n(tx): %d\na_indices[%d]: %d, shared_B_rows[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + // } + + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + while(n < b_cols && b_offsets[n + 1] >= TILE_SIZE * TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + + // if(bx == 0 && tx == 0){ + // helperArray[0] = m0; + // helperArray[1] = n; + // helperArray[2] = col_arr_itr_a; + // helperArray[3] = a_indices[col_arr_itr_a + prev_col_arr_range]; + // helperArray[4] = row_arr_itr_b - row_arr_start; + // helperArray[5] = b_indices[row_arr_itr_b]; + // printf("bx: 0, m0: %d, n: %d\a_indices[%d]: %d, b_indices[%d]: %d\nC_n_nnz_per_m0[%d]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], C_n_nnz_per_m0[n]); + // } + // if((a_indices[col_arr_itr_a + prev_col_arr_range] == b_indices[row_arr_itr_b])) + if((a_indices[col_arr_itr_a + col_arr_start] == b_indices[row_arr_itr_b])) + { + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + + /* + if(bx == 56){ + helperArray[0] = m0; + helperArray[1] = n; + // helperArray[2] = col_arr_itr_a + prev_col_arr_range; + // helperArray[3] = a_indices[col_arr_itr_a + prev_col_arr_range]; + helperArray[2] = col_arr_itr_a + col_arr_start; + helperArray[3] = a_indices[col_arr_itr_a + col_arr_start]; + helperArray[4] = row_arr_itr_b - row_arr_start; + helperArray[5] = b_indices[row_arr_itr_b]; + helperArray[6] = C_n_nnz_per_m0[n % TILE_SIZE]; + printf("GLOBAL_A && GLOBAL_B: m0: %d, n(tx): %d, col_arr_itr_a: %d\a_indices[%d]: %d, b_indices[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + printf("GLOBAL_A && GLOBAL_B:\nm0: %d, n(tx): %d\na_indices[%d]: %d, b_indices[%d]: %d\nC_n_nnz_per_m0[%d % 32 = %d ]: %d\n", helperArray[0], helperArray[1], helperArray[2], helperArray[3], helperArray[4], helperArray[5], helperArray[1], helperArray[1]%32, helperArray[6]); + } + */ + + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + // if(bx == 56 && tx == 0){ + // for(int i = 0; i < TILE_SIZE; ++i){ + // printf("GLOBAL_A: C_n_nnz_per_m0[%d]: %d\n", i, C_n_nnz_per_m0[i % TILE_SIZE]); + // } + // } + + int C_n_nnz = C_n_nnz_per_m0[tx]; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + c_nnz_per_row[m0] = C_nnz_per_row; + + // if(bx == 56 && tx == 0){ + // printf("GLOBAL_A: bx: %d, m0: %d, C_nnz_per_row: %d\n", bx, m0, C_nnz_per_row); + // } + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + m0 += gridDim.x; + } + __syncthreads(); +// */ +} + + +// Precalculate the column indices of C +/* +template +__global__ void __precalculate_c_col_indices(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const std::size_t c_rows, + const std::size_t c_cols, + const std::size_t c_nnz, + const offset_t* c_offsets, + const index_t* c_indices) { + + __shared__ index_t shared_A_cols[TILE_SIZE * TILE_SIZE]; + __shared__ index_t shared_B_rows[TILE_SIZE * TILE_SIZE]; + __shared__ int C_n_nnz_per_m0[TILE_SIZE * TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + + bool found = false; + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + std::array test; + + int shared_mem_prev_col_arr_range = 0; + for(int m0 = bx; m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) <= (TILE_SIZE * TILE_SIZE - shared_mem_prev_col_arr_range); m0 += gridDim.x) //can't exploit the shared memory b/c the shared memory isn't large enough to take an entire row of A + { // Stride over the rows of A with the stride width of gridDim.x + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto shared_mem_curr_col_arr_range = col_arr_end - col_arr_start; + + + for(int col_arr_itr = tx; col_arr_itr < shared_mem_curr_col_arr_range; col_arr_itr += TILE_SIZE){ + shared_A_cols[col_arr_itr + shared_mem_prev_col_arr_range] = a_indices[col_arr_itr + col_arr_start]; + + } + shared_mem_prev_col_arr_range += shared_mem_curr_col_arr_range; + } + __syncthreads(); + + for(int n0 = tx; n0 < b_cols && b_offsets[n0 + 1] <= TILE_SIZE * TILE_SIZE; n0 += TILE_SIZE) + { + auto row_arr_start = b_offsets[n0]; + auto row_arr_end = b_offsets[n0 + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + } + } + __syncthreads(); + + if(b_nnz < TILE_SIZE * TILE_SIZE){ // If the number of non-zero elements in B is less than TILE_SIZE * TILE_SIZE, pad the shared memory with -1 + int diff = TILE_SIZE * TILE_SIZE - b_nnz; + for(int i = tx; i < diff; i += TILE_SIZE){ + shared_B_rows[b_nnz + i] = -1; + } + } + __syncthreads(); + + std::array helperArray; + + // SHARED_A: + int prev_col_arr_range = 0; + int m0 = bx; + while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) <= (TILE_SIZE * TILE_SIZE - prev_col_arr_range)) + { //TODO: which loop order will be faster? m0->n0->kb->ka or n0->kb->m0->ka? + + // /* + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + // Using SHARED B + int n = tx; + while(n < b_cols && b_offsets[n + 1] < TILE_SIZE * TILE_SIZE){ + + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == shared_B_rows[row_arr_itr_b])){ + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + // int n = tx; + // Using GLOBAL B + while(n < b_cols && b_offsets[n + 1] >= TILE_SIZE * TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == b_indices[row_arr_itr_b])){ + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + int C_n_nnz = C_n_nnz_per_m0[tx]; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + c_nnz_per_row[m0] = C_nnz_per_row; + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + m0 += gridDim.x; + } + __syncthreads(); + + // GLOBAL A + while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) > (TILE_SIZE * TILE_SIZE - prev_col_arr_range)) + { + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + int n = tx; + while(n < b_cols && b_offsets[n + 1] < TILE_SIZE * TILE_SIZE){ + + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + if((a_indices[col_arr_itr_a + col_arr_start] == shared_B_rows[row_arr_itr_b])) + { + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + while(n < b_cols && b_offsets[n + 1] >= TILE_SIZE * TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a){ + if((a_indices[col_arr_itr_a + col_arr_start] == b_indices[row_arr_itr_b])) + { + found = true; + C_n_nnz_per_m0[n % TILE_SIZE] += 1; + break; + } + } + if(found) break; + } + n += TILE_SIZE; + } + __syncthreads(); + + int C_n_nnz = C_n_nnz_per_m0[tx]; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + int C_nnz_per_row = BlockReduce(temp_storage).Sum(C_n_nnz); + c_nnz_per_row[m0] = C_nnz_per_row; + + C_n_nnz_per_m0[tx] = 0; + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + m0 += gridDim.x; + } + __syncthreads(); +} +*/ + +/** + * @brief Estimate the nnz of output matrix C. + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +template +void estimate_nnz_test(csr_t& csr, + csc_t& csc, + int* c_nnz_per_tile, + cudaStream_t stream = 0) { + // Create a schedule. + constexpr std::size_t block_size = 128; + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + + launch::non_cooperative( + stream, __estimate_nnz_test, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + c_nnz_per_tile); + + cudaStreamSynchronize(stream); +} + +/** + * @brief Estimate the nnz of output matrix C using tiling + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +template +void estimate_nnz_test_v2(csr_t& csr, + csc_t& csc, + int* c_nnz_per_tile, + cudaStream_t stream = 0) { + + + // Create a schedule. + constexpr std::size_t block_size = 32; + // constexpr dim3 block_size(TILE_SIZE, TILE_SIZE, 1); + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + // dim3 grid_size((csc.cols + block_size.x - 1) / block_size.x, (csr.rows + block_size.y - 1) / block_size.y, 1); + // dim3 grid_size((csc.cols + block_size.x - 1) / block_size.x, csr.rows, 1); + std::size_t grid_size = csr.rows; // Assigning the number of rows in A to the grid size + + + launch::non_cooperative( + stream, __estimate_nnz_row_col_pairs_v2, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + c_nnz_per_tile); + + cudaStreamSynchronize(stream); +} + +/** + * @brief Estimate the nnz of output matrix C using tiling + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +template +void estimate_nnz_test_v3(csr_t& csr, + csc_t& csc, + int* c_nnz_per_tile, + cudaStream_t stream = 0) { + + constexpr std::size_t block_size = TILE_SIZE; + // constexpr dim3 block_size(TILE_SIZE, TILE_SIZE, 1); + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + // dim3 grid_size((csc.cols + block_size.x - 1) / block_size.x, (csr.rows + block_size.y - 1) / block_size.y, 1); + // dim3 grid_size((csc.cols + block_size.x - 1) / block_size.x, csr.rows, 1); + + + + // std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + std::size_t grid_size = 32; + printf("grid_size: %ld\n", grid_size); + + + launch::non_cooperative( + stream, __estimate_nnz_row_col_pairs_v4, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + c_nnz_per_tile); + + cudaStreamSynchronize(stream); +} + + +/** + * @brief Precalculate the column indices array of C + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +/* +template +void precalculate_c_col_indices(csr_t& csr, + csc_t& csc, + csr_t& c, + cudaStream_t stream = 0) { + + + // Create a schedule. + constexpr std::size_t block_size = TILE_SIZE; + // constexpr dim3 block_size(TILE_SIZE, TILE_SIZE, 1); + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + printf("grid_size: %ld\n", grid_size); + + launch::non_cooperative( + stream, __estimate_nnz_row_col_pairs_v4, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + c_nnz_per_tile); + + cudaStreamSynchronize(stream); +} +*/ + +// template +void scanNnzC(int* c_nnz_per_tile, int* c_offsets, std::size_t c_rows){ + thrust::device_ptr ptr_begin = thrust::device_pointer_cast(c_nnz_per_tile); + thrust::device_ptr ptr_end = thrust::device_pointer_cast(c_nnz_per_tile + c_rows + 1); + thrust::exclusive_scan(ptr_begin, ptr_end, c_offsets); +} + +// template +int sumEstimateNnzC(int* c_nnz_per_tile, std::size_t c_rows){ + thrust::device_ptr ptr_begin = thrust::device_pointer_cast(c_nnz_per_tile); + thrust::device_ptr ptr_end = thrust::device_pointer_cast(c_nnz_per_tile + c_rows); + + int sum = thrust::reduce(ptr_begin, ptr_end, 0); + return sum; +} + +} // namespace spgemm +} // namespace algorithms +} // namespace loops \ No newline at end of file diff --git a/include/loops/algorithms/spgemm/find_explicit_zeros.cuh b/include/loops/algorithms/spgemm/find_explicit_zeros.cuh new file mode 100644 index 0000000..732f00f --- /dev/null +++ b/include/loops/algorithms/spgemm/find_explicit_zeros.cuh @@ -0,0 +1,107 @@ +/** + * @file estimate_nnz_test.cuh + * @author + * @brief SpGEMM kernels. + * @version 0.1 + * @date 2023-11-08 + * + * @copyright Copyright (c) 2023 + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace loops { +namespace algorithms { +namespace spgemm { + +template +__global__ void __find_explicit_zeros(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const type_t* a_values, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const type_t* b_values, + int* explicit_zeros_per_row) { + + + for (auto mm : config.tiles()) { + bool found = false; + for (auto nn : + custom_stride_range(std::size_t(0), b_cols, std::size_t(1))) { + type_t sum = 0; + for (auto nz : config.atoms(mm)) { + auto kk_a = a_indices[nz]; + for (auto nz_b = b_offsets[nn]; nz_b < b_offsets[nn + 1]; ++nz_b) { + if(kk_a == b_indices[nz_b]&&(a_values[nz] != 0 && b_values[nz_b] != 0)&&!found){ + ++explicit_zeros_per_row[mm]; + found = true; + } + } + } + found = false; + } + } +} + +/** + * @brief Find out the explicit zeros in the input matrices when applying SpGEMM + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +template +void find_explicit_zeros(csr_t& csr, + csc_t& csc, + int* explicit_zeros_per_row, + cudaStream_t stream = 0) { + // Create a schedule. + constexpr std::size_t block_size = 128; + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + + launch::non_cooperative( + stream, __find_explicit_zeros, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csr.values.data().get(), csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + csc.values.data().get(), explicit_zeros_per_row); + + cudaStreamSynchronize(stream); +} + +} // namespace spgemm +} // namespace algorithms +} // namespace loops \ No newline at end of file diff --git a/include/loops/algorithms/spgemm/thread_mapped.cuh b/include/loops/algorithms/spgemm/thread_mapped.cuh new file mode 100644 index 0000000..f8ac488 --- /dev/null +++ b/include/loops/algorithms/spgemm/thread_mapped.cuh @@ -0,0 +1,629 @@ +/** + * @file thread_mapped.cuh + * @author + * @brief SpGEMM kernels. + * @version 0.1 + * @date 2023-10-17 + * + * @copyright Copyright (c) 2023 + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// #define tests 1 +#define SPGEMM_TILE_SIZE 32 + + +namespace loops { +namespace algorithms { +namespace spgemm { + +template +__global__ void __thread_mapped(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const type_t* a_values, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const type_t* b_values, + const offset_t* c_offsets, + index_t* tmp_c_indices, + type_t* tmp_c_values) { + + + for (auto mm : config.tiles()) { //translate tileId to rowId and colId - the grid stride grid_stride_range(T begin, T end) + int c_row_nnz = 0; + for (auto nn : + custom_stride_range(std::size_t(0), b_cols, std::size_t(1))) { + type_t sum = 0; + for (auto nz : config.atoms(mm)) { + auto kk_a = a_indices[nz]; + for (auto nz_b = b_offsets[nn]; nz_b < b_offsets[nn + 1]; ++nz_b) { + if (kk_a == b_indices[nz_b]) { + sum += a_values[nz] * b_values[nz_b]; + } + } + } + + if(sum != 0){ + tmp_c_indices[c_offsets[mm] + c_row_nnz] = nn; + tmp_c_values[c_offsets[mm] + c_row_nnz] = sum; + ++c_row_nnz; + // c_row_nnz = atomicAdd(&c_row_nnz, 1); + } + } + } +} + + +template +__global__ void __thread_mapped_v2(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const type_t* a_values, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const type_t* b_values, + const offset_t* c_offsets, + index_t* tmp_c_indices, + type_t* tmp_c_values) { + + for (auto mm : config.tiles()) { + int c_row_nnz = 0; + type_t sum = 0; + for (auto nz : config.atoms(mm)) { + auto kk_a = a_indices[nz]; + for (auto nn : + custom_stride_range(std::size_t(0), b_cols, std::size_t(1))) { + for (auto nz_b = b_offsets[nn]; nz_b < b_offsets[nn + 1]; ++nz_b) { + if (kk_a == b_indices[nz_b]) { + sum += a_values[nz] * b_values[nz_b]; + } + } + if(sum != 0){ + tmp_c_indices[c_offsets[mm] + c_row_nnz] = nn; + tmp_c_values[c_offsets[mm] + c_row_nnz] = sum; + ++c_row_nnz; + // c_row_nnz = atomicAdd(&c_row_nnz, 1); + } + } + } + } +} + +// Tiling A and B +template +__global__ void __thread_mapped_v3(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const type_t* a_values, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const type_t* b_values, + const offset_t* c_offsets, + index_t* tmp_c_indices, + type_t* tmp_c_values) { + + for (auto mm : config.tiles()) { + int c_row_nnz = 0; + type_t sum = 0; + for (auto nz : config.atoms(mm)) { + auto kk_a = a_indices[nz]; + for (auto nn : + custom_stride_range(std::size_t(0), b_cols, std::size_t(1))) { + for (auto nz_b = b_offsets[nn]; nz_b < b_offsets[nn + 1]; ++nz_b) { + if (kk_a == b_indices[nz_b]) { + sum += a_values[nz] * b_values[nz_b]; + } + } + if(sum != 0){ + tmp_c_indices[c_offsets[mm] + c_row_nnz] = nn; + tmp_c_values[c_offsets[mm] + c_row_nnz] = sum; + ++c_row_nnz; + // c_row_nnz = atomicAdd(&c_row_nnz, 1); + } + } + } + } +} + +// Tile by row, col pair of the input matrices +// For input matrices with number of columns and rows <= SPGEMM_TILE_SIZE && B_nnz < SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE +template +__global__ void __thrad_mapped_row_col_pairs_v1(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const type_t* a_values, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const type_t* b_values, + const offset_t* c_offsets, + index_t* c_indices, + type_t* c_values) { + + __shared__ index_t shared_A_cols[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + __shared__ index_t shared_B_rows[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + + __shared__ type_t shared_A_values[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + __shared__ type_t shared_B_values[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + + // Keep track of the column indices of the non-zeros in the m0th row of C, if the colmun as a non-zero element, set the flag to 1, else 0 + __shared__ int C_m0_flag[SPGEMM_TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + // For every block: load ONE row of A into shared memory, load as much of B as possible into shared memory + auto m = bx; + + C_m0_flag[tx] = 0; + __syncthreads(); + + // Load the mth row of A into shared memory + if(m < a_rows){ + auto col_arr_start = a_offsets[m]; + auto col_arr_end = a_offsets[m + 1]; + auto range = col_arr_end - col_arr_start; + + // Every thread loads one element of the mth row of A into shared memory + shared_A_cols[tx] = a_indices[col_arr_start + tx]; + shared_A_values[tx] = a_values[col_arr_start + tx]; + __syncthreads(); + } + + // Load the entire B into shared memory + int n = tx; + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + shared_B_values[k0] = b_values[k0]; + } + __syncthreads(); + + /* + for(int i = 0; i < gridDim.x; ++i){ + if(bx == 0 && tx == 0){ + auto start = b_offsets[0]; + for(int k0 = 0; k0 < b_nnz; ++k0){ + // if(shared_B_rows[k0] != b_indices[start + k0]){ + printf("shared_B_values[%d] = %f b_values[%d] = %f\n", k0, shared_B_values[k0], start + k0, b_values[start + k0]); + // } + } + } + } + */ + + std::array helperArray; + std::array valueArray; + if(m < a_rows){ + int n = tx; + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + + float sum = 0; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b){ // Iterate over all the elements in nth column of B + + auto col_arr_start = a_offsets[m]; + auto col_arr_end = a_offsets[m + 1]; + auto range = col_arr_end - col_arr_start; + + for(auto col_arr_itr_a = 0; col_arr_itr_a < range; ++col_arr_itr_a){ + if((shared_A_cols[col_arr_itr_a] == shared_B_rows[row_arr_itr_b])){ + sum += shared_A_values[col_arr_itr_a] * shared_B_values[row_arr_itr_b]; + + C_m0_flag[tx] = 1; + + } + } + } + + /* + if(bx == 30 && sum != 0){ + helperArray[0] = m; + helperArray[1] = n; + valueArray[0] = sum; + + printf("(m, n): (%d, %d)\nsum: %f\n", helperArray[0], helperArray[1], valueArray[0]); + } + */ + + // C_m0_flag[tx] = (sum != 0); + __syncthreads(); + + /* + if(bx == 30 && tx == 0){ + for(int i = 0; i < SPGEMM_TILE_SIZE; ++i){ + if(C_m0_flag[i] == 1){ + printf("C_m0_flag[%d]: %d\n", i, C_m0_flag[i]); + } + } + } + */ + + typedef cub::BlockScan BlockScan; + __shared__ typename BlockScan::TempStorage temp_storage; + int m0_col_idx_c; + BlockScan(temp_storage).InclusiveSum(C_m0_flag[tx], m0_col_idx_c); + + /* + if(bx == 30){ + printf("tx: %d, m0_col_idx_c: %d\n", tx, m0_col_idx_c); + } + */ + + int col_arr_idx_c = m0_col_idx_c - 1 + c_offsets[m]; + + if(C_m0_flag[tx]){ + c_indices[col_arr_idx_c] = tx; + c_values[col_arr_idx_c] = sum; + } + __syncthreads(); + + /* + if(bx == 30 && tx == 0){ + for(int i = 0; i < c_offsets[m + 1] - c_offsets[m]; ++i){ + printf("c_indices[%d]: %d, c_values[%d]: %f\n", i, c_indices[i], i, c_values[i]); + } + } + */ + + } + __syncthreads(); +} + + + + + +/* +template +__global__ void __thread_mapped_row_col_pairs(setup_t config, + const std::size_t a_rows, + const std::size_t a_cols, + const std::size_t a_nnz, + const offset_t* a_offsets, + const index_t* a_indices, + const type_t* a_values, + const std::size_t b_rows, + const std::size_t b_cols, + const std::size_t b_nnz, + const offset_t* b_offsets, + const index_t* b_indices, + const type_t* b_values, + const offset_t* c_offsets, + index_t* tmp_c_indices, + type_t* tmp_c_values) { + + __shared__ index_t shared_A_cols[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + __shared__ type_t shared_A_values[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + + __shared__ index_t shared_B_rows[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + __shared__ type_t shared_B_values[SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE]; + + int tx = threadIdx.x, bx = blockIdx.x; + + bool found = false; + + int shared_mem_prev_col_arr_range = 0; + // Load entire rows of A into shared memory with stride length = SPGEMM_TILE_SIZE.x, if the row size is larger than the empty space in shared memory, then skip the current row + for(int m0 = bx; m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) <= (SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE - shared_mem_prev_col_arr_range); m0 += gridDim.x) //can't exploit the shared memory b/c the shared memory isn't large enough to take an entire row of A + { // Stride over the rows of A with the stride width of gridDim.x + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto shared_mem_curr_col_arr_range = col_arr_end - col_arr_start; + + for(int col_arr_itr = tx; col_arr_itr < shared_mem_curr_col_arr_range; col_arr_itr += SPGEMM_TILE_SIZE){ + shared_A_cols[col_arr_itr + shared_mem_prev_col_arr_range] = a_indices[col_arr_itr + col_arr_start]; + shared_A_values[col_arr_itr + shared_mem_prev_col_arr_range] = a_values[col_arr_itr + col_arr_start]; + } + shared_mem_prev_col_arr_range += shared_mem_curr_col_arr_range; + } + __syncthreads(); + + // Load entire columns of B into shared memory with stride length = SPGEMM_TILE_SIZE.x, if the column size is larger than the empty space in shared memory, then skip the current column + for(int n0 = tx; n0 < b_cols && b_offsets[n0 + 1] <= SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE; n0 += SPGEMM_TILE_SIZE) + { + auto row_arr_start = b_offsets[n0]; + auto row_arr_end = b_offsets[n0 + 1]; + for(int k0 = row_arr_start; k0 < row_arr_end; ++k0){ + shared_B_rows[k0] = b_indices[k0]; + shared_B_values[k0] = b_values[k0]; + } + } + __syncthreads(); + + if(b_nnz < SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE){ // If the number of non-zero elements in B is less than SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE, pad the shared memory with -1 + int diff = SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE - b_nnz; + for(int i = tx; i < diff; i += SPGEMM_TILE_SIZE){ + shared_B_rows[b_nnz + i] = -1; + } + } + __syncthreads(); + + // SHARED_A: + int prev_col_arr_range = 0; + int m0 = bx; + while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) <= (SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE - prev_col_arr_range)) + { + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + // Using SHARED B + int n = tx; + while(n < b_cols && b_offsets[n + 1] < SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE){ + + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b) + { // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a) + { + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == shared_B_rows[row_arr_itr_b])) + { + // Perform the multiplication + // Add to C_values + } + } + } + n += SPGEMM_TILE_SIZE; + } + __syncthreads(); + + while(n < b_cols && b_offsets[n + 1] >= SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b) + { // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a) + { + if((shared_A_cols[col_arr_itr_a + prev_col_arr_range] == b_indices[row_arr_itr_b])) + { + // Perform the multiplication + // Add to C_values + } + } + } + n += SPGEMM_TILE_SIZE; + } + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + m0 += gridDim.x; + } + __syncthreads(); + + while(m0 < a_rows && (a_offsets[m0 + 1] - a_offsets[m0]) > (SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE - prev_col_arr_range)) + { + auto col_arr_start = a_offsets[m0]; + auto col_arr_end = a_offsets[m0 + 1]; + auto curr_col_arr_range = col_arr_end - col_arr_start; + + int n = tx; + while(n < b_cols && b_offsets[n + 1] < SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b) + { // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a) + { + if((a_indices[col_arr_itr_a + col_arr_start] == shared_B_rows[row_arr_itr_b])) + { + + } + } + } + n += SPGEMM_TILE_SIZE; + } + __syncthreads(); + + while(n < b_cols && b_offsets[n + 1] >= SPGEMM_TILE_SIZE * SPGEMM_TILE_SIZE){ + auto row_arr_start = b_offsets[n]; + auto row_arr_end = b_offsets[n + 1]; + found = false; + for(int row_arr_itr_b = row_arr_start; row_arr_itr_b < row_arr_end; ++row_arr_itr_b) + { // Iterate over all the elements in nth column of B + for(auto col_arr_itr_a = 0; col_arr_itr_a < curr_col_arr_range; ++col_arr_itr_a) + { + if((a_indices[col_arr_itr_a + col_arr_start] == b_indices[row_arr_itr_b])) + { + + } + } + } + n += SPGEMM_TILE_SIZE; + } + __syncthreads(); + + prev_col_arr_range += curr_col_arr_range; + m0 += gridDim.x; + } + __syncthreads(); + +} +*/ + +/** + * @brief Sparse-Matrix Matrix Multiplication API. + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +template +void thread_mapped(csr_t& csr, + csc_t& csc, + csr_t& C, + // int* c_nnz_by_row, + index_t* tmp_c_indices, + type_t* tmp_c_values, + cudaStream_t stream = 0) { + // Create a schedule. + constexpr std::size_t block_size = 128; + + + /* + /// Set-up kernel launch parameters and run the kernel. + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + + launch::non_cooperative( + stream, __thread_mapped, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csr.values.data().get(), csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + csc.values.data().get(), C.offsets.data().get(), + // c_nnz_by_row, + tmp_c_indices, tmp_c_values); + */ + + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; // fix grid size to a constant if the matrix is VERY big + + launch::non_cooperative( + stream, __thread_mapped_v2, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csr.values.data().get(), csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + csc.values.data().get(), C.offsets.data().get(), + // c_nnz_by_row, + tmp_c_indices, tmp_c_values); + cudaStreamSynchronize(stream); +} + +/** + * @brief Sparse-Matrix Matrix Multiplication API. + * + * @tparam index_t Type of column indices. + * @tparam offset_t Type of row offsets. + * @tparam type_t Type of values. + * @param csr CSR matrix (GPU). + * @param n Number of columns in the B-matrix. + * @param B Input matrix B (GPU). + * @param C Output matrix C (GPU). + * @param stream CUDA stream. + */ +template +void thread_mapped_v2(csr_t& csr, + csc_t& csc, + csr_t& C, + cudaStream_t stream = 0) { + + /* + /// Set-up kernel launch parameters and run the kernel. + + // Create a schedule. + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + + launch::non_cooperative( + stream, __thread_mapped, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csr.values.data().get(), csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + csc.values.data().get(), C.offsets.data().get(), + // c_nnz_by_row, + tmp_c_indices, tmp_c_values); + */ + + constexpr std::size_t block_size = SPGEMM_TILE_SIZE; + using setup_t = schedule::setup; + setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); + + // std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + std::size_t grid_size = 32; + + printf("grid_size: %ld\n", grid_size); + + launch::non_cooperative( + stream, __thrad_mapped_row_col_pairs_v1, grid_size, + block_size, config, csr.rows, csr.cols, csr.nnzs, + csr.offsets.data().get(), csr.indices.data().get(), + csr.values.data().get(), csc.rows, csc.cols, csc.nnzs, + csc.offsets.data().get(), csc.indices.data().get(), + csc.values.data().get(), C.offsets.data().get(), + C.indices.data().get(), C.values.data().get()); + cudaStreamSynchronize(stream); +} + +} // namespace spgemm +} // namespace algorithms +} // namespace loops \ No newline at end of file diff --git a/include/loops/algorithms/spmm/thread_mapped.cuh b/include/loops/algorithms/spmm/thread_mapped.cuh index a64a477..f12eb19 100644 --- a/include/loops/algorithms/spmm/thread_mapped.cuh +++ b/include/loops/algorithms/spmm/thread_mapped.cuh @@ -48,6 +48,7 @@ __global__ void __thread_mapped(setup_t config, // Output C(row, col) = sum; + } } } @@ -80,6 +81,7 @@ void thread_mapped(csr_t& csr, setup_t config(csr.offsets.data().get(), csr.rows, csr.nnzs); std::size_t grid_size = (csr.rows + block_size - 1) / block_size; + launch::non_cooperative( stream, __thread_mapped, grid_size, block_size, config, csr.rows, csr.cols, csr.nnzs, diff --git a/include/loops/range.hxx b/include/loops/range.hxx index a515e7d..b4a05f5 100644 --- a/include/loops/range.hxx +++ b/include/loops/range.hxx @@ -6,7 +6,7 @@ * @date 2022-02-02 * * @copyright Copyright (c) 2022 - * + *3 */ #pragma once