From c9a61689e92db115e2afadb7addd0a6855b6227d Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 17 Mar 2024 03:14:11 +0000 Subject: [PATCH 01/15] [0, Nc) indexing and silhouette score --- dtwc/Problem.cpp | 42 ++++++++++++++----------------- dtwc/Problem.hpp | 4 ++- dtwc/Problem_IO.cpp | 18 +++++++------- dtwc/dtwc_cl.cpp | 28 +++++++++++---------- dtwc/initialisation.cpp | 2 ++ dtwc/initialisation.hpp | 17 +++++++------ dtwc/mip/mip_Gurobi.cpp | 27 ++++++++++---------- dtwc/mip/mip_Highs.cpp | 18 +++++++------- dtwc/scores.cpp | 55 +++++++++++++++++++++++------------------ 9 files changed, 110 insertions(+), 101 deletions(-) diff --git a/dtwc/Problem.cpp b/dtwc/Problem.cpp index 61efeec..a00a8f8 100644 --- a/dtwc/Problem.cpp +++ b/dtwc/Problem.cpp @@ -40,7 +40,7 @@ namespace dtwc { void Problem::resize() { clusters_ind.resize(size()); - centroids_ind.resize(Nc); + centroids_ind.resize(cluster_size()); } /** @@ -195,12 +195,12 @@ void Problem::cluster_by_MIP() */ void Problem::assignClusters() { - auto assignClustersTask = [this](int i_p) // i_p and i_c in [0, Np] + auto assignClustersTask = [this](int i_p) //!< i_p and i_c in [0, Np) { - clusters_ind[i_p] = *std::min_element( - centroids_ind.begin(), - centroids_ind.end(), - [this, i_p](int i_c1, int ic2) { return distByInd(i_p, i_c1) < distByInd(i_p, ic2); }); + auto minIt = std::min_element(centroids_ind.begin(), centroids_ind.end(), [this, i_p](int ic_1, int ic_2) { + return distByInd(i_p, ic_1) < distByInd(i_p, ic_2); + }); + clusters_ind[i_p] = std::distance(centroids_ind.begin(), minIt); }; clusters_ind.resize(data.size()); // Resize before assigning. @@ -231,15 +231,14 @@ void Problem::distanceInClusters() */ void Problem::calculateMedoids() { - std::vector pointCosts(size()); + static std::vector pointCosts(size()), clusterCosts(cluster_size()); + pointCosts.resize(size()); auto findBetterMedoidTask = [&](int i_p) // i_p is point index. { - const auto clusterNo = clusters_ind[i_p]; double sum{ 0 }; - for (const auto i : Range(size())) - if (clusters_ind[i] == clusterNo) + if (clusters_ind[i] == clusters_ind[i_p]) // If they are in the same cluster sum += distByInd(i_p, i); pointCosts[i_p] = sum; @@ -247,17 +246,12 @@ void Problem::calculateMedoids() run(findBetterMedoidTask, size()); - for (const auto i : Range(Nc)) { - const auto centroidNow = centroids_ind[i]; - double minCost{ std::numeric_limits::max() }; - int minInd{}; - - for (const auto i_p : Range(size())) - if ((clusters_ind[i_p] == centroidNow) && (pointCosts[i_p] < minCost)) - std::tie(minCost, minInd) = std::tie(pointCosts[i_p], i_p); - - centroids_ind[i] = minInd; - } + clusterCosts.assign(cluster_size(), std::numeric_limits::max()); + for (const auto i : Range(size())) + if (pointCosts[i] < clusterCosts[clusters_ind[i]]) { + clusterCosts[clusters_ind[i]] = pointCosts[i]; + centroids_ind[clusters_ind[i]] = i; + } } /** @@ -351,12 +345,12 @@ std::pair Problem::cluster_by_kMedoidsPAM_single(int rep) double Problem::findTotalCost() { double sum = 0; - for (int i = 0; i < data.size(); i++) { + for (int i : Range(size())) { if constexpr (settings::isDebug) std::cout << "Distance between " << i << " and closest cluster " << clusters_ind[i] - << " which is: " << distByInd(i, clusters_ind[i]) << "\n"; + << " which is: " << distByInd(i, centroid_of(i)) << "\n"; - sum += distByInd(i, clusters_ind[i]); // #TODO should cost be square or like this? + sum += distByInd(i, centroid_of(i)); // #TODO should cost be square or like this? } return sum; diff --git a/dtwc/Problem.hpp b/dtwc/Problem.hpp index 897b48a..16590d6 100644 --- a/dtwc/Problem.hpp +++ b/dtwc/Problem.hpp @@ -73,7 +73,7 @@ class Problem std::string name{}; /*!< Problem name. */ Data data; /*!< Data associated with the problem. */ - std::vector clusters_ind; //!< Indices of which point belongs to which cluster. [0,Np] + std::vector clusters_ind; //!< Indices of which point belongs to which cluster. [0,Nc) std::vector centroids_ind; //!< indices of cluster centroids. [0, Np) // Constructors: @@ -97,6 +97,8 @@ class Problem void resize(); // Getters and setters: + int centroid_of(int i_p) const { return centroids_ind[clusters_ind[i_p]]; } // [0, Np) Get the centroid of the cluster of i_p + void readDistanceMatrix(const fs::path &distMat_path); void set_numberOfClusters(int Nc_); void set_clusters(std::vector &candidate_centroids); diff --git a/dtwc/Problem_IO.cpp b/dtwc/Problem_IO.cpp index afadee8..9369ac1 100644 --- a/dtwc/Problem_IO.cpp +++ b/dtwc/Problem_IO.cpp @@ -58,7 +58,7 @@ void Problem::writeMedoids(std::vector> ¢roids_all, int rep */ void Problem::printClusters() const { - std::cout << "Clusters: "; + std::cout << "Clusters centroids: "; for (auto ind : centroids_ind) std::cout << get_name(ind) << ' '; @@ -68,7 +68,7 @@ void Problem::printClusters() const std::cout << get_name(centroids_ind[i_c]) << " has: "; for (const auto i_p : Range(size())) - if (clusters_ind[i_p] == centroids_ind[i_c]) + if (clusters_ind[i_p] == i_c) std::cout << get_name(i_p) << " "; std::cout << '\n'; @@ -85,7 +85,7 @@ void Problem::writeClusters() std::ofstream myFile(output_folder / file_name, std::ios_base::out); - myFile << "Clusters:\n"; + myFile << "Cluster centroids:\n"; for (int i{ 0 }; i < Nc; i++) { if (i != 0) myFile << ','; @@ -96,8 +96,8 @@ void Problem::writeClusters() myFile << "\n\n" << "Data" << ',' << "its cluster\n"; - for (int i{ 0 }; i < data.size(); i++) - myFile << get_name(i) << ',' << get_name(clusters_ind[i]) << '\n'; + for (int i : Range(size())) + myFile << get_name(i) << ',' << get_name(centroid_of(i)) << '\n'; myFile << "Procedure is completed with cost: " << findTotalCost() << '\n'; @@ -114,12 +114,12 @@ void Problem::writeSilhouettes() std::string silhouette_name{ name + "_silhouettes_Nc_" }; - silhouette_name += std::to_string(Nc) + ".csv"; + silhouette_name += std::to_string(cluster_size()) + ".csv"; std::ofstream myFile(output_folder / silhouette_name, std::ios_base::out); myFile << "Silhouettes:\n"; - for (int i{ 0 }; i < data.size(); i++) + for (auto i : Range(size())) myFile << get_name(i) << ',' << silhouettes[i] << '\n'; myFile.close(); @@ -136,9 +136,9 @@ void Problem::writeMedoidMembers(int iter, int rep) const + std::to_string(rep) + "_iter_" + std::to_string(iter) + ".csv"; std::ofstream medoidMembers(output_folder / medoid_name, std::ios_base::out); - for (const auto i_c : Range(Nc)) { + for (const auto i_c : Range(cluster_size())) { for (const auto i_p : Range(size())) - if (clusters_ind[i_p] == centroids_ind[i_c]) + if (clusters_ind[i_p] == i_c) medoidMembers << get_name(i_p) << ','; medoidMembers << '\n'; diff --git a/dtwc/dtwc_cl.cpp b/dtwc/dtwc_cl.cpp index e50f9d9..3c981f4 100644 --- a/dtwc/dtwc_cl.cpp +++ b/dtwc/dtwc_cl.cpp @@ -1,10 +1,12 @@ -/* - * dtwc_cl.cpp +/** + * @file dtwc_cl.cpp + * @brief Command line interface for DTWC++ * - * Command line interface for DTWC++ + * This file contains the implementation of the command line interface for DTWC++. + * It provides a command line interface for users to interact with the DTWC++ library. * - * Created on: 11 Dec 2023 - * Author(s): Volkan Kumtepeli, Becky Perriment + * @date 11 Dec 2023 + * @authors Volkan Kumtepeli, Becky Perriment */ #include "dtwc.hpp" @@ -31,9 +33,9 @@ int main(int argc, char **argv) std::string distMatPath{ "" }; int maxIter{ dtwc::settings::DEFAULT_MAX_ITER }; - int skipRows{0}, skipCols{0}; + int skipRows{ 0 }, skipCols{ 0 }; int N_repetition{ 1 }; - int bandWidth{-1}; + int bandWidth{ -1 }; CLI::App app{ app_description }; @@ -60,8 +62,8 @@ int main(int argc, char **argv) std::cout << "Arguments are parsed." << std::endl; - auto Nc = str_to_range(Nc_str); // dtwc::Range(3,5); - dtwc::Clock clk; // Create a clock object + auto Nc = str_to_range(Nc_str); //!< dtwc::Range(3,5); + dtwc::Clock clk; //!< Create a clock object std::cout << "Nc_str : " << Nc_str << '\n'; std::cout << "name : " << probName << '\n'; @@ -72,9 +74,9 @@ int main(int argc, char **argv) std::cout << "Max iteration : " << maxIter << std::endl; dtwc::DataLoader dl{ inputPath }; - dl.startColumn(skipCols).startRow(skipRows); // Since dummy files are in Pandas format skip first row/column. + dl.startColumn(skipCols).startRow(skipRows); //!< Since dummy files are in Pandas format skip first row/column. - dtwc::Problem prob{ probName, dl }; // Create a problem. + dtwc::Problem prob{ probName, dl }; //!< Create a problem. std::cout << "Data loading finished at " << clk << "\n"; prob.maxIter = maxIter; @@ -82,7 +84,7 @@ int main(int argc, char **argv) prob.output_folder = outPath; prob.band = bandWidth; try { - if(distMatPath != "") + if (distMatPath != "") prob.readDistanceMatrix(distMatPath); } catch (const std::exception &e) { std::cout << "Distance matrix could not be read! Continuing without matrix!" << std::endl; @@ -106,7 +108,7 @@ int main(int argc, char **argv) for (auto nc : Nc) { std::cout << "\n\nClustering by " << method << " for Number of clusters : " << nc << std::endl; - prob.set_numberOfClusters(nc); // Nc = number of clusters. + prob.set_numberOfClusters(nc); //!< Nc = number of clusters. prob.cluster_and_process(); } diff --git a/dtwc/initialisation.cpp b/dtwc/initialisation.cpp index f9ab225..d763581 100644 --- a/dtwc/initialisation.cpp +++ b/dtwc/initialisation.cpp @@ -1,3 +1,5 @@ +#include // Add missing include directive for the header file + /** * @file initialisation.cpp * diff --git a/dtwc/initialisation.hpp b/dtwc/initialisation.hpp index e433cd2..ba6850c 100644 --- a/dtwc/initialisation.hpp +++ b/dtwc/initialisation.hpp @@ -1,10 +1,11 @@ -/* - * initialisation.hpp +/** + * @file initialisation.hpp + * @brief Header file for initialisation functions. * - * Header file for initialisation functions. - - * Created on: 19 Jan 2021 - * Author(s): Volkan Kumtepeli, Becky Perriment + * This file contains the declarations of initialisation functions for the dtwc namespace. + * + * @date 19 Jan 2021 + * @authors Volkan Kumtepeli, Becky Perriment */ #pragma once @@ -12,7 +13,7 @@ namespace dtwc { class Problem; namespace init { - void random(Problem &prob); // Random centroids initialisation - void Kmeanspp(Problem &prob); // Kmeanspp centroids initialisation + void random(Problem &prob); //!< This function initializes the centroids randomly. + void Kmeanspp(Problem &prob); //!< This function initializes the centroids using the K-means++ algorithm. } // namespace init } // namespace dtwc \ No newline at end of file diff --git a/dtwc/mip/mip_Gurobi.cpp b/dtwc/mip/mip_Gurobi.cpp index 270d052..d88e348 100644 --- a/dtwc/mip/mip_Gurobi.cpp +++ b/dtwc/mip/mip_Gurobi.cpp @@ -11,6 +11,8 @@ #include "mip.hpp" #include "../Problem.hpp" #include "../settings.hpp" +#include "../types/types.hpp" // for Range + #include #include @@ -30,8 +32,7 @@ void MIP_clustering_byGurobi(Problem &prob) { #ifdef DTWC_ENABLE_GUROBI - const auto Nb = prob.data.size(); - const auto Nc = prob.cluster_size(); + const auto Nb(prob.size()), Nc(prob.cluster_size()); prob.centroids_ind.clear(); try { @@ -41,22 +42,22 @@ void MIP_clustering_byGurobi(Problem &prob) // Create variables std::unique_ptr w{ model.addVars(Nb * Nb, GRB_BINARY) }; - for (int i{ 0 }; i < Nb; i++) { + for (auto i : Range(Nb)) { GRBLinExpr lhs = 0; - for (int j{ 0 }; j < Nb; j++) + for (auto j : Range(Nb)) lhs += w[j + i * Nb]; model.addConstr(lhs, '=', 1.0); } - for (int j{ 0 }; j < Nb; j++) - for (int i{ 0 }; i < Nb; i++) + for (auto j : Range(Nb)) + for (auto i : Range(Nb)) model.addConstr(w[i + j * Nb] <= w[i * (Nb + 1)]); { GRBLinExpr lhs = 0; - for (int i{ 0 }; i < Nb; i++) + for (auto i : Range(Nb)) lhs += w[i * (Nb + 1)]; model.addConstr(lhs == Nc); // There should be Nc clusters. @@ -66,8 +67,8 @@ void MIP_clustering_byGurobi(Problem &prob) const auto scaling_factor = std::max(prob.maxDistance() / 2.0, 1.0); // Set objective GRBLinExpr obj = 0; - for (int j{ 0 }; j < Nb; j++) - for (int i{ 0 }; i < Nb; i++) + for (auto j : Range(Nb)) + for (auto i : Range(Nb)) obj += w[i + j * Nb] * prob.distByInd(i, j) / scaling_factor; model.setObjective(obj, GRB_MINIMIZE); @@ -79,15 +80,15 @@ void MIP_clustering_byGurobi(Problem &prob) model.optimize(); - for (int i{ 0 }; i < Nb; i++) + for (auto i : Range(Nb)) if (w[i * (Nb + 1)].get(GRB_DoubleAttr_X) > 0.5) prob.centroids_ind.push_back(i); prob.clusters_ind.resize(Nb); - for (auto i : prob.centroids_ind) - for (int j{ 0 }; j < Nb; j++) - if (w[i + j * Nb].get(GRB_DoubleAttr_X) > 0.5) + for (auto i : Range(prob.cluster_size())) + for (auto j : Range(Nb)) + if (w[prob.centroids_ind[i] + j * Nb].get(GRB_DoubleAttr_X) > 0.5) prob.clusters_ind[j] = i; } catch (GRBException &e) { diff --git a/dtwc/mip/mip_Highs.cpp b/dtwc/mip/mip_Highs.cpp index 3fe469d..bbb92c0 100644 --- a/dtwc/mip/mip_Highs.cpp +++ b/dtwc/mip/mip_Highs.cpp @@ -33,15 +33,15 @@ void extract_mip_solution(Problem &prob, const T &solution) prob.centroids_ind.clear(); const auto Nb = prob.data.size(); - for (int i{ 0 }; i < Nb; i++) + for (auto i : Range(Nb)) if (solution[i * (Nb + 1)] > 0.5) prob.centroids_ind.push_back(i); prob.clusters_ind.resize(Nb); - for (auto i : prob.centroids_ind) - for (int j{ 0 }; j < Nb; j++) - if (solution[i * Nb + j] > 0.5) + for (auto i : Range(prob.cluster_size())) + for (auto j : Range(Nb)) + if (solution[prob.centroids_ind[i] * Nb + j] > 0.5) prob.clusters_ind[j] = i; } @@ -173,11 +173,11 @@ void MIP_clustering_byHiGHS(Problem &prob) // Get the solution information const HighsInfo &info = highs.getInfo(); - std::cout << "Simplex iteration count: " << info.simplex_iteration_count << '\n'; - std::cout << "Objective function value: " << info.objective_function_value << '\n'; - std::cout << "Primal solution status: " << highs.solutionStatusToString(info.primal_solution_status) << '\n'; - std::cout << "Dual solution status: " << highs.solutionStatusToString(info.dual_solution_status) << '\n'; - std::cout << "Basis: " << highs.basisValidityToString(info.basis_validity) << '\n'; + std::cout << "Simplex iteration count: " << info.simplex_iteration_count << '\n' + << "Objective function value: " << info.objective_function_value << '\n' + << "Primal solution status: " << highs.solutionStatusToString(info.primal_solution_status) << '\n' + << "Dual solution status: " << highs.solutionStatusToString(info.dual_solution_status) << '\n' + << "Basis: " << highs.basisValidityToString(info.basis_validity) << '\n'; // Get the solution values extract_mip_solution(prob, highs.getSolution().col_value); diff --git a/dtwc/scores.cpp b/dtwc/scores.cpp index 6eda4e1..6b78cf6 100644 --- a/dtwc/scores.cpp +++ b/dtwc/scores.cpp @@ -18,6 +18,7 @@ #include #include #include +#include // for pair namespace dtwc::scores { @@ -36,41 +37,47 @@ namespace dtwc::scores { */ std::vector silhouette(Problem &prob) { - const auto Nb = prob.data.size(); //!< Number of profiles + const auto Nb = prob.size(); //!< Number of profiles const auto Nc = prob.cluster_size(); //!< Number of clusters - std::vector silhouettes(Nb); + std::vector silhouettes(Nb, -1); //!< Silhouette scores for each profile initialised to -1 if (prob.centroids_ind.empty()) { std::cout << "Please cluster the data before calculating silhouette!" << std::endl; return silhouettes; } + prob.fillDistanceMatrix(); //!< We need all pairwise distance for silhouette score. + auto oneTask = [&](size_t i_b) { - // auto i_c = prob.clusters_ind[i_b]; - - // if (prob.cluster_members[i_c].size() == 1) - // silhouettes[i_b] = 0; - // else { - // thread_local std::vector mean_distances(Nc); - - // for (int i = 0; i < Nb; i++) - // mean_distances[prob.clusters_ind[i]] += prob.distByInd(i, i_b); - - // auto min = std::numeric_limits::max(); - // for (int i = 0; i < Nc; i++) // Finding means: - // if (i == i_c) - // mean_distances[i] /= (prob.cluster_members[i].size() - 1); - // else { - // mean_distances[i] /= prob.cluster_members[i].size(); - // min = std::min(min, mean_distances[i]); - // } - - // silhouettes[i_b] = (min - mean_distances[i_c]) / std::max(min, mean_distances[i_c]); - // } + const auto i_c = prob.clusters_ind[i_b]; + + thread_local std::vector> mean_distances(Nc); + mean_distances.assign(Nc, { 0, 0 }); + + for (auto i : Range(prob.size())) { + mean_distances[prob.clusters_ind[i]].first++; + mean_distances[prob.clusters_ind[i]].second += prob.distByInd(i, i_b); + } + + + if (mean_distances[i_c].first == 1) // If the profile is the only member of the cluster + silhouettes[i_b] = 0; + else { + auto min = std::numeric_limits::max(); + for (int i = 0; i < Nc; i++) // Finding means: + if (i == i_c) + mean_distances[i].second /= (mean_distances[i].first - 1); + else { + mean_distances[i].second /= mean_distances[i].first; + min = std::min(min, mean_distances[i].second); + } + + silhouettes[i_b] = (min - mean_distances[i_c].second) / std::max(min, mean_distances[i_c].second); + } }; - dtwc::run(oneTask, Nb); + dtwc::run(oneTask, prob.size()); return silhouettes; } From 9a24030a6a635f6cfb5cd5974f58d406fe1d6ca3 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Mon, 18 Mar 2024 03:01:17 +0000 Subject: [PATCH 02/15] python wrapper --- .gitignore | 1 + CMakeLists.txt | 11 ++++ cmake/Dependencies.cmake | 1 - pyproject.toml | 3 + python/__init__.py | 1 + python/py_main.cpp | 50 ++++++++++++++ setup.py | 139 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml create mode 100644 python/__init__.py create mode 100644 python/py_main.cpp create mode 100644 setup.py diff --git a/.gitignore b/.gitignore index d35ed2d..b173473 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,4 @@ cmake-build-debug/* cmake-build-relwithdebinfo/* build/* bin/* +*.egg-info/* diff --git a/CMakeLists.txt b/CMakeLists.txt index 293fafa..023b32d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,4 +78,15 @@ endif() if(BUILD_BENCHMARK) add_subdirectory(benchmark) +endif() + +find_package(pybind11 QUIET) + +if(pybind11_FOUND) + pybind11_add_module(dtwcpp + python/py_main.cpp) + + target_link_libraries(dtwcpp PRIVATE dtwc++ project_warnings project_options armadillo) +else() + message(STATUS "pybind11 not found. Skipping the dtwcpp module.") endif() \ No newline at end of file diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 43f140e..728ee40 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -55,5 +55,4 @@ function(dtwc_setup_dependencies) OPTIONS "BUILD_SMOKE_TEST OFF" ) - endfunction() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..beb36de --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools","prebuilt_binaries", "cmake>=3.21", "pybind11", "wheel", "ninja"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/python/__init__.py b/python/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/python/__init__.py @@ -0,0 +1 @@ + diff --git a/python/py_main.cpp b/python/py_main.cpp new file mode 100644 index 0000000..f237ade --- /dev/null +++ b/python/py_main.cpp @@ -0,0 +1,50 @@ +#include +#include +#include + +#include "../dtwc/Problem.hpp" + +#include +#include +#include + +namespace py = pybind11; + +using namespace dtwc; + +PYBIND11_MODULE(dtwcpp, m) +{ + m.doc() = "DTWC++ (Dynamic Time Warping Clustering++ Library)"; + + py::class_(m, "Problem") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("size", &Problem::size) + .def("cluster_size", &Problem::cluster_size) + .def("get_name", (const std::string &(Problem::*)(size_t) const) & Problem::get_name, py::return_value_policy::reference) + .def("p_vec", (const std::vector &(Problem::*)(size_t) const) & Problem::p_vec, py::return_value_policy::reference) + .def("refreshDistanceMatrix", &Problem::refreshDistanceMatrix) + .def("resize", &Problem::resize) + .def("centroid_of", &Problem::centroid_of) + .def("readDistanceMatrix", &Problem::readDistanceMatrix) + .def("set_numberOfClusters", &Problem::set_numberOfClusters) + .def("set_clusters", &Problem::set_clusters) + .def("set_solver", &Problem::set_solver) + .def("set_data", &Problem::set_data) + .def("maxDistance", &Problem::maxDistance) + .def("distByInd", &Problem::distByInd) + .def("isDistanceMatrixFilled", &Problem::isDistanceMatrixFilled) + .def("fillDistanceMatrix", &Problem::fillDistanceMatrix) + .def("printDistanceMatrix", &Problem::printDistanceMatrix) + .def("writeDistanceMatrix", (void(Problem::*)(const std::string &) const) & Problem::writeDistanceMatrix) + .def("writeClusters", &Problem::writeClusters) + .def("writeMedoidMembers", &Problem::writeMedoidMembers) + .def("writeSilhouettes", &Problem::writeSilhouettes) + .def("init", &Problem::init) + .def("cluster", &Problem::cluster) + .def("cluster_and_process", &Problem::cluster_and_process) + .def("findTotalCost", &Problem::findTotalCost) + .def("assignClusters", &Problem::assignClusters) + .def("calculateMedoids", &Problem::calculateMedoids); +} \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d92f128 --- /dev/null +++ b/setup.py @@ -0,0 +1,139 @@ +import os +import re +import subprocess +import sys +from pathlib import Path + +from setuptools import Extension, setup +from setuptools.command.build_ext import build_ext + +# Convert distutils Windows platform specifiers to CMake -A arguments +PLAT_TO_CMAKE = { + "win32": "Win32", + "win-amd64": "x64", + "win-arm32": "ARM", + "win-arm64": "ARM64", +} + +# A CMakeExtension needs a sourcedir instead of a file list. +# The name must be the _single_ output extension from the CMake build. +# If you need multiple extensions, see scikit-build. +class CMakeExtension(Extension): + def __init__(self, name: str, sourcedir: str = "") -> None: + super().__init__(name, sources=[]) + self.sourcedir = os.fspath(Path(sourcedir).resolve()) + + +class CMakeBuild(build_ext): + def build_extension(self, ext: CMakeExtension) -> None: + # Must be in this form due to bug in .resolve() only fixed in Python 3.10+ + ext_fullpath = Path.cwd() / self.get_ext_fullpath(ext.name) + extdir = ext_fullpath.parent.resolve() + + # Using this requires trailing slash for auto-detection & inclusion of + # auxiliary "native" libs + + debug = int(os.environ.get("DEBUG", 0)) if self.debug is None else self.debug + cfg = "Debug" if debug else "Release" + + # CMake lets you override the generator - we need to check this. + # Can be set with Conda-Build, for example. + cmake_generator = os.environ.get("CMAKE_GENERATOR", "") + + # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON + # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code + # from Python. + cmake_args = [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={extdir}{os.sep}", + f"-DPYTHON_EXECUTABLE={sys.executable}", + f"-DCMAKE_BUILD_TYPE={cfg}", # not used on MSVC, but no harm + ] + build_args = [] + # Adding CMake arguments set as environment variable + # (needed e.g. to build for ARM OSx on conda-forge) + if "CMAKE_ARGS" in os.environ: + cmake_args += [item for item in os.environ["CMAKE_ARGS"].split(" ") if item] + + # In this example, we pass in the version to C++. You might not need to. + cmake_args += [f"-DEXAMPLE_VERSION_INFO={self.distribution.get_version()}"] + + if self.compiler.compiler_type != "msvc": + # Using Ninja-build since it a) is available as a wheel and b) + # multithreads automatically. MSVC would require all variables be + # exported for Ninja to pick it up, which is a little tricky to do. + # Users can override the generator with CMAKE_GENERATOR in CMake + # 3.15+. + if not cmake_generator or cmake_generator == "Ninja": + try: + import ninja + + ninja_executable_path = Path(ninja.BIN_DIR) / "ninja" + cmake_args += [ + "-GNinja", + f"-DCMAKE_MAKE_PROGRAM:FILEPATH={ninja_executable_path}", + ] + except ImportError: + pass + + else: + # Single config generators are handled "normally" + single_config = any(x in cmake_generator for x in {"NMake", "Ninja"}) + + # CMake allows an arch-in-generator style for backward compatibility + contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"}) + + # Specify the arch if using MSVC generator, but only if it doesn't + # contain a backward-compatibility arch spec already in the + # generator name. + if not single_config and not contains_arch: + cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]] + + # Multi-config generators have a different way to specify configs + if not single_config: + cmake_args += [ + f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{cfg.upper()}={extdir}" + ] + build_args += ["--config", cfg] + + if sys.platform.startswith("darwin"): + # Cross-compile support for macOS - respect ARCHFLAGS if set + archs = re.findall(r"-arch (\S+)", os.environ.get("ARCHFLAGS", "")) + if archs: + cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))] + + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level + # across all generators. + if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: + # self.parallel is a Python 3 only way to set parallel jobs by hand + # using -j in the build_ext call, not supported by pip or PyPA-build. + if hasattr(self, "parallel") and self.parallel: + # CMake 3.12+ only. + build_args += [f"-j{self.parallel}"] + + build_temp = Path(self.build_temp) / ext.name + if not build_temp.exists(): + build_temp.mkdir(parents=True) + + subprocess.run( + ["cmake", ext.sourcedir, *cmake_args], cwd=build_temp, check=True + ) + subprocess.run( + ["cmake", "--build", ".", *build_args], cwd=build_temp, check=True + ) + + +# The information here can also be placed in setup.cfg - better separation of +# logic and declaration, and simpler if you include description/version in a file. +setup( + name="dtwcpp", + version="0.1.0", + author="Volkan Kumtepeli", + author_email="volkan.kumtepeli@gmail.com", + description="A test project using pybind11 and CMake", + long_description="", + ext_modules=[CMakeExtension("dtwcpp")], + cmdclass={"build_ext": CMakeBuild}, + zip_safe=False, + extras_require={"test": ["pytest>=6.0"]}, + python_requires=">=3.7", +) \ No newline at end of file From 9eb2bf7c225dba990287c7cbcca654b57fa67c82 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Tue, 19 Mar 2024 01:43:16 +0000 Subject: [PATCH 03/15] working on DataLoader and other classes. --- dtwc/scores.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++++ python/py_main.cpp | 21 +++++++++++++- test.py | 6 ++++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 test.py diff --git a/dtwc/scores.cpp b/dtwc/scores.cpp index 6b78cf6..2d1d418 100644 --- a/dtwc/scores.cpp +++ b/dtwc/scores.cpp @@ -82,4 +82,73 @@ std::vector silhouette(Problem &prob) return silhouettes; } +/** + * @brief Calculates the Davies-Bouldin index for a given clustering problem. + * + * The Davies-Bouldin index is a measure of the average similarity between clusters and the + * dissimilarity between clusters. It is used to evaluate the quality of a clustering solution, + * with a lower value indicating better separation between clusters. + * + * @param prob The clustering problem instance, which contains the data points, cluster indices, and centroids. + * @return double The Davies-Bouldin index. + * + * @note Requires that the data has already been clustered; if not, it will prompt the user to cluster the data first. + * @see https://en.wikipedia.org/wiki/Davies%E2%80%93Bouldin_index for more information on the Davies-Bouldin index. + */ +// double daviesBouldinIndex(Problem &prob) +// { +// const auto Nc = prob.cluster_size(); //!< Number of clusters + +// if (prob.centroids_ind.empty()) { +// std::cout << "Please cluster the data before calculating the Davies-Bouldin index!" << std::endl; +// return 0.0; +// } + +// prob.fillDistanceMatrix(); //!< We need all pairwise distances for the Davies-Bouldin index. + +// std::vector clusterSimilarities(Nc, 0.0); //!< Similarities between clusters +// std::vector clusterDissimilarities(Nc, 0.0); //!< Dissimilarities between clusters + +// // Calculate the similarity and dissimilarity for each cluster +// for (int i = 0; i < Nc; i++) { +// double maxSimilarity = std::numeric_limits::lowest(); + +// for (int j = 0; j < Nc; j++) { +// if (i != j) { +// double similarity = (prob.distByInd(prob.centroids_ind[i], prob.centroids_ind[i]) + prob.distByInd(prob.centroids_ind[j], prob.centroids_ind[j])) / prob.distByInd(prob.centroids_ind[i], prob.centroids_ind[j]); + +// if (similarity > maxSimilarity) { +// maxSimilarity = similarity; +// } +// } +// } + +// clusterSimilarities[i] = maxSimilarity; +// } + +// // Calculate the dissimilarity for each cluster +// for (int i = 0; i < Nc; i++) { +// double sumDissimilarity = 0.0; + +// for (int j = 0; j < Nc; j++) { +// if (i != j) { +// sumDissimilarity += prob.distByInd(prob.centroids_ind[i], prob.centroids_ind[j]); +// } +// } + +// clusterDissimilarities[i] = sumDissimilarity / (Nc - 1); +// } + +// // Calculate the Davies-Bouldin index +// double daviesBouldinIndex = 0.0; + +// for (int i = 0; i < Nc; i++) { +// daviesBouldinIndex += clusterSimilarities[i] + clusterDissimilarities[i]; +// } + +// daviesBouldinIndex /= Nc; + +// return daviesBouldinIndex; +// } + } // namespace dtwc::scores diff --git a/python/py_main.cpp b/python/py_main.cpp index f237ade..f99601b 100644 --- a/python/py_main.cpp +++ b/python/py_main.cpp @@ -2,7 +2,7 @@ #include #include -#include "../dtwc/Problem.hpp" +#include "../dtwc/dtwc.hpp" #include #include @@ -47,4 +47,23 @@ PYBIND11_MODULE(dtwcpp, m) .def("findTotalCost", &Problem::findTotalCost) .def("assignClusters", &Problem::assignClusters) .def("calculateMedoids", &Problem::calculateMedoids); + + + py::class_(m, "DataLoader") + .def(py::init<>()) + .def(py::init()) + .def(py::init()) + .def("startColumn", (int(DataLoader::*)()) & DataLoader::startColumn) + .def("startRow", (int(DataLoader::*)()) & DataLoader::startRow) + .def("n_data", (int(DataLoader::*)()) & DataLoader::n_data) + .def("delimiter", (char(DataLoader::*)()) & DataLoader::delimiter) + .def("path", (std::filesystem::path(DataLoader::*)()) & DataLoader::path) + .def("verbosity", (int(DataLoader::*)()) & DataLoader::verbosity) + .def("startColumn", (DataLoader & (DataLoader::*)(int)) & DataLoader::startColumn) + .def("startRow", (DataLoader & (DataLoader::*)(int)) & DataLoader::startRow) // Added missing function signature + .def("n_data", &DataLoader::n_data) + .def("delimiter", &DataLoader::delimiter) + .def("path", &DataLoader::path) + .def("verbosity", &DataLoader::verbosity) + .def("load", &DataLoader::load); } \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..4c97be3 --- /dev/null +++ b/test.py @@ -0,0 +1,6 @@ +import dtwcpp as dtwc + + +a = dtwc.Problem() + +print(a.size()) \ No newline at end of file From d627e7309fca4485c442169079362ef85fea00c0 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Tue, 26 Mar 2024 03:58:28 +0000 Subject: [PATCH 04/15] Update py_main.cpp --- python/py_main.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/python/py_main.cpp b/python/py_main.cpp index f99601b..2999791 100644 --- a/python/py_main.cpp +++ b/python/py_main.cpp @@ -52,18 +52,18 @@ PYBIND11_MODULE(dtwcpp, m) py::class_(m, "DataLoader") .def(py::init<>()) .def(py::init()) - .def(py::init()) - .def("startColumn", (int(DataLoader::*)()) & DataLoader::startColumn) - .def("startRow", (int(DataLoader::*)()) & DataLoader::startRow) - .def("n_data", (int(DataLoader::*)()) & DataLoader::n_data) - .def("delimiter", (char(DataLoader::*)()) & DataLoader::delimiter) - .def("path", (std::filesystem::path(DataLoader::*)()) & DataLoader::path) - .def("verbosity", (int(DataLoader::*)()) & DataLoader::verbosity) - .def("startColumn", (DataLoader & (DataLoader::*)(int)) & DataLoader::startColumn) - .def("startRow", (DataLoader & (DataLoader::*)(int)) & DataLoader::startRow) // Added missing function signature - .def("n_data", &DataLoader::n_data) - .def("delimiter", &DataLoader::delimiter) - .def("path", &DataLoader::path) - .def("verbosity", &DataLoader::verbosity) - .def("load", &DataLoader::load); + .def(py::init()); + // .def("startColumn", (int(DataLoader::*)()) & DataLoader::startColumn) + // .def("startRow", (int(DataLoader::*)()) & DataLoader::startRow) + // .def("n_data", (int(DataLoader::*)()) & DataLoader::n_data) + // .def("delimiter", (char(DataLoader::*)()) & DataLoader::delimiter) + // .def("path", (std::filesystem::path(DataLoader::*)()) & DataLoader::path) + // .def("verbosity", (int(DataLoader::*)()) & DataLoader::verbosity) + // .def("startColumn", (DataLoader & (DataLoader::*)(int)) & DataLoader::startColumn) + // .def("startRow", (DataLoader & (DataLoader::*)(int)) & DataLoader::startRow) + // .def("n_data", &DataLoader::n_data) + // .def("delimiter", &DataLoader::delimiter) + // .def("path", &DataLoader::path) + // .def("verbosity", &DataLoader::verbosity) + // .def("load", &DataLoader::load); } \ No newline at end of file From e8adcbe96fb4c562b73a8b9865d71ade6dfb5051 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 22:20:58 +0100 Subject: [PATCH 05/15] change actions v2 -> v4 --- .github/workflows/documentation.yml | 2 +- .github/workflows/draft-pdf.yml | 2 +- .github/workflows/macos-unit.yml | 2 +- .github/workflows/ubuntu-unit.yml | 11 ++++------- .github/workflows/windows-unit.yml | 2 +- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index e880a7c..f43e52c 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout current branch - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Pages uses: actions/configure-pages@v2 - name: Build master with Jekyll diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml index 588b9b0..f29083b 100644 --- a/.github/workflows/draft-pdf.yml +++ b/.github/workflows/draft-pdf.yml @@ -13,7 +13,7 @@ jobs: name: Paper Draft steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Build draft PDF uses: openjournals/openjournals-draft-action@master with: diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index f25dc64..d11f730 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -16,7 +16,7 @@ jobs: runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install openmp run: brew reinstall libomp & brew link libomp --force diff --git a/.github/workflows/ubuntu-unit.yml b/.github/workflows/ubuntu-unit.yml index ee867cf..0b68764 100644 --- a/.github/workflows/ubuntu-unit.yml +++ b/.github/workflows/ubuntu-unit.yml @@ -42,14 +42,11 @@ jobs: - CC: clang-15 CXX: clang++-15 compiler: clang-15 libomp-15-dev - # - CC: clang-16 -> we don't have these as package. - # CXX: clang++-16 - # compiler: clang-16 - # - CC: clang-17 - # CXX: clang++-17 - # compiler: clang-17 + - CC: clang-16 + CXX: clang++-16 + compiler: clang-16 libomp-16-dev steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: update run: sudo apt update diff --git a/.github/workflows/windows-unit.yml b/.github/workflows/windows-unit.yml index c9b0da4..a4278ea 100644 --- a/.github/workflows/windows-unit.yml +++ b/.github/workflows/windows-unit.yml @@ -16,7 +16,7 @@ jobs: runs-on: windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: make build directory run: mkdir build_dir From 1e1bc3752f695ebdfdad5d30583e086f2a5728d6 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 22:24:26 +0100 Subject: [PATCH 06/15] Change versioning --- CHANGELOG.md | 11 +++++------ develop/TODO.md | 10 ++++++---- develop/conventions.md | 9 +++++++++ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bb963d..02951a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,13 +6,12 @@ This changelog contains a non-exhaustive list of new features and notable bug-fi

-# DTWC v0.1.0 +# DTWC v1.0.0 ## New features -* HiGHS solver is added for open-source alternative to Gurobi. +* HiGHS solver is added for open-source alternative to Gurobi (which is now not necessary for compilation and can be enabled by necessary flags). * Command line interface is added. -* Documentation is improved. - +* Documentation is improved (Doxygen website). ## Notable Bug-fixes @@ -44,9 +43,9 @@ This changelog contains a non-exhaustive list of new features and notable bug-fi * `std::filesystem::path operator+` was unnecessary and removed.

-# DTWC v0.0.2 +# DTWC v0.1.0 -DTWC with a better interface for other people's use. +A user interface is created for other people's use. ## New features / updates - Scores file with silhouette score is added. diff --git a/develop/TODO.md b/develop/TODO.md index 58b1d55..a3524af 100644 --- a/develop/TODO.md +++ b/develop/TODO.md @@ -8,13 +8,15 @@ ### Current priority: +- [ ] Numpy-like arrays for multi-dimensional DTW. - [ ] If you accidentally compile with amd64_x86 you get bad_alloc due to the insufficient RAM in 32-bit computer. So, put a warning for that. - [ ] Modernise `*.csv` reading interface, allow for rowwise or columnwise data. - [x] CMake files arranging: - [x] HiGHS 1.6.0 does not always work. Also it does not work in debug mode due to iterator level. - [ ] Commenting - [ ] MATLAB / Python integration -- [ ] JOSS paper + - [ ] Pybind11 usage for Python integration. +- [x] JOSS paper - [ ] Speed up the code - [ ] GPU programming - [ ] Lighter and faster DTW cost calculation + make the band from long side so it is more accurate. @@ -26,8 +28,8 @@ - [ ] Give error message if data could not be loaded. - Benchmarking: - [x] UCR_test_2018 is continuing. - - [ ] USR_dtai.py - - [ ] TSlearn + - [x] USR_dtai.py + - [x] TSlearn - [ ] dtwclust in R - [ ] Encapsulating Data and related functions in one folder. - [ ] Open-source solver addition. @@ -38,7 +40,7 @@ - [x] Doxygen website? - [x] Remove unnecessary warping functions. - [ ] Fix warnings. Especially, we should not get warnings from external libraries. -- [ ] Make Gurobi dependency optional. (Now it cannot be disabled. ) +- [x] Make Gurobi dependency optional. (Now it cannot be disabled. ) - [x] Consider including Eigen library for matrix operations / linear system solution. - [ ] w based DTW. - [ ] Reading memoisation matrix distMat from file instead of re-calculating DTW every time. diff --git a/develop/conventions.md b/develop/conventions.md index 8331cd7..e32c2e3 100644 --- a/develop/conventions.md +++ b/develop/conventions.md @@ -3,6 +3,15 @@ - Do not use "using" in global namespace. - First include standard headers then our headers. +## Versioning: + +We use semantic versioning with MAJOR.MINOR.PATCH where: + +- PATCH: Backwards compatible bug fix without introducing a new feature. +- MINOR: Backwards compatible new feature. +- MAJOR: Incompatible changes, especially on API. + +See [this link](https://www.youtube.com/watch?v=xvPiZyx0cDc) for a detailed tutorial on versioning. ### Commenting: From c88a2b98b411b7930955ee5acc6db64a4419865c Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 22:40:43 +0100 Subject: [PATCH 07/15] Update ubuntu-unit.yml --- .github/workflows/ubuntu-unit.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ubuntu-unit.yml b/.github/workflows/ubuntu-unit.yml index 0b68764..d7e9cfe 100644 --- a/.github/workflows/ubuntu-unit.yml +++ b/.github/workflows/ubuntu-unit.yml @@ -1,16 +1,14 @@ -name: Ubuntu unit -# adapted from: https://github.com/trase-cpp/trase/tree/master/.github/workflows +name: Ubuntu unit # adapted from: https://github.com/trase-cpp/trase/tree/master/.github/workflows on: push: branches: - # - main + # - main - develop pull_request: branches: - '**' jobs: - build-and-test: name: Ubuntu unit ${{matrix.CC}} runs-on: ubuntu-latest @@ -45,12 +43,22 @@ jobs: - CC: clang-16 CXX: clang++-16 compiler: clang-16 libomp-16-dev + - CC: clang-17 + CXX: clang++-17 + compiler: clang-17 libomp-17-dev steps: - uses: actions/checkout@v4 - name: update run: sudo apt update + - name: Install Clang 16 or 17 + if: matrix.CC == 'clang-16' || matrix.CC == 'clang-17' + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh $(echo "${{ matrix.CC }}" | cut -d '-' -f 2) + - name: install compiler run: sudo apt install ${{matrix.compiler}} From 451f6dc2c4f587178cd4b2039df082cfc8148e9f Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 23:11:37 +0100 Subject: [PATCH 08/15] Remove force link from macOS as unit test does not work. --- .github/workflows/macos-unit.yml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index d11f730..493cb9b 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -3,14 +3,13 @@ name: macOS unit on: push: branches: - # - main + # - main - develop pull_request: branches: - '**' jobs: - build-and-test: name: macOS unit runs-on: macos-latest @@ -18,20 +17,23 @@ jobs: steps: - uses: actions/checkout@v4 - - name: install openmp - run: brew reinstall libomp & brew link libomp --force - - - name: make build directory + - name: Install libomp without linking + run: | + brew reinstall libomp + echo "export LDFLAGS=-L/opt/homebrew/opt/libomp/lib" >> $GITHUB_ENV + echo "export CPPFLAGS=-I/opt/homebrew/opt/libomp/include" >> $GITHUB_ENV + + - name: Make build directory run: mkdir build_dir - - name: cmake configure + - name: CMake configure run: cmake .. -DCMAKE_BUILD_TYPE=Debug -DDTWC_BUILD_TESTING=ON working-directory: build_dir - - name: cmake build + - name: CMake build run: cmake --build . --parallel 2 working-directory: build_dir - - name: cmake test + - name: CMake test run: ctest -j2 -C Debug --output-on-failure - working-directory: build_dir + working-directory: build_dir \ No newline at end of file From 6e6834535786f9666d00df1ac4df708657821a7f Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 23:17:12 +0100 Subject: [PATCH 09/15] export omp flags --- .github/workflows/macos-unit.yml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index 493cb9b..02f2ebf 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -20,20 +20,29 @@ jobs: - name: Install libomp without linking run: | brew reinstall libomp - echo "export LDFLAGS=-L/opt/homebrew/opt/libomp/lib" >> $GITHUB_ENV - echo "export CPPFLAGS=-I/opt/homebrew/opt/libomp/include" >> $GITHUB_ENV + echo "LDFLAGS=-L/opt/homebrew/opt/libomp/lib" >> $GITHUB_ENV + echo "CPPFLAGS=-I/opt/homebrew/opt/libomp/include" >> $GITHUB_ENV - name: Make build directory run: mkdir build_dir - name: CMake configure - run: cmake .. -DCMAKE_BUILD_TYPE=Debug -DDTWC_BUILD_TESTING=ON + run: | + export LDFLAGS=-L/opt/homebrew/opt/libomp/lib + export CPPFLAGS=-I/opt/homebrew/opt/libomp/include + cmake .. -DCMAKE_BUILD_TYPE=Debug -DDTWC_BUILD_TESTING=ON working-directory: build_dir - name: CMake build - run: cmake --build . --parallel 2 + run: | + export LDFLAGS=-L/opt/homebrew/opt/libomp/lib + export CPPFLAGS=-I/opt/homebrew/opt/libomp/include + cmake --build . --parallel 2 working-directory: build_dir - name: CMake test - run: ctest -j2 -C Debug --output-on-failure + run: | + export LDFLAGS=-L/opt/homebrew/opt/libomp/lib + export CPPFLAGS=-I/opt/homebrew/opt/libomp/include + ctest -j2 -C Debug --output-on-failure working-directory: build_dir \ No newline at end of file From cdc9d7e664e04e9497783ffc7cbbbf7c8a1c582f Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 23:39:21 +0100 Subject: [PATCH 10/15] export flags for libomp --- .github/workflows/macos-unit.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index 02f2ebf..d4cface 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -19,30 +19,29 @@ jobs: - name: Install libomp without linking run: | - brew reinstall libomp - echo "LDFLAGS=-L/opt/homebrew/opt/libomp/lib" >> $GITHUB_ENV - echo "CPPFLAGS=-I/opt/homebrew/opt/libomp/include" >> $GITHUB_ENV + xcode-select --install + brew install libomp + export CC=/usr/bin/clang + export CXX=/usr/bin/clang++ + export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp" + export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include" + export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include" + export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp" - name: Make build directory run: mkdir build_dir - name: CMake configure run: | - export LDFLAGS=-L/opt/homebrew/opt/libomp/lib - export CPPFLAGS=-I/opt/homebrew/opt/libomp/include cmake .. -DCMAKE_BUILD_TYPE=Debug -DDTWC_BUILD_TESTING=ON working-directory: build_dir - name: CMake build run: | - export LDFLAGS=-L/opt/homebrew/opt/libomp/lib - export CPPFLAGS=-I/opt/homebrew/opt/libomp/include cmake --build . --parallel 2 working-directory: build_dir - name: CMake test run: | - export LDFLAGS=-L/opt/homebrew/opt/libomp/lib - export CPPFLAGS=-I/opt/homebrew/opt/libomp/include ctest -j2 -C Debug --output-on-failure working-directory: build_dir \ No newline at end of file From 29ec4ac179028dc208a1cbb79f7ea27be79b2cda Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Sun, 28 Apr 2024 23:46:23 +0100 Subject: [PATCH 11/15] removed xcode-select --install --- .github/workflows/macos-unit.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index d4cface..67a0cc2 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -19,7 +19,6 @@ jobs: - name: Install libomp without linking run: | - xcode-select --install brew install libomp export CC=/usr/bin/clang export CXX=/usr/bin/clang++ From 0aa61881b264566b20429815986c97bf617e630d Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Mon, 29 Apr 2024 02:15:46 +0100 Subject: [PATCH 12/15] macos unit test libomp --- .github/dependabot.yml | 9 +++++++++ .github/workflows/macos-unit.yml | 10 ++++------ 2 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..489b692 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,9 @@ +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" \ No newline at end of file diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index 67a0cc2..ac9233d 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -20,12 +20,10 @@ jobs: - name: Install libomp without linking run: | brew install libomp - export CC=/usr/bin/clang - export CXX=/usr/bin/clang++ - export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp" - export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include" - export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include" - export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp" + echo "CC=$(brew --prefix llvm)/bin/clang" >> $GITHUB_ENV + echo "CXX=$(brew --prefix llvm)/bin/clang++" >> $GITHUB_ENV + echo "LDFLAGS=\"-L$(brew --prefix llvm)/lib\"" >> $GITHUB_ENV + echo "CPPFLAGS=\"-I$(brew --prefix llvm)/include\"" >> $GITHUB_ENV - name: Make build directory run: mkdir build_dir From ed0c94c2f8fc247d5d03043f17184a12a6c4002a Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Mon, 29 Apr 2024 02:25:52 +0100 Subject: [PATCH 13/15] Update macos-unit.yml --- .github/workflows/macos-unit.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/macos-unit.yml b/.github/workflows/macos-unit.yml index ac9233d..3f1e02b 100644 --- a/.github/workflows/macos-unit.yml +++ b/.github/workflows/macos-unit.yml @@ -19,11 +19,7 @@ jobs: - name: Install libomp without linking run: | - brew install libomp - echo "CC=$(brew --prefix llvm)/bin/clang" >> $GITHUB_ENV - echo "CXX=$(brew --prefix llvm)/bin/clang++" >> $GITHUB_ENV - echo "LDFLAGS=\"-L$(brew --prefix llvm)/lib\"" >> $GITHUB_ENV - echo "CPPFLAGS=\"-I$(brew --prefix llvm)/include\"" >> $GITHUB_ENV + brew install libomp llvm && brew link --force libomp - name: Make build directory run: mkdir build_dir From 653710daa34d6b35ba12ff265172cd89c9bc0c88 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Mon, 29 Apr 2024 02:44:19 +0100 Subject: [PATCH 14/15] UCR dataset all benchmark is enabled. --- CHANGELOG.md | 7 ++++--- benchmark/UCR_dtwc.cpp | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02951a3..321b4bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,9 +26,10 @@ This changelog contains a non-exhaustive list of new features and notable bug-fi ## Developer updates: * The software is now being tested via Catch2 library. +* Dependabot is added.

-# DTWC v0.0.3 +# DTWC v0.3.0 ## New features * UCR_test_2018 data integration for benchmarking. @@ -43,7 +44,7 @@ This changelog contains a non-exhaustive list of new features and notable bug-fi * `std::filesystem::path operator+` was unnecessary and removed.

-# DTWC v0.1.0 +# DTWC v0.2.0 A user interface is created for other people's use. @@ -67,7 +68,7 @@ A user interface is created for other people's use. * Required C++ standard is upgraded from C++17 to C++20.

-# DTWC v0.0.1 +# DTWC v0.1.0 This is the initial release of DTWC. diff --git a/benchmark/UCR_dtwc.cpp b/benchmark/UCR_dtwc.cpp index 429f0df..3aa0ced 100644 --- a/benchmark/UCR_dtwc.cpp +++ b/benchmark/UCR_dtwc.cpp @@ -97,6 +97,9 @@ inline void UCR_2018() //(settings::root_folder / "data/benchmark/UCRArchive_2018/AllGestureWiimoteZ/AllGestureWiimoteZ_TEST.tsv") }; + + dataofInterest = UCR_list; // Comment this out to do an individual testing. + size_t solved = 0; for (auto &file_path : dataofInterest) { dl.path(file_path); @@ -108,10 +111,8 @@ inline void UCR_2018() std::cout << "Now, number " << solved << " " << file_path << " is being solved.\n"; solved++; - // if (solved < 25) // We already calculated this part - // continue; - if (prob.data.size() > 1000) // DOnt calculate large data it is not good. For example Crop. + if (prob.data.size() > 1000) // Don't calculate large data it is not good. For example Crop. continue; prob.set_numberOfClusters(Nc); // Nc = number of clusters. From e14e072b086eaeeac050f2a8c579da0db4a11bd7 Mon Sep 17 00:00:00 2001 From: Volkan Kumtepeli Date: Mon, 29 Apr 2024 02:46:35 +0100 Subject: [PATCH 15/15] version changes. --- CMakeLists.txt | 2 +- docs/Doxyfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 023b32d..0e9c3fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.21) # so that it works well with tools like CPM or other # manual dependency management -project(DTWC++ VERSION "0.1.0" +project(DTWC++ VERSION "1.0.0" DESCRIPTION "A C++ library for fast Dynamic Time Wrapping Clustering" HOMEPAGE_URL https://battery-intelligence-lab.github.io/dtw-cpp/ LANGUAGES CXX C) diff --git a/docs/Doxyfile b/docs/Doxyfile index 3e7d5e7..2472419 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -48,7 +48,7 @@ PROJECT_NAME = DTWC++ # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 0.1.0 +PROJECT_NUMBER = 1.0.0-rc0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a