Skip to content

Commit

Permalink
Support large dimensionality #48
Browse files Browse the repository at this point in the history
The algorithm failed when n_clusters*n_features > 256.

New limits:
 - n_clusters <= 32
 - n_features < 256
 - n_clusters * n_features <= 4096
  • Loading branch information
SylvanBrocard authored Dec 5, 2024
2 parents 8389170 + 0705c70 commit 7225122
Show file tree
Hide file tree
Showing 15 changed files with 541 additions and 328 deletions.
34 changes: 30 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ cmake_minimum_required(VERSION 3.15...3.27)
set(CMAKE_C_COMPILER "gcc")
set(CMAKE_CXX_COMPILER "g++")

project(${SKBUILD_PROJECT_NAME} VERSION ${SKBUILD_PROJECT_VERSION})
project(
${SKBUILD_PROJECT_NAME}
VERSION ${SKBUILD_PROJECT_VERSION}
LANGUAGES CXX)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# set(CMAKE_VERBOSE_MAKEFILE ON)
Expand Down Expand Up @@ -33,7 +36,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
# temporary workaround until SDK distributes fixed toolchain file
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_CURRENT_SOURCE_DIR}/src/dpu_program/dpu.cmake
)
BUILD_ALWAYS TRUE)

# =================== BUILDING THE HOST BINARY ======================

Expand All @@ -43,6 +46,7 @@ find_package(pybind11 CONFIG REQUIRED)
include(CheckIPOSupported)
include(${UPMEM_HOME}/share/upmem/cmake/include/host/DpuHost.cmake)
include(cmake/CPM.cmake)
include(cmake/CompilerWarnings.cmake)

cpmaddpackage(
NAME
Expand All @@ -56,12 +60,34 @@ cpmaddpackage(

pybind11_add_module(_core MODULE src/main.cpp src/host_program/dimm_manager.cpp
src/host_program/lloyd_iter.cpp)
target_sources(
_core
PRIVATE FILE_SET
common_headers
TYPE
HEADERS
BASE_DIRS
src
FILES
src/common.h)
target_sources(
_core
PRIVATE FILE_SET
host_headers
TYPE
HEADERS
BASE_DIRS
src
FILES
src/kmeans.hpp)
target_link_libraries(_core PRIVATE ${DPU_HOST_LIBRARIES} fmt stdc++fs)
target_include_directories(_core SYSTEM PUBLIC ${DPU_HOST_INCLUDE_DIRECTORIES})
target_link_directories(_core PUBLIC ${DPU_HOST_LINK_DIRECTORIES})

target_compile_features(_core PUBLIC c_std_99 cxx_std_17)
target_compile_options(_core PRIVATE -Wall -Wextra $<$<CONFIG:RELEASE>:-Ofast>)
target_compile_features(_core PUBLIC cxx_std_17)
target_compile_options(_core PRIVATE $<$<CONFIG:RELEASE>:-Ofast>
$<$<CONFIG:DEBUG>:-Og>)
dpu_kmeans_set_project_warnings(_core "" "")

target_compile_definitions(
_core
Expand Down
68 changes: 68 additions & 0 deletions cmake/CompilerWarnings.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# from here:
#
# https://github.com/lefticus/cppbestpractices/blob/master/02-Use_the_Tools_Available.md

function(dpu_kmeans_set_project_warnings project_name CLANG_WARNINGS
GCC_WARNINGS)
if("${CLANG_WARNINGS}" STREQUAL "")
set(CLANG_WARNINGS
-Wall
-Wextra # reasonable and standard
-Wshadow # warn the user if a variable declaration shadows one from a
# parent context
-Wnon-virtual-dtor # warn the user if a class with virtual functions has
# a non-virtual destructor. This helps
# catch hard to track down memory errors
-Wold-style-cast # warn for c-style casts
-Wcast-align # warn for potential performance problem casts
-Wunused # warn on anything being unused
-Woverloaded-virtual # warn if you overload (not override) a virtual
# function
-Wpedantic # warn if non-standard C++ is used
-Wconversion # warn on type conversions that may lose data
-Wsign-conversion # warn on sign conversions
-Wnull-dereference # warn if a null dereference is detected
-Wdouble-promotion # warn if float is implicit promoted to double
-Wformat=2 # warn on security issues around functions that format output
# (ie printf)
-Wimplicit-fallthrough # warn on statements that fallthrough without an
# explicit annotation
)
endif()

if("${GCC_WARNINGS}" STREQUAL "")
set(GCC_WARNINGS
${CLANG_WARNINGS}
-Wmisleading-indentation # warn if indentation implies blocks where
# blocks do not exist
-Wduplicated-cond # warn if if / else chain has duplicated conditions
-Wduplicated-branches # warn if if / else branches have duplicated code
-Wlogical-op # warn about logical operations being used where bitwise
# were probably wanted
-Wuseless-cast # warn if you perform a cast to the same type
-Wsuggest-override # warn if an overridden member function is not marked
# 'override' or 'final'
)
endif()

if(CMAKE_CXX_COMPILER_ID MATCHES ".*Clang" OR CMAKE_C_COMPILER_ID MATCHES
".*Clang")
set(PROJECT_WARNINGS_CXX ${CLANG_WARNINGS})
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(PROJECT_WARNINGS_CXX ${GCC_WARNINGS})
else()
message(
AUTHOR_WARNING
"No compiler warnings set for CXX compiler: '${CMAKE_CXX_COMPILER_ID}'")
endif()

# use the same warning flags for C
set(PROJECT_WARNINGS_C "${PROJECT_WARNINGS_CXX}")

target_compile_options(
${project_name}
PRIVATE # C++ warnings
$<$<COMPILE_LANGUAGE:CXX>:${PROJECT_WARNINGS_CXX}>
# C warnings
$<$<COMPILE_LANGUAGE:C>:${PROJECT_WARNINGS_C}>)
endfunction()
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,9 @@ build-type = "Release"
[tool.scikit-build.cmake.define]
NR_TASKLETS = "16"
UPMEM_HOME = {env = "UPMEM_HOME", default = "/usr"}

[tool.ruff.lint]
ignore = ["COM812"]

[tool.ruff.lint.per-file-ignores]
"tests/*.py" = ["S101", "INP001"]
11 changes: 9 additions & 2 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
* @brief Data size constraints
*/
/**@{*/
#define ASSUMED_NR_CLUSTERS 32 /**< Maximum number of clusters */
#define ASSUMED_NR_FEATURES 34 /**< Maximum number of features */
#define ASSUMED_NR_CLUSTERS 32 /**< Maximum number of clusters */
#define ASSUMED_NR_FEATURES 128 /**< Maximum number of features */
#define WRAM_FEATURES_SIZE \
512 /**< max size of the WRAM array holding points features in bytes */
/**@}*/
Expand Down Expand Up @@ -75,3 +75,10 @@ typedef int32_t int_feature;
(MRAM_SIZE / FEATURE_TYPE * 8 / \
2) /**< How many features we fit into one DPU's MRAM. Can be increased \
further. */

struct __attribute__((aligned(8))) task_parameters {
uint8_t nfeatures;
uint8_t task_size_in_points;
uint16_t task_size_in_features;
uint16_t task_size_in_bytes;
};
21 changes: 18 additions & 3 deletions src/dpu_kmeans/_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,28 @@
from __future__ import annotations
import numpy
import os
import typing
__all__ = ['Container', 'FEATURE_TYPE']
class Container:
"""
Container object to interface with the DPUs
"""
nr_dpus: int
@staticmethod
def _pybind11_conduit_v1_(*args, **kwargs):
...
def __init__(self) -> None:
...
def allocate(self) -> None:
def allocate(self, arg0: int) -> None:
...
def compute_inertia(self, arg0: numpy.ndarray[numpy.int16]) -> int:
...
def free_dpus(self) -> None:
...
def lloyd_iter(self, arg0: numpy.ndarray[numpy.int16], arg1: numpy.ndarray[numpy.int64], arg2: numpy.ndarray[numpy.int32]) -> None:
...
def load_array_data(self, arg0: numpy.ndarray[numpy.int16], arg1: int, arg2: int) -> None:
def load_array_data(self, arg0: numpy.ndarray[numpy.int16], arg1: str) -> None:
...
def load_kernel(self, arg0: os.PathLike) -> None:
...
Expand All @@ -44,12 +44,27 @@ class Container:
def reset_timer(self) -> None:
...
@property
def allocated(self) -> bool:
...
@property
def binary_path(self) -> os.PathLike | None:
...
@property
def cpu_pim_time(self) -> float:
...
@property
def data_size(self) -> int | None:
...
@property
def dpu_run_time(self) -> float:
...
@property
def hash(self) -> bytes | None:
...
@property
def nr_dpus(self) -> int:
...
@property
def pim_cpu_time(self) -> float:
...
FEATURE_TYPE: int = 16
Expand Down
100 changes: 4 additions & 96 deletions src/dpu_kmeans/_dimm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@

# pylint: disable=global-statement

import atexit
import sys

import numpy as np
import xxhash

Expand All @@ -26,18 +23,9 @@

from ._core import FEATURE_TYPE, Container

_allocated = False # whether the DPUs have been allocated
_kernel = None # name of the currently loaded binary
_data_id = None # ID of the currently loaded data
_data_checksum = None # the checksum of the currently loaded data
_data_size = None # size of the currently loaded data

_kernels_lib = {"kmeans": files("dpu_kmeans").joinpath("dpu_program/kmeans_dpu_kernel")}

ctr = Container()
ctr.nr_dpus = 0

_requested_dpus = 0


class LinearDiscretizer(TransformerMixin, BaseEstimator):
Expand Down Expand Up @@ -126,106 +114,26 @@ def inverse_transform(self, Xt):
ld = LinearDiscretizer() # linear discretization transformer


def set_n_dpu(n_dpu: int):
"""Set the number of DPUs to ask for during the allocation."""
global _allocated
global _requested_dpus
if _allocated and _requested_dpus != n_dpu:
free_dpus()
if not _allocated:
_requested_dpus = n_dpu
ctr.nr_dpus = n_dpu
ctr.allocate()
_allocated = True


def get_n_dpu():
"""Return the number of allocated DPUs."""
return ctr.nr_dpus


def load_kernel(kernel: str, verbose: int = False):
"""Load a given kernel into the allocated DPUs."""
global _kernel
global _allocated
global _data_id
global _data_checksum
global _data_size
if not _allocated:
ctr.allocate()
_allocated = True
if not _kernel == kernel:
if ctr.binary_path != kernel:
if verbose:
print(f"loading new kernel : {kernel}")
_kernel = kernel
ref = _kernels_lib[kernel]
with as_file(ref) as dpu_binary:
with as_file(_kernels_lib[kernel]) as dpu_binary:
ctr.load_kernel(dpu_binary)
_data_id = None
_data_checksum = None
_data_size = None


def load_data(X, verbose: int = False):
"""Load a dataset into the allocated DPUs."""
global _data_checksum
global _data_size

# compute the checksum of X
h = xxhash.xxh3_64()
h.update(X)
X_checksum = h.digest()

if _data_checksum != X_checksum:
if ctr.hash != X_checksum:
if verbose:
print("loading new data")
_data_checksum = X_checksum
Xt = ld.fit_transform(X)
ctr.load_array_data(
Xt,
Xt.shape[0],
Xt.shape[1],
)
_data_size = sys.getsizeof(Xt)
ctr.load_array_data(Xt, X_checksum)
elif verbose:
print("reusing previously loaded data")


def reset_timer(verbose=False):
"""Reset the DPU execution timer."""
if verbose:
print("resetting inner timer")
ctr.reset_timer()


def get_dpu_run_time():
"""Return the DPU execution timer."""
return ctr.dpu_run_time


def get_cpu_pim_time():
"""Return the time to load the data to the DPU memory."""
return ctr.cpu_pim_time


def get_pim_cpu_time():
"""Return the time to get the inertia from the DPU memory."""
return ctr.pim_cpu_time


def free_dpus(verbose: int = False):
"""Frees all allocated DPUs."""
global _allocated
global _kernel
global _data_id
global _data_checksum
global _data_size
if _allocated:
if verbose:
print("freeing dpus")
ctr.free_dpus()
_allocated = False
_kernel = None
_data_id = None
_data_checksum = None
_data_size = None
Loading

0 comments on commit 7225122

Please sign in to comment.