diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index b130ea3..b3124a6 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -74,7 +74,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BEFORE_TEST_LINUX: yum -y install maven java - CIBW_TEST_REQUIRES: bfio requests numpy ome_zarr + CIBW_TEST_REQUIRES: bfio requests numpy==1.24.0 ome_zarr CIBW_TEST_COMMAND: python -W default -m unittest discover -s {project}/tests -v - name: Install Dependencies @@ -128,7 +128,7 @@ jobs: CIBW_ENVIRONMENT_MACOS: REPAIR_LIBRARY_PATH="/tmp/bfiocpp_bld/local_install/lib:/tmp/bfiocpp_bld/local_install/lib64" ON_GITHUB="TRUE" BFIOCPP_DEP_DIR="/tmp/bfiocpp_bld/local_install" CMAKE_ARGS="-DTENSORSTORE_USE_SYSTEM_JPEG=ON" CIBW_REPAIR_WHEEL_COMMAND_MACOS: DYLD_LIBRARY_PATH=$REPAIR_LIBRARY_PATH delocate-listdeps {wheel} && DYLD_LIBRARY_PATH=$REPAIR_LIBRARY_PATH delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel} CIBW_ARCHS: ${{ matrix.cibw_archs }} - CIBW_TEST_REQUIRES: bfio requests numpy ome_zarr + CIBW_TEST_REQUIRES: bfio requests numpy==1.24.0 ome_zarr CIBW_TEST_COMMAND: python -W default -m unittest discover -s {project}/tests -v - name: Install Dependencies diff --git a/.github/workflows/wheel_build.yml b/.github/workflows/wheel_build.yml index 1971d5a..35b801b 100644 --- a/.github/workflows/wheel_build.yml +++ b/.github/workflows/wheel_build.yml @@ -72,7 +72,7 @@ jobs: CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "delvewheel repair -w {dest_dir} {wheel}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_BEFORE_TEST_LINUX: yum -y install maven java - CIBW_TEST_REQUIRES: bfio requests numpy ome_zarr + CIBW_TEST_REQUIRES: bfio requests numpy==1.24.0 ome_zarr CIBW_TEST_COMMAND: python -W default -m unittest discover -s {project}/tests -v - name: Upload Artifact @@ -124,7 +124,7 @@ jobs: CIBW_ENVIRONMENT_MACOS: REPAIR_LIBRARY_PATH="/tmp/bfiocpp_bld/local_install/lib:/tmp/bfiocpp_bld/local_install/lib64" ON_GITHUB="TRUE" BFIOCPP_DEP_DIR="/tmp/bfiocpp_bld/local_install" CMAKE_ARGS="-DTENSORSTORE_USE_SYSTEM_JPEG=ON" CIBW_REPAIR_WHEEL_COMMAND_MACOS: DYLD_LIBRARY_PATH=$REPAIR_LIBRARY_PATH delocate-listdeps {wheel} && DYLD_LIBRARY_PATH=$REPAIR_LIBRARY_PATH delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel} CIBW_ARCHS: ${{ matrix.cibw_archs }} - CIBW_TEST_REQUIRES: bfio requests numpy ome_zarr + CIBW_TEST_REQUIRES: bfio requests numpy==1.24.0 ome_zarr zarr CIBW_TEST_COMMAND: python -W default -m unittest discover -s {project}/tests -v - name: Upload Artifact diff --git a/CMakeLists.txt b/CMakeLists.txt index aba792a..43dc4e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,7 +24,8 @@ set(SOURCE src/cpp/ts_driver/ometiff/driver.cc src/cpp/interface/interface.cpp src/cpp/reader/tsreader.cpp - src/cpp/reader/utilities.cpp + src/cpp/utilities/utilities.cpp + src/cpp/writer/tswriter.cpp ) include(FetchContent) @@ -68,4 +69,4 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") endif() target_link_libraries(libbfiocpp PRIVATE tensorstore::tensorstore tensorstore::all_drivers) -target_link_libraries(libbfiocpp PRIVATE ${Build_LIBRARIES}) +target_link_libraries(libbfiocpp PRIVATE ${Build_LIBRARIES}) \ No newline at end of file diff --git a/setup.py b/setup.py index 09f4936..fed64e8 100644 --- a/setup.py +++ b/setup.py @@ -104,6 +104,6 @@ def build_extension(self, ext): zip_safe=False, python_requires=">=3.8", install_requires=[ - "numpy", + "numpy<2.0.0", ], ) diff --git a/src/cpp/interface/interface.cpp b/src/cpp/interface/interface.cpp index a6a6f15..2911755 100644 --- a/src/cpp/interface/interface.cpp +++ b/src/cpp/interface/interface.cpp @@ -1,10 +1,12 @@ #include #include +#include #include #include #include "../reader/tsreader.h" #include "../reader/sequence.h" -#include "../reader/utilities.h" +#include "../utilities/utilities.h" +#include "../writer/tswriter.h" namespace py = pybind11; using bfiocpp::Seq; @@ -116,4 +118,10 @@ PYBIND11_MODULE(libbfiocpp, m) { .export_values(); m.def("get_ome_xml", &bfiocpp::GetOmeXml); -} \ No newline at end of file + + + // Writer class + py::class_>(m, "TsWriterCPP") + .def(py::init&, const std::vector&, const std::string&>()) + .def("write", &bfiocpp::TsWriterCPP::write_image); +} diff --git a/src/cpp/reader/tsreader.cpp b/src/cpp/reader/tsreader.cpp index 2dc36b2..6171426 100644 --- a/src/cpp/reader/tsreader.cpp +++ b/src/cpp/reader/tsreader.cpp @@ -7,7 +7,7 @@ #include "tensorstore/open.h" #include "tsreader.h" -#include "utilities.h" +#include "../utilities/utilities.h" #include "type_info.h" diff --git a/src/cpp/reader/utilities.cpp b/src/cpp/utilities/utilities.cpp similarity index 60% rename from src/cpp/reader/utilities.cpp rename to src/cpp/utilities/utilities.cpp index b88c67f..f360512 100644 --- a/src/cpp/reader/utilities.cpp +++ b/src/cpp/utilities/utilities.cpp @@ -4,6 +4,7 @@ #include "utilities.h" #include #include +#include namespace bfiocpp { tensorstore::Spec GetOmeTiffSpecToRead(const std::string& filename){ @@ -29,7 +30,6 @@ tensorstore::Spec GetZarrSpecToRead(const std::string& filename){ } - uint16_t GetDataTypeCode (std::string_view type_name){ if (type_name == std::string_view{"uint8"}) {return 1;} @@ -108,4 +108,53 @@ std::string GetOmeXml(const std::string& file_path){ return OmeXmlInfo; } +tensorstore::Spec GetZarrSpecToWrite( const std::string& filename, + const std::vector& image_shape, + const std::vector& chunk_shape, + const std::string& dtype){ + return tensorstore::Spec::FromJson({{"driver", "zarr"}, + {"kvstore", {{"driver", "file"}, + {"path", filename}} + }, + {"context", { + {"cache_pool", {{"total_bytes_limit", 1000000000}}}, + {"data_copy_concurrency", {{"limit", std::thread::hardware_concurrency()}}}, + {"file_io_concurrency", {{"limit", std::thread::hardware_concurrency()}}}, + }}, + {"metadata", { + {"zarr_format", 2}, + {"shape", image_shape}, + {"chunks", chunk_shape}, + {"dtype", dtype}, + }, + }}).value(); + } + +// Function to get the TensorStore DataType based on a string identifier +tensorstore::DataType GetTensorStoreDataType(const std::string& type_str) { + if (type_str == "uint8") { + return tensorstore::dtype_v; + } else if (type_str == "uint16") { + return tensorstore::dtype_v; + } else if (type_str == "uint32") { + return tensorstore::dtype_v; + } else if (type_str == "uint64") { + return tensorstore::dtype_v; + } else if (type_str == "int8") { + return tensorstore::dtype_v; + } else if (type_str == "int16") { + return tensorstore::dtype_v; + } else if (type_str == "int32") { + return tensorstore::dtype_v; + } else if (type_str == "int64") { + return tensorstore::dtype_v; + } else if (type_str == "float") { + return tensorstore::dtype_v; + } else if (type_str == "double" || type_str == "float64") { // handle float64 from numpy + return tensorstore::dtype_v; + } else { + throw std::invalid_argument("Unknown data type string: " + type_str); + } +} + } // ns bfiocpp \ No newline at end of file diff --git a/src/cpp/reader/utilities.h b/src/cpp/utilities/utilities.h similarity index 63% rename from src/cpp/reader/utilities.h rename to src/cpp/utilities/utilities.h index 29ed949..047b49c 100644 --- a/src/cpp/reader/utilities.h +++ b/src/cpp/utilities/utilities.h @@ -18,4 +18,9 @@ uint16_t GetDataTypeCode (std::string_view type_name); std::string GetUTCString(); std::string GetOmeXml(const std::string& file_path); std::tuple, std::optional, std::optional>ParseMultiscaleMetadata(const std::string& axes_list, int len); +tensorstore::DataType GetTensorStoreDataType(const std::string& type_str); +tensorstore::Spec GetZarrSpecToWrite(const std::string& filename, + const std::vector& image_shape, + const std::vector& chunk_shape, + const std::string& dtype); } // ns bfiocpp \ No newline at end of file diff --git a/src/cpp/writer/tswriter.cpp b/src/cpp/writer/tswriter.cpp new file mode 100644 index 0000000..f907ef6 --- /dev/null +++ b/src/cpp/writer/tswriter.cpp @@ -0,0 +1,142 @@ +#include "tswriter.h" + +#include "../utilities/utilities.h" + +#include +#include + +using ::tensorstore::internal_zarr::ChooseBaseDType; + +namespace bfiocpp { + +TsWriterCPP::TsWriterCPP( + const std::string& fname, + const std::vector& image_shape, + const std::vector& chunk_shape, + const std::string& dtype_str + ): _filename(fname), _image_shape(image_shape), _chunk_shape(chunk_shape) { + + _dtype_code = GetDataTypeCode(dtype_str); + + std::string dtype_str_converted = (dtype_str == "float64") ? "double" : dtype_str; // change float64 numpy type to double + + auto dtype = GetTensorStoreDataType(dtype_str_converted); + + auto dtype_base = ChooseBaseDType(dtype).value().encoded_dtype; + + auto spec = GetZarrSpecToWrite(_filename, image_shape, chunk_shape, dtype_base); + + TENSORSTORE_CHECK_OK_AND_ASSIGN(_source, tensorstore::Open( + spec, + tensorstore::OpenMode::create | + tensorstore::OpenMode::delete_existing, + tensorstore::ReadWriteMode::write).result()); +} + + +void TsWriterCPP::write_image(py::array& py_image) { + + // use switch instead of template to avoid creating functions for each datatype + switch(_dtype_code) + { + case (1): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + // Write data array to TensorStore + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + + break; + } + case (2): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (4): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (8): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (16): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (32): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (64): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (128): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + // Write data array to TensorStore + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (256): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + case (512): { + auto data_array = tensorstore::Array(py_image.mutable_unchecked().data(0), _image_shape, tensorstore::c_order); + + auto write_result = tensorstore::Write(tensorstore::UnownedToShared(data_array), _source).result(); + if (!write_result.ok()) { + std::cerr << "Error writing image: " << write_result.status() << std::endl; + } + break; + } + default: { + // should not be reached + std::cerr << "Error writing image: unsupported data type" << std::endl; + } + } + } +} diff --git a/src/cpp/writer/tswriter.h b/src/cpp/writer/tswriter.h new file mode 100644 index 0000000..63e59e2 --- /dev/null +++ b/src/cpp/writer/tswriter.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "../reader/sequence.h" + +#include "tensorstore/tensorstore.h" +#include "tensorstore/context.h" +#include "tensorstore/array.h" +#include "tensorstore/driver/zarr/dtype.h" +#include "tensorstore/index_space/dim_expression.h" +#include "tensorstore/kvstore/kvstore.h" +#include "tensorstore/open.h" + +#include +#include +#include + +namespace py = pybind11; + +namespace bfiocpp{ + +class TsWriterCPP{ +public: + TsWriterCPP(const std::string& fname, const std::vector& image_shape, const std::vector& chunk_shape, const std::string& dtype); + + void write_image(py::array& py_image); + +private: + std::string _filename; + + std::vector _image_shape, _chunk_shape; + + uint16_t _dtype_code; + + tensorstore::TensorStore _source; + +}; +} + diff --git a/src/python/bfiocpp/__init__.py b/src/python/bfiocpp/__init__.py index 955243a..5e89052 100644 --- a/src/python/bfiocpp/__init__.py +++ b/src/python/bfiocpp/__init__.py @@ -1,4 +1,5 @@ from .tsreader import TSReader, Seq, FileType, get_ome_xml # NOQA: F401 +from .tswriter import TSWriter # NOQA: F401 from . import _version __version__ = _version.get_versions()["version"] diff --git a/src/python/bfiocpp/tswriter.py b/src/python/bfiocpp/tswriter.py new file mode 100644 index 0000000..f43a402 --- /dev/null +++ b/src/python/bfiocpp/tswriter.py @@ -0,0 +1,62 @@ +import numpy as np +from .libbfiocpp import TsWriterCPP + + +class TSWriter: + + def __init__( + self, file_name: str, image_shape: list, chunk_shape: list, dtype: np.dtype + ): + """Initialize tensorstore Zarr writer + + file_name: Path to write file to + """ + + self._image_writer: TsWriterCPP = TsWriterCPP( + file_name, image_shape, chunk_shape, str(dtype) + ) + + def write_image(self, image_data: np.ndarray): + """Write image data to file + + image_data: 5d numpy array containing image data + """ + + if not isinstance(image_data, np.ndarray): + + raise ValueError("Image data must be a 5d numpy array") + + try: + self._image_writer.write(image_data.flatten()) + + except Exception as e: + raise RuntimeError(f"Error writing image data: {e.what}") + + def close(self): + + pass + + def __enter__(self) -> "TSWriter": + """Handle entrance to a context manager. + + This code is called when a `with` statement is used. + ... + """ + + return self + + def __del__(self) -> None: + """Handle file deletion. + + This code runs when an object is deleted.. + """ + + self.close() + + def __exit__(self, type_class, value, traceback) -> None: + """Handle exit from the context manager. + + This code runs when exiting a `with` statement. + """ + + self.close() diff --git a/tests/test_read.py b/tests/test_read.py index dff293b..c45c556 100644 --- a/tests/test_read.py +++ b/tests/test_read.py @@ -59,12 +59,11 @@ def setUpModule(): ) as bw: bw[:] = br[:] - def tearDownModule(): """Remove test images""" logger.info("teardown - Removing test images...") - # shutil.rmtree(TEST_DIR) + shutil.rmtree(TEST_DIR) class TestOmeTiffRead(unittest.TestCase): diff --git a/tests/test_write.py b/tests/test_write.py new file mode 100644 index 0000000..55a6af4 --- /dev/null +++ b/tests/test_write.py @@ -0,0 +1,100 @@ +from bfiocpp import TSReader, TSWriter, Seq, FileType +import unittest +import requests, pathlib, shutil, logging, sys +import bfio +import numpy as np +import tempfile, os +from ome_zarr.utils import download as zarr_download + +TEST_IMAGES = { + "5025551.zarr": "https://uk1s3.embassy.ebi.ac.uk/idr/zarr/v0.4/idr0054A/5025551.zarr", +} + +TEST_DIR = pathlib.Path(__file__).with_name("data") + +logging.basicConfig( + format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s", + datefmt="%d-%b-%y %H:%M:%S", +) +logger = logging.getLogger("bfio.test") + +if "-v" in sys.argv: + logger.setLevel(logging.INFO) + + +def setUpModule(): + """Download images for testing""" + TEST_DIR.mkdir(exist_ok=True) + + for file, url in TEST_IMAGES.items(): + logger.info(f"setup - Downloading: {file}") + + if not file.endswith(".zarr"): + if TEST_DIR.joinpath(file).exists(): + continue + + r = requests.get(url) + + with open(TEST_DIR.joinpath(file), "wb") as fw: + fw.write(r.content) + else: + if TEST_DIR.joinpath(file).exists(): + shutil.rmtree(TEST_DIR.joinpath(file)) + zarr_download(url, str(TEST_DIR)) + + +def tearDownModule(): + """Remove test images""" + + logger.info("teardown - Removing test images...") + shutil.rmtree(TEST_DIR) + + +class TestZarrWrite(unittest.TestCase): + + def test_write_zarr_2d(self): + """test_write_zarr_2d - Write zarr using TSWrtier""" + + br = TSReader( + str(TEST_DIR.joinpath("5025551.zarr/0")), + FileType.OmeZarr, + "", + ) + assert br._X == 2702 + assert br._Y == 2700 + assert br._Z == 1 + assert br._C == 27 + assert br._T == 1 + + rows = Seq(0, br._Y - 1, 1) + cols = Seq(0, br._X - 1, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + tmp = br.data(rows, cols, layers, channels, tsteps) + + with tempfile.TemporaryDirectory() as dir: + # Use the temporary directory + test_file_path = os.path.join(dir, 'out/test.ome.zarr') + + bw = TSWriter(test_file_path, tmp.shape, tmp.shape, str(tmp.dtype)) + bw.write_image(tmp) + bw.close() + + br = TSReader( + str(test_file_path), + FileType.OmeZarr, + "", + ) + + rows = Seq(0, br._Y - 1, 1) + cols = Seq(0, br._X - 1, 1) + layers = Seq(0, 0, 1) + channels = Seq(0, 0, 1) + tsteps = Seq(0, 0, 1) + tmp = br.data(rows, cols, layers, channels, tsteps) + + assert tmp.dtype == np.uint8 + assert tmp.sum() == 183750394 + assert tmp.shape == (1, 1, 1, 2700, 2702) +