Skip to content

Commit

Permalink
Endianness support in binary2zarr and ncvar2zarr
Browse files Browse the repository at this point in the history
  • Loading branch information
LDeakin committed Mar 17, 2024
1 parent 3bf902b commit de39699
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 46 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- `zarrs_filter`: apply simple image filters (transformations) to an array
- `zarrs_ome`: convert an array to OME-Zarr
- Add `--endianness` option to `zarrs_binary2zarr`

### Changed
- **Breaking**: put various tools behind feature flags
- Bump `rayon_iter_concurrent_limit` to 0.2.0
- Various minor fixes to clap help
- `zarrs_reencode`: add `--attributes-append` to re-encoding options

### Fixed
- Correctly account for endianness of netCDF variable in `zarrs_ncvar2zarr`

## [0.3.0] - 2024-02-22

### Added
Expand Down
84 changes: 45 additions & 39 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ sysinfo = "0.30.6"
tempfile = "3.10.1"
thiserror = "1.0.57"
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread"] }
zarrs = { version = "0.12.3", features = [] }
zarrs = { version = "0.12.5", features = [] }

[[bin]]
name = "zarrs_benchmark_read_sync"
Expand Down
3 changes: 2 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

## TODO
- zarrs_reencode convert data type
- `zarrs_reencode`: convert data type
- `zarrs_filter`: merge multiple images, channel first or last
36 changes: 33 additions & 3 deletions src/bin/zarrs_binary2zarr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ use zarrs_tools::{get_array_builder, ZarrEncodingArgs};

use zarrs::{
array::{
codec::{ArrayCodecTraits, CodecOptionsBuilder},
codec::{
array_to_bytes::bytes::{reverse_endianness, Endianness},
ArrayCodecTraits, CodecOptionsBuilder,
},
concurrency::RecommendedConcurrency,
Array, DimensionName,
},
Expand All @@ -22,6 +25,10 @@ use zarrs::{
#[command(author, version)]
#[allow(rustdoc::bare_urls)]
struct Cli {
/// The endianness of the binary data. If unspecified, it is assumed to match the host endianness.
#[arg(long, value_parser = parse_endianness)]
endianness: Option<Endianness>,

#[command(flatten)]
encoding: ZarrEncodingArgs,

Expand Down Expand Up @@ -56,7 +63,24 @@ struct Cli {
// file: Vec<PathBuf>,
}

fn stdin_to_array(array: &Array<FilesystemStore>, concurrent_chunks: Option<usize>) -> usize {
fn parse_endianness(endianness: &str) -> std::io::Result<Endianness> {
if endianness == "little" {
Ok(Endianness::Little)
} else if endianness == "big" {
Ok(Endianness::Big)
} else {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"Endianness must be little or big",
))
}
}

fn stdin_to_array(
array: &Array<FilesystemStore>,
endianness: Option<Endianness>,
concurrent_chunks: Option<usize>,
) -> usize {
let data_type_size = array.data_type().size();
let dimensionality = array.chunk_grid().dimensionality();
let array_shape = array.shape();
Expand Down Expand Up @@ -128,6 +152,12 @@ fn stdin_to_array(array: &Array<FilesystemStore>, concurrent_chunks: Option<usiz

drop(idxm);

if let Some(endianness) = endianness {
if !endianness.is_native() {
reverse_endianness(&mut subset_bytes, array.data_type());
}
}

array
.store_array_subset_opt(&array_subset, subset_bytes, &codec_options)
.unwrap();
Expand Down Expand Up @@ -160,7 +190,7 @@ fn main() {

// Read stdin to the array and write chunks/shards
let start = std::time::Instant::now();
let bytes_read: usize = stdin_to_array(&array, cli.concurrent_chunks);
let bytes_read: usize = stdin_to_array(&array, cli.endianness, cli.concurrent_chunks);
let duration_s = start.elapsed().as_secs_f32();

// Output stats
Expand Down
Loading

0 comments on commit de39699

Please sign in to comment.