From 329134e59f0a1ffe8fb3bec58702ba5cd47e79af Mon Sep 17 00:00:00 2001 From: Tim Coalson Date: Fri, 1 Mar 2024 17:26:46 -0600 Subject: [PATCH] simplify chunked IO, fix vertex/voxel reading uniqueness checks, check for multiple label dims (warn on reading, error on writing) --- cifti_read.m | 62 ++++++++++++++++----------------------- cifti_write.m | 54 ++++++++++++---------------------- private/cifti_parse_xml.m | 14 +++++++-- private/cifti_write_xml.m | 7 +++++ 4 files changed, 63 insertions(+), 74 deletions(-) diff --git a/cifti_read.m b/cifti_read.m index deaae61..6471498 100644 --- a/cifti_read.m +++ b/cifti_read.m @@ -93,51 +93,38 @@ if(fseek(fid, hdr.vox_offset, 'bof') ~= 0) error(['failed to seek to start of data in file ' filename]); end - %always output as float32, maybe add a feature later + %always convert to float32, maybe add a feature later %use 'cdata' to be compatible with old ciftiopen max_elems = 128 * 1024 * 1024 / 4; %when reading as float32, use only 128MiB extra memory when reading (or the size of a row, if that manages to be larger) - if prod(hdr.dim(6:(hdr.dim(1) + 1))) <= max_elems + if prod(dims_c) <= max_elems %file is small, use the simple code to read it all in one call %permute to match ciftiopen: cifti "rows" matching matlab rows %hack: 3:2 produces empty array, 3:3 produces [3] outstruct.cdata = permute(do_nifti_scaling(fread_excepting(fid, hdr.dim(6:(hdr.dim(1) + 1)), [intype '=>float32'], filename), hdr), [2 1 3:(hdr.dim(1) - 4)]); else - outstruct.cdata = myzeros(dims_m); %avoid inconsistent 1-dimension handling, allow column vector, default to float32 - max_rows = max(1, min(hdr.dim(7), floor(max_elems / hdr.dim(6)))); - switch hdr.dim(1) %dim[0] - case 6 - %even out the passes to use about the same memory - num_passes = ceil(hdr.dim(7) / max_rows); - chunk_rows = ceil(hdr.dim(7) / num_passes); - for i = 1:chunk_rows:hdr.dim(7) - outstruct.cdata(i:min(hdr.dim(7), i + chunk_rows - 1), :) = do_nifti_scaling(fread_excepting(fid, [hdr.dim(6), min(chunk_rows, hdr.dim(7) - i + 1)], [intype '=>float32']), hdr)'; + %matlab indexing modes can't handle swapping first two dims while doing "some full planes plus a partial plane" per fread() + %reshape at the end would hit double the memory usage, and we want to avoid that + %so to avoid slow row-at-a-time loops, two cases: less than a plane, and multiple full planes + %with implicit third index and implicit flattening on final specified dimension, we can do these generically in a way that should work even for 4D+ (if it is ever supported) + outstruct.cdata = myzeros(dims_m); + max_rows = max(1, floor(max_elems / dims_c(1))); + if max_rows < dims_c(2) %less than a plane at a time, don't read cross-plane + num_passes = ceil(dims_c(2) / max_rows); %even out the passes + chunk_rows = ceil(prod(dims_c(2:end)) / num_passes); + total_planes = prod(dims_c(3:end)); + for plane = 1:total_planes + for chunkstart = 1:chunk_rows:dims_c(2) + outstruct.cdata(chunkstart:min(dims_c(2), chunkstart + chunk_rows - 1), :, plane) = do_nifti_scaling(fread_excepting(fid, [dims_c(1), min(chunk_rows, dims_c(2) - chunkstart + 1)], [intype '=>float32']), hdr)'; end - case 7 - %3D - this is all untested - if max_rows < hdr.dim(7) - %keep it simple, chunk each plane independently - num_passes = ceil(hdr.dim(7) / max_rows); - chunk_rows = ceil(hdr.dim(7) / num_passes); - for j = 1:hdr.dim(8) - for i = 1:chunk_rows:hdr.dim(7) - outstruct.cdata(i:min(hdr.dim(7), i + chunk_rows - 1), :, j) = do_nifti_scaling(fread_excepting(fid, [hdr.dim(6), min(chunk_rows, hdr.dim(7) - i + 1)], [intype '=>float32']), hdr)'; - end - end - else - %read multiple full planes per call - plane_elems = hdr.dim(7) * hdr.dim(6); - max_planes = max(1, min(hdr.dim(8), floor(max_elems / plane_elems))); - num_passes = ceil(hdr.dim(8) / max_planes); - chunk_planes = ceil(hdr.dim(8) / num_passes); - for j = 1:chunk_planes:hdr.dim(8) - outstruct.cdata(:, :, j:min(hdr.dim(8), j + chunk_planes - 1)) = permute(do_nifti_scaling(fread_excepting(fid, [hdr.dim(6:7), min(chunk_planes, hdr.dim(8) - j + 1)], [intype '=>float32']), hdr), [2 1 3]); - end - end - otherwise - %4D and beyond is not in the cifti-2 standard and is treated as an error in sanity_check_cdata - %but, if it ever is supported, warn and read it the memory-intensive way anyway - warning('cifti reading for 4 or more dimensions currently peaks at double the memory'); - outstruct.cdata = permute(do_nifti_scaling(fread_excepting(fid, hdr.dim(6:(hdr.dim(1) + 1)), [intype '=>float32'], filename), hdr), [2 1 3:(hdr.dim(1) - 4)]); + end + else + max_planes = max(1, floor(max_rows / dims_c(2))); %just in case the division does something dumb + total_planes = prod(dims_c(3:end)); + num_passes = ceil(total_planes / max_planes); + chunk_planes = ceil(total_planes / num_passes); + for chunkstart = 1:chunk_planes:total_planes + outstruct.cdata(:, :, chunkstart:min(total_planes, chunkstart + chunk_planes - 1)) = permute(do_nifti_scaling(fread_excepting(fid, [dims_c(1:2), min(chunk_planes, total_planes - chunkstart + 1)], [intype '=>float32']), hdr), [2 1 3]); + end end end end @@ -158,3 +145,4 @@ outzeros = zeros(dimarray(:)', 'single'); end end + diff --git a/cifti_write.m b/cifti_write.m index 0b40270..687f2d5 100644 --- a/cifti_write.m +++ b/cifti_write.m @@ -70,44 +70,28 @@ function cifti_write(cifti, filename, varargin) %FIXME: if we allow setting nifti scale/intercept, that needs to be added to this code max_elems = 128 * 1024 * 1024 / 4; %assuming float32, use only 128MiB extra memory when writing (or the size of a row, if that manages to be larger) if numel(cifti.cdata) <= max_elems - %file is small, use simple 'permute' writing code + %file is small, use simple whole-array writing code fwrite_excepting(fid, permute(cifti.cdata, [2 1 3:length(size(cifti.cdata))]), 'float32'); else - max_rows = max(1, min(size(cifti.cdata, 1), floor(max_elems / size(cifti.cdata, 2)))); - switch length(size(cifti.cdata)) - case 2 - %even out the passes to use about the same memory - num_passes = ceil(size(cifti.cdata, 1) / max_rows); - chunk_rows = ceil(size(cifti.cdata, 1) / num_passes); - for i = 1:chunk_rows:size(cifti.cdata, 1) - fwrite_excepting(fid, cifti.cdata(i:min(size(cifti.cdata, 1), i + chunk_rows - 1), :)', 'float32'); + max_rows = max(1, floor(max_elems / dims_c(1))); + if max_rows < dims_c(2) %less than a plane at a time, don't write cross-plane + num_passes = ceil(dims_c(2) / max_rows); %even out the passes + chunk_rows = ceil(prod(dims_c(2:end)) / num_passes); + total_planes = prod(dims_c(3:end)); + for plane = 1:total_planes + for chunkstart = 1:chunk_rows:dims_c(2) + %since it is part of one plane, technically matlab will return a 2D matrix, and we could just use transpose... + fwrite_excepting(fid, permute(cifti.cdata(chunkstart:min(dims_c(2), chunkstart + chunk_rows - 1), :, plane), [2 1 3]), 'float32'); end - case 3 - %3D - this is all untested - if max_rows < size(cifti.cdata, 1) - %keep it simple, chunk each plane independently - num_passes = ceil(size(cifti.cdata, 1) / max_rows); - chunk_rows = ceil(size(cifti.cdata, 1) / num_passes); - for j = 1:size(cifti.cdata, 3) - for i = 1:chunk_rows:size(cifti.cdata, 1) - fwrite_excepting(fid, cifti.cdata(i:min(size(cifti.cdata, 1), i + chunk_rows - 1), :, j)', 'float32'); - end - end - else - %write multiple full planes per call - plane_elems = size(cifti.cdata, 1) * size(cifti.cdata, 2); - max_planes = max(1, min(size(cifti.cdata, 3), floor(max_elems / plane_elems))); - num_passes = ceil(size(cifti.cdata, 3) / max_planes); - chunk_planes = ceil(size(cifti.cdata, 3) / num_passes); - for j = 1:chunk_planes:size(cifti.cdata, 3) - fwrite_excepting(fid, permute(cifti.cdata(:, :, j:min(size(cifti.cdata, 3), j + chunk_planes - 1)), [2 1 3]), 'float32'); - end - end - otherwise - %4D and beyond is not in the cifti-2 standard and is treated as an error in sanity_check_cdata - %but, if it ever is supported, warn and write it the memory-intensive way anyway - warning('cifti writing for 4 or more dimensions currently peaks at double the memory'); - fwrite_excepting(fid, permute(cifti.cdata, [2 1 3:length(size(cifti.cdata))]), 'float32'); + end + else + max_planes = max(1, floor(max_rows / dims_c(2))); %just in case the division does something dumb + total_planes = prod(dims_c(3:end)); %flatten all dimensions 3+ + num_passes = ceil(total_planes / max_planes); + chunk_planes = ceil(total_planes / num_passes); + for chunkstart = 1:chunk_planes:total_planes + fwrite_excepting(fid, permute(cifti.cdata(:, :, chunkstart:min(total_planes, chunkstart + chunk_planes - 1)), [2 1 3]), 'float32'); + end end end end diff --git a/private/cifti_parse_xml.m b/private/cifti_parse_xml.m index 325b74b..271edc9 100644 --- a/private/cifti_parse_xml.m +++ b/private/cifti_parse_xml.m @@ -24,8 +24,15 @@ outstruct.metadata = cifti_parse_metadata(tree, find(tree, '/CIFTI/Matrix/MetaData'), filename); map_uids = find(tree, '/CIFTI/Matrix/MatrixIndicesMap'); outstruct.diminfo = {}; + haveLabels = false; for map_uid = map_uids [map, applies] = cifti_parse_map(tree, map_uid, filename); + if strcmp(map.type, 'labels') + if haveLabels || length(applies) > 1 + mywarn('more than one labels dimension', filename); + end + haveLabels = true; + end appliesmod = applies; appliesmod(applies < 3) = 3 - applies(applies < 3); %NOTE: swap 1 and 2 to match ciftiopen cdata convention... outstruct.diminfo(appliesmod) = {map}; %lhs {} doesn't support multi-assignment, but () on cell does... @@ -182,6 +189,9 @@ function mywarn(msg, filename) if length(model.vertlist) ~= model.count myerror('VertexIndices does not match IndexCount', filename); end + if length(unique(model.vertlist)) ~= length(model.vertlist) + myerror('brain models have repeated or overlapping voxels', filename); + end case 'vox' if isfield(model, 'numvert') mywarn('SurfaceNumberOfVertices attribute present in Voxel type BrainModel', filename); @@ -234,7 +244,7 @@ function mywarn(msg, filename) if length(unique(allstructs)) ~= length(allstructs) myerror('brain models specify a structure more than once', filename); end - if size(unique(allvox, 'rows'), 1) ~= size(allvox, 1) + if size(unique(allvox', 'rows'), 1) ~= size(allvox, 2) myerror('brain models have repeated or overlapping voxels', filename); end end @@ -436,7 +446,7 @@ function mywarn(msg, filename) map.parcels(i) = thisparcel; end map.length = length(map.parcels); - allvox = horzcat(map.parcels.voxlist); + allvox = [map.parcels.voxlist]; if size(unique(allvox', 'rows'), 1) ~= size(allvox, 2) myerror('parcels have repeated or overlapping voxels', filename); end diff --git a/private/cifti_write_xml.m b/private/cifti_write_xml.m index b5ec46e..04a1a37 100644 --- a/private/cifti_write_xml.m +++ b/private/cifti_write_xml.m @@ -31,8 +31,15 @@ end function tree = cifti_write_maps(cifti, tree, matrix_uid) + haveLabels = false; mapused = false(length(cifti.diminfo), 1); for i = 1:length(cifti.diminfo) + if strcmp(cifti.diminfo{i}.type, 'labels') + if haveLabels + error('cifti files are not allowed to have more than one labels dimension, change one of them to scalar'); + end + haveLabels = true; + end if mapused(i) continue; end