diff --git a/bio2zarr/plink.py b/bio2zarr/plink.py index 3aa9bcf..11e45b2 100644 --- a/bio2zarr/plink.py +++ b/bio2zarr/plink.py @@ -86,7 +86,7 @@ def convert( # we're not using the best Blosc settings for genotypes here. default_compressor = numcodecs.Blosc(cname="zstd", clevel=7) - a = root.create_dataset( + a = root.array( "sample_id", data=bed.iid, shape=bed.iid.shape, @@ -99,7 +99,7 @@ def convert( # TODO encode these in slices - but read them in one go to avoid # fetching repeatedly from bim file - a = root.create_dataset( + a = root.array( "variant_position", data=bed.bp_position, shape=bed.bp_position.shape, @@ -111,7 +111,7 @@ def convert( logger.debug("encoded variant_position") alleles = np.stack([bed.allele_1, bed.allele_2], axis=1) - a = root.create_dataset( + a = root.array( "variant_allele", data=alleles, shape=alleles.shape, diff --git a/bio2zarr/vcf2zarr/vcz.py b/bio2zarr/vcf2zarr/vcz.py index e9bdbd0..53b2592 100644 --- a/bio2zarr/vcf2zarr/vcz.py +++ b/bio2zarr/vcf2zarr/vcz.py @@ -572,7 +572,7 @@ def init( def encode_samples(self, root): if self.schema.samples != self.icf.metadata.samples: raise ValueError("Subsetting or reordering samples not supported currently") - array = root.create_dataset( + array = root.array( "sample_id", data=[sample.id for sample in self.schema.samples], shape=len(self.schema.samples), @@ -584,7 +584,7 @@ def encode_samples(self, root): logger.debug("Samples done") def encode_contig_id(self, root): - array = root.create_dataset( + array = root.array( "contig_id", data=[contig.id for contig in self.schema.contigs], shape=len(self.schema.contigs), @@ -593,7 +593,7 @@ def encode_contig_id(self, root): ) array.attrs["_ARRAY_DIMENSIONS"] = ["contigs"] if all(contig.length is not None for contig in self.schema.contigs): - array = root.create_dataset( + array = root.array( "contig_length", data=[contig.length for contig in self.schema.contigs], shape=len(self.schema.contigs), @@ -605,7 +605,7 @@ def encode_contig_id(self, root): def encode_filter_id(self, root): # TODO need a way to store description also # https://github.com/sgkit-dev/vcf-zarr-spec/issues/19 - array = root.create_dataset( + array = root.array( "filter_id", data=[filt.id for filt in self.schema.filters], shape=len(self.schema.filters), @@ -955,7 +955,7 @@ def create_index(self): kwargs = {} if not zarr_v3(): kwargs["dimension_separator"] = self.metadata.dimension_separator - array = root.create_dataset( + array = root.array( "region_index", data=index, shape=index.shape,