-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.jl
155 lines (124 loc) · 4.72 KB
/
dataset.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
CDM.name(v::ZarrDataset) = Zarr.zname(v.zgroup)
Base.keys(ds::ZarrDataset) = keys(ds.zgroup.arrays)
Base.haskey(ds::ZarrDataset,varname::SymbolOrString) = haskey(ds.zgroup.arrays,String(varname))
function CDM.variable(ds::ZarrDataset,varname::SymbolOrString)
zarray = ds.zgroup.arrays[String(varname)]
ZarrVariable{eltype(zarray),ndims(zarray),typeof(zarray),typeof(ds)}(zarray,ds)
end
CDM.dimnames(ds::ZarrDataset) = Tuple(String.(keys(ds.dimensions)))
# function CDM.unlimited(ds::ZarrDataset)
# ul = ds.unlimited
# if ds.parentdataset != nothing
# append!(ul,unlimited(ds.parentdataset))
# end
# return ul
# end
# function _dim(ds::ZarrDataset,dimname::SymbolOrString)
# if haskey(ds.dimensions,name)
# return ds.dimensions[name]
# elseif ds.parentdataset !== nothing
# return _dim(ds.parentdataset,name)
# end
# return nothing
# end
CDM.dim(ds::ZarrDataset,dimname::SymbolOrString) = ds.dimensions[Symbol(dimname)]
function CDM.defDim(ds::ZarrDataset,dimname::SymbolOrString,dimlen)
dn = Symbol(dimname)
@assert !haskey(ds.dimensions,dn)
ds.dimensions[dn] = dimlen
end
CDM.varnames(ds::ZarrDataset) = keys(ds.zgroup.arrays)
CDM.attribnames(ds::ZarrDataset) = keys(ds.zgroup.attrs)
CDM.attrib(ds::ZarrDataset,name::SymbolOrString) = ds.zgroup.attrs[String(name)]
function CDM.defAttrib(ds::ZarrDataset,name::SymbolOrString,value)
@assert iswritable(ds)
ds.zgroup.attrs[String(name)] = value
storage = ds.zgroup.storage
io = IOBuffer()
JSON.print(io, ds.zgroup.attrs)
storage[ds.zgroup.path,".zattrs"] = take!(io)
end
CDM.groupnames(ds::ZarrDataset) = keys(ds.zgroup.groups)
CDM.group(ds::ZarrDataset,name::SymbolOrString) = ZarrDataset(ds.zgroup.groups,String(name),ds)
CDM.parentdataset(ds::ZarrDataset) = ds.parentdataset
CDM.iswritable(ds::ZarrDataset) = ds.iswritable
CDM.maskingvalue(ds::ZarrDataset) = ds.maskingvalue
"""
ds = ZarrDataset(url::AbstractString,mode = "r";
_omitcode = 404,
maskingvalue = missing)
ZarrDataset(f::Function,url::AbstractString,mode = "r";
maskingvalue = missing)
Open the zarr dataset at the url or path `url`. Only the read-mode is
currently supported. `ds` supports the API of the
[JuliaGeo/CommonDataModel.jl](https://github.com/JuliaGeo/CommonDataModel.jl).
The experimental `_omitcode` allows to work-around servers that return
HTTP error different than 404 for missing chunks.
Example:
```julia
using ZarrDatasets
url = "https://s3.waw3-1.cloudferro.com/mdl-arco-time-035/arco/MEDSEA_MULTIYEAR_PHY_006_004/med-cmcc-ssh-rean-d_202012/timeChunked.zarr"
ds = ZarrDataset(url);
# see the metadata
display(ds)
# load the variable time
time = ds["time"][:]
# load the the attribute long_name for the variable zos
zos_long_name = ds["zos"].attrib["long_name"]
# load the global attribute
comment = ds.attrib["comment"]
# query the dimension of the variable zos
size(ds["zos"])
close(ds)
```
Example with a `do`-block:
```julia
using ZarrDatasets
url = "https://s3.waw3-1.cloudferro.com/mdl-arco-time-035/arco/MEDSEA_MULTIYEAR_PHY_006_004/med-cmcc-ssh-rean-d_202012/timeChunked.zarr"
zos1 = ZarrDataset(url) do ds
ds["zos"][:,:,end,1]
end # implicit call to close(ds)
```
"""
function ZarrDataset(url::AbstractString,mode = "r";
parentdataset = nothing,
_omitcode = 404,
maskingvalue = missing,
attrib = Dict(),
)
dimensions = OrderedDict{Symbol,Int}()
iswritable = false
if mode == "r"
zg = Zarr.zopen(url,mode)
if (zg.storage isa Zarr.HTTPStore) ||
(zg.storage isa Zarr.ConsolidatedStore{Zarr.HTTPStore})
@debug "omit chunks on HTTP error" _omitcode
Zarr.missing_chunk_return_code!(zg.storage,_omitcode)
end
for (varname,zarray) in zg.arrays
for (dimname,dimlen) in zip(reverse(zarray.attrs["_ARRAY_DIMENSIONS"]),size(zarray))
dn = Symbol(dimname)
if haskey(dimensions,dn)
@assert dimensions[dn] == dimlen
else
dimensions[dn] = dimlen
end
end
end
elseif mode == "c"
store = Zarr.DirectoryStore(url)
zg = zgroup(store, "",attrs = Dict(attrib))
iswritable = true
end
ZarrDataset(zg,parentdataset,dimensions,iswritable,maskingvalue)
end
ZarrDataset(fnames::AbstractArray{<:AbstractString,N}, args...; kwargs...) where N =
MFDataset(ZarrDataset,fnames, args...; kwargs...)
function ZarrDataset(f::Function,args...; kwargs...)
ds = ZarrDataset(args...; kwargs...)
try
f(ds)
finally
close(ds)
end
end