From 89764b07c6c43eb3cc9563e18805783b83dd6c1e Mon Sep 17 00:00:00 2001 From: Sam Han Date: Sat, 27 Nov 2021 19:32:52 +0800 Subject: [PATCH 1/2] pdef and template --- xgrads/io.py | 109 ++++++++++++++++++++++++++------------------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/xgrads/io.py b/xgrads/io.py index 75678f0..401556a 100644 --- a/xgrads/io.py +++ b/xgrads/io.py @@ -60,18 +60,18 @@ def open_mfdataset(paths, parallel=False, encoding='GBK'): open_ = dask.delayed(open_CtlDataset) else: open_ = open_CtlDataset - + datasets = [open_(p, encoding=encoding) for p in paths] if parallel: # calling compute here will return the datasets/file_objs lists, # the underlying datasets will still be stored as dask arrays datasets = dask.compute(datasets) - + return xr.concat(datasets[0], dim='time') - + combined = xr.concat(datasets, dim='time') - + return combined @@ -81,14 +81,14 @@ def open_CtlDataset(desfile, returnctl=False, encoding='GBK'): Open a 4D dataset with a descriptor file end with .ctl and return a xarray.Dataset. This also uses the dask to chunk very large dataset, which is similar to the xarray.open_dataset. - + Parameters ---------- desfile: string Path to the descriptor file end with .ctl or .cts returnctl: bool Return dset and ctl as a tuple - + Returns ------- dset : xarray.Dataset @@ -99,34 +99,34 @@ def open_CtlDataset(desfile, returnctl=False, encoding='GBK'): if isinstance(desfile, str): if not desfile.endswith('.ctl'): raise Exception('unsupported file, suffix should be .ctl') - + ctl = CtlDescriptor(encoding=encoding, file=desfile) elif isinstance(desfile, CtlDescriptor): ctl = desfile else: raise Exception('unsupported type of input ('+str(type(desfile))+'), ' + '[CtlDescriptor or str] are allowed') - + if ctl.template: tcount = len(ctl.tdef.samples) # number of total time count tcPerf = [] # number of time count per file - + for file in ctl.dsetPath: fsize = os.path.getsize(file) - + if fsize % ctl.tRecLength != 0: raise Exception('incomplete file for ' + file + ' (not multiple of ' + str(ctl.tRecLength) + ' bytes)') - + tcPerf.append(fsize // ctl.tRecLength) - + total_size = sum(tcPerf) - + if total_size < tcount: raise Exception('no enough files for ' + str(tcount) + ' time records') - + # get time record number in each file rem = tcount idx = 0 @@ -138,25 +138,25 @@ def open_CtlDataset(desfile, returnctl=False, encoding='GBK'): tcPerf_m = tcPerf[:idx+1] tcPerf_m[idx] = tcPerf[idx ] + rem - + # print(ctl.dsetPath) # print(tcPerf) # print(tcPerf_m) - + binData = __read_template_as_dask(ctl, tcPerf_m) - + else: expect = ctl.tRecLength * ctl.tdef.length() actual = os.path.getsize(ctl.dsetPath) - + if expect != actual: print('WARNING: expected binary file size: {0}, actual size: {1}' .format(expect, actual)) - + binData = __read_as_dask(ctl) variables = [] - + if ctl.pdef is None: for m, v in enumerate(ctl.vdef): if v.dependZ: @@ -182,18 +182,18 @@ def open_CtlDataset(desfile, returnctl=False, encoding='GBK'): else: PDEF = ctl.pdef - + if PDEF.proj in ['lcc', 'lccr']: ycoord = np.linspace(0, (PDEF.jsize-1) * PDEF.dx, PDEF.jsize) xcoord = np.linspace(0, (PDEF.isize-1) * PDEF.dy, PDEF.isize) - + elif PDEF.proj in ['sps', 'nps']: inc = PDEF.gridinc * 1000 # change unit from km to m ycoord = np.linspace(-(PDEF.jpole), (PDEF.jsize-PDEF.jpole), PDEF.jsize) * inc xcoord = np.linspace(-(PDEF.ipole), (PDEF.isize-PDEF.ipole), PDEF.isize) * inc - + for m, v in enumerate(ctl.vdef): if v.dependZ: da = xr.DataArray(name=v.name, data=binData[m], @@ -225,10 +225,10 @@ def open_CtlDataset(desfile, returnctl=False, encoding='GBK'): dset.attrs['title'] = ctl.title dset.attrs['undef'] = ctl.undef dset.attrs['pdef' ] = 'None' - + if ctl.pdef: dset.attrs['pdef' ] = ctl.pdef.proj - + if returnctl: return dset, ctl else: @@ -250,7 +250,7 @@ def __read_as_dask(dd): totalNum = sum([reduce(lambda x, y: x*y, (t,v.zcount,y,x)) for v in dd.vdef]) - + if dd.sequential: sequentialSize = x * y + 2 else: @@ -259,17 +259,17 @@ def __read_as_dask(dd): # print(totalNum * 4.0 / 1024.0 / 1024.0) binData = [] - + dtype = 'f4' for m, v in enumerate(dd.vdef): name = '@miniufo_' + tokenize(v, m) - + if totalNum < (100 * 100 * 100 * 10): # about 40 MB, chunk all # print('small') chunk = (t, v.zcount, y, x) shape = (t, v.zcount, y, x) - + dsk = {(name, 0, 0, 0, 0): (__read_var, dd.dsetPath, v, dd.tRecLength, None, None, dtype, sequentialSize)} @@ -281,7 +281,7 @@ def __read_as_dask(dd): # print('large') chunk = (1, 1, y, x) shape = (t, v.zcount, y, x) - + dsk = {(name, l, k, 0, 0): (__read_var, dd.dsetPath, v, dd.tRecLength, l, k, dtype, sequentialSize) @@ -295,7 +295,7 @@ def __read_as_dask(dd): # print('between') chunk = (1, v.zcount, y, x) shape = (t, v.zcount, y, x) - + dsk = {(name, l, 0, 0, 0): (__read_var, dd.dsetPath, v, dd.tRecLength, l, None, dtype, sequentialSize) @@ -311,11 +311,14 @@ def __read_template_as_dask(dd, tcPerf): """ Read template binary data and return as a dask array """ - t, y, x = dd.tdef.length(), dd.ydef.length(), dd.xdef.length() + if dd.pdef is None: + t, y, x = dd.tdef.length(), dd.ydef.length(), dd.xdef.length() + else: + t, y, x = dd.tdef.length(), dd.pdef.jsize, dd.pdef.isize totalNum = sum([reduce(lambda x, y: x*y, (tcPerf[0],v.zcount,y,x)) for v in dd.vdef]) - + if dd.sequential: sequentialSize = x * y + 2 else: @@ -324,17 +327,17 @@ def __read_template_as_dask(dd, tcPerf): # print(totalNum * 4.0 / 1024.0 / 1024.0) binData = [] - + dtype = 'f4' for m, v in enumerate(dd.vdef): name = '@miniufo_' + tokenize(v, m) - + if totalNum > (200 * 100 * 100 * 100): # about 800 MB, chunk 2D slice # print('large') chunk = (1, 1, y, x) shape = (t, v.zcount, y, x) - + dsk = {(name, l + sum(tcPerf[:m]), k, 0, 0): (__read_var, f, v, dd.tRecLength, l, k, dtype, sequentialSize) @@ -349,7 +352,7 @@ def __read_template_as_dask(dd, tcPerf): # print('between') chunk = (1, v.zcount, y, x) shape = (t, v.zcount, y, x) - + dsk = {(name, l + sum(tcPerf[:m]), 0, 0, 0): (__read_var, f, v, dd.tRecLength, l, None, dtype, sequentialSize) @@ -394,7 +397,7 @@ def __read_var(file, var, tstride, tstep, zstep, dtype, sequentialSize=-1): pos = var.strPos return __read_continuous(file, pos, shape, dtype, sequentialShape=seqShp) - + elif zstep is None and tstep is not None: shape = (1, var.zcount, var.ycount, var.xcount) if sequentialSize != -1: @@ -404,10 +407,10 @@ def __read_var(file, var, tstride, tstep, zstep, dtype, sequentialSize=-1): pos = var.strPos return __read_continuous(file, pos, shape, dtype, sequentialShape=seqShp) - + elif tstep is None and zstep is not None: raise Exception('not implemented in -1,20') - + else: shape = (1, 1, var.ycount, var.xcount) zstri = var.ycount * var.xcount * 4 @@ -419,7 +422,7 @@ def __read_var(file, var, tstride, tstep, zstep, dtype, sequentialSize=-1): pos = var.strPos + zstri * zstep return __read_continuous(file, pos, shape, dtype, sequentialShape=seqShp) - + elif var.storage in ['99', '0', '00', '000', '1', '11', '111']: # elif var.storage == '99' or var.storage == '0': if tstep is None and zstep is None: @@ -430,14 +433,14 @@ def __read_var(file, var, tstride, tstep, zstep, dtype, sequentialSize=-1): seqShp = shape pos = var.strPos data = [] - + for l in range(var.tcount): data.append(__read_continuous(file, pos, shape, dtype, sequentialShape=seqShp)) pos += tstride - + return np.concatenate(data) - + elif zstep is None and tstep is not None: shape = (1, var.zcount, var.ycount, var.xcount) if sequentialSize != -1: @@ -447,12 +450,12 @@ def __read_var(file, var, tstride, tstep, zstep, dtype, sequentialSize=-1): pos = var.strPos + tstride * tstep data = __read_continuous(file, pos, shape, dtype, sequentialShape=seqShp) - + return data - + elif tstep is None and zstep is not None: raise Exception('not implemented in 0,99') - + else: shape = (1, 1, var.ycount, var.xcount) zstri = var.ycount * var.xcount * 4 @@ -464,7 +467,7 @@ def __read_var(file, var, tstride, tstep, zstep, dtype, sequentialSize=-1): pos = var.strPos + tstride * tstep + zstri * zstep return __read_continuous(file, pos, shape, dtype, sequentialShape=seqShp) - + else: raise Exception('invalid storage ' + var.storage + ', only "99" or "-1,20" are supported') @@ -493,15 +496,15 @@ def __read_continuous(file, offset=0, shape=None, dtype=' Date: Sat, 27 Nov 2021 19:53:05 +0800 Subject: [PATCH 2/2] pdef and template , --- .DS_Store | Bin 0 -> 10244 bytes ctls/.DS_Store | Bin 0 -> 6148 bytes ctls/test8_5.ctl | 12 ++++++++++++ ctls/test8_6.ctl | 12 ++++++++++++ 4 files changed, 24 insertions(+) create mode 100644 .DS_Store create mode 100644 ctls/.DS_Store create mode 100644 ctls/test8_5.ctl create mode 100644 ctls/test8_6.ctl diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..ba397f4f3351b1aec7e5f38234a05b3de880576b GIT binary patch literal 10244 zcmeHMTWl0n7(V~Bh1m{mr-h|0U2tuM0x4xHcL`;?ZN(O)$hNfQR%Um`cEaq;x-+|5 zD24i{sNo63U`W&$ygnGgXuKuFfQeD54JtvylP~fj0TX>N{%2-(J5vh02_bWmJ^!5l zaxUNb|8rYH2t?9i6CpuD2#Y^T`B|7vlc=2jZb^*I2_glwC(2+`G_EC*w`h6{4*?GW z4*?GW4*?H>`vn2qvuR>Ei+Z_-fQNvGKpp|QJ_PYcY1pP?7B!a+%*-nQ(n4gL3BP&z zfi!5-uuaD-iV{p3meAVTVu}_+9$R|&(6#!v&$YPa;vMD5A;;Um zqhFyZwi_n~QHUPP>Pfrq^Rc8N>qaQ_6EFpZMKflyY;kdMN%8LJkd}=af|1rELTV>1 zZ5526nEl;ONfRZ|T?R;83_EQ=DV?CO+k5*4cs{L5+ayIW4b`s^0=VH)2J-9C`wd5U}?uwnxbsgblH$KwZG4hh7BsC z6}d!xJ9I5&w3?Om$ymRoRYBY6N>XEeyOh$A`g|XhRt6qkxODl-bo_W2K*h3h zzJl{FYxJ?a-n|c}-ZTw0asD1fh)D`B`mA*l_8Ea7dW}Ergdd}a6FIoxgv0e@h zHhUwsyoa{@q@WvJNlDE&-infbDrpf#lS$k6`#U;WmLK1t4+$x$D~k?k_6zx2Sk|0H zNNi`6NUlq?7C+NwS|D zA;-um@)0>h&XTXmcjOYeLVhQIlI!FKxk>(k0+C%Ko*AKId~q9z>Dw_9D`HvI=lgI!u#+Ud=6j0dAJDU@FQG- zU*R{n3V*;2xCwtTEK|-@FteFOOg$4~)-lb@CadR6E}Z&vL$vuFNGHQd&)BO4b9DNU z32b^|Q%C2T`!-~LDWyR&8-t`WNF!(MhDhGaEVs;gqSL=kr*?&cIqzlwP4UKws_J<) zf%)7+w+q4^j0967>UWXd?Ey0Hj2n!*g(*HcgAC)TGDQR;KCtM!#^}a>`~srmk~?SM$kD6I?O2i! zC~S+iMQ}*D>-W^r(YN*>{`TUJCaO1PQPTu7^En==`5cc0`W$b*P9V=bC}Nq~g>5=! dQRHCy-~SA_$4_S7{olL)6Ww{=`v33${{d3b+syy~ literal 0 HcmV?d00001 diff --git a/ctls/.DS_Store b/ctls/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0