diff --git a/notebooks/advanced_indexing.ipynb b/notebooks/advanced_indexing.ipynb index eba6b5880..eb2365a11 100644 --- a/notebooks/advanced_indexing.ipynb +++ b/notebooks/advanced_indexing.ipynb @@ -24,12 +24,17 @@ } ], "source": [ + "import cProfile\n", "import sys\n", + "\n", + "import numpy as np\n", + "\n", "sys.path.insert(0, '..')\n", + "\n", "import zarr\n", - "import numpy as np\n", + "\n", "np.random.seed(42)\n", - "import cProfile\n", + "\n", "zarr.__version__" ] }, @@ -975,7 +980,7 @@ "source": [ "a = np.array([(b'aaa', 1, 4.2),\n", " (b'bbb', 2, 8.4),\n", - " (b'ccc', 3, 12.6)], \n", + " (b'ccc', 3, 12.6)],\n", " dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')])\n", "za = zarr.array(a, chunks=2, fill_value=None)\n", "za[:]" @@ -1437,9 +1442,9 @@ "metadata": {}, "outputs": [], "source": [ - "import tempfile\n", - "import cProfile\n", "import pstats\n", + "import tempfile\n", + "\n", "\n", "def profile(statement, sort='time', restrictions=(7,)):\n", " with tempfile.NamedTemporaryFile() as f:\n", @@ -2637,8 +2642,7 @@ "metadata": {}, "outputs": [], "source": [ - "import h5py\n", - "import tempfile" + "import h5py" ] }, { @@ -2733,7 +2737,7 @@ "metadata": {}, "outputs": [], "source": [ - "# # this is pathological, takes minutes \n", + "# # this is pathological, takes minutes\n", "# %time hc[ix_dense_bool]" ] }, diff --git a/notebooks/blosc_microbench.ipynb b/notebooks/blosc_microbench.ipynb index 9361d8e95..fbcfaa674 100644 --- a/notebooks/blosc_microbench.ipynb +++ b/notebooks/blosc_microbench.ipynb @@ -20,7 +20,9 @@ ], "source": [ "import numpy as np\n", + "\n", "import zarr\n", + "\n", "zarr.__version__" ] }, @@ -101,10 +103,13 @@ } ], "source": [ - "import numpy as np\n", "import sys\n", + "\n", + "import numpy as np\n", + "\n", "sys.path.insert(0, '..')\n", "import zarr\n", + "\n", "zarr.__version__" ] }, diff --git a/notebooks/dask_2d_subset.ipynb b/notebooks/dask_2d_subset.ipynb index 6e88b510d..839e30662 100644 --- a/notebooks/dask_2d_subset.ipynb +++ b/notebooks/dask_2d_subset.ipynb @@ -24,10 +24,13 @@ } ], "source": [ - "import zarr; print('zarr', zarr.__version__)\n", - "import dask; print('dask', dask.__version__)\n", - "import dask.array as da\n", - "import numpy as np" + "import dask\n", + "import numpy as np\n", + "\n", + "import zarr\n", + "\n", + "print('zarr', zarr.__version__)\n", + "print('dask', dask.__version__)\n" ] }, { @@ -367,6 +370,7 @@ "source": [ "# what's taking so long?\n", "import cProfile\n", + "\n", "cProfile.run('gd[dim0_condition][:, dim1_indices]', sort='time')" ] }, diff --git a/notebooks/dask_copy.ipynb b/notebooks/dask_copy.ipynb index ba4391737..41cc07ae6 100644 --- a/notebooks/dask_copy.ipynb +++ b/notebooks/dask_copy.ipynb @@ -133,25 +133,27 @@ } ], "source": [ + "import multiprocessing\n", "import sys\n", + "\n", + "import bcolz\n", + "import dask.array as da\n", + "import h5py\n", + "import numpy as np\n", + "from bokeh.io import output_notebook\n", + "from dask.diagnostics import Profiler, ResourceProfiler\n", + "from dask.diagnostics.profile_visualize import visualize\n", + "\n", "sys.path.insert(0, '..')\n", + "\n", "import zarr\n", + "\n", "print('zarr', zarr.__version__)\n", - "from zarr import blosc\n", - "import numpy as np\n", - "import h5py\n", - "import bcolz\n", - "# don't let bcolz use multiple threads internally, we want to \n", + "\n", + "# don't let bcolz use multiple threads internally, we want to\n", "# see whether dask can make good use of multiple CPUs\n", "bcolz.set_nthreads(1)\n", - "import multiprocessing\n", - "import dask\n", - "import dask.array as da\n", - "from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler\n", - "from dask.diagnostics.profile_visualize import visualize\n", - "from cachey import nbytes\n", - "import bokeh\n", - "from bokeh.io import output_notebook\n", + "\n", "output_notebook()" ] }, @@ -163,9 +165,10 @@ }, "outputs": [], "source": [ - "import tempfile\n", "import operator\n", + "import tempfile\n", "from functools import reduce\n", + "\n", "from zarr.util import human_readable_size\n", "\n", "\n", @@ -188,7 +191,7 @@ "\n", "def h5d_diagnostics(d):\n", " \"\"\"Print some diagnostics on an HDF5 dataset.\"\"\"\n", - " \n", + "\n", " print(d)\n", " nbytes = reduce(operator.mul, d.shape) * d.dtype.itemsize\n", " cbytes = d._id.get_storage_size()\n", @@ -196,15 +199,14 @@ " ratio = nbytes / cbytes\n", " else:\n", " ratio = np.inf\n", - " r = ' compression: %s' % d.compression\n", - " r += '; compression_opts: %s' % d.compression_opts\n", - " r += '; shuffle: %s' % d.shuffle\n", - " r += '\\n nbytes: %s' % human_readable_size(nbytes)\n", - " r += '; nbytes_stored: %s' % human_readable_size(cbytes)\n", - " r += '; ratio: %.1f' % ratio\n", - " r += '; chunks: %s' % str(d.chunks)\n", - " print(r)\n", - " " + " r = f' compression: {d.compression}'\n", + " r += f'; compression_opts: {d.compression_opts}'\n", + " r += f'; shuffle: {d.shuffle}'\n", + " r += f'\\n nbytes: {human_readable_size(nbytes)}'\n", + " r += f'; nbytes_stored: {human_readable_size(cbytes)}'\n", + " r += f'; ratio: {ratio:.1f}'\n", + " r += f'; chunks: {d.chunks}'\n", + " print(r)\n" ] }, { @@ -219,8 +221,7 @@ " dsrc = da.from_array(src, chunks=chunks)\n", " with Profiler() as prof, ResourceProfiler(dt=dt) as rprof:\n", " da.store(dsrc, dst, num_workers=num_workers, lock=lock)\n", - " visualize([prof, rprof], min_border_top=60, min_border_bottom=60)\n", - " " + " visualize([prof, rprof], min_border_top=60, min_border_bottom=60)\n" ] }, { @@ -567,7 +568,7 @@ } ], "source": [ - "z1 = zarr.array(a1, chunks=chunks, compression='blosc', \n", + "z1 = zarr.array(a1, chunks=chunks, compression='blosc',\n", " compression_opts=dict(cname='lz4', clevel=1, shuffle=2))\n", "z1" ] @@ -934,8 +935,8 @@ } ], "source": [ - "h2 = h5f.create_dataset('h2', shape=h1.shape, chunks=h1.chunks, \n", - " compression=h1.compression, compression_opts=h1.compression_opts, \n", + "h2 = h5f.create_dataset('h2', shape=h1.shape, chunks=h1.chunks,\n", + " compression=h1.compression, compression_opts=h1.compression_opts,\n", " shuffle=h1.shuffle)\n", "h5d_diagnostics(h2)" ] @@ -1218,7 +1219,7 @@ } ], "source": [ - "c2 = bcolz.zeros(a1.shape, chunklen=chunks[0], dtype=a1.dtype, \n", + "c2 = bcolz.zeros(a1.shape, chunklen=chunks[0], dtype=a1.dtype,\n", " cparams=bcolz.cparams(cname='lz4', clevel=1, shuffle=2))\n", "c2" ] diff --git a/notebooks/dask_count_alleles.ipynb b/notebooks/dask_count_alleles.ipynb index 8b9b7cec6..64312c572 100644 --- a/notebooks/dask_count_alleles.ipynb +++ b/notebooks/dask_count_alleles.ipynb @@ -127,24 +127,19 @@ ], "source": [ "import sys\n", + "\n", + "import allel\n", + "import h5py\n", + "from bokeh.io import output_notebook\n", + "from dask.diagnostics import Profiler, ResourceProfiler\n", + "from dask.diagnostics.profile_visualize import visualize\n", + "\n", "sys.path.insert(0, '..')\n", + "\n", "import zarr\n", + "\n", "print('zarr', zarr.__version__)\n", - "from zarr import blosc\n", - "import numpy as np\n", - "import h5py\n", - "import multiprocessing\n", - "import dask\n", - "import dask.array as da\n", - "from dask.diagnostics import Profiler, ResourceProfiler, CacheProfiler\n", - "from dask.diagnostics.profile_visualize import visualize\n", - "from cachey import nbytes\n", - "import bokeh\n", - "from bokeh.io import output_notebook\n", - "output_notebook()\n", - "from functools import reduce\n", - "import operator\n", - "import allel" + "output_notebook()" ] }, { diff --git a/notebooks/genotype_benchmark_compressors.ipynb b/notebooks/genotype_benchmark_compressors.ipynb index b262e63fa..e575bd047 100644 --- a/notebooks/genotype_benchmark_compressors.ipynb +++ b/notebooks/genotype_benchmark_compressors.ipynb @@ -17,18 +17,23 @@ } ], "source": [ - "import sys\n", - "sys.path.insert(0, '..')\n", "import functools\n", + "import sys\n", "import timeit\n", - "import zarr\n", - "print('zarr', zarr.__version__)\n", - "from zarr import blosc\n", - "print('blosc', blosc.version())\n", - "import numpy as np\n", + "\n", "import h5py\n", + "import numpy as np\n", + "\n", "%matplotlib inline\n", - "import matplotlib.pyplot as plt" + "import matplotlib.pyplot as plt\n", + "\n", + "sys.path.insert(0, '..')\n", + "\n", + "import zarr\n", + "from zarr import blosc\n", + "\n", + "print('zarr', zarr.__version__)\n", + "print('blosc', blosc.version())" ] }, { @@ -133,8 +138,8 @@ ], "source": [ "# 1M chunks of first dimension\n", - "chunks = (int(2**20 / (genotype_sample.shape[1] * genotype_sample.shape[2])), \n", - " genotype_sample.shape[1], \n", + "chunks = (int(2**20 / (genotype_sample.shape[1] * genotype_sample.shape[2])),\n", + " genotype_sample.shape[1],\n", " genotype_sample.shape[2])\n", "chunks" ] @@ -280,11 +285,11 @@ }, "outputs": [], "source": [ - "@functools.lru_cache(maxsize=None)\n", + "@functools.cache\n", "def compression_ratios():\n", " x = list()\n", " for compression, compression_opts in compression_configs:\n", - " z = zarr.array(genotype_sample, chunks=chunks, compression=compression, \n", + " z = zarr.array(genotype_sample, chunks=chunks, compression=compression,\n", " compression_opts=compression_opts)\n", " ratio = z.nbytes / z.nbytes_stored\n", " x.append(ratio)\n", @@ -311,8 +316,8 @@ } ], "source": [ - "ratios = compression_ratios() \n", - "labels = ['%s - %s' % (c, o)\n", + "ratios = compression_ratios()\n", + "labels = [f'{c} - {o}'\n", " for c, o in compression_configs]\n", "\n", "fig = plt.figure(figsize=(12, len(compression_configs)*.3))\n", @@ -347,34 +352,33 @@ }, "outputs": [], "source": [ - "@functools.lru_cache(maxsize=None)\n", + "@functools.cache\n", "def compression_decompression_times(repeat=3, number=1):\n", " c = list()\n", " d = list()\n", " for compression, compression_opts in compression_configs:\n", - " \n", + "\n", " def compress():\n", - " zarr.array(genotype_sample, chunks=chunks, compression=compression, \n", + " zarr.array(genotype_sample, chunks=chunks, compression=compression,\n", " compression_opts=compression_opts)\n", - " \n", + "\n", " t = timeit.Timer(stmt=compress, globals=locals())\n", " compress_times = t.repeat(repeat=repeat, number=number)\n", " c.append(compress_times)\n", - " \n", - " z = zarr.array(genotype_sample, chunks=chunks, compression=compression, \n", + "\n", + " z = zarr.array(genotype_sample, chunks=chunks, compression=compression,\n", " compression_opts=compression_opts)\n", - " \n", + "\n", " def decompress():\n", " z[:]\n", - " \n", + "\n", " t = timeit.Timer(stmt=decompress, globals=locals())\n", " decompress_times = t.repeat(repeat=repeat, number=number)\n", " d.append(decompress_times)\n", - " \n", + "\n", " log(compression, compression_opts, compress_times, decompress_times)\n", - " \n", - " return c, d\n", - " " + "\n", + " return c, d\n" ] }, { diff --git a/notebooks/object_arrays.ipynb b/notebooks/object_arrays.ipynb index 714d02490..e64965615 100644 --- a/notebooks/object_arrays.ipynb +++ b/notebooks/object_arrays.ipynb @@ -14,9 +14,7 @@ "execution_count": 1, "metadata": {}, "outputs": [], - "source": [ - "import numpy as np" - ] + "source": [] }, { "cell_type": "code", @@ -36,6 +34,7 @@ ], "source": [ "import zarr\n", + "\n", "zarr.__version__" ] }, @@ -57,6 +56,7 @@ ], "source": [ "import numcodecs\n", + "\n", "numcodecs.__version__" ] }, @@ -293,6 +293,7 @@ ], "source": [ "from numcodecs.tests.common import greetings\n", + "\n", "z = zarr.array(greetings, chunks=5, dtype=object, object_codec=numcodecs.MsgPack())\n", "z[:]" ] diff --git a/notebooks/repr_tree.ipynb b/notebooks/repr_tree.ipynb index dde8bbb9a..9ec0b5f55 100644 --- a/notebooks/repr_tree.ipynb +++ b/notebooks/repr_tree.ipynb @@ -18,6 +18,7 @@ ], "source": [ "import zarr\n", + "\n", "zarr.__version__" ] }, diff --git a/notebooks/store_benchmark.ipynb b/notebooks/store_benchmark.ipynb index 869e7df60..a95a5d3da 100644 --- a/notebooks/store_benchmark.ipynb +++ b/notebooks/store_benchmark.ipynb @@ -32,6 +32,7 @@ ], "source": [ "import zarr\n", + "\n", "zarr.__version__" ] }, @@ -53,6 +54,7 @@ ], "source": [ "import bsddb3\n", + "\n", "bsddb3.__version__" ] }, @@ -74,6 +76,7 @@ ], "source": [ "import lmdb\n", + "\n", "lmdb.__version__" ] }, @@ -104,6 +107,7 @@ "source": [ "import os\n", "import shutil\n", + "\n", "bench_dir = '../data/bench'\n", "\n", "\n", @@ -112,10 +116,10 @@ " shutil.rmtree(bench_dir)\n", " os.makedirs(bench_dir)\n", "\n", - " \n", + "\n", "def setup(a, name='foo/bar'):\n", " global fdict_z, hdict_z, lmdb_z, gdbm_z, ndbm_z, bdbm_btree_z, bdbm_hash_z, zip_z, dir_z\n", - " \n", + "\n", " clean()\n", " fdict_root = zarr.group(store=dict())\n", " hdict_root = zarr.group(store=zarr.DictStore())\n", @@ -140,8 +144,7 @@ " # check compression ratio\n", " fdict_z[:] = a\n", " return fdict_z.info\n", - " \n", - " " + "\n" ] }, { @@ -164,8 +167,8 @@ " z[:] = a\n", " if hasattr(z.store, 'flush'):\n", " z.store.flush()\n", - " \n", - " \n", + "\n", + "\n", "def load(z, a):\n", " z.get_basic_selection(out=a)\n" ] @@ -933,8 +936,7 @@ " result = (d // 2) * 2\n", " da.store(result, sink, lock=False)\n", " if hasattr(sink.store, 'flush'):\n", - " sink.store.flush()\n", - " " + " sink.store.flush()\n" ] }, { diff --git a/notebooks/zip_benchmark.ipynb b/notebooks/zip_benchmark.ipynb index 680555242..d14a7c2ea 100644 --- a/notebooks/zip_benchmark.ipynb +++ b/notebooks/zip_benchmark.ipynb @@ -20,8 +20,10 @@ ], "source": [ "import sys\n", + "\n", "sys.path.insert(0, '..')\n", "import zarr\n", + "\n", "zarr.__version__" ] }, @@ -135,6 +137,7 @@ ], "source": [ "import cProfile\n", + "\n", "cProfile.run('z[:10]', sort='cumtime')" ] }, @@ -159,6 +162,7 @@ "source": [ "import dask\n", "import dask.array as da\n", + "\n", "dask.__version__" ] },