diff --git a/xarray.ipynb b/xarray.ipynb index 3e77935..117d72d 100644 --- a/xarray.ipynb +++ b/xarray.ipynb @@ -17,6 +17,34 @@ "This is a national water model: https://registry.opendata.aws/nwm-archive/" ] }, + { + "cell_type": "markdown", + "id": "8185966d-6659-482b-bcbb-826b8f30b1e3", + "metadata": { + "tags": [] + }, + "source": [ + "## Load NWM data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8b1749a-0d64-4278-823c-892120bf1a5b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import xarray as xr\n", + "\n", + "ds = xr.open_zarr(\n", + " \"s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr\",\n", + " consolidated=True,\n", + ").drop_encoding()\n", + "ds" + ] + }, { "cell_type": "markdown", "id": "5dd71599-465f-4c97-baaa-19d900d2a070", @@ -39,7 +67,7 @@ "import coiled\n", "\n", "cluster = coiled.Cluster(\n", - " n_workers=40,\n", + " n_workers=100,\n", " region=\"us-east-1\",\n", ")\n", "client = cluster.get_client()" @@ -47,99 +75,104 @@ }, { "cell_type": "markdown", - "id": "8185966d-6659-482b-bcbb-826b8f30b1e3", - "metadata": {}, + "id": "0911fb96-7c08-4ca6-a35a-22e2a5a908cd", + "metadata": { + "tags": [] + }, "source": [ - "## Load NWM data" + "## Compute average over space" ] }, { "cell_type": "code", "execution_count": null, - "id": "e8b1749a-0d64-4278-823c-892120bf1a5b", + "id": "2a6fb91d-6a02-4afc-8d8a-ec3529f805f4", "metadata": { "tags": [] }, "outputs": [], "source": [ - "import xarray as xr\n", - "\n", - "ds = xr.open_zarr(\n", - " \"s3://noaa-nwm-retrospective-2-1-zarr-pds/rtout.zarr\",\n", - " consolidated=True,\n", - ")\n", - "ds" + "subset = ds.zwattablrt.sel(time=slice(\"2001-01-01\", \"2001-03-31\"))\n", + "subset" ] }, { "cell_type": "code", "execution_count": null, - "id": "2147fc5c-60ee-4409-8c22-69c5e68a4c63", + "id": "8ae07b31-383c-4cc9-b94a-cbbb68369746", "metadata": { "tags": [] }, "outputs": [], "source": [ - "ds.nbytes / 1e12 # half-petabyte" + "avg = subset.mean(dim=[\"x\", \"y\"]).compute()\n", + "avg.plot()" ] }, { "cell_type": "markdown", - "id": "0911fb96-7c08-4ca6-a35a-22e2a5a908cd", - "metadata": { - "tags": [] - }, + "id": "b237d221-a2db-44fb-924d-6003cd73f933", + "metadata": {}, "source": [ - "## Compute average over space" + "## Rechunk" ] }, { "cell_type": "code", "execution_count": null, - "id": "2a6fb91d-6a02-4afc-8d8a-ec3529f805f4", - "metadata": { - "tags": [] - }, + "id": "da1eca49-8362-42c6-aea7-ac986df36ef3", + "metadata": {}, "outputs": [], "source": [ - "subset = ds.zwattablrt.sel(time=slice(\"2001-01-01\", \"2001-12-31\"))\n", - "subset" + "import dask\n", + "\n", + "dask.config.set({\n", + " \"array.rechunk.method\": \"p2p\",\n", + " \"optimization.fuse.active\": False,\n", + "});" ] }, { "cell_type": "code", "execution_count": null, - "id": "8ae07b31-383c-4cc9-b94a-cbbb68369746", - "metadata": { - "tags": [] - }, + "id": "8ac6e24d-24d6-445d-a532-438b9d3a13f9", + "metadata": {}, "outputs": [], "source": [ - "avg = subset.mean(dim=[\"x\", \"y\"]).persist()" + "result = subset.chunk({\"time\": \"auto\", \"x\": -1, \"y\": \"auto\"})\n", + "result" ] }, { "cell_type": "code", "execution_count": null, - "id": "c42ef712-c60c-4049-816f-fcbd115a27a5", - "metadata": { - "tags": [] - }, + "id": "657ce639-b644-42ea-b98b-b70c2cb3170a", + "metadata": {}, "outputs": [], "source": [ - "cluster.scale(300)" + "%%time\n", + "\n", + "result.to_zarr(\"s3://oss-scratch-space/nwm-x-optimized.zarr\", mode=\"w\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "fdb5ac6f-48c3-4457-bf80-aab4336598f3", - "metadata": { - "tags": [] - }, + "id": "1d615e48-29dc-48b8-a185-c2fd20c3fdda", + "metadata": {}, "outputs": [], "source": [ - "avg.compute().plot()" + "result.chunk(" + ] + }, + { + "cell_type": "markdown", + "id": "268ccde2-7e83-4e97-9fb7-4887a52adbe6", + "metadata": {}, + "source": [ + "## Cleanup if you like\n", + "\n", + "(but we'll clean up automatically eventually)" ] }, { @@ -155,9 +188,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:deepak]", + "display_name": "Python [conda env:coiled]", "language": "python", - "name": "conda-env-deepak-py" + "name": "conda-env-coiled-py" }, "language_info": { "codemirror_mode": { @@ -169,7 +202,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.11.8" } }, "nbformat": 4,