diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 443e236..5d0f096 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,29 +13,12 @@ repos: - id: debug-statements - id: mixed-line-ending - - repo: https://github.com/asottile/pyupgrade - rev: v3.19.0 - hooks: - - id: pyupgrade - args: - - "--py38-plus" - - - repo: https://github.com/psf/black - rev: 24.10.0 - hooks: - - id: black - - id: black-jupyter - - - repo: https://github.com/keewis/blackdoc - rev: v0.3.9 - hooks: - - id: blackdoc - - repo: https://github.com/astral-sh/ruff-pre-commit rev: "v0.8.1" hooks: - id: ruff args: ["--fix"] + - id: ruff-format - repo: https://github.com/pre-commit/mirrors-prettier rev: v4.0.0-alpha.8 diff --git a/carbonplan_data/utils.py b/carbonplan_data/utils.py index 30797b5..dd3dfb4 100644 --- a/carbonplan_data/utils.py +++ b/carbonplan_data/utils.py @@ -246,7 +246,7 @@ def get_versions( "intake", "rasterio", "zarr", - ] + ], ) -> dict[str, str]: """Helper to fetch commonly used package versions Parameters diff --git a/pyproject.toml b/pyproject.toml index 98d2570..646c355 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,13 +24,7 @@ classifiers = [ ] dynamic = ["version"] -dependencies = [ - "intake<=0.7.0", - "validators", - "wget", - "numpy", - "xarray", -] +dependencies = ["intake<=0.7.0", "validators", "wget", "numpy", "xarray"] [project.urls] repository = "https://github.com/carbonplan/data" @@ -53,6 +47,7 @@ skip-string-normalization = true line-length = 100 target-version = "py39" builtins = ["ellipsis"] +extend-include = ["*.ipynb"] # Exclude a variety of commonly ignored directories. exclude = [ ".bzr", @@ -75,11 +70,13 @@ exclude = [ "node_modules", "venv", ] +[tool.ruff.lint] per-file-ignores = {} -# E402: module level import not at top of file -# E501: line too long - let black worry about that -# E731: do not assign a lambda expression, use a def -ignore = ["E402", "E501", "E731"] +ignore = [ + "E721", # Comparing types instead of isinstance + "E741", # Ambiguous variable names + "E501", # Conflicts with ruff format +] select = [ # Pyflakes "F", @@ -93,10 +90,10 @@ select = [ ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 18 -[tool.ruff.isort] +[tool.ruff.lint.isort] known-first-party = ["carbonplan_data"] [tool.pytest.ini_options] diff --git a/scripts/fia/01_raw_to_parquet.ipynb b/scripts/fia/01_raw_to_parquet.ipynb index b62fb96..7a65303 100644 --- a/scripts/fia/01_raw_to_parquet.ipynb +++ b/scripts/fia/01_raw_to_parquet.ipynb @@ -32,7 +32,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "import gcsfs\n", "import pandas as pd\n", "\n", diff --git a/scripts/fia/01_raw_to_parquet_part2.ipynb b/scripts/fia/01_raw_to_parquet_part2.ipynb index 4e1af8d..eb3ed03 100644 --- a/scripts/fia/01_raw_to_parquet_part2.ipynb +++ b/scripts/fia/01_raw_to_parquet_part2.ipynb @@ -89,9 +89,6 @@ " \"PHASE\": \"object\",\n", " \"PILE_TL_ADJ\": \"float64\",\n", " \"PILE_TL_COND\": \"float64\",\n", - " \"CWD_TL_UNADJ\": \"float64\",\n", - " \"PILE_TL_ADJ\": \"float64\",\n", - " \"PILE_TL_COND\": \"float64\",\n", " },\n", " \"dwm_coarse_woody_debris\": {\n", " \"CONDID\": \"float64\",\n", diff --git a/scripts/gridmet/01_gridmet_to_zarr.ipynb b/scripts/gridmet/01_gridmet_to_zarr.ipynb index e9c4566..bb5550b 100644 --- a/scripts/gridmet/01_gridmet_to_zarr.ipynb +++ b/scripts/gridmet/01_gridmet_to_zarr.ipynb @@ -14,7 +14,7 @@ "\n", "**Inputs:**\n", "\n", - "- inake catalog: `climate.gridmet_opendap`\n", + "- intake catalog: `climate.gridmet_opendap`\n", "\n", "**Outputs:**\n", "\n", diff --git a/scripts/iiasa/01_raw_to_parquet.ipynb b/scripts/iiasa/01_raw_to_parquet.ipynb index 27d81cf..99168b7 100644 --- a/scripts/iiasa/01_raw_to_parquet.ipynb +++ b/scripts/iiasa/01_raw_to_parquet.ipynb @@ -33,8 +33,6 @@ "metadata": {}, "outputs": [], "source": [ - "import io\n", - "import os.path\n", "import pathlib\n", "\n", "import gcsfs\n", @@ -108,10 +106,8 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ "# TODO:\n", "\n", diff --git a/scripts/mtbs/02_downsampling_and_reprojection.ipynb b/scripts/mtbs/02_downsampling_and_reprojection.ipynb index e47249f..2beeb00 100644 --- a/scripts/mtbs/02_downsampling_and_reprojection.ipynb +++ b/scripts/mtbs/02_downsampling_and_reprojection.ipynb @@ -103,7 +103,7 @@ " resampling = \"mode\"\n", " else:\n", " resampling = \"near\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/mtbs/05_monthly_downsampling.ipynb b/scripts/mtbs/05_monthly_downsampling.ipynb index acc4809..3989e5f 100644 --- a/scripts/mtbs/05_monthly_downsampling.ipynb +++ b/scripts/mtbs/05_monthly_downsampling.ipynb @@ -89,7 +89,9 @@ " f = get_file(region, fire, year, month)\n", " crs, extent = projections(\"albers\", region)\n", " resampling = \"average\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\n", + " \"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\"\n", + " ).format(\n", " crs,\n", " extent,\n", " resolution,\n", @@ -127,7 +129,7 @@ " files = [get_file(\"conus\", \"vlf\", year, month + 1)[\"source\"] for month in range(12)]\n", " crs, extent = projections(\"albers\", region)\n", " resampling = \"sum\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb b/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb index 586dc3e..93dbe35 100644 --- a/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb +++ b/scripts/mtbs/05_monthly_mtbs_to_zarr.ipynb @@ -36,14 +36,10 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", "import gcsfs\n", "import numpy as np\n", "import rasterio\n", - "import rioxarray\n", "import xarray as xr\n", - "import zarr\n", "from numcodecs.zlib import Zlib\n", "from rasterio import Affine\n", "from rasterio.crs import CRS\n", @@ -125,74 +121,63 @@ "\n", "def prepare_mtbs(year, resolution, return_ds=True):\n", " src_path_year = f\"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/severity/{year}.tif\"\n", - " with rasterio.open(src_path_year, \"r\") as src_raster_year:\n", + " with rasterio.open(src_path_year, \"r\") as src_raster:\n", " src_transform = src_raster.meta[\"transform\"]\n", " src_crs = src_raster.meta[\"crs\"]\n", " src_band = src_raster.read(1)\n", " src_resolution = resolution\n", "\n", - " dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(\n", - " src_band, src_resolution\n", - " )\n", + " dst_band, dst_transform, dst_crs, dst_shape = make_dst_band(src_band, src_resolution)\n", " print(\"calc_coords\")\n", " coords = calc_coords(dst_shape, dst_transform, dst_crs)\n", - " \n", - " for month in range(12):\n", - " src_path_month = f\"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{year}.{month+1}.tif\"\n", - " with rasterio.open(src_path_month, \"r\") as src_raster_month:\n", "\n", - " src_nodata = 6\n", - " resampling = Resampling.average\n", - " # set moderate or high burn severity to 1 and others to 1\n", - " src_band_tmp = ((src_band == 3) | (src_band == 4)).astype(\"uint8\")\n", - " # set masked regions to nodata value\n", - " src_band_tmp[src_band == src_nodata] = src_nodata\n", - " src_band = src_band_tmp\n", - " dst_band = dst_band.astype(\n", - " \"float32\"\n", - " ) # convert to float for averaging\n", + " for month in range(12):\n", + " src_path_month = f\"/Users/freeman/workdir/carbonplan-data/raw/mtbs/conus/30m/area/{year}.{month+1}.tif\"\n", + " with rasterio.open(src_path_month, \"r\"):\n", + " src_nodata = 6\n", + " resampling = Resampling.average\n", + " # set moderate or high burn severity to 1 and others to 1\n", + " src_band_tmp = ((src_band == 3) | (src_band == 4)).astype(\"uint8\")\n", + " # set masked regions to nodata value\n", + " src_band_tmp[src_band == src_nodata] = src_nodata\n", + " src_band = src_band_tmp\n", + " dst_band = dst_band.astype(\"float32\") # convert to float for averaging\n", "\n", - " print(\"reproject\")\n", - " # this seems to require rasterio=1.0.25 and gdal=2.4.2\n", - " reproject(\n", - " src_band,\n", - " dst_band,\n", - " src_transform=src_transform,\n", - " src_crs=src_crs,\n", - " dst_transform=dst_transform,\n", - " dst_crs=dst_crs,\n", - " resampling=resampling,\n", - " src_nodata=src_nodata,\n", - " dst_nodata=src_raster.meta[\"nodata\"],\n", - " )\n", + " print(\"reproject\")\n", + " # this seems to require rasterio=1.0.25 and gdal=2.4.2\n", + " reproject(\n", + " src_band,\n", + " dst_band,\n", + " src_transform=src_transform,\n", + " src_crs=src_crs,\n", + " dst_transform=dst_transform,\n", + " dst_crs=dst_crs,\n", + " resampling=resampling,\n", + " src_nodata=src_nodata,\n", + " dst_nodata=src_raster.meta[\"nodata\"],\n", + " )\n", "\n", - " meta = src_raster.meta\n", - " meta.update(\n", - " width=dst_shape[0],\n", - " height=dst_shape[1],\n", - " dtype=str(dst_band.dtype),\n", - " crs=dst_crs.to_wkt(),\n", - " transform=list(dst_transform),\n", - " nodata=src_raster.meta[\"nodata\"],\n", - " )\n", + " meta = src_raster.meta\n", + " meta.update(\n", + " width=dst_shape[0],\n", + " height=dst_shape[1],\n", + " dtype=str(dst_band.dtype),\n", + " crs=dst_crs.to_wkt(),\n", + " transform=list(dst_transform),\n", + " nodata=src_raster.meta[\"nodata\"],\n", + " )\n", "\n", " varname = f\"{year}\"\n", " chunks = {\"x\": 512, \"y\": 512}\n", - " ds = xr.DataArray(dst_band, dims=(\"y\", \"x\"), attrs=meta).to_dataset(\n", - " name=varname\n", - " )\n", + " ds = xr.DataArray(dst_band, dims=(\"y\", \"x\"), attrs=meta).to_dataset(name=varname)\n", " ds = ds.assign_coords(coords).chunk(chunks)\n", "\n", " if return_ds:\n", " return ds\n", " else:\n", - " fs = gcsfs.GCSFileSystem(\n", - " project=\"carbonplan\", token=\"cloud\", requester_pays=True\n", - " )\n", - " mapper = fs.get_mapper(scratch + f\"/MTBS.{year}.{resolution}m.zarr\")\n", - " ds.to_zarr(\n", - " store=mapper, mode=\"w\", encoding={varname: {\"compressor\": Zlib()}}\n", - " )" + " fs = gcsfs.GCSFileSystem(project=\"carbonplan\", token=\"cloud\", requester_pays=True)\n", + " mapper = fs.get_mapper(\"scratch\" + f\"/MTBS.{year}.{resolution}m.zarr\")\n", + " ds.to_zarr(store=mapper, mode=\"w\", encoding={varname: {\"compressor\": Zlib()}})" ] }, { diff --git a/scripts/mtbs/06_annual_downsampling.py b/scripts/mtbs/06_annual_downsampling.py index a262992..c37e2fc 100644 --- a/scripts/mtbs/06_annual_downsampling.py +++ b/scripts/mtbs/06_annual_downsampling.py @@ -4,9 +4,10 @@ from rio_cogeo.cogeo import cog_translate from rio_cogeo.profiles import cog_profiles +from carbonplan_data.utils import projections, setup + dst_profile = cog_profiles.get("deflate") -from carbonplan_data.utils import projections, setup workdir, upload = setup("jeremy") workdir diff --git a/scripts/nftd/02_downsampling_and_reprojection.ipynb b/scripts/nftd/02_downsampling_and_reprojection.ipynb index 8e5ed5f..2ef0ba7 100644 --- a/scripts/nftd/02_downsampling_and_reprojection.ipynb +++ b/scripts/nftd/02_downsampling_and_reprojection.ipynb @@ -103,7 +103,7 @@ " resampling = \"mode\"\n", " else:\n", " resampling = \"near\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", @@ -186,8 +186,8 @@ " dst.write(out, 1)\n", "\n", " cmd = (\n", - " \"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"-ot Float32 \" \"%s \" \"%s\"\n", - " ) % (\n", + " \"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"-ot Float32 \" \"{} \" \"{}\"\n", + " ).format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/nlcd/02_downsampling_and_reprojection.ipynb b/scripts/nlcd/02_downsampling_and_reprojection.ipynb index 867a992..b623d88 100644 --- a/scripts/nlcd/02_downsampling_and_reprojection.ipynb +++ b/scripts/nlcd/02_downsampling_and_reprojection.ipynb @@ -122,7 +122,7 @@ " resampling = \"near\"\n", " else:\n", " resampling = \"mode\"\n", - " cmd = (\"gdalwarp \" \"-t_srs '%s' \" \"-te %s \" \"-tr %s %s \" \"-r %s \" \"%s \" \"%s\") % (\n", + " cmd = (\"gdalwarp \" \"-t_srs '{}' \" \"-te {} \" \"-tr {} {} \" \"-r {} \" \"{} \" \"{}\").format(\n", " crs,\n", " extent,\n", " resolution,\n", @@ -208,14 +208,14 @@ "\n", " cmd = (\n", " \"gdalwarp \"\n", - " \"-t_srs '%s' \"\n", - " \"-te %s \"\n", - " \"-tr %s %s \"\n", - " \"-r %s \"\n", + " \"-t_srs '{}' \"\n", + " \"-te {} \"\n", + " \"-tr {} {} \"\n", + " \"-r {} \"\n", " \"-ot Float32 \"\n", - " \"%s \"\n", - " \"%s\"\n", - " ) % (\n", + " \"{} \"\n", + " \"{}\"\n", + " ).format(\n", " crs,\n", " extent,\n", " resolution,\n", diff --git a/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb b/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb index 18d0497..79347b4 100644 --- a/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb +++ b/scripts/terraclimate/01_terraclimate_to_zarr3.ipynb @@ -160,6 +160,9 @@ "metadata": {}, "outputs": [], "source": [ + "from numcodecs import Blosc\n", + "\n", + "\n", "def apply_mask(key, da):\n", " \"\"\"helper function to mask DataArrays based on a threshold value\"\"\"\n", " if mask_opts.get(key, None):\n", diff --git a/scripts/terraclimate/02_terraclimate_regrid.ipynb b/scripts/terraclimate/02_terraclimate_regrid.ipynb index 60aa7a8..12a9787 100644 --- a/scripts/terraclimate/02_terraclimate_regrid.ipynb +++ b/scripts/terraclimate/02_terraclimate_regrid.ipynb @@ -29,7 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "pip install -U xarray==0.16.0 --no-deps" + "%pip install -U xarray==0.16.0 --no-deps" ] }, { @@ -207,9 +207,11 @@ "metadata": {}, "outputs": [], "source": [ - "mapper2 = fsspec.get_mapper(target_location)\n", "import zarr\n", "\n", + "mapper2 = fsspec.get_mapper(target_location)\n", + "\n", + "\n", "zarr.consolidate_metadata(mapper2)" ] } diff --git a/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb b/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb index 052ea9d..4a07934 100644 --- a/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb +++ b/scripts/terraclimate/02_terraclimate_to_fiaplots.ipynb @@ -339,10 +339,11 @@ "metadata": {}, "outputs": [], "source": [ + "from dask.diagnostics import ProgressBar\n", + "\n", "bucket = \"gs://carbonplan-scratch/terraclimate-fia-cond-ann-3.zarr\"\n", "mapper3 = fsspec.get_mapper(bucket, create=True)\n", "\n", - "from dask.diagnostics import ProgressBar\n", "\n", "with ProgressBar():\n", " ds_cond_ann.to_zarr(mapper3, mode=\"w\", consolidated=True, encoding=encoding)"