Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dataset/dataset/domain-cells-for-different-bands #76

Merged
merged 6 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,10 @@ Dataset
"""""""
* fix the un-updated array dimension bug in the crop method when the mask is a vector mask and the touch parameter is
True.


0.5.5 (2024-01-04)
------------------
Dataset
"""""""
* Count domain cells for a specific band.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Installing pyramids
Installing `pyramids` from the `conda-forge` channel can be achieved by:

```
conda install -c conda-forge pyramids=0.5.4
conda install -c conda-forge pyramids=0.5.5
```

It is possible to list all the versions of `pyramids` available on your platform with:
Expand All @@ -68,7 +68,7 @@ pip install git+https://github.com/Serapieum-of-alex/pyramids
to install the last release, you can easily use pip

```
pip install pyramids-gis==0.5.4
pip install pyramids-gis==0.5.5
```

Quick start
Expand Down
29 changes: 29 additions & 0 deletions docs/dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,35 @@ To extract the

ExtractedValues, Cells = R.OverlayMap(Path+"DepthMax22489.zip", BaseMapF,ExcludedValue, Compressed,OccupiedCellsOnly)

count_domain_cells
------------------
- To number of cells in a raster that are not `no_data_value` value.

Parameters
^^^^^^^^^^
band: [int]
band index. Default is 0.

Returns
^^^^^^^
int:
Number of cells

.. code:: py

path = "examples/data/dem/DEM5km_Rhine_burned_fill.tif"
dataset = Dataset.read_file(path)
cells = dataset.count_domain_cells()
print(f"Number of cells = {cells}")

Number of cells = 6374

in case the dataset is a multi-band raster, you can specify the band index.

.. code:: py

cells = dataset.count_domain_cells(band=1)


Mathematical operations
=======================
Expand Down
132 changes: 68 additions & 64 deletions pyramids/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1068,18 +1068,22 @@ def get_variables(self):

return variables

def count_domain_cells(self):
def count_domain_cells(self, band: int = 0) -> int:
"""Count cells inside the domain

Parameters
----------
band: [int]
band index. Default is 0.

Returns
-------
int:
Number of cells
"""
# count cells inside the domain
arr = self.raster.ReadAsArray()
arr = self.read_array(band=band)
domain_count = np.size(arr[:, :]) - np.count_nonzero(
(arr[np.isclose(arr, self.no_data_value[0], rtol=0.001)])
(arr[np.isclose(arr, self.no_data_value[band], rtol=0.001)])
)
return domain_count

Expand Down Expand Up @@ -3353,7 +3357,7 @@ def read_multiple_files(
cls,
path: Union[str, List[str]],
with_order: bool = False,
regex_string=r"\d{4}.\d{2}.\d{2}",
regex_string: str = r"\d{4}.\d{2}.\d{2}",
date: bool = True,
file_name_data_fmt: str = None,
start: str = None,
Expand All @@ -3363,74 +3367,74 @@ def read_multiple_files(
):
r"""read_multiple_files.

- reads rasters from a folder and creates a 3d array with the same 2d dimensions of the first raster in
the folder and length as the number of files.
- reads rasters from a folder and creates a 3d array with the same 2d dimensions of the first raster in
the folder and length as the number of files.

inside the folder.
- All rasters should have the same dimensions
- If you want to read the rasters with a certain order, then all raster file names should have a date that follows
the same format (YYYY.MM .DD / YYYY-MM-DD or YYYY_MM_DD) (i.e. "MSWEP_1979.01.01.tif").
inside the folder.
- All rasters should have the same dimensions
- If you want to read the rasters with a certain order, then all raster file names should have a date that
follows the same format (YYYY.MM .DD / YYYY-MM-DD or YYYY_MM_DD) (i.e. "MSWEP_1979.01.01.tif").

Parameters
----------
path:[str/list]
path of the folder that contains all the rasters, ora list contains the paths of the rasters to read.
with_order: [bool]
` True if the rasters names' follows a certain order, then the rasters' names should have a date that follows
the same format (YYYY.MM.DD / YYYY-MM-DD or YYYY_MM_DD).
>>> "MSWEP_1979.01.01.tif"
>>> "MSWEP_1979.01.02.tif"
>>> ...
>>> "MSWEP_1979.01.20.tif"
regex_string: [str]
a regex string that we can use to locate the date in the file names.Default is r"\d{4}.\d{
2}.\d{2}".
>>> fname = "MSWEP_YYYY.MM.DD.tif"
>>> regex_string = r"\d{4}.\d{2}.\d{2}"
- or
>>> fname = "MSWEP_YYYY_M_D.tif"
>>> regex_string = r"\d{4}_\d{1}_\d{1}"
- if there is a number at the beginning of the name
>>> fname = "1_MSWEP_YYYY_M_D.tif"
>>> regex_string = r"\d+"
date: [bool]
True if the number in the file name is a date. Default is True.
file_name_data_fmt : [str]
if the files names' have a date and you want to read them ordered .Default is None
>>> "MSWEP_YYYY.MM.DD.tif"
>>> file_name_data_fmt = "%Y.%m.%d"
start: [str]
start date if you want to read the input raster for a specific period only and not all rasters,
if not given all rasters in the given path will be read.
end: [str]
end date if you want to read the input temperature for a specific period only,
if not given all rasters in the given path will be read.
fmt: [str]
format of the given date in the start/end parameter.
extension: [str]
the extension of the files you want to read from the given path. Default is ".tif".
Parameters
----------
path:[str/list]
path of the folder that contains all the rasters, ora list contains the paths of the rasters to read.
with_order: [bool]
True if the rasters names' follows a certain order, then the rasters' names should have a date that follows
the same format (YYYY.MM.DD / YYYY-MM-DD or YYYY_MM_DD).
>>> "MSWEP_1979.01.01.tif"
>>> "MSWEP_1979.01.02.tif"
>>> ...
>>> "MSWEP_1979.01.20.tif"
regex_string: [str]
a regex string that we can use to locate the date in the file names.Default is r"\d{4}.\d{
2}.\d{2}".
>>> fname = "MSWEP_YYYY.MM.DD.tif"
>>> regex_string = r"\d{4}.\d{2}.\d{2}"
- or
>>> fname = "MSWEP_YYYY_M_D.tif"
>>> regex_string = r"\d{4}_\d{1}_\d{1}"
- if there is a number at the beginning of the name
>>> fname = "1_MSWEP_YYYY_M_D.tif"
>>> regex_string = r"\d+"
date: [bool]
True if the number in the file name is a date. Default is True.
file_name_data_fmt : [str]
if the files names' have a date and you want to read them ordered .Default is None
>>> "MSWEP_YYYY.MM.DD.tif"
>>> file_name_data_fmt = "%Y.%m.%d"
start: [str]
start date if you want to read the input raster for a specific period only and not all rasters,
if not given all rasters in the given path will be read.
end: [str]
end date if you want to read the input temperature for a specific period only,
if not given all rasters in the given path will be read.
fmt: [str]
format of the given date in the start/end parameter.
extension: [str]
the extension of the files you want to read from the given path. Default is ".tif".

Returns
-------
DataCube:
instance of the datacube class.
Returns
-------
DataCube:
instance of the datacube class.

Example
-------
>>> from pyramids.dataset import Datacube
>>> raster_folder = "examples/GIS/data/raster-folder"
>>> prec = Datacube.read_multiple_files(raster_folder)

>>> import glob
>>> search_criteria = "*.tif"
>>> file_list = glob.glob(os.path.join(raster_folder, search_criteria))
>>> prec = Datacube.read_multiple_files(file_list, with_order=False)
Example
-------
>>> from pyramids.dataset import Datacube
>>> raster_folder = "examples/GIS/data/raster-folder"
>>> prec = Datacube.read_multiple_files(raster_folder)

>>> import glob
>>> search_criteria = "*.tif"
>>> file_list = glob.glob(os.path.join(raster_folder, search_criteria))
>>> prec = Datacube.read_multiple_files(file_list, with_order=False)
"""
if not isinstance(path, str) and not isinstance(path, list):
raise TypeError(f"path input should be string/list type, given{type(path)}")

if isinstance(path, str):
# check wither the path exists or not
# check whither the path exists or not
if not os.path.exists(path):
raise FileNotFoundError("The path you have provided does not exist")
# get a list of all files
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name="pyramids-gis",
version="0.5.4",
version="0.5.5",
description="GIS utility package",
author="Mostafa Farrag",
author_email="moah.farag@gmail.come",
Expand Down
12 changes: 12 additions & 0 deletions tests/dataset/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,18 @@ def test_ascii(
pass


class TestCountDomainCells:
"""test count domain cells"""

def test_single_band(self, src: gdal.Dataset):
src = Dataset(src)
assert src.count_domain_cells() == 89

def test_multi_band(self, era5_image: gdal.Dataset):
src = Dataset(era5_image)
assert src.count_domain_cells() == 5


class TestGetCellCoordsAndCreateCellGeometry:
def test_cell_center_masked_cells(
self,
Expand Down
Loading