From 0baf500da9b391c3023c688fb06d9aad69b08588 Mon Sep 17 00:00:00 2001 From: Gabriel Stefanini Vicente Date: Mon, 26 Feb 2024 12:16:09 -0500 Subject: [PATCH] Add pre-commit (#14) --- .gitignore | 4 +- .pre-commit-config.yaml | 39 + build_commands.txt | 2 +- docs/novel_context.md | 10 +- docs/urbanization_comparison.md | 10 +- .../JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb | 101 ++- .../ECA_Urban_Extents.ipynb | 98 ++- .../KAZ_Urbanization_Review.ipynb | 430 ++++++---- .../Urban_metrics_Combine_All.ipynb | 62 +- .../Urban_metrics_Fullness.ipynb | 136 +-- .../Urban_metrics_Shape.ipynb | 193 +++-- .../Urban_metrics_Sprawl.ipynb | 216 ++--- .../Urban_metrics_Structure.ipynb | 111 ++- .../Summarize_Urban.ipynb | 72 +- notebooks/Implementations/README.md | 2 +- .../Slum_Mapping/Imagery_Search.ipynb | 24 +- .../slumML/OpenBuildings2FeatureClass.py | 72 +- .../Slum_Mapping/slumML/STEP1.ipynb | 486 ++++++++++- .../Slum_Mapping/slumML/STEP1.py | 204 ++--- .../Slum_Mapping/slumML/STEP2.ipynb | 779 +++++++++++++++++- .../Slum_Mapping/slumML/STEP2.py | 105 +-- .../URB_DECAT_B_ExploringGHSSMODcode.ipynb | 110 +-- .../URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb | 198 +++-- .../Create_Mosaick_Datasets.ipynb | 99 +-- .../GHSL_Standardize_To_Country.ipynb | 46 +- .../MAP_Urbanization.ipynb | 100 ++- .../URB_SEAU1_NovelUrbanization/README.md | 19 +- .../URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb | 348 ++++---- .../WBGAPI_Extract_urbanization_GDP.ipynb | 58 +- .../novelUrbanization.py | 674 +++++++++------ .../Data Preparation.ipynb | 194 +++-- .../URB_SURDR_ZAF_Energy_Transition/README.md | 2 +- .../Zonal_statistics.ipynb | 217 ++--- notebooks/Implementations/WSF/wsfdata.py | 123 ++- notebooks/Tutorials/LEI_Example.ipynb | 68 +- notebooks/Tutorials/Untitled.ipynb | 8 +- .../Tutorials/UrbanAreas_tutorials.ipynb | 28 +- .../UrbanRural_extents_from_griddedPop.ipynb | 44 +- notebooks/URB_DECAT_ExtractByISO3.ipynb | 54 +- notebooks/Untitled.ipynb | 14 +- pyproject.toml | 2 +- src/GOSTurban/LEI.py | 170 ++-- src/GOSTurban/UrbanRaster.py | 378 +++++---- src/GOSTurban/country_helper.py | 255 +++--- src/GOSTurban/urban_helper.py | 596 ++++++++------ 45 files changed, 4523 insertions(+), 2438 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.gitignore b/.gitignore index 853a986..6634f56 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,6 @@ dmypy.json .pyre/ # Project-specific -_build/ +_build/ docs/api/ -src/GOSTurban/_version.py \ No newline at end of file +src/GOSTurban/_version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ddb6da7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,39 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-ast + - id: check-json + - id: detect-aws-credentials + args: [--allow-missing-credentials] + - id: detect-private-key + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.39.0 + hooks: + - id: markdownlint + name: Markdownlint + files: \.(md|mdown|markdown)$ + args: [ + "--disable=MD013", # line-length + "--disable=MD033", # no-inline-html + ] + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + name: codespell + description: Checks for common misspellings in text files + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.2.2 + hooks: + - id: ruff + types_or: [python, pyi, jupyter] + args: [--fix] + - id: ruff-format + types_or: [python, pyi, jupyter] diff --git a/build_commands.txt b/build_commands.txt index 81ce80d..7540164 100644 --- a/build_commands.txt +++ b/build_commands.txt @@ -1,5 +1,5 @@ # Commit to github -git status +git status git add -A git commit -m "FUBAR" git push diff --git a/docs/novel_context.md b/docs/novel_context.md index c441ed9..b7ad988 100755 --- a/docs/novel_context.md +++ b/docs/novel_context.md @@ -4,10 +4,10 @@ The European Commission developed a globally consistent, people-centric definition of urban areas. The basic approach is to apply a threshold to population grids on both the minimum population density, and then on the minimum total population of the resulting settlements. While the team at the EC continues to advance and iterate on their methodology, we rely on the original definitions of urban they produced: -| Urban area | Min Pop Density | Min Settlement Pop | +| Urban area | Min Pop Density | Min Settlement Pop | | --- | --- | --- | -| Urban areas | 300 people/km2 | 5000 people | -| High density urban areas | 1500 people/km2 | 50000 people | +| Urban areas | 300 people/km2 | 5000 people | +| High density urban areas | 1500 people/km2 | 50000 people | ## [Bellefon (2021)](https://www.sciencedirect.com/science/article/pii/S0094119019301032) @@ -21,6 +21,6 @@ This method eschews the absolute density thresholds of the EC methodology and in | Urban area | Definition | | --- | --- | -| Urban areas | contiguous pixels for which the density is above the 95th percentile of the counterfactual | +| Urban areas | contiguous pixels for which the density is above the 95th percentile of the counterfactual | | Cores | contiguous pixels within urban areas that are above the 95th percentile of the counterfactual within the urban core | -| Cities | urban areas that have a core | +| Cities | urban areas that have a core | diff --git a/docs/urbanization_comparison.md b/docs/urbanization_comparison.md index b5d18ff..1a6213d 100755 --- a/docs/urbanization_comparison.md +++ b/docs/urbanization_comparison.md @@ -4,18 +4,18 @@ Map comparison is a complicated process, as there are many tools and approaches. ## Degree of Urbanization -| Urban area | Min Pop Density | Min Settlement Pop | +| Urban area | Min Pop Density | Min Settlement Pop | | --- | --- | --- | -| Urban areas | 300 people/km2 | 5000 people | -| High density urban areas | 1500 people/km2 | 50000 people | +| Urban areas | 300 people/km2 | 5000 people | +| High density urban areas | 1500 people/km2 | 50000 people | ## Dartboard | Urban area | Definition | | --- | --- | -| Urban areas | contiguous pixels for which the density is above the 95th percentile of the counterfactual | +| Urban areas | contiguous pixels for which the density is above the 95th percentile of the counterfactual | | Cores | contiguous pixels within urban areas that are above the 95th percentile of the counterfactual within the urban core | -| Cities | urban areas that have a core | +| Cities | urban areas that have a core | Any attempt to compare these methods has to start with a question of what do we compare: the DoU method produces two layers, and the DB method produces three. After investigating the data it is clear that the DoU urban area is equivalent to the DB urban areas. However, the comparison of the DoU high density to the DB could be to either the Cores or the cities. In the figures below you can see the comparison to both, however, let's look at the nature of the comparison: diff --git a/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb b/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb index 7c1f5a4..36cf301 100755 --- a/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb +++ b/notebooks/Implementations/JUP_SURGP_GLO_B_D__LEI_Evaluation.ipynb @@ -28,22 +28,21 @@ } ], "source": [ - "import os, sys, logging\n", + "import os\n", + "import sys\n", "\n", - "import geojson, rasterio\n", + "import rasterio\n", "import rasterio.features\n", "\n", "import pandas as pd\n", "import numpy as np\n", "\n", - "from shapely.geometry import shape, GeometryCollection\n", - "from shapely.wkt import loads\n", "from matplotlib import pyplot\n", - "from rasterio.plot import show, show_hist\n", + "from rasterio.plot import show\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../\")\n", - "from src.LEI import *\n" + "from src.LEI import *" ] }, { @@ -54,16 +53,25 @@ }, "outputs": [], "source": [ - "LEI_folder = '/home/wb411133/data/Projects/LEI'\n", + "LEI_folder = \"/home/wb411133/data/Projects/LEI\"\n", "results = {}\n", "GHSL_files = []\n", "for root, dirs, files in os.walk(LEI_folder):\n", " if os.path.exists(os.path.join(root, \"GHSL.tif\")):\n", " GHSL_files.append(os.path.join(root, \"GHSL.tif\"))\n", " try:\n", - " results[os.path.basename(root)] = [len(files), os.stat(os.path.join(root, \"GHSL.tif\")).st_size]\n", + " results[os.path.basename(root)] = [\n", + " len(files),\n", + " os.stat(os.path.join(root, \"GHSL.tif\")).st_size,\n", + " ]\n", " if len(files) != 6:\n", - " print(\"%s - %s\" % (os.path.basename(root), os.stat(os.path.join(root, \"GHSL.tif\")).st_size))\n", + " print(\n", + " \"%s - %s\"\n", + " % (\n", + " os.path.basename(root),\n", + " os.stat(os.path.join(root, \"GHSL.tif\")).st_size,\n", + " )\n", + " )\n", " except:\n", " pass" ] @@ -82,13 +90,13 @@ "metadata": {}, "outputs": [], "source": [ - "root = '/home/wb411133/data/Projects/LEI/634/'\n", + "root = \"/home/wb411133/data/Projects/LEI/634/\"\n", "inputGHSL = os.path.join(root, \"GHSL.tif\")\n", "inRaster = rasterio.open(inputGHSL)\n", "inR = inRaster.read()\n", "\n", - "newR = (inR == 3).astype('int')\n", - "oldR = (np.isin(inR, [4,5,6])).astype('int')" + "newR = (inR == 3).astype(\"int\")\n", + "oldR = (np.isin(inR, [4, 5, 6])).astype(\"int\")" ] }, { @@ -97,9 +105,9 @@ "metadata": {}, "outputs": [], "source": [ - "fig, (axr, axg) = pyplot.subplots(1, 2, figsize=(20,20))\n", - "show(oldR, ax=axr, title='OLD')\n", - "show(newR, ax=axg, title='NEW')" + "fig, (axr, axg) = pyplot.subplots(1, 2, figsize=(20, 20))\n", + "show(oldR, ax=axr, title=\"OLD\")\n", + "show(newR, ax=axg, title=\"NEW\")" ] }, { @@ -108,11 +116,11 @@ "metadata": {}, "outputs": [], "source": [ - "#write out raster to file\n", + "# write out raster to file\n", "outProperties = inRaster.profile\n", - "outRaster = outRaster.astype('int32')\n", - "outProperties['dtype'] = 'int32'\n", - "with rasterio.open(inputGHSL.replace(\".tif\", \"_LEI.tif\"), 'w', **outProperties) as out:\n", + "outRaster = outRaster.astype(\"int32\")\n", + "outProperties[\"dtype\"] = \"int32\"\n", + "with rasterio.open(inputGHSL.replace(\".tif\", \"_LEI.tif\"), \"w\", **outProperties) as out:\n", " out.write(outRaster)" ] }, @@ -133,12 +141,12 @@ "outputs": [], "source": [ "for ghsl_file in GHSL_files:\n", - " print(f'{ghsl_file}')\n", + " print(f\"{ghsl_file}\")\n", " out_file = ghsl_file.replace(\".tif\", \"new_LEI_90_00.csv\")\n", " if not os.path.exists(out_file):\n", - " lei = calculate_LEI(ghsl_file, old_list = [5,6], new_list=[4])\n", - " xx = pd.DataFrame(lei, columns=['geometry', 'old', 'total'])\n", - " xx['LEI'] = xx['old'] / xx['total']\n", + " lei = calculate_LEI(ghsl_file, old_list=[5, 6], new_list=[4])\n", + " xx = pd.DataFrame(lei, columns=[\"geometry\", \"old\", \"total\"])\n", + " xx[\"LEI\"] = xx[\"old\"] / xx[\"total\"]\n", " xx.to_csv(out_file)" ] }, @@ -149,12 +157,12 @@ "outputs": [], "source": [ "# Process LEI results\n", - "base_folder = '/home/wb411133/data/Projects/LEI'\n", + "base_folder = \"/home/wb411133/data/Projects/LEI\"\n", "all_results_files = []\n", "for root, folders, files in os.walk(base_folder):\n", " for f in files:\n", " if \"GHSLnew_LEI_90_00\" in f:\n", - " all_results_files.append(os.path.join(root, f))\n" + " all_results_files.append(os.path.join(root, f))" ] }, { @@ -178,7 +186,7 @@ "source": [ "all_results = pd.DataFrame(summarized_results).transpose()\n", "# Old test to determine which files were not processed correctly\n", - "#bas_res = all_results[all_results['Expansion'] == 123282000.0].index\n", + "# bas_res = all_results[all_results['Expansion'] == 123282000.0].index\n", "all_results.head()" ] }, @@ -225,8 +233,8 @@ "outputs": [], "source": [ "xx = pd.DataFrame(all_res).head().transpose()\n", - "xx.columns = ['built75', 'built90', 'built00', 'built14']\n", - "#xx[xx.index.isin(['1'])]\n", + "xx.columns = [\"built75\", \"built90\", \"built00\", \"built14\"]\n", + "# xx[xx.index.isin(['1'])]\n", "xx.head()" ] }, @@ -254,17 +262,23 @@ "outputs": [], "source": [ "csv_files = [x for x in os.listdir(LEI_folder) if x[-4:] == \".csv\"]\n", - "lei0014 = pd.read_csv(os.path.join(LEI_folder, 'Summarized_LEI_Results.csv'),index_col=0)\n", + "lei0014 = pd.read_csv(\n", + " os.path.join(LEI_folder, \"Summarized_LEI_Results.csv\"), index_col=0\n", + ")\n", "lei0014.columns = [\"%s_0014\" % x for x in lei0014.columns]\n", "\n", - "lei9014 = pd.read_csv(os.path.join(LEI_folder, 'Summarized_LEI_Results_90_0014.csv'),index_col=0)\n", + "lei9014 = pd.read_csv(\n", + " os.path.join(LEI_folder, \"Summarized_LEI_Results_90_0014.csv\"), index_col=0\n", + ")\n", "lei9014.columns = [\"%s_9014\" % x for x in lei9014.columns]\n", "\n", - "lei9000 = pd.read_csv(os.path.join(LEI_folder, 'Summarized_LEI_Results_90_00.csv'),index_col=0)\n", + "lei9000 = pd.read_csv(\n", + " os.path.join(LEI_folder, \"Summarized_LEI_Results_90_00.csv\"), index_col=0\n", + ")\n", "lei9000.columns = [\"%s_9000\" % x for x in lei9000.columns]\n", "\n", - "built_area = pd.read_csv(\"/home/wb411133/temp/LEI_cities_built.csv\",index_col=0)\n", - "built_area.columns = [\"%s_BUILT\" % x for x in built_area.columns]\n" + "built_area = pd.read_csv(\"/home/wb411133/temp/LEI_cities_built.csv\", index_col=0)\n", + "built_area.columns = [\"%s_BUILT\" % x for x in built_area.columns]" ] }, { @@ -282,7 +296,7 @@ "metadata": {}, "outputs": [], "source": [ - "combined_results.to_csv(os.path.join(LEI_folder, 'LEI_COMBINED.csv'))" + "combined_results.to_csv(os.path.join(LEI_folder, \"LEI_COMBINED.csv\"))" ] }, { @@ -291,7 +305,12 @@ "metadata": {}, "outputs": [], "source": [ - "combined_results['Expansion_0014'] + combined_results['Infill_0014'] + combined_results['Leapfrog_0014'] - (combined_results['built14_BUILT'] - combined_results['built00_BUILT'])" + "(\n", + " combined_results[\"Expansion_0014\"]\n", + " + combined_results[\"Infill_0014\"]\n", + " + combined_results[\"Leapfrog_0014\"]\n", + " - (combined_results[\"built14_BUILT\"] - combined_results[\"built00_BUILT\"])\n", + ")" ] }, { @@ -319,7 +338,7 @@ "source": [ "in_ghsl = \"/home/wb411133/data/Projects/LEI/1/GHSL.tif\"\n", "inR = rasterio.open(in_ghsl)\n", - "inD = inR.read()\n" + "inD = inR.read()" ] }, { @@ -357,8 +376,8 @@ "metadata": {}, "outputs": [], "source": [ - "lei_2000_2014 = calculate_LEI(in_ghsl, old_list = [4,5,6], new_list=[3])\n", - "lei_1990_2000 = calculate_LEI(in_ghsl, old_list = [5,6], new_list=[4])" + "lei_2000_2014 = calculate_LEI(in_ghsl, old_list=[4, 5, 6], new_list=[3])\n", + "lei_1990_2000 = calculate_LEI(in_ghsl, old_list=[5, 6], new_list=[4])" ] }, { @@ -367,8 +386,8 @@ "metadata": {}, "outputs": [], "source": [ - "xx = pd.DataFrame(lei, columns=['geometry', 'old', 'total'])\n", - "xx['LEI'] = xx['old'] / xx['total'] " + "xx = pd.DataFrame(lei, columns=[\"geometry\", \"old\", \"total\"])\n", + "xx[\"LEI\"] = xx[\"old\"] / xx[\"total\"]" ] }, { diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb index 9fcae36..edbc094 100644 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/ECA_Urban_Extents.ipynb @@ -15,8 +15,12 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, importlib, json, boto3, multiprocessing\n", - "import rasterio, geojson\n", + "import sys\n", + "import os\n", + "import json\n", + "import boto3\n", + "import multiprocessing\n", + "import rasterio\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", @@ -24,7 +28,6 @@ "from botocore.config import Config\n", "from botocore import UNSIGNED\n", "from shapely.geometry import Point\n", - "from geopy.geocoders import Nominatim, GeoNames\n", "\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "import GOSTRocks.rasterMisc as rMisc\n", @@ -32,20 +35,17 @@ "from GOSTRocks.misc import tPrint\n", "\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", - "import GOST_Urban.urban_helper as urban_helper\n", "import GOST_Urban.country_helper as country_helper\n", - "import GOST_Urban.urban_helper as clippy\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", "# read in local important parameters\n", "local_json = \"/home/wb411133/Code/urbanParameters.json\"\n", - "with open(local_json, 'r') as inJ:\n", + "with open(local_json, \"r\") as inJ:\n", " important_vars = json.load(inJ)\n", - " \n", - "s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))" + "\n", + "s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))" ] }, { @@ -56,14 +56,18 @@ "source": [ "global_population_ghs_file = \"/home/public/Data/GLOBAL/Population/GHS/2022_1km/GHS_POP_E2020_GLOBE_R2022A_54009_1000_V1_0.tif\"\n", "global_admin = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", - "global_ghsl_folder = '/home/public/Data/GLOBAL/GHSL/v2022/'\n", + "global_ghsl_folder = \"/home/public/Data/GLOBAL/GHSL/v2022/\"\n", "\n", "output_folder = \"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents\"\n", "if not os.path.exists(output_folder):\n", " os.makedirs(output_folder)\n", - " \n", + "\n", "ntl_files = ntl.aws_search_ntl()\n", - "ghsl_files = [os.path.join(global_ghsl_folder, x) for x in os.listdir(global_ghsl_folder) if x.endswith(\".tif\")]\n", + "ghsl_files = [\n", + " os.path.join(global_ghsl_folder, x)\n", + " for x in os.listdir(global_ghsl_folder)\n", + " if x.endswith(\".tif\")\n", + "]\n", "ghsl_files.sort()" ] }, @@ -76,7 +80,10 @@ "inR = rasterio.open(global_population_ghs_file)\n", "\n", "allAdmin = gpd.read_file(global_admin)\n", - "inAdmin = allAdmin.loc[(allAdmin['Region'] == 'Europe & Central Asia') | (allAdmin['ISO3'].isin(['RUS','ROU','HRV']))]\n", + "inAdmin = allAdmin.loc[\n", + " (allAdmin[\"Region\"] == \"Europe & Central Asia\")\n", + " | (allAdmin[\"ISO3\"].isin([\"RUS\", \"ROU\", \"HRV\"]))\n", + "]\n", "inAdmin = inAdmin.to_crs(inR.crs)" ] }, @@ -127,10 +134,10 @@ "source": [ "all_args = []\n", "for idx, row in inAdmin.iterrows():\n", - " iso3 = row['ISO3']\n", - " tPrint(f'*********STARTING {iso3}')\n", - " sel_country = gpd.GeoDataFrame(inAdmin.loc[inAdmin['ISO3'] == iso3], crs=inR.crs)\n", - " sel_country['geometry'] = sel_country['geometry'].apply(lambda x:x.buffer(0))\n", + " iso3 = row[\"ISO3\"]\n", + " tPrint(f\"*********STARTING {iso3}\")\n", + " sel_country = gpd.GeoDataFrame(inAdmin.loc[inAdmin[\"ISO3\"] == iso3], crs=inR.crs)\n", + " sel_country[\"geometry\"] = sel_country[\"geometry\"].apply(lambda x: x.buffer(0))\n", " cur_folder = os.path.join(output_folder, iso3)\n", " if not os.path.exists(cur_folder):\n", " os.makedirs(cur_folder)\n", @@ -138,9 +145,10 @@ " if not os.path.exists(pop_file):\n", " rMisc.clipRaster(inR, sel_country, pop_file)\n", " inP = rasterio.open(pop_file)\n", - " if iso3 == 'HRV':\n", - " all_args.append([iso3, sel_country, cur_folder, pop_file, ntl_files, ghsl_files])\n", - " " + " if iso3 == \"HRV\":\n", + " all_args.append(\n", + " [iso3, sel_country, cur_folder, pop_file, ntl_files, ghsl_files]\n", + " )" ] }, { @@ -151,9 +159,9 @@ "source": [ "def run_extractor(iso3, sel_country, cur_folder, inP, ntl_files, ghsl_files):\n", " extractor = country_helper.urban_country(iso3, sel_country, cur_folder, inP)\n", - " #extractor.delete_urban_data()\n", + " # extractor.delete_urban_data()\n", " extractor.calculate_urban_extents()\n", - " extractor.summarize_ntl(ntl_files = ntl_files)\n", + " extractor.summarize_ntl(ntl_files=ntl_files)\n", " extractor.summarize_ghsl(ghsl_files, clip_raster=True, binary_calc=True)" ] }, @@ -267,7 +275,6 @@ } ], "source": [ - "\n", "with multiprocessing.Pool(len(all_args)) as pool:\n", " pool.starmap(run_extractor, all_args)" ] @@ -306,9 +313,9 @@ "for cFile in center_files:\n", " curD = gpd.read_file(cFile)\n", " iso3 = os.path.basename(cFile)[:3]\n", - " curD['ISO3'] = iso3\n", + " curD[\"ISO3\"] = iso3\n", " all_res.append(curD)\n", - " \n", + "\n", "final_center = pd.concat(all_res)" ] }, @@ -323,9 +330,9 @@ "for cFile in hd_files:\n", " curD = gpd.read_file(cFile)\n", " iso3 = os.path.basename(cFile)[:3]\n", - " curD['ISO3'] = iso3\n", + " curD[\"ISO3\"] = iso3\n", " all_res.append(curD)\n", - " \n", + "\n", "hd_center = pd.concat(all_res)" ] }, @@ -472,10 +479,15 @@ ], "source": [ "# match cities to centers\n", - "inCities = pd.read_csv(\"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/CITIES/worldcities.csv\")\n", - "geoms = [Point(x) for x in zip(inCities['lng'], inCities['lat'])]\n", + "inCities = pd.read_csv(\n", + " \"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/CITIES/worldcities.csv\"\n", + ")\n", + "geoms = [Point(x) for x in zip(inCities[\"lng\"], inCities[\"lat\"])]\n", "inCities = gpd.GeoDataFrame(inCities, geometry=geoms, crs=4326)\n", - "inCities.to_file(\"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/CITIES/worldcities.geojson\", driver='GeoJSON')\n", + "inCities.to_file(\n", + " \"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/CITIES/worldcities.geojson\",\n", + " driver=\"GeoJSON\",\n", + ")\n", "inCities.head()" ] }, @@ -618,16 +630,16 @@ } ], "source": [ - "final_center['wCity'] = ''\n", + "final_center[\"wCity\"] = \"\"\n", "final_center.reset_index(inplace=True)\n", "\n", "for idx, row in final_center.iterrows():\n", " try:\n", - " sel_city = inCities.loc[inCities.intersects(row['geometry'])]\n", + " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"])]\n", " except:\n", - " sel_city = inCities.loc[inCities.intersects(row['geometry'].buffer(0))]\n", + " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"].buffer(0))]\n", " if sel_city.shape[0] > 0:\n", - " final_center.loc[idx, 'wCity'] = sel_city['city'].iloc[0]" + " final_center.loc[idx, \"wCity\"] = sel_city[\"city\"].iloc[0]" ] }, { @@ -636,7 +648,9 @@ "metadata": {}, "outputs": [], "source": [ - "final_center.to_file(os.path.join(output_folder, \"all_urban_centers.geojson\"), driver='GeoJSON')" + "final_center.to_file(\n", + " os.path.join(output_folder, \"all_urban_centers.geojson\"), driver=\"GeoJSON\"\n", + ")" ] }, { @@ -645,15 +659,15 @@ "metadata": {}, "outputs": [], "source": [ - "hd_center['wCity'] = ''\n", + "hd_center[\"wCity\"] = \"\"\n", "hd_center.reset_index(inplace=True)\n", "for idx, row in hd_center.iterrows():\n", " try:\n", - " sel_city = inCities.loc[inCities.intersects(row['geometry'])]\n", + " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"])]\n", " except:\n", - " sel_city = inCities.loc[inCities.intersects(row['geometry'].buffer(0))]\n", + " sel_city = inCities.loc[inCities.intersects(row[\"geometry\"].buffer(0))]\n", " if sel_city.shape[0] > 0:\n", - " hd_center.loc[idx, 'wCity'] = sel_city['city'].iloc[0]\n", + " hd_center.loc[idx, \"wCity\"] = sel_city[\"city\"].iloc[0]\n", " break" ] }, @@ -914,7 +928,9 @@ "metadata": {}, "outputs": [], "source": [ - "hd_center.to_file(os.path.join(output_folder, \"all_hd_urban_centers.geojson\"), driver='GeoJSON')" + "hd_center.to_file(\n", + " os.path.join(output_folder, \"all_hd_urban_centers.geojson\"), driver=\"GeoJSON\"\n", + ")" ] }, { @@ -1185,7 +1201,7 @@ " for f in files:\n", " if f.endswith(\"100_V1_0.tif\"):\n", " bad_files.append(os.path.join(root, f))\n", - " \n", + "\n", "bad_files" ] }, diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/KAZ_Urbanization_Review.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/KAZ_Urbanization_Review.ipynb index dbe7d3c..7513e0f 100644 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/KAZ_Urbanization_Review.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/KAZ_Urbanization_Review.ipynb @@ -35,8 +35,13 @@ } ], "source": [ - "import sys, os, importlib, json, boto3\n", - "import rasterio, geojson, folium\n", + "import sys\n", + "import os\n", + "import importlib\n", + "import json\n", + "import boto3\n", + "import rasterio\n", + "import folium\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", @@ -45,14 +50,12 @@ "\n", "from botocore.config import Config\n", "from botocore import UNSIGNED\n", - "from shapely.geometry import Point, mapping, LineString\n", - "from geopy.geocoders import Nominatim, GeoNames\n", + "from shapely.geometry import Point, mapping\n", "from scipy import ndimage\n", - "from tqdm import tqdm\n", "\n", "sys.path.insert(0, \"/home/wb411133/Code/GOSTNets_Raster/src\")\n", "import GOSTNetsRaster.market_access as ma\n", - "#import GOSTNetsRaster.conversion_tables as speed_tables\n", + "# import GOSTNetsRaster.conversion_tables as speed_tables\n", "\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "import GOSTRocks.rasterMisc as rMisc\n", @@ -68,10 +71,10 @@ "\n", "# read in local important parameters\n", "local_json = \"/home/wb411133/Code/urbanParameters.json\"\n", - "with open(local_json, 'r') as inJ:\n", + "with open(local_json, \"r\") as inJ:\n", " important_vars = json.load(inJ)\n", - " \n", - "s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))" + "\n", + "s3 = boto3.client(\"s3\", config=Config(signature_version=UNSIGNED))" ] }, { @@ -81,64 +84,70 @@ "outputs": [], "source": [ "# Define input data\n", - "iso3 = 'KAZ'\n", - "global_population_file = \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + "iso3 = \"KAZ\"\n", + "global_population_file = (\n", + " \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + ")\n", "global_population_ghs_file = \"/home/public/Data/GLOBAL/Population/GHS/2022_1km/GHS_POP_E2020_GLOBE_R2022A_54009_1000_V1_0.tif\"\n", - "global_ghsl_folder = '/home/public/Data/GLOBAL/GHSL/v2022/'\n", - "global_friction_surface = '/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff'\n", + "global_ghsl_folder = \"/home/public/Data/GLOBAL/GHSL/v2022/\"\n", + "global_friction_surface = \"/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff\"\n", "admin_bounds = \"/home/public/Data/COUNTRY/KAZ/ADMIN/kaz_districts.shp\"\n", "\n", "output_folder = \"/home/wb411133/projects/KAZ_SCADR_Urbanization\"\n", "output_data = os.path.join(output_folder, \"DATA\")\n", "worldpop_urban = os.path.join(output_data, \"WorldPop_Urban\")\n", - "ghspop_urban = os.path.join(output_data, \"GHS_Urban\")\n", - "ghsl_folder = os.path.join(output_data, \"GHSL\")\n", - "ma_folder = os.path.join(output_data, \"MARKET_ACCESS\")\n", + "ghspop_urban = os.path.join(output_data, \"GHS_Urban\")\n", + "ghsl_folder = os.path.join(output_data, \"GHSL\")\n", + "ma_folder = os.path.join(output_data, \"MARKET_ACCESS\")\n", "\n", "if not os.path.exists(ma_folder):\n", " os.makedirs(ma_folder)\n", - " \n", + "\n", "# Define output files\n", - "local_population = os.path.join(output_data, f\"{iso3}_ppp_2020_1km_aggregated.tif\")\n", - "local_ghs_population = os.path.join(output_data, f\"{iso3}_ghs_pop_2020.tif\")\n", - "local_friction = os.path.join(output_data, f\"{iso3}_2020_motorized_travel.tif\")\n", - "urban_tt_result = os.path.join(output_data, f\"urban_travel_time.csv\")\n", + "local_population = os.path.join(output_data, f\"{iso3}_ppp_2020_1km_aggregated.tif\")\n", + "local_ghs_population = os.path.join(output_data, f\"{iso3}_ghs_pop_2020.tif\")\n", + "local_friction = os.path.join(output_data, f\"{iso3}_2020_motorized_travel.tif\")\n", + "urban_tt_result = os.path.join(output_data, \"urban_travel_time.csv\")\n", "\n", - "'''\n", + "\"\"\"\n", "urban_extents_file = os.path.join(worldpop_urban, f\"{iso3}_urban_extents.geojson\")\n", "urban_extents_raster_file = os.path.join(worldpop_urban, f\"{iso3}_urban_extents.tif\")\n", "urban_extents_hd_file = os.path.join(worldpop_urban, f\"{iso3}_urban_extents_hd.geojson\")\n", "urban_extents_hd_raster_file = os.path.join(worldpop_urban, f\"{iso3}_urban_extents_hd.tif\")\n", "admin_urban_summary = os.path.join(worldpop_urban, \"adm2_urban_summary.shp\")\n", "urban_admin_summary = os.path.join(worldpop_urban, f\"{iso3}_ADM2_urban_summary.csv\")\n", - "'''\n", - "urban_extents_file = os.path.join(ghspop_urban, f\"{iso3}_urban_extents.geojson\")\n", + "\"\"\"\n", + "urban_extents_file = os.path.join(ghspop_urban, f\"{iso3}_urban_extents.geojson\")\n", "urban_extents_raster_file = os.path.join(ghspop_urban, f\"{iso3}_urban_extents.tif\")\n", - "urban_extents_hd_file = os.path.join(ghspop_urban, f\"{iso3}_urban_extents_hd.geojson\")\n", - "urban_extents_hd_raster_file = os.path.join(ghspop_urban, f\"{iso3}_urban_extents_hd.tif\")\n", - "admin_urban_summary = os.path.join(ghspop_urban, \"adm2_urban_summary.shp\")\n", - "urban_admin_summary = os.path.join(ghspop_urban, f\"{iso3}_ADM2_urban_summary.csv\")\n", + "urban_extents_hd_file = os.path.join(ghspop_urban, f\"{iso3}_urban_extents_hd.geojson\")\n", + "urban_extents_hd_raster_file = os.path.join(\n", + " ghspop_urban, f\"{iso3}_urban_extents_hd.tif\"\n", + ")\n", + "admin_urban_summary = os.path.join(ghspop_urban, \"adm2_urban_summary.shp\")\n", + "urban_admin_summary = os.path.join(ghspop_urban, f\"{iso3}_ADM2_urban_summary.csv\")\n", "\n", - "urban_viirs_summary = os.path.join(output_folder, f\"{iso3}_urban_viirs_summary.csv\")\n", - "urban_hd_viirs_summary = os.path.join(output_folder, f\"{iso3}_urban_hd_viirs_summary.csv\")\n", - "admin_viirs_summary = os.path.join(output_folder, f\"{iso3}_admin_viirs_summary.csv\")\n", + "urban_viirs_summary = os.path.join(output_folder, f\"{iso3}_urban_viirs_summary.csv\")\n", + "urban_hd_viirs_summary = os.path.join(\n", + " output_folder, f\"{iso3}_urban_hd_viirs_summary.csv\"\n", + ")\n", + "admin_viirs_summary = os.path.join(output_folder, f\"{iso3}_admin_viirs_summary.csv\")\n", "\n", - "urban_ghsl_summary = os.path.join(output_folder, f\"{iso3}_urban_ghsl_summary.csv\")\n", - "urban_hd_ghsl_summary = os.path.join(output_folder, f\"{iso3}_urban_hd_ghsl_summary.csv\")\n", - "admin_ghsl_summary = os.path.join(output_folder, f\"{iso3}_admin_ghsl_summary.csv\")\n", + "urban_ghsl_summary = os.path.join(output_folder, f\"{iso3}_urban_ghsl_summary.csv\")\n", + "urban_hd_ghsl_summary = os.path.join(output_folder, f\"{iso3}_urban_hd_ghsl_summary.csv\")\n", + "admin_ghsl_summary = os.path.join(output_folder, f\"{iso3}_admin_ghsl_summary.csv\")\n", "\n", "admin_final = os.path.join(output_folder, \"admin_summarized.shp\")\n", "urban_final = os.path.join(output_folder, \"urban_summarized.shp\")\n", "urban_hd_final = os.path.join(output_folder, \"urban_hd_summarized.shp\")\n", "focal_cities = os.path.join(output_folder, \"FOCAL_AOIs.shp\")\n", "\n", - "#Define market access output\n", - "all_routes_file = os.path.join(ma_folder, \"all_routes.shp\")\n", - "time_matrix = os.path.join(ma_folder, \"all_routes_time_minutes.csv\")\n", - "dist_matrix = os.path.join(ma_folder, \"all_routes_distance_km.csv\")\n", + "# Define market access output\n", + "all_routes_file = os.path.join(ma_folder, \"all_routes.shp\")\n", + "time_matrix = os.path.join(ma_folder, \"all_routes_time_minutes.csv\")\n", + "dist_matrix = os.path.join(ma_folder, \"all_routes_distance_km.csv\")\n", "dist_all_routes_file = os.path.join(ma_folder, \"all_routes.shp\")\n", - "dist_time_matrix = os.path.join(ma_folder, \"district_routes_time_minutes.csv\")\n", - "dist_dist_matrix = os.path.join(ma_folder, \"district_routes_distance_km.csv\")" + "dist_time_matrix = os.path.join(ma_folder, \"district_routes_time_minutes.csv\")\n", + "dist_dist_matrix = os.path.join(ma_folder, \"district_routes_distance_km.csv\")" ] }, { @@ -152,11 +161,11 @@ "if not os.path.exists(local_population):\n", " globalP = rasterio.open(global_population_file)\n", " rMisc.clipRaster(globalP, inAdmin, local_population)\n", - " \n", + "\n", "if not os.path.exists(local_ghs_population):\n", " globalP = rasterio.open(global_population_ghs_file)\n", " rMisc.clipRaster(globalP, inAdmin, local_ghs_population)\n", - " \n", + "\n", "inP = rasterio.open(local_population)\n", "inP_ghs = rasterio.open(local_ghs_population)" ] @@ -181,12 +190,22 @@ "# 1. Create urban extents for WorldPop\n", "if not os.path.exists(urban_extents_file):\n", " urban_calculator = urban.urbanGriddedPop(inP)\n", - " urban_extents = urban_calculator.calculateUrban(densVal=300, totalPopThresh=5000, \n", - " smooth=False, queen=False,\n", - " verbose=True, raster=urban_extents_raster_file)\n", - " urban_extents_hd = urban_calculator.calculateUrban(densVal=1500, totalPopThresh=50000, \n", - " smooth=True, queen=False,\n", - " verbose=True, raster=urban_extents_hd_raster_file)\n", + " urban_extents = urban_calculator.calculateUrban(\n", + " densVal=300,\n", + " totalPopThresh=5000,\n", + " smooth=False,\n", + " queen=False,\n", + " verbose=True,\n", + " raster=urban_extents_raster_file,\n", + " )\n", + " urban_extents_hd = urban_calculator.calculateUrban(\n", + " densVal=1500,\n", + " totalPopThresh=50000,\n", + " smooth=True,\n", + " queen=False,\n", + " verbose=True,\n", + " raster=urban_extents_hd_raster_file,\n", + " )\n", " # Name urban extents\n", " urban_extents = urban.geocode_cities(urban_extents)\n", " urban_extents_hd = urban.geocode_cities(urban_extents_hd)\n", @@ -208,17 +227,27 @@ "# 1b. Create urban extents for GHS_Pop\n", "if not os.path.exists(urban_extents_file):\n", " urban_calculator = urban.urbanGriddedPop(inP_ghs)\n", - " urban_extents = urban_calculator.calculateUrban(densVal=300, totalPopThresh=5000, \n", - " smooth=False, queen=False,\n", - " verbose=True, raster=urban_extents_raster_file)\n", + " urban_extents = urban_calculator.calculateUrban(\n", + " densVal=300,\n", + " totalPopThresh=5000,\n", + " smooth=False,\n", + " queen=False,\n", + " verbose=True,\n", + " raster=urban_extents_raster_file,\n", + " )\n", " if urban_extents.crs.to_epsg() != 4326:\n", " urban_extents = urban_extents.to_crs(4326)\n", " urban_extents = urban.geocode_cities(urban_extents)\n", " urban_extents.to_file(urban_extents_file, driver=\"GeoJSON\")\n", - "if not os.path.exists(urban_extents_hd_file): \n", - " urban_extents_hd = urban_calculator.calculateUrban(densVal=1500, totalPopThresh=50000, \n", - " smooth=True, queen=False,\n", - " verbose=True, raster=urban_extents_hd_raster_file)\n", + "if not os.path.exists(urban_extents_hd_file):\n", + " urban_extents_hd = urban_calculator.calculateUrban(\n", + " densVal=1500,\n", + " totalPopThresh=50000,\n", + " smooth=True,\n", + " queen=False,\n", + " verbose=True,\n", + " raster=urban_extents_hd_raster_file,\n", + " )\n", " if urban_extents_hd.crs.to_epsg() != 4326:\n", " urban_extents_hd = urban_extents_hd.to_crs(4326)\n", " # Name urban extents\n", @@ -237,12 +266,19 @@ "source": [ "# 2. Calculate urban population in admin areas\n", "if not os.path.exists(urban_admin_summary):\n", - " pop_worker = clippy.summarize_population(local_ghs_population, inAdmin, urban_extents_raster_file, urban_extents_hd_raster_file)\n", + " pop_worker = clippy.summarize_population(\n", + " local_ghs_population,\n", + " inAdmin,\n", + " urban_extents_raster_file,\n", + " urban_extents_hd_raster_file,\n", + " )\n", " summarized_urban = pop_worker.calculate_zonal()\n", - " urban_res = summarized_urban.loc[:,[x for x in summarized_urban.columns if \"SUM\" in x]]\n", - " urban_res.columns = ['TOTAL_POP', \"URBAN_POP\", \"URBAN_HD_POP\"]\n", - " urban_res['district_c'] = inAdmin['district_c']\n", - " urban_res['district'] = inAdmin['district']\n", + " urban_res = summarized_urban.loc[\n", + " :, [x for x in summarized_urban.columns if \"SUM\" in x]\n", + " ]\n", + " urban_res.columns = [\"TOTAL_POP\", \"URBAN_POP\", \"URBAN_HD_POP\"]\n", + " urban_res[\"district_c\"] = inAdmin[\"district_c\"]\n", + " urban_res[\"district\"] = inAdmin[\"district\"]\n", " urban_res.to_csv(urban_admin_summary)" ] }, @@ -263,34 +299,33 @@ "\n", "urbanD = gpd.read_file(urban_extents_file)\n", "urbanHD = gpd.read_file(urban_extents_hd_file)\n", - " \n", + "\n", "for ntl_file in ntl_files:\n", " name = ntl_file.split(\"/\")[-1].split(\"_\")[2][:8]\n", - " inR = rasterio.open(ntl_file) \n", + " inR = rasterio.open(ntl_file)\n", " tPrint(\"Processing %s\" % name)\n", " urban_res_file = os.path.join(viirs_folder, f\"URBAN_{name}.csv\")\n", " urban_hd_res_file = os.path.join(viirs_folder, f\"HD_URBAN_{name}.csv\")\n", " admin_res_file = os.path.join(viirs_folder, f\"ADMIN_{name}.csv\")\n", - " \n", + "\n", " # Urban Summary\n", " if not os.path.exists(urban_res_file):\n", " urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1)\n", - " col_names = [f'URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " col_names = [f\"URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n", " urban_df = pd.DataFrame(urban_res, columns=col_names)\n", " urban_df.to_csv(urban_res_file)\n", " # HD Urban Summary\n", " if not os.path.exists(urban_hd_res_file):\n", " hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1)\n", - " col_names = [f'HD_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " col_names = [f\"HD_URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n", " hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names)\n", " hd_urban_df.to_csv(urban_hd_res_file)\n", " # admin Summary\n", " if not os.path.exists(admin_res_file):\n", " admin_res = rMisc.zonalStats(inAdmin, inR, minVal=0.1)\n", - " col_names = [f'ADM_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']]\n", + " col_names = [f\"ADM_URBAN_{name}_{x}\" for x in [\"SUM\", \"MIN\", \"MAX\", \"MEAN\"]]\n", " admin_df = pd.DataFrame(admin_res, columns=col_names)\n", - " admin_df.to_csv(admin_res_file)\n", - " " + " admin_df.to_csv(admin_res_file)" ] }, { @@ -303,21 +338,21 @@ "urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"URBAN\")]\n", "for x in urb_files:\n", " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", - " urbanD[x[:-4]] = tempD.iloc[:,0]\n", + " urbanD[x[:-4]] = tempD.iloc[:, 0]\n", "\n", "hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"HD_URBAN\")]\n", "for x in hd_urb_files:\n", " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", - " urbanHD[x[:-4]] = tempD.iloc[:,0]\n", - " \n", + " urbanHD[x[:-4]] = tempD.iloc[:, 0]\n", + "\n", "admin_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith(\"ADMIN\")]\n", "for x in admin_urb_files:\n", " tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)\n", - " inAdmin[x[:-4]] = tempD.iloc[:,0]\n", + " inAdmin[x[:-4]] = tempD.iloc[:, 0]\n", "\n", - "urbanD.drop(['geometry'], axis=1).to_csv(urban_viirs_summary)\n", - "urbanHD.drop(['geometry'], axis=1).to_csv(urban_hd_viirs_summary)\n", - "inAdmin.drop(['geometry'], axis=1).to_csv(admin_viirs_summary)" + "urbanD.drop([\"geometry\"], axis=1).to_csv(urban_viirs_summary)\n", + "urbanHD.drop([\"geometry\"], axis=1).to_csv(urban_hd_viirs_summary)\n", + "inAdmin.drop([\"geometry\"], axis=1).to_csv(admin_viirs_summary)" ] }, { @@ -334,17 +369,20 @@ "outputs": [], "source": [ "# List all files in ghsl folder\n", - "ghsl_files = [os.path.join(global_ghsl_folder, x) for x in os.listdir(global_ghsl_folder) if x.endswith(\".tif\")]\n", + "ghsl_files = [\n", + " os.path.join(global_ghsl_folder, x)\n", + " for x in os.listdir(global_ghsl_folder)\n", + " if x.endswith(\".tif\")\n", + "]\n", "\n", "for file_def in [\n", - " #[admin_bounds, admin_ghsl_summary],\n", - " [urban_extents_file, urban_ghsl_summary],\n", - " [urban_extents_file, urban_ghsl_summary],\n", - " ]:\n", - "\n", + " # [admin_bounds, admin_ghsl_summary],\n", + " [urban_extents_file, urban_ghsl_summary],\n", + " [urban_extents_file, urban_ghsl_summary],\n", + "]:\n", " resG = gpd.read_file(file_def[0])\n", "\n", - " for ghsl_file in ghsl_files: \n", + " for ghsl_file in ghsl_files:\n", " date = os.path.basename(ghsl_file).split(\"_\")[3]\n", " inR = rasterio.open(ghsl_file)\n", " if resG.crs != inR.crs:\n", @@ -353,10 +391,10 @@ " if not os.path.exists(local_file):\n", " rMisc.clipRaster(inR, resG, local_file)\n", " res = rMisc.zonalStats(resG, inR, minVal=0)\n", - " res = pd.DataFrame(res, columns=[\"SUM\",\"MIN\",\"MAX\",\"MEAN\"])\n", - " resG[f'ghsl_{date}'] = res['SUM']\n", - " print(date) \n", - " pd.DataFrame(resG.drop(['geometry'], axis=1)).to_csv(file_def[1])" + " res = pd.DataFrame(res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])\n", + " resG[f\"ghsl_{date}\"] = res[\"SUM\"]\n", + " print(date)\n", + " pd.DataFrame(resG.drop([\"geometry\"], axis=1)).to_csv(file_def[1])" ] }, { @@ -386,32 +424,35 @@ "metadata": {}, "outputs": [], "source": [ - "def get_majority_polygon(shp, shp2, pop_layer, area_name='zz_area', zonal_sum_name='zz_sum'):\n", - " ''' Intersect shp(single polygon) with shp2(GeoDataFrame) to determine which row in shp2 has\n", - " the highest zonal sum (ie - population)\n", - " \n", - " Args:\n", - " shp: shapely polygon\n", - " shp2: GeoDataFrame\n", - " pop_layer: rasterio reader\n", - " returns\n", - " shp2 GeoDataFrame with two additional columns: area and zonal_sum\n", - " '''\n", + "def get_majority_polygon(\n", + " shp, shp2, pop_layer, area_name=\"zz_area\", zonal_sum_name=\"zz_sum\"\n", + "):\n", + " \"\"\"Intersect shp(single polygon) with shp2(GeoDataFrame) to determine which row in shp2 has\n", + " the highest zonal sum (ie - population)\n", + "\n", + " Args:\n", + " shp: shapely polygon\n", + " shp2: GeoDataFrame\n", + " pop_layer: rasterio reader\n", + " returns\n", + " shp2 GeoDataFrame with two additional columns: area and zonal_sum\n", + " \"\"\"\n", " temp_shp = shp2.copy()\n", " for idx, row in temp_shp.iterrows():\n", " # Convert geometry in shp2 to the intersection with shp1\n", - " xx = row['geometry'].intersection(shp.buffer(0)).buffer(0)\n", - " temp_shp.loc[[idx], 'geometry'] = gpd.GeoDataFrame(geometry=[xx]).geometry.values\n", - "\n", + " xx = row[\"geometry\"].intersection(shp.buffer(0)).buffer(0)\n", + " temp_shp.loc[[idx], \"geometry\"] = gpd.GeoDataFrame(\n", + " geometry=[xx]\n", + " ).geometry.values\n", "\n", " # Run zonal analysis on pop_layer\n", " res = rMisc.zonalStats(temp_shp, pop_layer, reProj=True)\n", - " res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])\n", + " res = pd.DataFrame(res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])\n", + "\n", + " shp2[zonal_sum_name] = res[\"SUM\"].values\n", + " shp2[area_name] = temp_shp[\"geometry\"].apply(lambda x: x.area)\n", "\n", - " shp2[zonal_sum_name] = res['SUM'].values\n", - " shp2[area_name] = temp_shp['geometry'].apply(lambda x: x.area)\n", - " \n", - " return(shp2.sort_values(zonal_sum_name, ascending = False))" + " return shp2.sort_values(zonal_sum_name, ascending=False)" ] }, { @@ -422,33 +463,33 @@ }, "outputs": [], "source": [ - "inUrban['HD_ID'] = ''\n", - "inUrban['Admin1_ID'] = ''\n", - "inUrban['Admin1_Pop'] = 0\n", - "inUrban['Admin2_ID'] = ''\n", - "inUrban['Admin2_Pop'] = 0\n", + "inUrban[\"HD_ID\"] = \"\"\n", + "inUrban[\"Admin1_ID\"] = \"\"\n", + "inUrban[\"Admin1_Pop\"] = 0\n", + "inUrban[\"Admin2_ID\"] = \"\"\n", + "inUrban[\"Admin2_Pop\"] = 0\n", "\n", "\n", "for idx, row in inUrban.iterrows():\n", " tPrint(idx)\n", " # Identify intersecting HD urban areas\n", - " selHD = inHD.loc[inHD.intersects(row['geometry'])]\n", + " selHD = inHD.loc[inHD.intersects(row[\"geometry\"])]\n", " if selHD.shape[0] == 1:\n", - " inUrban.loc[idx, 'HD_ID'] = selHD['ID'].iloc[0]\n", + " inUrban.loc[idx, \"HD_ID\"] = selHD[\"ID\"].iloc[0]\n", " elif selHD.shape[0] > 1:\n", - " selHD = get_majority_polygon(row['geometry'], selHD, inP_ghs)\n", - " inUrban.loc[idx, 'HD_ID'] = selHD['ID'].iloc[0]\n", - " \n", + " selHD = get_majority_polygon(row[\"geometry\"], selHD, inP_ghs)\n", + " inUrban.loc[idx, \"HD_ID\"] = selHD[\"ID\"].iloc[0]\n", + "\n", " # Identify intersecting admin areas\n", - " selAdmin = inAdmin.loc[inAdmin.intersects(row['geometry'])]\n", + " selAdmin = inAdmin.loc[inAdmin.intersects(row[\"geometry\"])]\n", " if selAdmin.shape[0] == 1:\n", - " inUrban.loc[idx, 'Admin1_ID'] = selAdmin['district_c'].iloc[0]\n", + " inUrban.loc[idx, \"Admin1_ID\"] = selAdmin[\"district_c\"].iloc[0]\n", " elif selAdmin.shape[0] > 1:\n", - " selAdmin = get_majority_polygon(row['geometry'], selAdmin, inP_ghs)\n", - " inUrban.loc[idx, 'Admin1_ID'] = selAdmin['district_c'].iloc[0]\n", - " inUrban.loc[idx, 'Admin1_Pop'] = selAdmin['zz_sum'].iloc[0]\n", - " inUrban.loc[idx, 'Admin2_ID'] = selAdmin['district_c'].iloc[1]\n", - " inUrban.loc[idx, 'Admin2_Pop'] = selAdmin['zz_sum'].iloc[1]" + " selAdmin = get_majority_polygon(row[\"geometry\"], selAdmin, inP_ghs)\n", + " inUrban.loc[idx, \"Admin1_ID\"] = selAdmin[\"district_c\"].iloc[0]\n", + " inUrban.loc[idx, \"Admin1_Pop\"] = selAdmin[\"zz_sum\"].iloc[0]\n", + " inUrban.loc[idx, \"Admin2_ID\"] = selAdmin[\"district_c\"].iloc[1]\n", + " inUrban.loc[idx, \"Admin2_Pop\"] = selAdmin[\"zz_sum\"].iloc[1]" ] }, { @@ -466,7 +507,9 @@ "metadata": {}, "outputs": [], "source": [ - "pd.DataFrame(inUrban.drop(['geometry'], axis=1)).to_csv(urban_extents_file.replace(\".geojson\", '_named.csv'))" + "pd.DataFrame(inUrban.drop([\"geometry\"], axis=1)).to_csv(\n", + " urban_extents_file.replace(\".geojson\", \"_named.csv\")\n", + ")" ] }, { @@ -493,8 +536,8 @@ " x_range = b[2] - b[0]\n", " y_range = b[3] - b[1]\n", "\n", - " x_coord = b[0] + x_range * (centroid_coords[1]/curP.shape[1])\n", - " y_coord = b[1] + y_range * (centroid_coords[2]/curP.shape[2])\n", + " x_coord = b[0] + x_range * (centroid_coords[1] / curP.shape[1])\n", + " y_coord = b[1] + y_range * (centroid_coords[2] / curP.shape[2])\n", " final_geom = Point(x_coord, y_coord)\n", " inAdmin_centroids.loc[idx, \"geometry\"] = final_geom\n", "inAdmin_centroids.to_file(admin_final.replace(\".shp\", \"_centroids.shp\"))" @@ -518,9 +561,9 @@ " rMisc.clipRaster(globalP, inAdmin, local_friction)\n", "\n", "dests = gpd.read_file(urban_extents_file)\n", - "dests['geometry'] = dests['geometry'].apply(lambda x: x.centroid)\n", + "dests[\"geometry\"] = dests[\"geometry\"].apply(lambda x: x.centroid)\n", "inR = rasterio.open(local_friction)\n", - "frictionD = inR.read()[0,:,:]\n", + "frictionD = inR.read()[0, :, :]\n", "frictionD = frictionD * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] @@ -3667,10 +3710,9 @@ "source": [ "importlib.reload(ma)\n", "# Calculate travel time between all urban areas\n", - "all_rts = ma.get_linear_routes(inR, frictionD, dests, dests, \n", - " \"ID\", \"ID\", verbose=True)\n", + "all_rts = ma.get_linear_routes(inR, frictionD, dests, dests, \"ID\", \"ID\", verbose=True)\n", "all_rts = all_rts.to_crs(3857)\n", - "all_rts['length_km'] = all_rts['geometry'].apply(lambda x: x.length/1000)" + "all_rts[\"length_km\"] = all_rts[\"geometry\"].apply(lambda x: x.length / 1000)" ] }, { @@ -3679,9 +3721,9 @@ "metadata": {}, "outputs": [], "source": [ - "#all_rts.to_file(all_routes_file)\n", - "pd.pivot_table(all_rts, \"cost\", \"origin\", 'destination').to_csv(time_matrix)\n", - "pd.pivot_table(all_rts, \"length_km\", \"origin\", 'destination').to_csv(dist_matrix)" + "# all_rts.to_file(all_routes_file)\n", + "pd.pivot_table(all_rts, \"cost\", \"origin\", \"destination\").to_csv(time_matrix)\n", + "pd.pivot_table(all_rts, \"length_km\", \"origin\", \"destination\").to_csv(dist_matrix)" ] }, { @@ -67181,10 +67223,11 @@ "importlib.reload(ma)\n", "# Calculate travel time between all urban areas and district centroids\n", "inAdmin_centroids = gpd.read_file(admin_final.replace(\".shp\", \"_centroids.shp\"))\n", - "dist_all_rts = ma.get_linear_routes_mp(inR, frictionD, dests, inAdmin_centroids, \n", - " \"ID\", \"district_c\", verbose=True)\n", + "dist_all_rts = ma.get_linear_routes_mp(\n", + " inR, frictionD, dests, inAdmin_centroids, \"ID\", \"district_c\", verbose=True\n", + ")\n", "dist_all_rts = dist_all_rts.to_crs(3857)\n", - "dist_all_rts['length_km'] = dist_all_rts['geometry'].apply(lambda x: x.length/1000)" + "dist_all_rts[\"length_km\"] = dist_all_rts[\"geometry\"].apply(lambda x: x.length / 1000)" ] }, { @@ -67308,9 +67351,11 @@ "metadata": {}, "outputs": [], "source": [ - "#dist_all_rts.to_file(dist_all_routes_file)\n", - "pd.pivot_table(dist_all_rts, \"cost\", \"origin\", 'destination').to_csv(dist_time_matrix)\n", - "pd.pivot_table(dist_all_rts, \"length_km\", \"origin\", 'destination').to_csv(dist_dist_matrix)" + "# dist_all_rts.to_file(dist_all_routes_file)\n", + "pd.pivot_table(dist_all_rts, \"cost\", \"origin\", \"destination\").to_csv(dist_time_matrix)\n", + "pd.pivot_table(dist_all_rts, \"length_km\", \"origin\", \"destination\").to_csv(\n", + " dist_dist_matrix\n", + ")" ] }, { @@ -67323,9 +67368,9 @@ "source": [ "# For each urban area, generate a travel time to the centroid, and then sample for all the other areas\n", "urban_output_matrix = np.zeros([dests.shape[0], dests.shape[0]])\n", - "for idx, row in dests.iterrows(): \n", + "for idx, row in dests.iterrows():\n", " costs, trace = ma.calculate_travel_time(inR, mcp, row.to_frame().transpose())\n", - " cur_res = dests['geometry'].apply(lambda x: costs[inR.index(x.x, x.y)])\n", + " cur_res = dests[\"geometry\"].apply(lambda x: costs[inR.index(x.x, x.y)])\n", " output_matrix[idx,] = cur_res\n", " tPrint(f\"{idx} of {dests.shape[0]} completed\")" ] @@ -67338,8 +67383,8 @@ }, "outputs": [], "source": [ - "tt_res = pd.DataFrame(output_matrix, columns=[f'urb_{x}' for x in dests['ID']])\n", - "tt_res.index = dests['ID']\n", + "tt_res = pd.DataFrame(output_matrix, columns=[f\"urb_{x}\" for x in dests[\"ID\"]])\n", + "tt_res.index = dests[\"ID\"]\n", "tt_res.to_csv(urban_tt_result)" ] }, @@ -67457,13 +67502,15 @@ ], "source": [ "urbanD = gpd.read_file(urban_extents_file)\n", - "eca_kaz = gpd.read_file('/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/KAZ/KAZ_urban_extents.geojson')\n", - "urbanD['eca_id'] = 0\n", + "eca_kaz = gpd.read_file(\n", + " \"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/KAZ/KAZ_urban_extents.geojson\"\n", + ")\n", + "urbanD[\"eca_id\"] = 0\n", "for idx, row in urbanD.iterrows():\n", - " sel_eca = eca_kaz.loc[eca_kaz.intersects(row['geometry'].centroid)]\n", + " sel_eca = eca_kaz.loc[eca_kaz.intersects(row[\"geometry\"].centroid)]\n", " if sel_eca.shape[0] > 0:\n", - " urbanD.loc[idx, 'eca_id'] = sel_eca['ID'].iloc[0]\n", - "urbanD.to_file(urban_extents_file, driver='GeoJSON')" + " urbanD.loc[idx, \"eca_id\"] = sel_eca[\"ID\"].iloc[0]\n", + "urbanD.to_file(urban_extents_file, driver=\"GeoJSON\")" ] }, { @@ -67473,13 +67520,15 @@ "outputs": [], "source": [ "urbanHD = gpd.read_file(urban_extents_hd_file)\n", - "eca_kaz = gpd.read_file('/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/KAZ/KAZ_urban_extents_hd.geojson')\n", - "urbanHD['eca_id'] = 0\n", + "eca_kaz = gpd.read_file(\n", + " \"/home/wb411133/projects/KAZ_SCADR_Urbanization/DATA/ECA_Extents/KAZ/KAZ_urban_extents_hd.geojson\"\n", + ")\n", + "urbanHD[\"eca_id\"] = 0\n", "for idx, row in urbanHD.iterrows():\n", - " sel_eca = eca_kaz.loc[eca_kaz.intersects(row['geometry'].centroid)]\n", + " sel_eca = eca_kaz.loc[eca_kaz.intersects(row[\"geometry\"].centroid)]\n", " if sel_eca.shape[0] > 0:\n", - " urbanHD.loc[idx, 'eca_id'] = sel_eca['ID'].iloc[0]\n", - "urbanHD.to_file(urban_extents_hd_file, driver='GeoJSON')" + " urbanHD.loc[idx, \"eca_id\"] = sel_eca[\"ID\"].iloc[0]\n", + "urbanHD.to_file(urban_extents_hd_file, driver=\"GeoJSON\")" ] }, { @@ -67495,18 +67544,22 @@ "metadata": {}, "outputs": [], "source": [ - "world_filepath = gpd.datasets.get_path('naturalearth_lowres')\n", + "world_filepath = gpd.datasets.get_path(\"naturalearth_lowres\")\n", "world = gpd.read_file(world_filepath)\n", - "sel_country = world.loc[world['name'] == \"Kenya\"]\n", + "sel_country = world.loc[world[\"name\"] == \"Kenya\"]\n", "\n", "local_friction = \"/home/wb411133/temp/KEN_friction.tif\"\n", "if not os.path.exists(local_friction):\n", " globalP = rasterio.open(global_friction_surface)\n", " rMisc.clipRaster(globalP, sel_country, local_friction)\n", - " \n", - "inAdmin = gpd.read_file(\"/home/public/Data/COUNTRY/KEN/ADMIN/geoBoundaries-KEN-ADM1.geojson\")\n", + "\n", + "inAdmin = gpd.read_file(\n", + " \"/home/public/Data/COUNTRY/KEN/ADMIN/geoBoundaries-KEN-ADM1.geojson\"\n", + ")\n", "inAdmin_centroids = inAdmin.copy()\n", - "inAdmin_centroids['geometry'] = inAdmin_centroids['geometry'].apply(lambda x: x.centroid)" + "inAdmin_centroids[\"geometry\"] = inAdmin_centroids[\"geometry\"].apply(\n", + " lambda x: x.centroid\n", + ")" ] }, { @@ -67539,7 +67592,7 @@ "outputs": [], "source": [ "inR = rasterio.open(local_friction)\n", - "frictionD = inR.read()[0,:,:]\n", + "frictionD = inR.read()[0, :, :]\n", "frictionD = frictionD * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] @@ -67608,10 +67661,17 @@ "source": [ "importlib.reload(ma)\n", "\n", - "all_rts = ma.get_linear_routes(inR, frictionD, inAdmin_centroids, inAdmin_centroids, \n", - " \"shapeName\", \"shapeName\", verbose=True)\n", + "all_rts = ma.get_linear_routes(\n", + " inR,\n", + " frictionD,\n", + " inAdmin_centroids,\n", + " inAdmin_centroids,\n", + " \"shapeName\",\n", + " \"shapeName\",\n", + " verbose=True,\n", + ")\n", "all_rts = all_rts.to_crs(3857)\n", - "all_rts['length_km'] = all_rts['geometry'].apply(lambda x: x.length/1000)\n" + "all_rts[\"length_km\"] = all_rts[\"geometry\"].apply(lambda x: x.length / 1000)" ] }, { @@ -67638,19 +67698,23 @@ "source": [ "# Map resulting route\n", "centre = sel_country.unary_union.centroid\n", - "m = folium.Map(location=[centre.y,centre.x], zoom_start=4)\n", + "m = folium.Map(location=[centre.y, centre.x], zoom_start=4)\n", "orig_map = inAdmin_centroids.iloc[0]\n", - "rts = folium.GeoJson(mapping(all_rts.unary_union), style_function=lambda feature: {\n", - " 'color':'red',\n", - " 'weight':1\n", - "}) \n", + "rts = folium.GeoJson(\n", + " mapping(all_rts.unary_union),\n", + " style_function=lambda feature: {\"color\": \"red\", \"weight\": 1},\n", + ")\n", "\n", - "folium.CircleMarker(location=[orig_map.geometry.y, orig_map.geometry.x], \n", - " radius=2, weight=4, color='blue').add_to(m)\n", + "folium.CircleMarker(\n", + " location=[orig_map.geometry.y, orig_map.geometry.x],\n", + " radius=2,\n", + " weight=4,\n", + " color=\"blue\",\n", + ").add_to(m)\n", "\n", "rts.add_to(m)\n", "\n", - "m\n" + "m" ] }, { @@ -69122,7 +69186,7 @@ } ], "source": [ - "pd.pivot(all_rts, \"origin\", 'destination', 'cost')" + "pd.pivot(all_rts, \"origin\", \"destination\", \"cost\")" ] }, { @@ -70594,7 +70658,7 @@ } ], "source": [ - "pd.pivot(all_rts, \"origin\", 'destination', 'length_km')" + "pd.pivot(all_rts, \"origin\", \"destination\", \"length_km\")" ] }, { @@ -70604,19 +70668,23 @@ "outputs": [], "source": [ "# Generate h3 grid around dedicated city\n", - "m = folium.Map(location=[row.geometry.y,row.geometry.x], zoom_start=4)\n", + "m = folium.Map(location=[row.geometry.y, row.geometry.x], zoom_start=4)\n", "\n", - "folium.CircleMarker(location=[y_range[0], x_range[0]], \n", - " radius=2, weight=4, color='red').add_to(m)\n", + "folium.CircleMarker(\n", + " location=[y_range[0], x_range[0]], radius=2, weight=4, color=\"red\"\n", + ").add_to(m)\n", "\n", - "folium.CircleMarker(location=[y_range[0], x_range[-1]], \n", - " radius=2, weight=4, color='blue').add_to(m)\n", + "folium.CircleMarker(\n", + " location=[y_range[0], x_range[-1]], radius=2, weight=4, color=\"blue\"\n", + ").add_to(m)\n", "\n", - "folium.CircleMarker(location=[y_range[-1], x_range[-1]], \n", - " radius=2, weight=4, color='orange').add_to(m)\n", + "folium.CircleMarker(\n", + " location=[y_range[-1], x_range[-1]], radius=2, weight=4, color=\"orange\"\n", + ").add_to(m)\n", "\n", - "folium.CircleMarker(location=[y_range[-1], x_range[0]], \n", - " radius=2, weight=4, color='green').add_to(m)\n", + "folium.CircleMarker(\n", + " location=[y_range[-1], x_range[0]], radius=2, weight=4, color=\"green\"\n", + ").add_to(m)\n", "\n", "\n", "m" diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb index 7783048..f246c0a 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Combine_All.ipynb @@ -16,11 +16,7 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, inspect, logging, importlib, time\n", - "import pandas as pd\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np" + "import geopandas as gpd" ] }, { @@ -29,10 +25,10 @@ "metadata": {}, "outputs": [], "source": [ - "#folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", - "#folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", - "#folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", - "#folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", + "# folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", + "# folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", + "# folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", + "# folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", "folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", "folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", "folder = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_extents_results_5k_up\"" @@ -44,9 +40,9 @@ "metadata": {}, "outputs": [], "source": [ - "#file_base = r\"\\ECA_russia_urban_metrics_100k_\"\n", - "#file_base = r\"\\UZB_only_urban_metrics_FUAs_\"\n", - "#file_base = r\"\\UZB_only_urban_metrics_urban_extents_\"\n", + "# file_base = r\"\\ECA_russia_urban_metrics_100k_\"\n", + "# file_base = r\"\\UZB_only_urban_metrics_FUAs_\"\n", + "# file_base = r\"\\UZB_only_urban_metrics_urban_extents_\"\n", "file_base = r\"\\all_urban_metrics_5k_up_\"" ] }, @@ -56,7 +52,23 @@ "metadata": {}, "outputs": [], "source": [ - "metrics_dict = {\"shape\":['ProximityIndex','RoundnessIndex','CohesionIndex','SpinIndex','PerimIndex','DepthIndex','GirthIndex','DispersionIndex','DispersionIndex','RangeIndex'], \"fullness\":['fullness_index'], \"structure\":['circuity_avg','intersection_density_km','street_density_km'], \"sprawl\":['sprawl_index']}" + "metrics_dict = {\n", + " \"shape\": [\n", + " \"ProximityIndex\",\n", + " \"RoundnessIndex\",\n", + " \"CohesionIndex\",\n", + " \"SpinIndex\",\n", + " \"PerimIndex\",\n", + " \"DepthIndex\",\n", + " \"GirthIndex\",\n", + " \"DispersionIndex\",\n", + " \"DispersionIndex\",\n", + " \"RangeIndex\",\n", + " ],\n", + " \"fullness\": [\"fullness_index\"],\n", + " \"structure\": [\"circuity_avg\", \"intersection_density_km\", \"street_density_km\"],\n", + " \"sprawl\": [\"sprawl_index\"],\n", + "}" ] }, { @@ -66,13 +78,19 @@ "outputs": [], "source": [ "my_data = {}\n", - "for num, metric in enumerate(metrics_dict.items()): \n", + "for num, metric in enumerate(metrics_dict.items()):\n", " # https://github.com/geopandas/geopandas/issues/1234\n", - " my_data[metric[0]] = gpd.read_file(folder+file_base+metric[0]+\".csv\", GEOM_POSSIBLE_NAMES=\"geometry\", KEEP_GEOM_COLUMNS=\"NO\")\n", + " my_data[metric[0]] = gpd.read_file(\n", + " folder + file_base + metric[0] + \".csv\",\n", + " GEOM_POSSIBLE_NAMES=\"geometry\",\n", + " KEEP_GEOM_COLUMNS=\"NO\",\n", + " )\n", " if num == 0:\n", " merged_df = my_data[metric[0]]\n", " if num > 0:\n", - " merged_df = merged_df.merge(my_data[metric[0]][metric[1]], how='left', left_index=True, right_index=True)" + " merged_df = merged_df.merge(\n", + " my_data[metric[0]][metric[1]], how=\"left\", left_index=True, right_index=True\n", + " )" ] }, { @@ -527,15 +545,15 @@ "outputs": [], "source": [ "# save as shapefile\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_extents_results_5k_up\"\n", - "#merged_df.to_file(output + r\"\\ECA_russia_urban_metrics_100k_all.shp\")\n", - "#merged_df.to_file(output + r\"\\UZB_only_urban_metrics_urban_extents_all.shp\")\n", - "#merged_df.to_file(output + r\"\\eca_urban_metrics_results_wo_rus_all.shp\")\n", + "# merged_df.to_file(output + r\"\\ECA_russia_urban_metrics_100k_all.shp\")\n", + "# merged_df.to_file(output + r\"\\UZB_only_urban_metrics_urban_extents_all.shp\")\n", + "# merged_df.to_file(output + r\"\\eca_urban_metrics_results_wo_rus_all.shp\")\n", "merged_df.to_file(output + r\"\\all_urban_extents_results_5k_up.shp\")" ] }, diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb index f97f213..b32cc81 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Fullness.ipynb @@ -33,7 +33,8 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, inspect, logging, importlib, time" + "import sys\n", + "import time" ] }, { @@ -44,8 +45,7 @@ "source": [ "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", - "import math, random" + "import numpy as np" ] }, { @@ -54,7 +54,6 @@ "metadata": {}, "outputs": [], "source": [ - "import shapely\n", "from shapely.geometry import mapping\n", "from shapely.geometry import Point" ] @@ -66,7 +65,7 @@ "outputs": [], "source": [ "# Get reference to GOSTNets\n", - "sys.path.append(r'C:\\repos\\INFRA_SAP')\n", + "sys.path.append(r\"C:\\repos\\INFRA_SAP\")\n", "from infrasap.urban_metrics import *" ] }, @@ -78,7 +77,7 @@ "source": [ "import rasterio\n", "from rasterio.mask import mask\n", - "from rasterio import Affine # or from affine import Affine" + "from rasterio import Affine # or from affine import Affine" ] }, { @@ -87,7 +86,6 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", "start_time = time.time()" ] }, @@ -106,7 +104,7 @@ "outputs": [], "source": [ "built_up_layer = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\GHS_BUILT_LDS2014_GLOBE_R2018A_54009_250_V2_0\\GHS_BUILT_LDS2014_GLOBE_R2018A_54009_250_V2_0.tif\"\n", - "#built_up_layer = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\2015_250m_GHS_Built\\UZB_2015_GHS_built_merged.tif\"" + "# built_up_layer = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\2015_250m_GHS_Built\\UZB_2015_GHS_built_merged.tif\"" ] }, { @@ -115,12 +113,12 @@ "metadata": {}, "outputs": [], "source": [ - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_mollweide.shp\"\n", - "#shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_mollweide2.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_FUAs_Project_Mollweide.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_mollweide_geom_fixed_greater_50k.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_ghs_built_up_extents_mollweide_geom_fixed.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_molleweide.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_mollweide.shp\"\n", + "# shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_mollweide2.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_FUAs_Project_Mollweide.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_mollweide_geom_fixed_greater_50k.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_ghs_built_up_extents_mollweide_geom_fixed.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_molleweide.shp\"\n", "shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_clusters_5k_up_molleweide.shp\"" ] }, @@ -16483,83 +16481,85 @@ "%%time\n", "\n", "with rasterio.open(built_up_layer) as src:\n", - " pixelSizeX, pixelSizeY = src.res\n", - " #print(pixelSizeX, pixelSizeY)\n", + " pixelSizeX, pixelSizeY = src.res\n", + " # print(pixelSizeX, pixelSizeY)\n", " pixel_area = 250**2\n", - " \n", + "\n", " input_shapes_gpd = gpd.read_file(shpName)\n", - " \n", + "\n", " # psuedocode\n", " # For each Shape:\n", - " # Select all built-up pixels that are mostly within shape\n", - " # Area of shape = sum of all pixels * area of each pixel\n", - " # Built-up Area = (sum pixel value of each pixel) / 100\n", - " # Fullness index = Built-up Area / Area of Shape\n", + " # Select all built-up pixels that are mostly within shape\n", + " # Area of shape = sum of all pixels * area of each pixel\n", + " # Built-up Area = (sum pixel value of each pixel) / 100\n", + " # Fullness index = Built-up Area / Area of Shape\n", "\n", - " #for entry in input_shapes_gpd.head(5).iterrows():\n", + " # for entry in input_shapes_gpd.head(5).iterrows():\n", " for entry in input_shapes_gpd.iterrows():\n", " print(entry[0])\n", - " #print(row[1]['geometry'])\n", + " # print(row[1]['geometry'])\n", "\n", " # extract the geometry in GeoJSON format\n", - " geometry = entry[1]['geometry'] # list of shapely geometries\n", - " #geometry = geoms[0] # shapely geometry\n", + " geometry = entry[1][\"geometry\"] # list of shapely geometries\n", + " # geometry = geoms[0] # shapely geometry\n", " geoms = [mapping(geometry)]\n", "\n", - " # extract the raster values values within the polygon \n", + " # extract the raster values values within the polygon\n", " out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)\n", - " data = out_image[0,:,:]\n", - " \n", - " row, col = np.where(data != -9999.0) \n", + " data = out_image[0, :, :]\n", + "\n", + " row, col = np.where(data != -9999.0)\n", " val = np.extract(data != -9999.0, data)\n", "\n", - " T1 = out_transform * Affine.translation(0.5, 0.5) # reference the pixel centre\n", - " rc2xy = lambda r, c: (c, r) * T1 \n", - " \n", - " d = gpd.GeoDataFrame({'col':col,'row':row,'val':val})\n", - " \n", + " T1 = out_transform * Affine.translation(0.5, 0.5) # reference the pixel centre\n", + " rc2xy = lambda r, c: (c, r) * T1\n", + "\n", + " d = gpd.GeoDataFrame({\"col\": col, \"row\": row, \"val\": val})\n", + "\n", " # coordinate transformation\n", - " d['x'] = d.apply(lambda row: rc2xy(row.row,row.col)[0], axis=1)\n", - " d['y'] = d.apply(lambda row: rc2xy(row.row,row.col)[1], axis=1)\n", - " \n", + " d[\"x\"] = d.apply(lambda row: rc2xy(row.row, row.col)[0], axis=1)\n", + " d[\"y\"] = d.apply(lambda row: rc2xy(row.row, row.col)[1], axis=1)\n", + "\n", " # geometry\n", - " d['geometry'] = d.apply(lambda row: Point(row['x'], row['y']), axis=1)\n", - " \n", + " d[\"geometry\"] = d.apply(lambda row: Point(row[\"x\"], row[\"y\"]), axis=1)\n", + "\n", " # Area of shape = sum of all pixels * area of each pixel\n", " area_of_shape = pixel_area * d.count()[0]\n", " # Built-up Area = (sum pixel value of each pixel) / 100\n", - " d2 = d.val/100\n", + " d2 = d.val / 100\n", " built_up = sum(d2) * pixel_area\n", " # Fullness index = Built-up Area / Area of Shape\n", " fullness_index = built_up / area_of_shape\n", " print(f\"fullness index: {fullness_index}\")\n", - " \n", + "\n", " # creates a temporary GDF for just the row's shape\n", " temp_gdf = input_shapes_gpd.iloc[[entry[0]]]\n", - " \n", - " #print(\"print temp_gdf\")\n", - " #print(temp_gdf)\n", - " \n", + "\n", + " # print(\"print temp_gdf\")\n", + " # print(temp_gdf)\n", + "\n", " # Put all metrics in a DataFrame\n", " metrics_scalar = {}\n", - " metrics_scalar['fullness_index'] = [fullness_index]\n", + " metrics_scalar[\"fullness_index\"] = [fullness_index]\n", " metrics_df = pd.DataFrame(metrics_scalar)\n", - " \n", - " #print(\"print metrics_scalar\")\n", - " #print(metrics_scalar)\n", - " \n", + "\n", + " # print(\"print metrics_scalar\")\n", + " # print(metrics_scalar)\n", + "\n", " # and concatinate it with the row's shape\n", " new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", - " \n", - " #print(\"print new_temp_gdf\")\n", - " #print(new_temp_gdf)\n", - " #print(entry[0])\n", + "\n", + " # print(\"print new_temp_gdf\")\n", + " # print(new_temp_gdf)\n", + " # print(entry[0])\n", " # put the results of each row into a new DataFrame\n", " if entry[0] == 0:\n", " print(\"new_temp_gdf\")\n", " output_new_temp_gdf = new_temp_gdf\n", " else:\n", - " output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True) " + " output_new_temp_gdf = output_new_temp_gdf.append(\n", + " new_temp_gdf, ignore_index=True\n", + " )" ] }, { @@ -16601,7 +16601,7 @@ "outputs": [], "source": [ "# make the GeoDataFrame unprojected\n", - "output_new_temp_gdf = output_new_temp_gdf.to_crs('epsg:4326')" + "output_new_temp_gdf = output_new_temp_gdf.to_crs(\"epsg:4326\")" ] }, { @@ -16801,12 +16801,12 @@ "metadata": {}, "outputs": [], "source": [ - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_extents_results_5k_up\"" ] }, @@ -16817,11 +16817,11 @@ "outputs": [], "source": [ "# save as CSV\n", - "#output_new_temp_gdf.to_csv(output + r\"\\ECA_russia_urban_metrics_100k_fullness.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_FUAs_fullness.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_fullness.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_fullness.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\ECA_wo_rus_urban_metrics_urban_extents_all_fullness.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\ECA_russia_urban_metrics_100k_fullness.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_FUAs_fullness.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_fullness.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_fullness.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\ECA_wo_rus_urban_metrics_urban_extents_all_fullness.csv\")\n", "output_new_temp_gdf.to_csv(output + r\"\\all_urban_metrics_5k_up_fullness.csv\")" ] }, diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb index e6e697c..44b38f6 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Shape.ipynb @@ -23,7 +23,8 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, inspect, logging, importlib, time" + "import sys\n", + "import time" ] }, { @@ -41,12 +42,9 @@ } ], "source": [ - "import osmnx as ox\n", "import pandas as pd\n", "import geopandas as gpd\n", - "import networkx as nx\n", - "import numpy as np\n", - "import math, random" + "import math" ] }, { @@ -54,9 +52,7 @@ "execution_count": 4, "metadata": {}, "outputs": [], - "source": [ - "import shapely" - ] + "source": [] }, { "cell_type": "code", @@ -65,7 +61,7 @@ "outputs": [], "source": [ "# Get reference to GOSTNets\n", - "sys.path.append(r'C:\\repos\\INFRA_SAP')\n", + "sys.path.append(r\"C:\\repos\\INFRA_SAP\")\n", "from infrasap.urban_metrics import *" ] }, @@ -75,7 +71,6 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", "start_time = time.time()" ] }, @@ -96,15 +91,15 @@ "metadata": {}, "outputs": [], "source": [ - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_4326.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_4326.shp\"\n", "\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\sample_shps_3.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UBZ_only_FUAs2_geom_fixed.shp\"\n", - "#shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_4326_2.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed_greater_50k.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_4326.shp\"\n", - "shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_clusters_5k_up_4326.shp\"\n" + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\sample_shps_3.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UBZ_only_FUAs2_geom_fixed.shp\"\n", + "# shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_4326_2.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed_greater_50k.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_4326.shp\"\n", + "shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_clusters_5k_up_4326.shp\"" ] }, { @@ -123,7 +118,7 @@ "metadata": {}, "outputs": [], "source": [ - "#input_shapes_gpd" + "# input_shapes_gpd" ] }, { @@ -154,7 +149,7 @@ ], "source": [ "# proj\n", - "#vars(input_shapes_gpd)\n", + "# vars(input_shapes_gpd)\n", "input_shapes_gpd._crs" ] }, @@ -186,7 +181,7 @@ ], "source": [ "# make the GeoDataFrame unprojected\n", - "input_shapes_gpd = input_shapes_gpd.to_crs('epsg:4326')\n", + "input_shapes_gpd = input_shapes_gpd.to_crs(\"epsg:4326\")\n", "input_shapes_gpd._crs" ] }, @@ -384,7 +379,7 @@ "metadata": {}, "outputs": [], "source": [ - "#-------------------------------------\n", + "# -------------------------------------\n", "# SET UP TEMP WORKSPACE...\n", "TempWS = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\Shape_Metrics_Temp\"\n", "\n", @@ -24714,31 +24709,33 @@ "\n", " # creates a temporary GDF for just the row's shape\n", " temp_gdf = input_shapes_gpd.iloc[[index]]\n", - " \n", + "\n", " # finds its correct UTM zone projection and reprojects it\n", " temp_gdf_proj = project_gdf(temp_gdf)\n", "\n", " A = temp_gdf_proj.iloc[0].geometry.area\n", " P = temp_gdf_proj.iloc[0].geometry.length\n", - " \n", + "\n", " # Equal area circle radius...\n", - " r = (temp_gdf_proj.iloc[0].geometry.area / math.pi)**.5 # radius of equal area circle (circle with area equal to shape area) (derived from A = pi * r squared)\n", + " r = (\n", + " (temp_gdf_proj.iloc[0].geometry.area / math.pi) ** 0.5\n", + " ) # radius of equal area circle (circle with area equal to shape area) (derived from A = pi * r squared)\n", " print(f\"print r: {r}\")\n", - " p = 2 * math.pi * r # Equal area circle perimeter\n", + " p = 2 * math.pi * r # Equal area circle perimeter\n", "\n", " # LIST OF COORDINATES OF FEATURE VERTICES (for single part features)...\n", - " pntLst = [] # stores feature array...\n", + " pntLst = [] # stores feature array...\n", " subVLst = []\n", "\n", " # Step through exterior part of the feature\n", " for coord in temp_gdf_proj.iloc[0].geometry.exterior.coords:\n", " # Print the part number\n", - " #print(\"coord {}:\".format(coord))\n", + " # print(\"coord {}:\".format(coord))\n", " # Step through each vertex in the feature\n", " # Print x,y coordinates of current point\n", - " #print(\"{}, {}\".format(coord[0], coord[1]))\n", - " X, Y = coord[0], coord[1] # get point XY \n", - " subVLst.append([X,Y]) # add XY to list \n", + " # print(\"{}, {}\".format(coord[0], coord[1]))\n", + " X, Y = coord[0], coord[1] # get point XY\n", + " subVLst.append([X, Y]) # add XY to list\n", "\n", " pntLst.append(subVLst)\n", "\n", @@ -24749,101 +24746,111 @@ " subVLst = []\n", " # Step through each part of the feature\n", " for coord in poly.coords:\n", - " #print(\"coord {}:\".format(coord))\n", + " # print(\"coord {}:\".format(coord))\n", " # Step through each vertex in the feature\n", " # Print x,y coordinates of current point\n", - " #print(\"{}, {}\".format(coord[0], coord[1]))\n", - " X, Y = coord[0], coord[1] # get point XY \n", - " subVLst.append([X,Y]) # add XY to list \n", - " #print(subVLst)\n", + " # print(\"{}, {}\".format(coord[0], coord[1]))\n", + " X, Y = coord[0], coord[1] # get point XY\n", + " subVLst.append([X, Y]) # add XY to list\n", + " # print(subVLst)\n", " subVLst.reverse()\n", - " #print(subVLst)\n", + " # print(subVLst)\n", " pntLst.append(subVLst)\n", "\n", " # desired shape area in pixels...\n", " numPix = 20000\n", "\n", " # calculate pixel size...\n", - " cellsize = (A / numPix)**.5\n", + " cellsize = (A / numPix) ** 0.5\n", "\n", " # get min and max XY values\n", - " minX, minY, maxX, maxY = temp_gdf_proj.iloc[0].geometry.bounds[0],temp_gdf_proj.iloc[0].geometry.bounds[1],temp_gdf_proj.iloc[0].geometry.bounds[2],temp_gdf_proj.iloc[0].geometry.bounds[3]\n", + " minX, minY, maxX, maxY = (\n", + " temp_gdf_proj.iloc[0].geometry.bounds[0],\n", + " temp_gdf_proj.iloc[0].geometry.bounds[1],\n", + " temp_gdf_proj.iloc[0].geometry.bounds[2],\n", + " temp_gdf_proj.iloc[0].geometry.bounds[3],\n", + " )\n", "\n", " # offset grid by half a pixel...\n", " minX -= cellsize / 2\n", " maxY += cellsize / 2\n", "\n", " # centroid coordinates\n", - " centroidXY = temp_gdf_proj.iloc[0].geometry.centroid.x,temp_gdf_proj.iloc[0].geometry.centroid.y\n", - " x_offset, y_offset = 0,0\n", - " Xc, Yc = centroidXY[0]-x_offset, centroidXY[1]-y_offset\n", + " centroidXY = (\n", + " temp_gdf_proj.iloc[0].geometry.centroid.x,\n", + " temp_gdf_proj.iloc[0].geometry.centroid.y,\n", + " )\n", + " x_offset, y_offset = 0, 0\n", + " Xc, Yc = centroidXY[0] - x_offset, centroidXY[1] - y_offset\n", "\n", " # generates a list of points within the shape\n", - " featPntLst = generate_featPntLst(pntLst, minX, minY, maxX, maxY, cellsize, gridTxtFile)\n", + " featPntLst = generate_featPntLst(\n", + " pntLst, minX, minY, maxX, maxY, cellsize, gridTxtFile\n", + " )\n", "\n", " # NOTE: THE CENTROID IS CURRENTLY USED AS THE CENTER\n", " # calculate distance of feature points to center...\n", - " D_to_Center, EAC_pix = proximity(featPntLst,Xc,Yc,r)\n", + " D_to_Center, EAC_pix = proximity(featPntLst, Xc, Yc, r)\n", "\n", " # Proximity index (circle / shape)\n", " # avg distance to center for equal area circle...\n", - " circD = r * (2.0/3.0)\n", - " #print(f\"print circD: {circD}\")\n", - " #print(f\"print D_to_Center: {D_to_Center}\")\n", + " circD = r * (2.0 / 3.0)\n", + " # print(f\"print circD: {circD}\")\n", + " # print(f\"print D_to_Center: {D_to_Center}\")\n", " ProximityIndex = circD / D_to_Center\n", - " metrics['ProximityIndex'] = ProximityIndex\n", + " metrics[\"ProximityIndex\"] = ProximityIndex\n", "\n", " # Roundness (exchange-index)\n", " inArea = EAC_pix * cellsize**2\n", " areaExchange = inArea / A\n", - " metrics['RoundnessIndex'] = areaExchange\n", + " metrics[\"RoundnessIndex\"] = areaExchange\n", "\n", - " # Cohesion index \n", + " # Cohesion index\n", " # custom tool calculates approx. average interpoint distances between\n", " # samples of points in shape...\n", " shp_interD = interpointDistance(featPntLst)\n", "\n", " # average interpoint distance for equal area circle...\n", - " circ_interD = r * .9054\n", + " circ_interD = r * 0.9054\n", "\n", " # cohesion index is ratio of avg interpoint distance of circle to\n", " # avg interpoint distance of shape...\n", " CohesionIndex = circ_interD / shp_interD\n", "\n", - " metrics['CohesionIndex'] = CohesionIndex\n", + " metrics[\"CohesionIndex\"] = CohesionIndex\n", "\n", " # Spin index\n", " # custom tool calculates moment of inertia for shape...\n", - " shpMOI = spin(featPntLst,Xc,Yc)\n", + " shpMOI = spin(featPntLst, Xc, Yc)\n", "\n", " # moment of inertia for equal area circle...\n", - " circ_MOI = .5 * r**2\n", + " circ_MOI = 0.5 * r**2\n", "\n", " # calculate spin index (circle / shape)...\n", " Spin = circ_MOI / shpMOI\n", "\n", - " metrics['SpinIndex'] = Spin\n", - " \n", + " metrics[\"SpinIndex\"] = Spin\n", + "\n", " # Perimeter index (circle / shape)\n", - " PerimIndex = p / P # The Perimeter Index\n", - " metrics['PerimIndex'] = PerimIndex\n", + " PerimIndex = p / P # The Perimeter Index\n", + " metrics[\"PerimIndex\"] = PerimIndex\n", "\n", " # Pre-calculations for Depth, Girth, and Dispersion indices\n", "\n", - " #print(f\"print first 3 of pntLst: {pntLst[0][:3]}\")\n", + " # print(f\"print first 3 of pntLst: {pntLst[0][:3]}\")\n", "\n", " # get list of points evenly distributed along perimeter...\n", " perimPntLst = PerimeterPnts(pntLst, 500)\n", "\n", - " #print(f\"print first of perimPntLst: {perimPntLst[0]}\")\n", + " # print(f\"print first of perimPntLst: {perimPntLst[0]}\")\n", "\n", - " #------------------------------------------------------------------------------\n", + " # ------------------------------------------------------------------------------\n", " # SECTION 7: CALCULATE DISTANCE OF INTERIOR SHAPE POINTS TO PERIMETER POINTS...\n", "\n", " # custom tool calculates distance of each interior point to nearest perimeter point...\n", " pt_dToE = pt_distToEdge(featPntLst, perimPntLst)\n", "\n", - " #print(f\"print max pt_dToE: {pt_dToE[-1]}\")\n", + " # print(f\"print max pt_dToE: {pt_dToE[-1]}\")\n", "\n", " # Depth index\n", " # custom tool calculates average distance from interior pixels to nearest edge pixels...\n", @@ -24854,7 +24861,7 @@ "\n", " # calculate depth index (shape / circle)...\n", " depthIndex = shp_depth / EAC_depth\n", - " metrics['DepthIndex'] = depthIndex\n", + " metrics[\"DepthIndex\"] = depthIndex\n", "\n", " # Girth index\n", " # custom tool calculates shape girth (distance from edge to innermost point)\n", @@ -24862,15 +24869,15 @@ " shp_Girth = girth(pt_dToE)\n", "\n", " # calculate girth index (shape / circle)...\n", - " girthIndex = shp_Girth / r # girth of a circle is its radius\n", - " #print(f\"print shp_Girth: {shp_Girth}\")\n", - " #print(f\"print r: {r}\")\n", - " metrics['GirthIndex'] = girthIndex\n", + " girthIndex = shp_Girth / r # girth of a circle is its radius\n", + " # print(f\"print shp_Girth: {shp_Girth}\")\n", + " # print(f\"print r: {r}\")\n", + " metrics[\"GirthIndex\"] = girthIndex\n", "\n", " # Dispersion index\n", " # custom tool calculates average distance between proximate center and edge points...\n", " dispersionIndex, avgD = dispersion([Xc, Yc], perimPntLst[0])\n", - " metrics['DispersionIndex'] = dispersionIndex\n", + " metrics[\"DispersionIndex\"] = dispersionIndex\n", "\n", " # Detour index\n", " # custom tool creates list of points in the exterior polygon shape\n", @@ -24878,7 +24885,7 @@ "\n", " # calculate detour index (circle / shape)...\n", " detourIndex = p / hullPerim\n", - " metrics['DispersionIndex''DispersionIndex'] = detourIndex\n", + " metrics[\"DispersionIndex\" \"DispersionIndex\"] = detourIndex\n", "\n", " # Range index\n", " # custom tool identifies perimeter points that are farthest apart, outputs\n", @@ -24886,8 +24893,8 @@ " circumCircD = Range(pntLst[0])\n", "\n", " # calculate range index (circle / shape)\n", - " rangeIndex = (2*r) / circumCircD\n", - " metrics['RangeIndex'] = rangeIndex\n", + " rangeIndex = (2 * r) / circumCircD\n", + " metrics[\"RangeIndex\"] = rangeIndex\n", "\n", " # Put all metrics in a DataFrame\n", " metrics_scalar = {}\n", @@ -24896,26 +24903,30 @@ " metrics_df = pd.DataFrame(metrics_scalar)\n", "\n", " # and concatinate it with the row's shape\n", - " new_temp_gdf_proj = pd.concat([temp_gdf_proj.reset_index(drop=True), metrics_df], axis=1)\n", + " new_temp_gdf_proj = pd.concat(\n", + " [temp_gdf_proj.reset_index(drop=True), metrics_df], axis=1\n", + " )\n", "\n", - " #print(\"print new_temp_gdf_proj\")\n", - " #print(new_temp_gdf_proj)\n", + " # print(\"print new_temp_gdf_proj\")\n", + " # print(new_temp_gdf_proj)\n", "\n", " # make it unprojected\n", - " temp_gdf_proj_4326 = new_temp_gdf_proj.to_crs('epsg:4326')\n", + " temp_gdf_proj_4326 = new_temp_gdf_proj.to_crs(\"epsg:4326\")\n", "\n", " # put the results of each row into a new DataFrame\n", " if index == 0:\n", - " #print(\"creating output_shapes_gpd_4326\")\n", + " # print(\"creating output_shapes_gpd_4326\")\n", " output_shapes_gpd_4326 = temp_gdf_proj_4326\n", " else:\n", - " #print(f\"print output_shapes_gpd_4326, and index is {index}\")\n", - " #print(output_shapes_gpd_4326)\n", - " #print(\"to append temp_gdf_proj_4326\")\n", - " #print(temp_gdf_proj_4326)\n", - " output_shapes_gpd_4326 = output_shapes_gpd_4326.append(temp_gdf_proj_4326, ignore_index=True)\n", - " #print(\"output_shapes_gpd_4326 after append\")\n", - " #print(output_shapes_gpd_4326)" + " # print(f\"print output_shapes_gpd_4326, and index is {index}\")\n", + " # print(output_shapes_gpd_4326)\n", + " # print(\"to append temp_gdf_proj_4326\")\n", + " # print(temp_gdf_proj_4326)\n", + " output_shapes_gpd_4326 = output_shapes_gpd_4326.append(\n", + " temp_gdf_proj_4326, ignore_index=True\n", + " )\n", + " # print(\"output_shapes_gpd_4326 after append\")\n", + " # print(output_shapes_gpd_4326)" ] }, { @@ -25249,11 +25260,11 @@ "metadata": {}, "outputs": [], "source": [ - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_extents_results_5k_up\"" ] }, @@ -25264,9 +25275,9 @@ "outputs": [], "source": [ "# save as CSV\n", - "#output_shapes_gpd_4326.to_csv(output + r\"\\ECA_russia_urban_metrics_100k_shape.csv\")\n", - "#output_shapes_gpd_4326.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_shape.csv\")\n", - "#output_shapes_gpd_4326.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_shape.csv\")\n", + "# output_shapes_gpd_4326.to_csv(output + r\"\\ECA_russia_urban_metrics_100k_shape.csv\")\n", + "# output_shapes_gpd_4326.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_shape.csv\")\n", + "# output_shapes_gpd_4326.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_shape.csv\")\n", "output_shapes_gpd_4326.to_csv(output + r\"\\all_urban_metrics_5k_up_shape.csv\")" ] }, diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb index 9de5241..569ecb7 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Sprawl.ipynb @@ -39,7 +39,8 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, inspect, logging, importlib, time" + "import sys\n", + "import time" ] }, { @@ -50,8 +51,7 @@ "source": [ "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", - "import math, random" + "import numpy as np" ] }, { @@ -60,7 +60,6 @@ "metadata": {}, "outputs": [], "source": [ - "import shapely\n", "from shapely.geometry import mapping\n", "from shapely.geometry import Point" ] @@ -72,7 +71,7 @@ "outputs": [], "source": [ "# Get reference to GOSTNets\n", - "sys.path.append(r'C:\\repos\\INFRA_SAP')\n", + "sys.path.append(r\"C:\\repos\\INFRA_SAP\")\n", "from infrasap.urban_metrics import *" ] }, @@ -84,7 +83,7 @@ "source": [ "import rasterio\n", "from rasterio.mask import mask\n", - "from rasterio import Affine # or from affine import Affine" + "from rasterio import Affine # or from affine import Affine" ] }, { @@ -93,7 +92,6 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", "start_time = time.time()" ] }, @@ -112,8 +110,8 @@ "outputs": [], "source": [ "GHS_pop = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0\\GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif\"\n", - "#GHS_pop = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\clipped_eca_no_russia_1km.tif\"\n", - "#GHS_pop = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\2015_1km_GHS_Pop\\GHS_POP_2015_UZB_merged.tif\"" + "# GHS_pop = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\clipped_eca_no_russia_1km.tif\"\n", + "# GHS_pop = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\2015_1km_GHS_Pop\\GHS_POP_2015_UZB_merged.tif\"" ] }, { @@ -122,13 +120,13 @@ "metadata": {}, "outputs": [], "source": [ - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\Final_urban_extent_metrics\\ECA_all_urban_extents_100k_mollweide.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_mollweide.shp\"\n", - "#shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_mollweide2.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_FUAs_Project_Mollweide.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_mollweide_geom_fixed_greater_50k.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_ghs_built_up_extents_mollweide_geom_fixed.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_molleweide.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\Final_urban_extent_metrics\\ECA_all_urban_extents_100k_mollweide.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_mollweide.shp\"\n", + "# shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_mollweide2.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_FUAs_Project_Mollweide.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_mollweide_geom_fixed_greater_50k.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_ghs_built_up_extents_mollweide_geom_fixed.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_molleweide.shp\"\n", "shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_clusters_5k_up_molleweide.shp\"" ] }, @@ -147,14 +145,14 @@ "source": [ "# Psuedocode\n", "\n", - "#pop_values = []\n", + "# pop_values = []\n", "# For each Shape/FUA:\n", - " # Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)\n", - " #For each pixel:\n", - " # pop_values.append(pixel value)\n", - " \n", - " \n", - "#cell_group_median = median of pop_values" + "# Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)\n", + "# For each pixel:\n", + "# pop_values.append(pixel value)\n", + "\n", + "\n", + "# cell_group_median = median of pop_values" ] }, { @@ -44500,53 +44498,52 @@ "with rasterio.open(GHS_pop) as src:\n", " pixelSizeX, pixelSizeY = src.res\n", " print(pixelSizeX, pixelSizeY)\n", - " \n", + "\n", " input_shapes_gpd = gpd.read_file(shpName)\n", "\n", - " #pop_values = []\n", + " # pop_values = []\n", " pop_values = []\n", - " #for entry in input_shapes_gpd.head(2).iterrows():\n", + " # for entry in input_shapes_gpd.head(2).iterrows():\n", " for entry in input_shapes_gpd.iterrows():\n", " print(entry[0])\n", - " \n", + "\n", " # extract the geometry in GeoJSON format\n", - " geometry = entry[1]['geometry'] # list of shapely geometries\n", - " #geometry = geoms[0] # shapely geometry\n", + " geometry = entry[1][\"geometry\"] # list of shapely geometries\n", + " # geometry = geoms[0] # shapely geometry\n", " # converts to geojson format\n", " geoms = [mapping(geometry)]\n", "\n", - " # extract the raster values values within the polygon \n", + " # extract the raster values values within the polygon\n", " out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)\n", - " data = out_image[0,:,:]\n", - " \n", - " row, col = np.where(data != -9999.0) \n", + " data = out_image[0, :, :]\n", + "\n", + " row, col = np.where(data != -9999.0)\n", " val = np.extract(data != -9999.0, data)\n", "\n", " # Adding the x,y, and geometry columns is not necessary\n", - " T1 = out_transform * Affine.translation(0.5, 0.5) # reference the pixel centre\n", + " T1 = out_transform * Affine.translation(0.5, 0.5) # reference the pixel centre\n", " # row,column to x,y\n", - " rc2xy = lambda r, c: (c, r) * T1 \n", - " \n", - " d = gpd.GeoDataFrame({'col':col,'row':row,'val':val})\n", - " \n", + " rc2xy = lambda r, c: (c, r) * T1\n", + "\n", + " d = gpd.GeoDataFrame({\"col\": col, \"row\": row, \"val\": val})\n", + "\n", " # coordinate transformation\n", - " d['x'] = d.apply(lambda row: rc2xy(row.row,row.col)[0], axis=1)\n", - " d['y'] = d.apply(lambda row: rc2xy(row.row,row.col)[1], axis=1)\n", - " \n", + " d[\"x\"] = d.apply(lambda row: rc2xy(row.row, row.col)[0], axis=1)\n", + " d[\"y\"] = d.apply(lambda row: rc2xy(row.row, row.col)[1], axis=1)\n", + "\n", " # geometry\n", - " d['geometry'] = d.apply(lambda row: Point(row['x'], row['y']), axis=1)\n", - " \n", + " d[\"geometry\"] = d.apply(lambda row: Point(row[\"x\"], row[\"y\"]), axis=1)\n", + "\n", " # exlude pixels with value less than 77\n", " print(len(d))\n", - " \n", - " #print(d)\n", + "\n", + " # print(d)\n", " print(d.val[d.val > 77].to_list())\n", - " \n", + "\n", " print(len(d[d.val > 77]))\n", - " \n", - " #extend values to pop_values\n", - " pop_values.extend(d.val[d.val > 77].to_list())\n", - " " + "\n", + " # extend values to pop_values\n", + " pop_values.extend(d.val[d.val > 77].to_list())" ] }, { @@ -44556,6 +44553,7 @@ "outputs": [], "source": [ "import statistics\n", + "\n", "UZB_pop_median = statistics.median(pop_values)" ] }, @@ -44595,14 +44593,14 @@ "# Psuedocode\n", "\n", "# for each Shape/FUA:\n", - " # pixel_count_below_median = 0\n", - " # pixel_count_above_median = 0\n", - " \n", - " # Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)\n", - " # calculate pixel_share_below_median and pixel_share_above_median\n", - " \n", - " # Sprawl = ((L%−H%)+1)*0.5\n", - " # Sprawl = ((pixel_share_below_median-pixel_share_above_median)+1)*.5" + "# pixel_count_below_median = 0\n", + "# pixel_count_above_median = 0\n", + "\n", + "# Select all built-up pixels that are mostly within shape (and exclude pixels less than 77 per square km)\n", + "# calculate pixel_share_below_median and pixel_share_above_median\n", + "\n", + "# Sprawl = ((L%−H%)+1)*0.5\n", + "# Sprawl = ((pixel_share_below_median-pixel_share_above_median)+1)*.5" ] }, { @@ -77230,78 +77228,80 @@ "with rasterio.open(GHS_pop) as src:\n", " pixelSizeX, pixelSizeY = src.res\n", " print(pixelSizeX, pixelSizeY)\n", - " \n", + "\n", " input_shapes_gpd = gpd.read_file(shpName)\n", "\n", " # pixel_count_below_median = 0\n", " pixel_count_below_median = 0\n", " # pixel_count_above_median = 0\n", " pixel_count_above_median = 0\n", - " \n", - " #for entry in input_shapes_gpd.head(3).iterrows():\n", + "\n", + " # for entry in input_shapes_gpd.head(3).iterrows():\n", " for entry in input_shapes_gpd.iterrows():\n", " print(entry[0])\n", - " \n", + "\n", " # extract the geometry in GeoJSON format\n", - " geometry = entry[1]['geometry'] # list of shapely geometries\n", - " #geometry = geoms[0] # shapely geometry\n", + " geometry = entry[1][\"geometry\"] # list of shapely geometries\n", + " # geometry = geoms[0] # shapely geometry\n", " geoms = [mapping(geometry)]\n", "\n", - " # extract the raster values values within the polygon \n", + " # extract the raster values values within the polygon\n", " out_image, out_transform = mask(src, geoms, crop=True, nodata=-9999.0)\n", - " data = out_image[0,:,:]\n", - " \n", - " row, col = np.where(data != -9999.0) \n", + " data = out_image[0, :, :]\n", + "\n", + " row, col = np.where(data != -9999.0)\n", " val = np.extract(data != -9999.0, data)\n", - " \n", - " d = gpd.GeoDataFrame({'col':col,'row':row,'val':val})\n", - " \n", + "\n", + " d = gpd.GeoDataFrame({\"col\": col, \"row\": row, \"val\": val})\n", + "\n", " # exlude pixels with value less than 77\n", " d = d[d.val > 77]\n", " d_count = len(d)\n", - " #print(f\"d_count is {d_count}\")\n", - " \n", - " #print(d.val[d.val < UZB_pop_median])\n", - " #print(len(d.val[d.val < UZB_pop_median]))\n", + " # print(f\"d_count is {d_count}\")\n", + "\n", + " # print(d.val[d.val < UZB_pop_median])\n", + " # print(len(d.val[d.val < UZB_pop_median]))\n", " pixel_share_below_median = len(d.val[d.val < UZB_pop_median]) / d_count\n", " print(f\"pixel_share_below_median is: {pixel_share_below_median}\")\n", - " \n", - " #print(d.val[d.val > UZB_pop_median])\n", - " #print(len(d.val[d.val > UZB_pop_median]))\n", + "\n", + " # print(d.val[d.val > UZB_pop_median])\n", + " # print(len(d.val[d.val > UZB_pop_median]))\n", " pixel_share_above_median = len(d.val[d.val > UZB_pop_median]) / d_count\n", " print(f\"pixel_share_above_median is: {pixel_share_above_median}\")\n", - " \n", + "\n", " # Sprawl = ((L%−H%)+1)*0.5\n", " # Sprawl = ((pixel_count_below_median-pixel_count_above_median)+1)*.5\n", - " Sprawl = ((pixel_share_below_median-pixel_share_above_median)+1)*.5\n", + " Sprawl = ((pixel_share_below_median - pixel_share_above_median) + 1) * 0.5\n", " print(f\"Sprawl index is: {Sprawl}\")\n", - " \n", + "\n", " # creates a temporary GDF for just the row's shape\n", " temp_gdf = input_shapes_gpd.iloc[[entry[0]]]\n", - " \n", - " #print(\"print temp_gdf\")\n", - " #print(temp_gdf)\n", - " \n", + "\n", + " # print(\"print temp_gdf\")\n", + " # print(temp_gdf)\n", + "\n", " # Put all metrics in a DataFrame\n", " metrics_scalar = {}\n", - " metrics_scalar['sprawl_index'] = [Sprawl]\n", + " metrics_scalar[\"sprawl_index\"] = [Sprawl]\n", " metrics_df = pd.DataFrame(metrics_scalar)\n", - " \n", - " #print(\"print metrics_scalar\")\n", - " #print(metrics_scalar)\n", - " \n", + "\n", + " # print(\"print metrics_scalar\")\n", + " # print(metrics_scalar)\n", + "\n", " # and concatinate it with the row's shape\n", " new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", - " \n", - " #print(\"print new_temp_gdf\")\n", - " #print(new_temp_gdf)\n", - " #print(entry[0])\n", + "\n", + " # print(\"print new_temp_gdf\")\n", + " # print(new_temp_gdf)\n", + " # print(entry[0])\n", " # put the results of each row into a new DataFrame\n", " if entry[0] == 0:\n", " print(\"new_temp_gdf\")\n", " output_new_temp_gdf = new_temp_gdf\n", " else:\n", - " output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True) " + " output_new_temp_gdf = output_new_temp_gdf.append(\n", + " new_temp_gdf, ignore_index=True\n", + " )" ] }, { @@ -77502,24 +77502,24 @@ "outputs": [], "source": [ "# make the GeoDataFrame unprojected\n", - "output_new_temp_gdf = output_new_temp_gdf.to_crs('epsg:4326')\n", + "output_new_temp_gdf = output_new_temp_gdf.to_crs(\"epsg:4326\")\n", "\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_extents_results_5k_up\"\n", "\n", "# save as CSV\n", "\n", - "#output_new_temp_gdf.to_csv(output + r\"\\ECA_all_urban_metrics_100k_sprawl.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_FUAs_sprawl.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_sprawl.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_sprawl.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\ECA_wo_rus_urban_metrics_urban_extents_all_sprawl.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\ECA_all_urban_metrics_100k_sprawl.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_FUAs_sprawl.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_sprawl.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_sprawl.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\ECA_wo_rus_urban_metrics_urban_extents_all_sprawl.csv\")\n", "output_new_temp_gdf.to_csv(output + r\"\\all_urban_metrics_5k_up_sprawl.csv\")" ] }, diff --git a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb index 389d001..cb2980f 100755 --- a/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb +++ b/notebooks/Implementations/KAZ_SCADR_UrbanizationReview/Urban_metrics_Structure.ipynb @@ -23,17 +23,11 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, inspect, logging, importlib\n", + "import logging\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import numpy as np\n", - "import osmnx as ox\n", - "import networkx as nx\n", - "from shapely.ops import split, unary_union\n", - "from shapely.geometry import box, Point\n", - "\n", - "import matplotlib.pyplot as plt" + "import osmnx as ox" ] }, { @@ -42,8 +36,7 @@ "metadata": {}, "outputs": [], "source": [ - "import logging\n", - "logging.basicConfig(filename='urban_structure_metrics.log',level=logging.INFO)" + "logging.basicConfig(filename=\"urban_structure_metrics.log\", level=logging.INFO)" ] }, { @@ -53,6 +46,7 @@ "outputs": [], "source": [ "import time\n", + "\n", "start_time = time.time()" ] }, @@ -62,9 +56,9 @@ "metadata": {}, "outputs": [], "source": [ - "inputFolder = r'input_folder'\n", + "inputFolder = r\"input_folder\"\n", "# define output folder\n", - "outputFolder = r'output_folder'" + "outputFolder = r\"output_folder\"" ] }, { @@ -81,12 +75,12 @@ "outputs": [], "source": [ "# read extents in WGS84\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_4326.shp\"\n", - "#shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_4326_3.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UBZ_only_FUAs2_geom_fixed.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed_greater_50k.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed.shp\"\n", - "#shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_4326.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\russia_urban_extents_merged_4326.shp\"\n", + "# shpName = r\"C:\\repos\\GOST_Urban\\Notebooks\\Implementations\\eca_wo_rus_urban_clusters_ghs_pop_smooth_100k_4326_3.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UBZ_only_FUAs2_geom_fixed.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed_greater_50k.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_ghs_built_up_extents_4326\\UZB_only_ghs_built_up_extents_4326_geom_fixed.shp\"\n", + "# shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\ECA_wo_rus_urban_extents\\eca_wo_rus_built_up_extents_4326.shp\"\n", "shpName = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_clusters_5k_up_4326.shp\"" ] }, @@ -284,7 +278,7 @@ "metadata": {}, "outputs": [], "source": [ - "#input_shapes_gpd = input_shapes_gpd[137:]" + "# input_shapes_gpd = input_shapes_gpd[137:]" ] }, { @@ -36038,18 +36032,18 @@ "\n", "error_list = []\n", "\n", - "#for entry in input_shapes_gpd.head(2).iterrows():\n", + "# for entry in input_shapes_gpd.head(2).iterrows():\n", "for entry in input_shapes_gpd.iterrows():\n", " try:\n", " print(entry[0])\n", " # extract the geometry in GeoJSON format\n", - " geometry = entry[1]['geometry'] # list of shapely geometries\n", + " geometry = entry[1][\"geometry\"] # list of shapely geometries\n", "\n", " gdf = gpd.GeoDataFrame()\n", - " gdf['geometry'] = None\n", - " gdf.loc[0, 'geometry'] = geometry\n", + " gdf[\"geometry\"] = None\n", + " gdf.loc[0, \"geometry\"] = geometry\n", " # Set the GeoDataFrame's coordinate system to WGS84\n", - " gdf.crs = 'epsg:4326'\n", + " gdf.crs = \"epsg:4326\"\n", " area = ox.project_gdf(gdf).unary_union.area\n", " print(area)\n", "\n", @@ -36062,49 +36056,50 @@ " # creates a temporary GDF for just the row's shape\n", " temp_gdf = input_shapes_gpd.loc[[entry[0]]]\n", "\n", - " #print(\"print temp_gdf\")\n", - " #print(temp_gdf)\n", + " # print(\"print temp_gdf\")\n", + " # print(temp_gdf)\n", "\n", " # Put all metrics in a DataFrame\n", " metrics_scalar = {}\n", - " metrics_scalar['circuity_avg'] = [stats['circuity_avg']]\n", - " metrics_scalar['intersection_density_km'] = [stats['intersection_density_km']]\n", - " metrics_scalar['street_density_km'] = [stats['street_density_km']]\n", + " metrics_scalar[\"circuity_avg\"] = [stats[\"circuity_avg\"]]\n", + " metrics_scalar[\"intersection_density_km\"] = [stats[\"intersection_density_km\"]]\n", + " metrics_scalar[\"street_density_km\"] = [stats[\"street_density_km\"]]\n", " metrics_df = pd.DataFrame(metrics_scalar)\n", "\n", - " #print(\"print metrics_scalar\")\n", - " #print(metrics_scalar)\n", + " # print(\"print metrics_scalar\")\n", + " # print(metrics_scalar)\n", "\n", " # and concatinate it with the row's shape\n", " new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", "\n", - " #print(\"print new_temp_gdf\")\n", - " #print(new_temp_gdf)\n", - " #print(entry[0])\n", + " # print(\"print new_temp_gdf\")\n", + " # print(new_temp_gdf)\n", + " # print(entry[0])\n", " # put the results of each row into a new DataFrame\n", " if entry[0] == 0:\n", " print(\"new_temp_gdf\")\n", " output_new_temp_gdf = new_temp_gdf\n", " else:\n", - " output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True) \n", + " output_new_temp_gdf = output_new_temp_gdf.append(\n", + " new_temp_gdf, ignore_index=True\n", + " )\n", " except:\n", " print(f\"error with entry {entry[0]}\")\n", " error_list.append(entry[0])\n", - " #logging.INFO(f\"error with entry {entry[0]}\" + \"\\n\" + f\"entry information: {entry[1]}\")\n", - " \n", + " # logging.INFO(f\"error with entry {entry[0]}\" + \"\\n\" + f\"entry information: {entry[1]}\")\n", + "\n", " # creates a temporary GDF for just the row's shape\n", - "# temp_gdf = input_shapes_gpd.loc[[entry[0]]]\n", + " # temp_gdf = input_shapes_gpd.loc[[entry[0]]]\n", " # Put all metrics in a DataFrame\n", - "# metrics_scalar = {}\n", - "# metrics_scalar['circuity_avg'] = 0\n", - "# metrics_scalar['intersection_density_km'] = 0\n", - "# metrics_scalar['street_density_km'] = 0\n", - "# metrics_df = pd.DataFrame(metrics_scalar)\n", + " # metrics_scalar = {}\n", + " # metrics_scalar['circuity_avg'] = 0\n", + " # metrics_scalar['intersection_density_km'] = 0\n", + " # metrics_scalar['street_density_km'] = 0\n", + " # metrics_df = pd.DataFrame(metrics_scalar)\n", " # and concatinate it with the row's shape\n", - "# new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", - "# output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True) \n", - " continue\n", - " " + " # new_temp_gdf = pd.concat([temp_gdf.reset_index(drop=True), metrics_df], axis=1)\n", + " # output_new_temp_gdf = output_new_temp_gdf.append(new_temp_gdf, ignore_index=True)\n", + " continue" ] }, { @@ -37074,12 +37069,12 @@ "metadata": {}, "outputs": [], "source": [ - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", - "#output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_metrics_results_russia\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_FUAs_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\UZB_only_GHS_urban_extents_results_all\"\n", + "# output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\eca_urban_metrics_results_wo_rus_all\"\n", "output = r\"C:\\Users\\war-machine\\Documents\\world_bank_work\\UZB_project\\metrics_shape_tool\\all_urban_extents_results_5k_up\"" ] }, @@ -37090,11 +37085,11 @@ "outputs": [], "source": [ "# save as CSV\n", - "#output_new_temp_gdf.to_csv(output + r\"\\ECA_russia_urban_metrics_100k_structure_to_146.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_FUAs_structure.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_structure.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_structure.csv\")\n", - "#output_new_temp_gdf.to_csv(output + r\"\\ECA_wo_rus_urban_metrics_urban_extents_all_structure.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\ECA_russia_urban_metrics_100k_structure_to_146.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_FUAs_structure.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_structure.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\UZB_only_urban_metrics_urban_extents_all_structure.csv\")\n", + "# output_new_temp_gdf.to_csv(output + r\"\\ECA_wo_rus_urban_metrics_urban_extents_all_structure.csv\")\n", "output_new_temp_gdf.to_csv(output + r\"\\all_urban_metrics_5k_up_structure.csv\")" ] }, diff --git a/notebooks/Implementations/POV_EEAPV_COG_urban_assessment/Summarize_Urban.ipynb b/notebooks/Implementations/POV_EEAPV_COG_urban_assessment/Summarize_Urban.ipynb index 52b8529..f7437b9 100644 --- a/notebooks/Implementations/POV_EEAPV_COG_urban_assessment/Summarize_Urban.ipynb +++ b/notebooks/Implementations/POV_EEAPV_COG_urban_assessment/Summarize_Urban.ipynb @@ -15,31 +15,19 @@ } ], "source": [ - "import sys, os, importlib, shutil, pathlib, datetime, math\n", - "import requests\n", - "import rasterio, elevation, richdem\n", + "import sys\n", + "import os\n", + "import rasterio\n", "import rasterio.warp\n", - "from rasterio import features\n", - "from datetime import datetime\n", "\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", - "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", "import GOSTRocks.dataMisc as dataMisc\n", - "import GOSTRocks.mapMisc as mapMisc\n", - "import GOSTRocks.rasterMisc as rMisc\n", - "import GOSTRocks.metadataMisc as meta\n", - "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", "import GOST_Urban.urban_helper as helper\n", "\n", "%load_ext autoreload\n", @@ -52,10 +40,12 @@ "metadata": {}, "outputs": [], "source": [ - "pop_urban_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/COG_URBAN_DATA_new_naming\"\n", - "in_bounds = dataMisc.get_geoboundaries('COG', 'ADM1')\n", + "pop_urban_folder = (\n", + " \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/COG_URBAN_DATA_new_naming\"\n", + ")\n", + "in_bounds = dataMisc.get_geoboundaries(\"COG\", \"ADM1\")\n", "\n", - "out_folder = '/home/wb411133/temp'" + "out_folder = \"/home/wb411133/temp\"" ] }, { @@ -65,19 +55,21 @@ "outputs": [], "source": [ "# summarize worldpop urbanization and population\n", - "class urban_summary():\n", + "class urban_summary:\n", " def __init__(self, f_base, in_folder):\n", " self.f_base = f_base\n", " self.in_folder = in_folder\n", - " \n", - " self.pop_layer = os.path.join(in_folder, f'{f_base}.tif')\n", - " self.hd_layer = os.path.join(in_folder, f'{f_base}_urban_hd.tif')\n", - " self.urb_layer = os.path.join(in_folder, f'{f_base}_urban.tif')\n", - " \n", + "\n", + " self.pop_layer = os.path.join(in_folder, f\"{f_base}.tif\")\n", + " self.hd_layer = os.path.join(in_folder, f\"{f_base}_urban_hd.tif\")\n", + " self.urb_layer = os.path.join(in_folder, f\"{f_base}_urban.tif\")\n", + "\n", " def compare_populations(self, boundaries):\n", - " calculator = helper.summarize_population(self.pop_layer, boundaries, self.urb_layer, self.hd_layer)\n", + " calculator = helper.summarize_population(\n", + " self.pop_layer, boundaries, self.urb_layer, self.hd_layer\n", + " )\n", " pop_res = calculator.calculate_zonal()\n", - " return(pop_res)" + " return pop_res" ] }, { @@ -87,8 +79,8 @@ "outputs": [], "source": [ "source_folder = os.path.join(pop_urban_folder, \"FINAL_STANDARD\")\n", - "comp1_base = 'cog_cpo20'\n", - "comp2_base = 'cog_cpo20_WB'\n", + "comp1_base = \"cog_cpo20\"\n", + "comp2_base = \"cog_cpo20_WB\"\n", "\n", "calculator = urban_summary(comp1_base, source_folder)\n", "cpo20_pop_res = calculator.compare_populations(in_bounds)\n", @@ -96,7 +88,9 @@ "calculator = urban_summary(comp2_base, source_folder)\n", "cpo20WB_pop_res = calculator.compare_populations(in_bounds)\n", "\n", - "combo_res = cpo20_pop_res.loc[:,[x for x in cpo20_pop_res.columns if \"SUM\" in x]].join(cpo20WB_pop_res.loc[:,[x for x in cpo20WB_pop_res.columns if \"SUM\" in x]])\n", + "combo_res = cpo20_pop_res.loc[:, [x for x in cpo20_pop_res.columns if \"SUM\" in x]].join(\n", + " cpo20WB_pop_res.loc[:, [x for x in cpo20WB_pop_res.columns if \"SUM\" in x]]\n", + ")\n", "combo_res.to_csv(os.path.join(out_folder, \"COG_250m_urban_population_summary.csv\"))" ] }, @@ -427,8 +421,8 @@ "outputs": [], "source": [ "source_folder = os.path.join(pop_urban_folder, \"FINAL_STANDARD_1KM\")\n", - "comp1_base = 'cog1k_cpo20'\n", - "comp2_base = 'cog1k_cpo20_WB'\n", + "comp1_base = \"cog1k_cpo20\"\n", + "comp2_base = \"cog1k_cpo20_WB\"\n", "\n", "calculator = urban_summary(comp1_base, source_folder)\n", "cpo20_pop_res = calculator.compare_populations(in_bounds)\n", @@ -436,7 +430,9 @@ "calculator = urban_summary(comp2_base, source_folder)\n", "cpo20WB_pop_res = calculator.compare_populations(in_bounds)\n", "\n", - "combo_res = cpo20_pop_res.loc[:,[x for x in cpo20_pop_res.columns if \"SUM\" in x]].join(cpo20WB_pop_res.loc[:,[x for x in cpo20WB_pop_res.columns if \"SUM\" in x]])\n", + "combo_res = cpo20_pop_res.loc[:, [x for x in cpo20_pop_res.columns if \"SUM\" in x]].join(\n", + " cpo20WB_pop_res.loc[:, [x for x in cpo20WB_pop_res.columns if \"SUM\" in x]]\n", + ")\n", "combo_res.to_csv(os.path.join(out_folder, \"COG_1km_urban_population_summary.csv\"))" ] }, @@ -456,11 +452,11 @@ "outputs": [], "source": [ "source_folder = os.path.join(pop_urban_folder, \"FINAL_STANDARD\")\n", - "comp1_base = 'cog_cpo20'\n", - "comp2_base = 'cog_cpo20_WB'\n", + "comp1_base = \"cog_cpo20\"\n", + "comp2_base = \"cog_cpo20_WB\"\n", "\n", - "com1_pop = rasterio.open(os.path.join(source_folder, f'{comp1_base}.tif')).read()\n", - "com2_pop = rasterio.open(os.path.join(source_folder, f'{comp2_base}.tif')).read()" + "com1_pop = rasterio.open(os.path.join(source_folder, f\"{comp1_base}.tif\")).read()\n", + "com2_pop = rasterio.open(os.path.join(source_folder, f\"{comp2_base}.tif\")).read()" ] }, { diff --git a/notebooks/Implementations/README.md b/notebooks/Implementations/README.md index cc218e2..2942e71 100644 --- a/notebooks/Implementations/README.md +++ b/notebooks/Implementations/README.md @@ -1,2 +1,2 @@ # Implementations -The primary role of the GOST team in the World Bank Group is to support operational teams in their exploitation of geospatial data. This happens in many different ways, and the notebooks herein present examples of the specific support the team has provided to various investigations of urbanization. \ No newline at end of file +The primary role of the GOST team in the World Bank Group is to support operational teams in their exploitation of geospatial data. This happens in many different ways, and the notebooks herein present examples of the specific support the team has provided to various investigations of urbanization. diff --git a/notebooks/Implementations/Slum_Mapping/Imagery_Search.ipynb b/notebooks/Implementations/Slum_Mapping/Imagery_Search.ipynb index eae212e..aa82e3c 100644 --- a/notebooks/Implementations/Slum_Mapping/Imagery_Search.ipynb +++ b/notebooks/Implementations/Slum_Mapping/Imagery_Search.ipynb @@ -6,21 +6,13 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, importlib, math, multiprocessing\n", - "import rasterio, geojson\n", + "import sys\n", + "import os\n", "\n", - "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from h3 import h3\n", - "from tqdm import tqdm\n", - "from shapely.geometry import Polygon\n", "\n", - "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", - "import GOSTRocks.rasterMisc as rMisc\n", - "import GOSTRocks.ntlMisc as ntl\n", - "from GOSTRocks.misc import tPrint" + "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")" ] }, { @@ -30,7 +22,7 @@ "outputs": [], "source": [ "input_folder = \"/home/wb411133/projects/URB_slum_mapping/data/\"\n", - "boundary_folder = os.path.join(input_folder, 'boundary')\n", + "boundary_folder = os.path.join(input_folder, \"boundary\")\n", "wb_footprints = os.path.join(input_folder, \"WB_outlines_footgeo.geojson\")\n", "\n", "in_shps = []\n", @@ -39,7 +31,7 @@ " if f.endswith(\".shp\"):\n", " in_shps.append(os.path.join(root, f))\n", "in_wb = gpd.read_file(wb_footprints)\n", - "in_wb['geometry'] = in_wb['geometry'].apply(lambda x: x.buffer(0))" + "in_wb[\"geometry\"] = in_wb[\"geometry\"].apply(lambda x: x.buffer(0))" ] }, { @@ -176,10 +168,10 @@ "source": [ "all_size = 0\n", "for shp, images in good_res.items():\n", - " city = os.path.basename(os.path.dirname(shp)) \n", + " city = os.path.basename(os.path.dirname(shp))\n", " for idx, row in images.iterrows():\n", " print(f\"{row['location']}\")\n", - " all_size += float(row['zippedSize'])" + " all_size += float(row[\"zippedSize\"])" ] }, { @@ -219,7 +211,7 @@ } ], "source": [ - "all_size/1024/1024/1024" + "all_size / 1024 / 1024 / 1024" ] }, { diff --git a/notebooks/Implementations/Slum_Mapping/slumML/OpenBuildings2FeatureClass.py b/notebooks/Implementations/Slum_Mapping/slumML/OpenBuildings2FeatureClass.py index 9cb98ff..ac48dc4 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/OpenBuildings2FeatureClass.py +++ b/notebooks/Implementations/Slum_Mapping/slumML/OpenBuildings2FeatureClass.py @@ -12,75 +12,81 @@ ### Set the workplace and read data here--------------------------------------------- -data = 'Cairo_metropolitan_area.csv'#Target Google OpenBuilding CSV file. Alter the path if necessary. -gdb = 'D:/GoogleBuildings.gdb'#Geodatabase to store the transformed data. -fc_name = 'Cairo_metropolitan_area_TEST'#The name to be used for the new feature class. -arcpy.env.workspace = gdb#ArcGIS Pro workplace setting. Keep it as it is unless you need any specific adjustment. -spRef = arcpy.SpatialReference(4326)#Specify the spatial reference for the process. For OpenBuilding, EPSG:4326 -tarConf = 0.5#Confidence threshold, if necessary. If you want all records, insert 0.0. - -with open(data, 'r', encoding="utf-8_sig", ) as F: +data = "Cairo_metropolitan_area.csv" # Target Google OpenBuilding CSV file. Alter the path if necessary. +gdb = "D:/GoogleBuildings.gdb" # Geodatabase to store the transformed data. +fc_name = ( + "Cairo_metropolitan_area_TEST" # The name to be used for the new feature class. +) +arcpy.env.workspace = gdb # ArcGIS Pro workplace setting. Keep it as it is unless you need any specific adjustment. +spRef = arcpy.SpatialReference( + 4326 +) # Specify the spatial reference for the process. For OpenBuilding, EPSG:4326 +tarConf = ( + 0.5 # Confidence threshold, if necessary. If you want all records, insert 0.0. +) + +with open( + data, + "r", + encoding="utf-8_sig", +) as F: df = pd.read_csv(F, sep=",") - ###---------------------------------------------------------------------------------- ### Specify a target field list for the InsertCursor function below. ### This list should be exactly same as 'fields_desc' below except for 'SHAPE@' token. fields = [ - 'latitude', - 'longitude', - 'areaSize_m2', - 'confidence', - 'fullPlus_code', - 'SHAPE@' - ] + "latitude", + "longitude", + "areaSize_m2", + "confidence", + "fullPlus_code", + "SHAPE@", +] ### Create a new empty feature class here-------------------------------------------- # Set fields definition to be created within an empty feature class: fields_desc = [ - ['latitude', 'Double'], - ['longitude', 'Double'], - ['areaSize_m2', 'Double'], - ['confidence', 'Double'], - ['fullPlus_code', 'Text'] - ] + ["latitude", "Double"], + ["longitude", "Double"], + ["areaSize_m2", "Double"], + ["confidence", "Double"], + ["fullPlus_code", "Text"], +] arcpy.management.CreateFeatureclass(gdb, fc_name, "Polygon", "", "", "", spRef) arcpy.management.AddFields(fc_name, fields_desc) - ### Cleaning the raw table and mask by target confidence level----------------------- -df_clean = df[df['geometry'].str.contains('|'.join(['POLYGON']))].copy() +df_clean = df[df["geometry"].str.contains("|".join(["POLYGON"]))].copy() # Select records with a valid geometry (that starts with 'POLYGON'). # If a record starts with invalid texts (such as 'EMPTY'), the record will be removed. -df_conf = df_clean[df_clean['confidence'] > tarConf].copy() +df_conf = df_clean[df_clean["confidence"] > tarConf].copy() # Mask the table by confidence level. ### TO TEST THE CODE with a small chunk of data: -#df_test = df_conf.iloc[0:100, :].copy() -#df_test.reset_index(inplace=True, drop=True) - +# df_test = df_conf.iloc[0:100, :].copy() +# df_test.reset_index(inplace=True, drop=True) ### Main loop - Convert the CSV data to a feature class:----------------------------- for i, r in df_conf.iterrows(): - - geomet = arcpy.FromWKT(r['geometry'], spRef) + geomet = arcpy.FromWKT(r["geometry"], spRef) lat = r[0] long = r[1] area = r[2] conf = r[3] plus = r[5] - + rowList = [lat, long, area, conf, plus, geomet] - + with arcpy.da.InsertCursor(fc_name, fields) as cursor: cursor.insertRow(rowList) -print('END PROCESS.') \ No newline at end of file +print("END PROCESS.") diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb index 4b9a56d..b56f15e 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.ipynb @@ -1 +1,485 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"STEP1.ipynb","provenance":[],"collapsed_sections":[],"mount_file_id":"1i8cjeXnEH4KWK49QPqea_3tfmAHKbLBO","authorship_tag":"ABX9TyPFrE7NgvLX3bil/AKToTSa"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# **STEP-1**"],"metadata":{"id":"lwK54umtCYgr"}},{"cell_type":"markdown","source":["Import modules"],"metadata":{"id":"uMrArolCC_HU"}},{"cell_type":"code","execution_count":1,"metadata":{"id":"q42KpdZ3CHcc","executionInfo":{"status":"ok","timestamp":1649162226834,"user_tz":-540,"elapsed":343,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"outputs":[],"source":["import sys\n","sys.path.append('/content/drive/MyDrive/Colab Notebooks/modules')"]},{"cell_type":"code","source":["# -*- coding: utf-8 -*-\n","import pandas as pd\n","import time\n","import geopandas as gpd\n","from scipy import spatial\n","import numpy as np\n","from sklearn.neighbors import KDTree\n","from multiprocessing import Pool\n","import multiprocessing"],"metadata":{"id":"kD_wjFLZDDqi","executionInfo":{"status":"ok","timestamp":1649162263940,"user_tz":-540,"elapsed":35924,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":2,"outputs":[]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","## **Initial file setting**\n"],"metadata":{"id":"LuzVCHyjDYXE"}},{"cell_type":"code","source":["f = '/content/drive/MyDrive/Colab Notebooks/slumML/data/Bambari/Bambari_DA_shape.shp'#Input shapefile path\n","outf = '/content/drive/MyDrive/Colab Notebooks/slumML/data/Bambari/Bambari_DA_morphology.shp'#Output shapefile path"],"metadata":{"id":"E0K1hsfADnHC","executionInfo":{"status":"ok","timestamp":1649162272100,"user_tz":-540,"elapsed":282,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["# Spatial reference setting\n","WGS = 'epsg:4326'\n","UTM = 'epsg:32629'\n","\n","# Options for multi-processor process (currently not used)\n","save_thresh = 100000 # save progress every [] rows \n","print_thresh = 10000 # print out calculation process every [] rows for each processor\n","cpuPower = 1/2"],"metadata":{"id":"HQkkty3zDiwu","executionInfo":{"status":"ok","timestamp":1649162274384,"user_tz":-540,"elapsed":2,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","## **Data treatment**"],"metadata":{"id":"GupC1EmVEMAY"}},{"cell_type":"code","source":["# Prepare the original shape file\n","original = gpd.read_file(f)# Read ESEI shapefile\n","if original.crs != WGS:\n"," original = original.to_crs(WGS)# Convert the spatial referenct to WGS if it is not\n"," \n","original['PID'] = original.index + 1\n","\n","\n","# Create a deep copy of 'original' \n","fil = original.copy()\n","\n","fil = fil.to_crs(UTM)# Convert the spatial reference to UTM\n","# Adding attributes to the shapefile: area, geomerty, and PID (unique IDs)\n","fil['area'] = fil.area\n","fil['centroid'] = fil['geometry'].centroid\n","\n","fil = fil.to_crs(WGS)# Convert back to WGS\n","fil = fil[['PID','centroid','area']]\n","\n","#short = fil[:50000]# Activate this line and diactivate the below line if you want to test the code with a smaller records.\n","short = fil\n","\n","# Generate KD tree matrix\n","area_dict = dict(zip(list(short.index), list(short['area'])))\n","matrix = list(zip(short.centroid.apply(lambda x: x.x),short.centroid.apply(lambda x: x.y)))\n","KD_tree = KDTree(matrix)"],"metadata":{"id":"gKQaWpAkERnc","executionInfo":{"status":"ok","timestamp":1649162303460,"user_tz":-540,"elapsed":27127,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":5,"outputs":[]},{"cell_type":"markdown","source":["**Morphology generation function**"],"metadata":{"id":"GnJRw1cnEncn"}},{"cell_type":"code","source":["def Main(passed_dict):\n"," \n"," # unpack passed dict into local variables for this thread.\n"," short = passed_dict['df']\n"," thread_no = passed_dict['thread_no']\n"," print_thresh = passed_dict['print_thresh']\n"," save_thresh = passed_dict['save_thresh']\n"," \n"," # set up some counters / timings\n"," t = time.time()\n"," counter = 1\n"," \n"," bundle = []\n"," \n"," # iterate through each row of the passed DataFrame of housing polygons.\n"," for index, row in short.iterrows():\n"," \n"," # identify the x and y coordinates of the house's centroid\n"," y = row.centroid.y\n"," x = row.centroid.x\n"," \n"," # Query the KD tree for the first 26 objects (1 will be the house itself.)\n"," # this returns a dataframe of the nearest 26 objects, their distances, and their indices. \n"," distances, indices = KD_tree.query([(x,y)], k = 26)\n","\n"," # Distance calculations - closest 5\n"," # here, we subset the distances frame for the first 5 neighbours, and calculate summary stats\n"," nearest_5_distances = list(distances[0])[1:6] # subset / slice\n"," min_5 = min(nearest_5_distances) # closest neighbour of the 5 closest (min distance to another building)\n"," max_5 = max(nearest_5_distances) # furthest neighbour of the 5 closest (min distance to another building)\n"," mean_5 = np.mean(nearest_5_distances) # average distance of centroids of 5 nearest neighbours\n"," median_5 = np.median(nearest_5_distances) # median distance of centroids of 5 nearest neighbours\n"," dist_5_std = np.std(nearest_5_distances) # standard deviation of centroids of 5 nearest neighbours\n","\n"," # Distance calculations - closest 25\n"," # here, we subset the distances frame for the first 25 neighbours, and calculate summary stats\n"," nearest_25_distances = list(distances[0])[1:]\n"," min_25 = min(nearest_25_distances)\n"," max_25 = max(nearest_25_distances)\n"," mean_25 = np.mean(nearest_25_distances)\n"," median_25 = np.median(nearest_25_distances)\n"," dist_25_std = np.std(nearest_5_distances)\n","\n"," # Areal calculations - closest 5\n"," # here, instead of the distances frame we generated via the KD tree, we use the area_dict \n"," # and query it with the indices from the KD tree step\n"," indices_5 = list(indices[0])[1:6]\n"," areas = [area_dict[x] for x in indices_5] \n"," area_5_mean = np.mean(areas) # mean area of 5 nearest neighbours\n"," area_5_med = np.median(areas) # median area of 5 nearest neighbours\n"," area_5_stdev = np.std(areas) # standard deviation of area of 5 nearest neighbours\n","\n"," # Areal calculations - closest 25\n"," # repeat above block for closest 25\n"," indices_25 = list(indices[0])[1:]\n"," areas = [area_dict[x] for x in indices_25]\n"," area_25_mean = np.mean(areas)\n"," area_25_med = np.median(areas)\n"," area_25_stdev = np.std(areas)\n","\n"," # Count\n"," # here we turn the process on its head, and identify all objects within certain distance thresholds\n"," count_25m = KD_tree.query_radius([(x,y)], r = 25, count_only = True)[0] # count of buildings in 25m radius\n"," count_50m = KD_tree.query_radius([(x,y)], r = 50, count_only = True)[0] # count of buildings in 50m radius\n"," count_100m = KD_tree.query_radius([(x,y)], r = 100, count_only = True)[0] # count of buildings in 100m radius\n"," \n"," # add these stats to a dictionary called 'ans'\n"," ans = {'PID':row.PID,\n"," 'area':row.area,\n"," 'D5_min':min_5,\n"," 'D5_max':max_5,\n"," 'D5_mean':mean_5,\n"," 'D5_med':median_5,\n"," 'D5_std':dist_5_std,\n"," 'A5_mean':area_5_mean,\n"," 'A5_med':area_5_med,\n"," 'A5_std':area_5_stdev,\n"," 'D25_min':min_25,\n"," 'D25_max':max_25,\n"," 'D25_mean':mean_25,\n"," 'D25_med':median_25,\n"," 'D25_std':dist_25_std,\n"," 'A25_mean':area_25_mean,\n"," 'A25_med':area_25_med,\n"," 'A25_std':area_25_stdev,\n"," 'Count_25m':count_25m,\n"," 'Count_50m':count_50m,\n"," 'Count_100m':count_100m\n"," }\n","\n"," bundle.append(ans)\n"," \n"," # keep track of progress via this row\n"," if counter % print_thresh == 0:\n"," print('%s rows completed at %s' % (counter, time.ctime()))\n"," \n"," '''\n"," # this functionality saves progress in case the process cannot be finished in one sitting. \n"," # ideally, finish the processing in one sitting. \n"," old = 0\n"," if counter % save_thresh == 0:\n"," saver = pd.DataFrame(bundle)\n"," saver = saver[list(bundle[0].keys())]\n"," if saver.crs != WGS:\n"," saver = saver.to_crs(WGS)\n"," saver = saver.set_index('PID')\n"," saver = saver.set_index('PID')\n"," saver['geometry'] = saver['geometry']\n"," saver = gpd.GeoDataFrame(saver, geometry = 'geometry', crs = WGS)\n"," saver.to_file(os.path.join(pth, 'output_%s_to_%s_thread_%s.shp' % (old, counter, thread_no)), driver = 'ESRI Shapefile')\n"," bundle = []\n"," old = counter\n"," '''\n"," \n"," counter+=1\n"," \n"," return(bundle)\n"," \n"," print('Task completed in %s seconds' % (time.time() - t))\n","###"],"metadata":{"id":"yAP7uTfxEl6W","executionInfo":{"status":"ok","timestamp":1649162306665,"user_tz":-540,"elapsed":243,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":6,"outputs":[]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","## **Generating building morphology**"],"metadata":{"id":"h1RaBLPyE7Pz"}},{"cell_type":"code","source":["d = {}\n","\n","d = {\n"," 'df':short,\n"," 'thread_no':1,\n"," 'print_thresh':print_thresh,\n"," 'save_thresh':save_thresh\n","}\n","\n","result = Main(d)\n","out_df = pd.DataFrame(result)"],"metadata":{"id":"Auvs_07qFGcw","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1649162355336,"user_tz":-540,"elapsed":44491,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"97a8ee7e-8b4d-4230-a13f-ba8ceeda67a6"},"execution_count":7,"outputs":[{"output_type":"stream","name":"stdout","text":["10000 rows completed at Tue Apr 5 12:38:46 2022\n","20000 rows completed at Tue Apr 5 12:39:00 2022\n","30000 rows completed at Tue Apr 5 12:39:14 2022\n"]}]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","## **Post-analytical process**"],"metadata":{"id":"GAuJYnwEFJS9"}},{"cell_type":"code","source":["### Final data output----------------------------------------------------------------\n","original = original.set_index('PID')# Reset the index of 'original' DF.\n","out_df = out_df.set_index('PID')# Reset the index of 'out_df'\n","out_df['geometry'] = original['geometry']# Copy the original geometry to the geometry col of 'out_df'\n","out_df = gpd.GeoDataFrame(out_df, geometry = 'geometry', crs = WGS)\n","out_df.to_file(outf, driver = 'ESRI Shapefile')"],"metadata":{"id":"zO0O_WyhFMbY","executionInfo":{"status":"ok","timestamp":1649162510022,"user_tz":-540,"elapsed":28874,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":8,"outputs":[]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "lwK54umtCYgr" + }, + "source": [ + "# **STEP-1**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uMrArolCC_HU" + }, + "source": [ + "Import modules" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "executionInfo": { + "elapsed": 343, + "status": "ok", + "timestamp": 1649162226834, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "q42KpdZ3CHcc" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"/content/drive/MyDrive/Colab Notebooks/modules\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "executionInfo": { + "elapsed": 35924, + "status": "ok", + "timestamp": 1649162263940, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "kD_wjFLZDDqi" + }, + "outputs": [], + "source": [ + "# -*- coding: utf-8 -*-\n", + "import pandas as pd\n", + "import time\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "from sklearn.neighbors import KDTree" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LuzVCHyjDYXE" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "## **Initial file setting**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 282, + "status": "ok", + "timestamp": 1649162272100, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "E0K1hsfADnHC" + }, + "outputs": [], + "source": [ + "f = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Bambari/Bambari_DA_shape.shp\" # Input shapefile path\n", + "outf = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Bambari/Bambari_DA_morphology.shp\" # Output shapefile path" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 2, + "status": "ok", + "timestamp": 1649162274384, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "HQkkty3zDiwu" + }, + "outputs": [], + "source": [ + "# Spatial reference setting\n", + "WGS = \"epsg:4326\"\n", + "UTM = \"epsg:32629\"\n", + "\n", + "# Options for multi-processor process (currently not used)\n", + "save_thresh = 100000 # save progress every [] rows\n", + "print_thresh = 10000 # print out calculation process every [] rows for each processor\n", + "cpuPower = 1 / 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GupC1EmVEMAY" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "## **Data treatment**" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "executionInfo": { + "elapsed": 27127, + "status": "ok", + "timestamp": 1649162303460, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "gKQaWpAkERnc" + }, + "outputs": [], + "source": [ + "# Prepare the original shape file\n", + "original = gpd.read_file(f) # Read ESEI shapefile\n", + "if original.crs != WGS:\n", + " original = original.to_crs(WGS) # Convert the spatial referenct to WGS if it is not\n", + "\n", + "original[\"PID\"] = original.index + 1\n", + "\n", + "\n", + "# Create a deep copy of 'original'\n", + "fil = original.copy()\n", + "\n", + "fil = fil.to_crs(UTM) # Convert the spatial reference to UTM\n", + "# Adding attributes to the shapefile: area, geomerty, and PID (unique IDs)\n", + "fil[\"area\"] = fil.area\n", + "fil[\"centroid\"] = fil[\"geometry\"].centroid\n", + "\n", + "fil = fil.to_crs(WGS) # Convert back to WGS\n", + "fil = fil[[\"PID\", \"centroid\", \"area\"]]\n", + "\n", + "# short = fil[:50000]# Activate this line and diactivate the below line if you want to test the code with a smaller records.\n", + "short = fil\n", + "\n", + "# Generate KD tree matrix\n", + "area_dict = dict(zip(list(short.index), list(short[\"area\"])))\n", + "matrix = list(\n", + " zip(short.centroid.apply(lambda x: x.x), short.centroid.apply(lambda x: x.y))\n", + ")\n", + "KD_tree = KDTree(matrix)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GnJRw1cnEncn" + }, + "source": [ + "**Morphology generation function**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "executionInfo": { + "elapsed": 243, + "status": "ok", + "timestamp": 1649162306665, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "yAP7uTfxEl6W" + }, + "outputs": [], + "source": [ + "def Main(passed_dict):\n", + " # unpack passed dict into local variables for this thread.\n", + " short = passed_dict[\"df\"]\n", + " thread_no = passed_dict[\"thread_no\"]\n", + " print_thresh = passed_dict[\"print_thresh\"]\n", + " save_thresh = passed_dict[\"save_thresh\"]\n", + "\n", + " # set up some counters / timings\n", + " t = time.time()\n", + " counter = 1\n", + "\n", + " bundle = []\n", + "\n", + " # iterate through each row of the passed DataFrame of housing polygons.\n", + " for index, row in short.iterrows():\n", + " # identify the x and y coordinates of the house's centroid\n", + " y = row.centroid.y\n", + " x = row.centroid.x\n", + "\n", + " # Query the KD tree for the first 26 objects (1 will be the house itself.)\n", + " # this returns a dataframe of the nearest 26 objects, their distances, and their indices.\n", + " distances, indices = KD_tree.query([(x, y)], k=26)\n", + "\n", + " # Distance calculations - closest 5\n", + " # here, we subset the distances frame for the first 5 neighbours, and calculate summary stats\n", + " nearest_5_distances = list(distances[0])[1:6] # subset / slice\n", + " min_5 = min(\n", + " nearest_5_distances\n", + " ) # closest neighbour of the 5 closest (min distance to another building)\n", + " max_5 = max(\n", + " nearest_5_distances\n", + " ) # furthest neighbour of the 5 closest (min distance to another building)\n", + " mean_5 = np.mean(\n", + " nearest_5_distances\n", + " ) # average distance of centroids of 5 nearest neighbours\n", + " median_5 = np.median(\n", + " nearest_5_distances\n", + " ) # median distance of centroids of 5 nearest neighbours\n", + " dist_5_std = np.std(\n", + " nearest_5_distances\n", + " ) # standard deviation of centroids of 5 nearest neighbours\n", + "\n", + " # Distance calculations - closest 25\n", + " # here, we subset the distances frame for the first 25 neighbours, and calculate summary stats\n", + " nearest_25_distances = list(distances[0])[1:]\n", + " min_25 = min(nearest_25_distances)\n", + " max_25 = max(nearest_25_distances)\n", + " mean_25 = np.mean(nearest_25_distances)\n", + " median_25 = np.median(nearest_25_distances)\n", + " dist_25_std = np.std(nearest_5_distances)\n", + "\n", + " # Areal calculations - closest 5\n", + " # here, instead of the distances frame we generated via the KD tree, we use the area_dict\n", + " # and query it with the indices from the KD tree step\n", + " indices_5 = list(indices[0])[1:6]\n", + " areas = [area_dict[x] for x in indices_5]\n", + " area_5_mean = np.mean(areas) # mean area of 5 nearest neighbours\n", + " area_5_med = np.median(areas) # median area of 5 nearest neighbours\n", + " area_5_stdev = np.std(\n", + " areas\n", + " ) # standard deviation of area of 5 nearest neighbours\n", + "\n", + " # Areal calculations - closest 25\n", + " # repeat above block for closest 25\n", + " indices_25 = list(indices[0])[1:]\n", + " areas = [area_dict[x] for x in indices_25]\n", + " area_25_mean = np.mean(areas)\n", + " area_25_med = np.median(areas)\n", + " area_25_stdev = np.std(areas)\n", + "\n", + " # Count\n", + " # here we turn the process on its head, and identify all objects within certain distance thresholds\n", + " count_25m = KD_tree.query_radius([(x, y)], r=25, count_only=True)[\n", + " 0\n", + " ] # count of buildings in 25m radius\n", + " count_50m = KD_tree.query_radius([(x, y)], r=50, count_only=True)[\n", + " 0\n", + " ] # count of buildings in 50m radius\n", + " count_100m = KD_tree.query_radius([(x, y)], r=100, count_only=True)[\n", + " 0\n", + " ] # count of buildings in 100m radius\n", + "\n", + " # add these stats to a dictionary called 'ans'\n", + " ans = {\n", + " \"PID\": row.PID,\n", + " \"area\": row.area,\n", + " \"D5_min\": min_5,\n", + " \"D5_max\": max_5,\n", + " \"D5_mean\": mean_5,\n", + " \"D5_med\": median_5,\n", + " \"D5_std\": dist_5_std,\n", + " \"A5_mean\": area_5_mean,\n", + " \"A5_med\": area_5_med,\n", + " \"A5_std\": area_5_stdev,\n", + " \"D25_min\": min_25,\n", + " \"D25_max\": max_25,\n", + " \"D25_mean\": mean_25,\n", + " \"D25_med\": median_25,\n", + " \"D25_std\": dist_25_std,\n", + " \"A25_mean\": area_25_mean,\n", + " \"A25_med\": area_25_med,\n", + " \"A25_std\": area_25_stdev,\n", + " \"Count_25m\": count_25m,\n", + " \"Count_50m\": count_50m,\n", + " \"Count_100m\": count_100m,\n", + " }\n", + "\n", + " bundle.append(ans)\n", + "\n", + " # keep track of progress via this row\n", + " if counter % print_thresh == 0:\n", + " print(\"%s rows completed at %s\" % (counter, time.ctime()))\n", + "\n", + " \"\"\"\n", + " # this functionality saves progress in case the process cannot be finished in one sitting. \n", + " # ideally, finish the processing in one sitting. \n", + " old = 0\n", + " if counter % save_thresh == 0:\n", + " saver = pd.DataFrame(bundle)\n", + " saver = saver[list(bundle[0].keys())]\n", + " if saver.crs != WGS:\n", + " saver = saver.to_crs(WGS)\n", + " saver = saver.set_index('PID')\n", + " saver = saver.set_index('PID')\n", + " saver['geometry'] = saver['geometry']\n", + " saver = gpd.GeoDataFrame(saver, geometry = 'geometry', crs = WGS)\n", + " saver.to_file(os.path.join(pth, 'output_%s_to_%s_thread_%s.shp' % (old, counter, thread_no)), driver = 'ESRI Shapefile')\n", + " bundle = []\n", + " old = counter\n", + " \"\"\"\n", + "\n", + " counter += 1\n", + "\n", + " return bundle\n", + "\n", + " print(\"Task completed in %s seconds\" % (time.time() - t))\n", + "\n", + "\n", + "###" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h1RaBLPyE7Pz" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "## **Generating building morphology**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 44491, + "status": "ok", + "timestamp": 1649162355336, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "Auvs_07qFGcw", + "outputId": "97a8ee7e-8b4d-4230-a13f-ba8ceeda67a6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10000 rows completed at Tue Apr 5 12:38:46 2022\n", + "20000 rows completed at Tue Apr 5 12:39:00 2022\n", + "30000 rows completed at Tue Apr 5 12:39:14 2022\n" + ] + } + ], + "source": [ + "d = {}\n", + "\n", + "d = {\n", + " \"df\": short,\n", + " \"thread_no\": 1,\n", + " \"print_thresh\": print_thresh,\n", + " \"save_thresh\": save_thresh,\n", + "}\n", + "\n", + "result = Main(d)\n", + "out_df = pd.DataFrame(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GAuJYnwEFJS9" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "## **Post-analytical process**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "executionInfo": { + "elapsed": 28874, + "status": "ok", + "timestamp": 1649162510022, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "zO0O_WyhFMbY" + }, + "outputs": [], + "source": [ + "### Final data output----------------------------------------------------------------\n", + "original = original.set_index(\"PID\") # Reset the index of 'original' DF.\n", + "out_df = out_df.set_index(\"PID\") # Reset the index of 'out_df'\n", + "out_df[\"geometry\"] = original[\n", + " \"geometry\"\n", + "] # Copy the original geometry to the geometry col of 'out_df'\n", + "out_df = gpd.GeoDataFrame(out_df, geometry=\"geometry\", crs=WGS)\n", + "out_df.to_file(outf, driver=\"ESRI Shapefile\")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyPFrE7NgvLX3bil/AKToTSa", + "collapsed_sections": [], + "mount_file_id": "1i8cjeXnEH4KWK49QPqea_3tfmAHKbLBO", + "name": "STEP1.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py index b0e22f6..d93f560 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP1.py @@ -1,13 +1,10 @@ # -*- coding: utf-8 -*- import pandas as pd import geopandas as gpd -import sys, os -from scipy import spatial +import os import numpy as np from sklearn.neighbors import KDTree import time -from multiprocessing import Pool -import multiprocessing ### Credit: The base code of STEP1 and STEP2 were coded by Alex Chunet ### @@ -15,13 +12,13 @@ start_time = time.time() pth = os.getcwd() -WGS = {'init':'epsg:4326'} -UTM = {'init':'epsg:32629'} -save_thresh = 100000 # save progress every [] rows -print_thresh = 10000 # print out calculation process every [] rows for each processor +WGS = {"init": "epsg:4326"} +UTM = {"init": "epsg:32629"} +save_thresh = 100000 # save progress every [] rows +print_thresh = 10000 # print out calculation process every [] rows for each processor -cpuPower = 1/2 -shpName = '/Niamey_data/DA_Niamey.shp' +cpuPower = 1 / 2 +shpName = "/Niamey_data/DA_Niamey.shp" ### @@ -31,55 +28,65 @@ fil = gpd.read_file(f) if fil.crs != WGS: fil = fil.to_crs(WGS) -fil = fil.to_crs(UTM) -fil['area'] = fil.area -fil['centroid'] = fil['geometry'].centroid +fil = fil.to_crs(UTM) +fil["area"] = fil.area +fil["centroid"] = fil["geometry"].centroid fil = fil.to_crs(WGS) -fil = fil[['PID','centroid','area']] +fil = fil[["PID", "centroid", "area"]] -#short = fil[:50000] +# short = fil[:50000] short = fil -area_dict = dict(zip(list(short.index), list(short['area']))) -matrix = list(zip(short.centroid.apply(lambda x: x.x),short.centroid.apply(lambda x: x.y))) +area_dict = dict(zip(list(short.index), list(short["area"]))) +matrix = list( + zip(short.centroid.apply(lambda x: x.x), short.centroid.apply(lambda x: x.y)) +) KD_tree = KDTree(matrix) ### def Main(passed_dict): - # unpack passed dict into local variables for this thread. - short = passed_dict['df'] - thread_no = passed_dict['thread_no'] - print_thresh = passed_dict['print_thresh'] - save_thresh = passed_dict['save_thresh'] - + short = passed_dict["df"] + thread_no = passed_dict["thread_no"] + print_thresh = passed_dict["print_thresh"] + save_thresh = passed_dict["save_thresh"] + # set up some counters / timings t = time.time() counter = 1 - + bundle = [] - + # iterate through each row of the passed DataFrame of housing polygons. for index, row in short.iterrows(): - # identify the x and y coordinates of the house's centroid y = row.centroid.y x = row.centroid.x - + # Query the KD tree for the first 26 objects (1 will be the house itself.) - # this returns a dataframe of the nearest 26 objects, their distances, and their indices. - distances, indices = KD_tree.query([(x,y)], k = 26) + # this returns a dataframe of the nearest 26 objects, their distances, and their indices. + distances, indices = KD_tree.query([(x, y)], k=26) # Distance calculations - closest 5 # here, we subset the distances frame for the first 5 neighbours, and calculate summary stats nearest_5_distances = list(distances[0])[1:6] # subset / slice - min_5 = min(nearest_5_distances) # closest neighbour of the 5 closest (min distance to another building) - max_5 = max(nearest_5_distances) # furthest neighbour of the 5 closest (min distance to another building) - mean_5 = np.mean(nearest_5_distances) # average distance of centroids of 5 nearest neighbours - median_5 = np.median(nearest_5_distances) # median distance of centroids of 5 nearest neighbours - dist_5_std = np.std(nearest_5_distances) # standard deviation of centroids of 5 nearest neighbours + min_5 = min( + nearest_5_distances + ) # closest neighbour of the 5 closest (min distance to another building) + max_5 = max( + nearest_5_distances + ) # furthest neighbour of the 5 closest (min distance to another building) + mean_5 = np.mean( + nearest_5_distances + ) # average distance of centroids of 5 nearest neighbours + median_5 = np.median( + nearest_5_distances + ) # median distance of centroids of 5 nearest neighbours + dist_5_std = np.std( + nearest_5_distances + ) # standard deviation of centroids of 5 nearest neighbours # Distance calculations - closest 25 # here, we subset the distances frame for the first 25 neighbours, and calculate summary stats @@ -91,13 +98,15 @@ def Main(passed_dict): dist_25_std = np.std(nearest_5_distances) # Areal calculations - closest 5 - # here, instead of the distances frame we generated via the KD tree, we use the area_dict + # here, instead of the distances frame we generated via the KD tree, we use the area_dict # and query it with the indices from the KD tree step indices_5 = list(indices[0])[1:6] - areas = [area_dict[x] for x in indices_5] + areas = [area_dict[x] for x in indices_5] area_5_mean = np.mean(areas) # mean area of 5 nearest neighbours area_5_med = np.median(areas) # median area of 5 nearest neighbours - area_5_stdev = np.std(areas) # standard deviation of area of 5 nearest neighbours + area_5_stdev = np.std( + areas + ) # standard deviation of area of 5 nearest neighbours # Areal calculations - closest 25 # repeat above block for closest 25 @@ -109,43 +118,50 @@ def Main(passed_dict): # Count # here we turn the process on its head, and identify all objects within certain distance thresholds - count_25m = KD_tree.query_radius([(x,y)], r = 25, count_only = True)[0] # count of buildings in 25m radius - count_50m = KD_tree.query_radius([(x,y)], r = 50, count_only = True)[0] # count of buildings in 50m radius - count_100m = KD_tree.query_radius([(x,y)], r = 100, count_only = True)[0] # count of buildings in 100m radius - + count_25m = KD_tree.query_radius([(x, y)], r=25, count_only=True)[ + 0 + ] # count of buildings in 25m radius + count_50m = KD_tree.query_radius([(x, y)], r=50, count_only=True)[ + 0 + ] # count of buildings in 50m radius + count_100m = KD_tree.query_radius([(x, y)], r=100, count_only=True)[ + 0 + ] # count of buildings in 100m radius + # add these stats to a dictionary called 'ans' - ans = {'PID':row.PID, - 'area':row.area, - 'D5_min':min_5, - 'D5_max':max_5, - 'D5_mean':mean_5, - 'D5_med':median_5, - 'D5_std':dist_5_std, - 'A5_mean':area_5_mean, - 'A5_med':area_5_med, - 'A5_std':area_5_stdev, - 'D25_min':min_25, - 'D25_max':max_25, - 'D25_mean':mean_25, - 'D25_med':median_25, - 'D25_std':dist_25_std, - 'A25_mean':area_25_mean, - 'A25_med':area_25_med, - 'A25_std':area_25_stdev, - 'Count_25m':count_25m, - 'Count_50m':count_50m, - 'Count_100m':count_100m - } + ans = { + "PID": row.PID, + "area": row.area, + "D5_min": min_5, + "D5_max": max_5, + "D5_mean": mean_5, + "D5_med": median_5, + "D5_std": dist_5_std, + "A5_mean": area_5_mean, + "A5_med": area_5_med, + "A5_std": area_5_stdev, + "D25_min": min_25, + "D25_max": max_25, + "D25_mean": mean_25, + "D25_med": median_25, + "D25_std": dist_25_std, + "A25_mean": area_25_mean, + "A25_med": area_25_med, + "A25_std": area_25_stdev, + "Count_25m": count_25m, + "Count_50m": count_50m, + "Count_100m": count_100m, + } bundle.append(ans) - + # keep track of progress via this row if counter % print_thresh == 0: - print('%s rows completed at %s' % (counter, time.ctime())) - - ''' - # this functionality saves progress in case the process cannot be finished in one sitting. - # ideally, finish the processing in one sitting. + print("%s rows completed at %s" % (counter, time.ctime())) + + """ + # this functionality saves progress in case the process cannot be finished in one sitting. + # ideally, finish the processing in one sitting. old = 0 if counter % save_thresh == 0: saver = pd.DataFrame(bundle) @@ -159,19 +175,20 @@ def Main(passed_dict): saver.to_file(os.path.join(pth, 'output_%s_to_%s_thread_%s.shp' % (old, counter, thread_no)), driver = 'ESRI Shapefile') bundle = [] old = counter - ''' - - counter+=1 - - return(bundle) - - print('Task completed in %s seconds' % (time.time() - t)) -### + """ + + counter += 1 + return bundle + print("Task completed in %s seconds" % (time.time() - t)) -#threads = multiprocessing.cpu_count() # limit this line if on the JNB to avoid consuming 100% of resources! -''' + +### + + +# threads = multiprocessing.cpu_count() # limit this line if on the JNB to avoid consuming 100% of resources! +""" cpu = multiprocessing.cpu_count() threads = int(cpu * cpuPower) @@ -181,30 +198,29 @@ def Main(passed_dict): len_total_df = len(short) chunk = int(np.ceil(len_total_df / threads)) d_f = short[(chunk*(i-1)):(chunk*i)] - + processor_input_dict = { 'df':d_f, 'thread_no':i, 'print_thresh':print_thresh, 'save_thresh':save_thresh } - + d.append(processor_input_dict) with Pool(threads) as pool: results = pool.map(Main, d, chunksize = 1) -''' - +""" d = {} d = { - 'df':short, - 'thread_no':1, - 'print_thresh':print_thresh, - 'save_thresh':save_thresh + "df": short, + "thread_no": 1, + "print_thresh": print_thresh, + "save_thresh": save_thresh, } @@ -216,15 +232,13 @@ def Main(passed_dict): orig_fil = gpd.read_file(f) if orig_fil.crs != WGS: orig_fil = orig_fil.to_crs(WGS) -orig_fil = orig_fil.set_index('PID') - -out_df = out_df.set_index('PID') -out_df['geometry'] = orig_fil['geometry'] -out_df = gpd.GeoDataFrame(out_df, geometry = 'geometry', crs = WGS) -out_df.to_file(os.path.join(pth, 'buildings_altered.shp'), driver = 'ESRI Shapefile') +orig_fil = orig_fil.set_index("PID") +out_df = out_df.set_index("PID") +out_df["geometry"] = orig_fil["geometry"] +out_df = gpd.GeoDataFrame(out_df, geometry="geometry", crs=WGS) +out_df.to_file(os.path.join(pth, "buildings_altered.shp"), driver="ESRI Shapefile") elapsed_time = (time.time() - start_time) / 60 -print ("elapsed_time:{0}".format(elapsed_time) + "[Min]") - +print("elapsed_time:{0}".format(elapsed_time) + "[Min]") diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb index d30ce24..917264d 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.ipynb @@ -1 +1,778 @@ -{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"STEP2.ipynb","provenance":[],"collapsed_sections":[],"mount_file_id":"1qBfdsFv3exlWCp9Tdm8STEdKPypVpEvO","authorship_tag":"ABX9TyPDUwW/OThJXd73gqgjAayU"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","source":["# **STEP-2**"],"metadata":{"id":"ng-6APvaCTjz"}},{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BaLbga1UeeID","executionInfo":{"status":"ok","timestamp":1652081221317,"user_tz":-540,"elapsed":10476,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"ddc2866d-511e-4f78-ad37-5e91d75a9e05"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"markdown","source":["**OPTIONAL:** Install required modules (geopandas, pyproj, h2o) if not yet installed.\n","Create 'modules' directory under 'Colab\\ Notebooks' (or specify a directory you like).\n","\n","https://ggcs.io/2020/06/22/google-colab-pip-install/ (Japanese)"],"metadata":{"id":"1lY0lVSWE86u"}},{"cell_type":"code","source":["!pip install --target /content/drive/MyDrive/Colab\\ Notebooks/modules geopandas pyproj h2o"],"metadata":{"id":"5sJCm-ZyD_a3"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["Import modules"],"metadata":{"id":"jPbQEJvZIXas"}},{"cell_type":"code","source":["import sys\n","sys.path.append('/content/drive/MyDrive/Colab Notebooks/modules')"],"metadata":{"id":"noTOcSfdEmLJ","executionInfo":{"status":"ok","timestamp":1652081234256,"user_tz":-540,"elapsed":209,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","execution_count":3,"metadata":{"id":"R_-1zemWBe3V","executionInfo":{"status":"ok","timestamp":1652081288302,"user_tz":-540,"elapsed":53111,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"outputs":[],"source":["# -*- coding: utf-8 -*-\n","import os, sys, time\n","import pyproj\n","import pandas as pd\n","import geopandas as gpd\n","import h2o\n","from h2o.automl import H2OAutoML\n","from h2o.frame import H2OFrame"]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","# Initial file setting\n","Do not forget to mount your GoogleDrive! All necessary files need to be uploaded in directories on your GoogleDrive. You can easily mount your GoogleDrive from the left pane, click the folder icon and select mount drive. Also see: https://colab.research.google.com/notebooks/io.ipynb"],"metadata":{"id":"QpsPI01sCAU_"}},{"cell_type":"code","source":["pth = '/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/'#Directory to save model, ouputs\n","building_file = '/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_DA_morphology.shp'# Specify the processed building footprint data\n","sample_file = '/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_sample_data.shp'# Specify the sample data"],"metadata":{"id":"x0Rl8cyiTiDT","executionInfo":{"status":"ok","timestamp":1652081337297,"user_tz":-540,"elapsed":191,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["# Read a Building Footprint layer processed at STEP-1\n","proj_4326 = pyproj.Proj(4326)\n","\n","building_df = gpd.read_file(building_file)\n","building_df = building_df.to_crs(4326)\n","\n","# Read a Sample Area layer\n","sample_area = gpd.read_file(sample_file)\n","sample_area = sample_area.to_crs(4326)"],"metadata":{"id":"d5A4MLXLB7Ad","executionInfo":{"status":"ok","timestamp":1652081468022,"user_tz":-540,"elapsed":128736,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":5,"outputs":[]},{"cell_type":"markdown","source":["Here, adjust your prediction and response variables.\n","Modify the code below to satisfy your needs.Current setting is very basic: Apply all variables in the building_df."],"metadata":{"id":"DS0NcJ-VSCbj"}},{"cell_type":"code","source":["# Urban classes to be used in the sample layer and for classification\n","# Assign unique integer for each class by yourself here.\n","#class_map = {'High Income':1,'Middle income':2,'Industrial':5, 'Informal':3, 'Commercial':4}\n","#class_map = {'High income':1,'Middle income':2,'Informal':3, 'Commercial':4}\n","#class_map = {'commercial':1,'industrial':2,'ins_admin':3, 'res':4, 'res_detached':5, 'slum':6}#Nairobi\n","#class_map = {'Commercial zone':1,'Formal':2,'Informal':3, 'middle income':4}#Bangui\n","#class_map = {'High Income':1,'Middle Income':2,'Low Income':3, 'Slum':4, 'Com Admin':5, 'Industrial':6}#Libreville, Brazzaville, & PointeNoire\n","#class_map = {'commercial':1,'informal':2,'mid/low income':3}#Bambari\n","class_map = {'Administrative':0,'Bidonville':1,'Commercial':2, 'High income':3, 'Industrial':4, 'Middle/Low income':5}#Douala, Yaounde\n"],"metadata":{"id":"S9tRIOowjSIH","executionInfo":{"status":"ok","timestamp":1652081494317,"user_tz":-540,"elapsed":204,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":6,"outputs":[]},{"cell_type":"code","source":["# Here, adjust your prediction and response variables. Modify the code below to satisfy your needs.\n","# Current setting is very basic: Apply all variables in the building_df.\n","col = building_df.columns\n","predictors = list(col[1:22])\n","response = 'type'"],"metadata":{"id":"Tn-1WOeK0dY8","executionInfo":{"status":"ok","timestamp":1652081504301,"user_tz":-540,"elapsed":192,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["### OPTIONAL ###\n","# The col defining the urban space type should have 'type' name. So, if not, rename it here.\n","sample_area = sample_area.rename(columns={'class': 'type'})"],"metadata":{"id":"6zGujLDrGMWJ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["### Generate a training data by intersecting 'building_df' and 'sample_area'\n","# Set urban class default as 'unknown'\n","\n","source_df = building_df.copy()\n","\n","source_df['type'] = 'unknown'\n","\n","# Create an empty DF for append\n","training_data = pd.DataFrame()\n","\n","# 'training_data' is now our official 'training data' for the ML model. \n","for index, row in sample_area.iterrows():\n"," x = row.geometry\n"," y = row.type\n"," \n"," df_temp = source_df[source_df.intersects(x)].copy()\n"," df_temp['type'] = y\n"," \n"," training_data = training_data.append(df_temp)\n"," \n","training_data['type'] = training_data['type'].map(class_map)\n"],"metadata":{"id":"HcEecyh9R3DP","executionInfo":{"status":"ok","timestamp":1652081629044,"user_tz":-540,"elapsed":91882,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}}},"execution_count":11,"outputs":[]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","# **h2o Pipeline**"],"metadata":{"id":"82ku5ND2SOWr"}},{"cell_type":"markdown","source":["Initialize h2o"],"metadata":{"id":"nvN08ZHeTSmd"}},{"cell_type":"code","source":["h2o.init()"],"metadata":{"id":"ekizevAsSxFQ","colab":{"base_uri":"https://localhost:8080/","height":575},"executionInfo":{"status":"ok","timestamp":1652081655900,"user_tz":-540,"elapsed":15589,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"79207c6e-d35a-4b71-bd9e-e42de8ed0218"},"execution_count":12,"outputs":[{"output_type":"stream","name":"stdout","text":["Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.\n","Attempting to start a local H2O server...\n"," Java Version: openjdk version \"11.0.15\" 2022-04-19; OpenJDK Runtime Environment (build 11.0.15+10-Ubuntu-0ubuntu0.18.04.1); OpenJDK 64-Bit Server VM (build 11.0.15+10-Ubuntu-0ubuntu0.18.04.1, mixed mode, sharing)\n"," Starting server from /content/drive/MyDrive/Colab Notebooks/modules/h2o/backend/bin/h2o.jar\n"," Ice root: /tmp/tmps3fyhcp5\n"," JVM stdout: /tmp/tmps3fyhcp5/h2o_unknownUser_started_from_python.out\n"," JVM stderr: /tmp/tmps3fyhcp5/h2o_unknownUser_started_from_python.err\n"," Server is running at http://127.0.0.1:54321\n","Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n","Warning: Your H2O cluster version is too old (3 months and 13 days)!Please download and install the latest version from http://h2o.ai/download/\n"]},{"output_type":"display_data","data":{"text/plain":["-------------------------- ----------------------------------------------------------------------------\n","H2O_cluster_uptime: 04 secs\n","H2O_cluster_timezone: Etc/UTC\n","H2O_data_parsing_timezone: UTC\n","H2O_cluster_version: 3.36.0.2\n","H2O_cluster_version_age: 3 months and 13 days !!!\n","H2O_cluster_name: H2O_from_python_unknownUser_b0cajz\n","H2O_cluster_total_nodes: 1\n","H2O_cluster_free_memory: 3.172 Gb\n","H2O_cluster_total_cores: 2\n","H2O_cluster_allowed_cores: 2\n","H2O_cluster_status: locked, healthy\n","H2O_connection_url: http://127.0.0.1:54321\n","H2O_connection_proxy: {\"http\": null, \"https\": null}\n","H2O_internal_security: False\n","H2O_API_Extensions: Amazon S3, XGBoost, Algos, Infogram, AutoML, Core V3, TargetEncoder, Core V4\n","Python_version: 3.7.13 final\n","-------------------------- ----------------------------------------------------------------------------"],"text/html":["
\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","
H2O_cluster_uptime:04 secs
H2O_cluster_timezone:Etc/UTC
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.36.0.2
H2O_cluster_version_age:3 months and 13 days !!!
H2O_cluster_name:H2O_from_python_unknownUser_b0cajz
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:3.172 Gb
H2O_cluster_total_cores:2
H2O_cluster_allowed_cores:2
H2O_cluster_status:locked, healthy
H2O_connection_url:http://127.0.0.1:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
H2O_API_Extensions:Amazon S3, XGBoost, Algos, Infogram, AutoML, Core V3, TargetEncoder, Core V4
Python_version:3.7.13 final
"]},"metadata":{}}]},{"cell_type":"markdown","source":["Data prep for the h2o pipeline, autoML model selection.\n","The best-performed model will be saved in the directory specified by 'pth.'"],"metadata":{"id":"ICa4GXDWTY1o"}},{"cell_type":"code","source":["# Convert the training data to an h2o frame.\n","# NOTE that this process will be inefficien if the original data has many NaNs.\n","hf = H2OFrame(training_data)\n","\n","\n","# This block of code is fairly h2o standard. It trains 20 models on this data,\n","# limiting the runtime to 1 hour. At the end of an hour or training 20 models,\n","# whichever is first, it returns a DataFrame of predictions as preds, ordered by the quality of their predictions.\n","\n","# Split 'hf' into a taraining frame and validation frame.\n","train, valid = hf.split_frame(ratios = [.8], seed = 10)\n","\n","# Identify predictors and response\n","x = predictors\n","y = response\n","\n","## For binary classification, response should be a factor\n","train[y] = train[y].asfactor()\n","valid[y] = valid[y].asfactor()\n","\n","# Run AutoML for 20 base models (limited to 1 hour max runtime by default)\n","aml = H2OAutoML(max_models = 20, seed =1)\n","aml.train(x = x,\n"," y = y,\n"," training_frame = train)\n","\n","# View the AutoML Leaderboard\n","lb = aml.leaderboard\n","\n","\n","# Print all rows instead of default (10 rows)\n","lb.head(rows=lb.nrows)\n","\n","print(\"** Model validation with 'valid' hf **\")\n","preds = aml.leader.predict(valid)\n","\n","# Here, we print out the performance of our top performing model.\n","res = aml.leader.model_performance(valid)\n","\n","print(res)\n","\n","# We save the model down to its own save location.\n","model_path = h2o.save_model(model = aml.leader,\n"," path = pth,\n"," force = True)"],"metadata":{"id":"74Z9NlvxSUBm","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1652083949200,"user_tz":-540,"elapsed":2286285,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"ebb4410d-e186-4f06-a6d8-38bb4a581833"},"execution_count":13,"outputs":[{"output_type":"stream","name":"stdout","text":["Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%\n","** Model validation with 'valid' hf **\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n","\n","ModelMetricsMultinomialGLM: stackedensemble\n","** Reported on test data. **\n","\n","MSE: 0.01759988910980335\n","RMSE: 0.13266457368040405\n","\n"]}]},{"cell_type":"markdown","source":["### **Supervised ML classification based on the selected model**\n","h2o struggled to generate predictions for more than 100,000 rows at a time.Thus, we split the original DataFrame into 100,000 row chunks, run the predictions on the h2o version of the frame, then send these to file."],"metadata":{"id":"pnH6Xw8QT5ee"}},{"cell_type":"code","source":["max_row_size = 100000\n","\n","chunk_num = int(len(building_df) / max_row_size)\n","chunk_mod = len(building_df) % max_row_size\n","\n","building_df['type'] = 'unknown'\n","\n","\n","def MLpred(df):\n"," df_input = df[predictors]\n"," # Extract predictor cols only (specified by the 'predictors' LIST)\n"," hf_temp = H2OFrame(df_input)\n"," \n"," preds_temp = aml.leader.predict(hf_temp)\n"," pred_df_temp = preds_temp.as_data_frame()\n"," \n"," # add 'PID' to 'pred_df_temp' so that it will be merged to the original 'df.'\n"," df.reset_index(inplace = True)\n"," pred_df_temp['PID'] = df.PID\n"," \n"," ans = pd.merge(df, pred_df_temp, on = \"PID\")\n"," \n"," return(ans)\n"," \n","\n","# Create an empty DF for append\n","prediction_df = pd.DataFrame()\n","\n","# If the total number of building footprints is smaller than 100,000:\n","if len(building_df) < 100000:\n"," # Prediction process here\n"," pred_x = MLpred(building_df)\n"," prediction_df = prediction_df.append(pred_x)\n","\n","else:\n"," for i in range(0, chunk_num):\n"," if i == 0:\n"," print(\"Processing Chunk No. {} ----> row 0–{}\".format(i+1, max_row_size))\n"," df_temp2 = building_df[0:max_row_size].copy()\n"," \n"," # Prediction process here\n"," pred_x = MLpred(df_temp2)\n"," \n"," prediction_df = prediction_df.append(pred_x)\n"," \n"," else:\n"," start = (i * max_row_size)\n"," stop = (i * max_row_size) + max_row_size\n"," print(\"Processing Chunk No. {} ----> row {}–{}\".format(i+1, start, stop))\n"," df_temp2 = building_df[start:stop].copy()\n"," \n"," # Prediction process here\n"," pred_x = MLpred(df_temp2)\n"," \n"," prediction_df = prediction_df.append(pred_x)\n","\n"," \n"," if chunk_mod > 0:\n"," start = (chunk_num * max_row_size)\n"," print(\"Processing Chunk No. {} ----> row {} till the end\".format(i + 1, start)) \n"," df_temp2 = building_df[start:].copy()\n"," \n"," # Prediction process here\n"," pred_x = MLpred(df_temp2)\n"," \n"," prediction_df = prediction_df.append(pred_x)"],"metadata":{"id":"HuWvDj9YT2dY","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1652084321443,"user_tz":-540,"elapsed":334981,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"99160640-2aaa-4242-aaf3-db2347dd4db8"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["Processing Chunk No. 1 ----> row 0–100000\n","Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n","Processing Chunk No. 2 ----> row 100000–200000\n","Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n","Processing Chunk No. 3 ----> row 200000–300000\n","Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n","Processing Chunk No. 4 ----> row 300000–400000\n","Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n","Processing Chunk No. 5 ----> row 400000–500000\n","Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n","Processing Chunk No. 5 ----> row 500000 till the end\n","Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n","stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n"]}]},{"cell_type":"markdown","source":["\n","\n","---\n","\n","\n","### **Post-analytical process**\n"],"metadata":{"id":"FqwpmSsrUIY_"}},{"cell_type":"code","source":["### Exporting\n","print('Exporting reulst to shapefile...')\n","output_path = pth + '\\prediction_result.shp'\n","prediction_df.to_file(output_path, driver = 'ESRI Shapefile')"],"metadata":{"id":"wuP096anUHzu","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1652085093365,"user_tz":-540,"elapsed":715775,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"dd0cb167-5cb1-432a-d4b3-468e2590ca74"},"execution_count":15,"outputs":[{"output_type":"stream","name":"stdout","text":["Exporting reulst to shapefile...\n"]}]},{"cell_type":"code","source":["### Refreshing H2O cluster (if necessary) \n","h2o.shutdown(prompt = True)"],"metadata":{"id":"3uhE9cHGH-DI","colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"status":"ok","timestamp":1649758011062,"user_tz":-540,"elapsed":4234,"user":{"displayName":"立石英吾","userId":"17884307298980796766"}},"outputId":"33c0d7ab-5ffc-4e70-a59b-a3c8c2cd005e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: H2ODeprecationWarning: Deprecated, use ``h2o.cluster().shutdown()``.\n"," \n"]},{"output_type":"stream","name":"stdout","text":["Are you sure you want to shutdown the H2O instance running at http://127.0.0.1:54321 (Y/N)? Y\n","H2O session _sid_95c4 closed.\n"]}]}]} \ No newline at end of file +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ng-6APvaCTjz" + }, + "source": [ + "# **STEP-2**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 10476, + "status": "ok", + "timestamp": 1652081221317, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "BaLbga1UeeID", + "outputId": "ddc2866d-511e-4f78-ad37-5e91d75a9e05" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "\n", + "drive.mount(\"/content/drive\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1lY0lVSWE86u" + }, + "source": [ + "**OPTIONAL:** Install required modules (geopandas, pyproj, h2o) if not yet installed.\n", + "Create 'modules' directory under 'Colab\\ Notebooks' (or specify a directory you like).\n", + "\n", + "https://ggcs.io/2020/06/22/google-colab-pip-install/ (Japanese)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5sJCm-ZyD_a3" + }, + "outputs": [], + "source": [ + "!pip install --target /content/drive/MyDrive/Colab\\ Notebooks/modules geopandas pyproj h2o" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jPbQEJvZIXas" + }, + "source": [ + "Import modules" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "executionInfo": { + "elapsed": 209, + "status": "ok", + "timestamp": 1652081234256, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "noTOcSfdEmLJ" + }, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "sys.path.append(\"/content/drive/MyDrive/Colab Notebooks/modules\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "executionInfo": { + "elapsed": 53111, + "status": "ok", + "timestamp": 1652081288302, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "R_-1zemWBe3V" + }, + "outputs": [], + "source": [ + "# -*- coding: utf-8 -*-\n", + "import sys\n", + "import pyproj\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "import h2o\n", + "from h2o.automl import H2OAutoML\n", + "from h2o.frame import H2OFrame" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QpsPI01sCAU_" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "# Initial file setting\n", + "Do not forget to mount your GoogleDrive! All necessary files need to be uploaded in directories on your GoogleDrive. You can easily mount your GoogleDrive from the left pane, click the folder icon and select mount drive. Also see: https://colab.research.google.com/notebooks/io.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "executionInfo": { + "elapsed": 191, + "status": "ok", + "timestamp": 1652081337297, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "x0Rl8cyiTiDT" + }, + "outputs": [], + "source": [ + "pth = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/\" # Directory to save model, ouputs\n", + "building_file = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_DA_morphology.shp\" # Specify the processed building footprint data\n", + "sample_file = \"/content/drive/MyDrive/Colab Notebooks/slumML/data/Yaounde/Yaounde_sample_data.shp\" # Specify the sample data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "executionInfo": { + "elapsed": 128736, + "status": "ok", + "timestamp": 1652081468022, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "d5A4MLXLB7Ad" + }, + "outputs": [], + "source": [ + "# Read a Building Footprint layer processed at STEP-1\n", + "proj_4326 = pyproj.Proj(4326)\n", + "\n", + "building_df = gpd.read_file(building_file)\n", + "building_df = building_df.to_crs(4326)\n", + "\n", + "# Read a Sample Area layer\n", + "sample_area = gpd.read_file(sample_file)\n", + "sample_area = sample_area.to_crs(4326)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DS0NcJ-VSCbj" + }, + "source": [ + "Here, adjust your prediction and response variables.\n", + "Modify the code below to satisfy your needs.Current setting is very basic: Apply all variables in the building_df." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "executionInfo": { + "elapsed": 204, + "status": "ok", + "timestamp": 1652081494317, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "S9tRIOowjSIH" + }, + "outputs": [], + "source": [ + "# Urban classes to be used in the sample layer and for classification\n", + "# Assign unique integer for each class by yourself here.\n", + "# class_map = {'High Income':1,'Middle income':2,'Industrial':5, 'Informal':3, 'Commercial':4}\n", + "# class_map = {'High income':1,'Middle income':2,'Informal':3, 'Commercial':4}\n", + "# class_map = {'commercial':1,'industrial':2,'ins_admin':3, 'res':4, 'res_detached':5, 'slum':6}#Nairobi\n", + "# class_map = {'Commercial zone':1,'Formal':2,'Informal':3, 'middle income':4}#Bangui\n", + "# class_map = {'High Income':1,'Middle Income':2,'Low Income':3, 'Slum':4, 'Com Admin':5, 'Industrial':6}#Libreville, Brazzaville, & PointeNoire\n", + "# class_map = {'commercial':1,'informal':2,'mid/low income':3}#Bambari\n", + "class_map = {\n", + " \"Administrative\": 0,\n", + " \"Bidonville\": 1,\n", + " \"Commercial\": 2,\n", + " \"High income\": 3,\n", + " \"Industrial\": 4,\n", + " \"Middle/Low income\": 5,\n", + "} # Douala, Yaounde" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "executionInfo": { + "elapsed": 192, + "status": "ok", + "timestamp": 1652081504301, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "Tn-1WOeK0dY8" + }, + "outputs": [], + "source": [ + "# Here, adjust your prediction and response variables. Modify the code below to satisfy your needs.\n", + "# Current setting is very basic: Apply all variables in the building_df.\n", + "col = building_df.columns\n", + "predictors = list(col[1:22])\n", + "response = \"type\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6zGujLDrGMWJ" + }, + "outputs": [], + "source": [ + "### OPTIONAL ###\n", + "# The col defining the urban space type should have 'type' name. So, if not, rename it here.\n", + "sample_area = sample_area.rename(columns={\"class\": \"type\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "executionInfo": { + "elapsed": 91882, + "status": "ok", + "timestamp": 1652081629044, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "HcEecyh9R3DP" + }, + "outputs": [], + "source": [ + "### Generate a training data by intersecting 'building_df' and 'sample_area'\n", + "# Set urban class default as 'unknown'\n", + "\n", + "source_df = building_df.copy()\n", + "\n", + "source_df[\"type\"] = \"unknown\"\n", + "\n", + "# Create an empty DF for append\n", + "training_data = pd.DataFrame()\n", + "\n", + "# 'training_data' is now our official 'training data' for the ML model.\n", + "for index, row in sample_area.iterrows():\n", + " x = row.geometry\n", + " y = row.type\n", + "\n", + " df_temp = source_df[source_df.intersects(x)].copy()\n", + " df_temp[\"type\"] = y\n", + "\n", + " training_data = training_data.append(df_temp)\n", + "\n", + "training_data[\"type\"] = training_data[\"type\"].map(class_map)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "82ku5ND2SOWr" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "# **h2o Pipeline**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nvN08ZHeTSmd" + }, + "source": [ + "Initialize h2o" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 575 + }, + "executionInfo": { + "elapsed": 15589, + "status": "ok", + "timestamp": 1652081655900, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "ekizevAsSxFQ", + "outputId": "79207c6e-d35a-4b71-bd9e-e42de8ed0218" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.\n", + "Attempting to start a local H2O server...\n", + " Java Version: openjdk version \"11.0.15\" 2022-04-19; OpenJDK Runtime Environment (build 11.0.15+10-Ubuntu-0ubuntu0.18.04.1); OpenJDK 64-Bit Server VM (build 11.0.15+10-Ubuntu-0ubuntu0.18.04.1, mixed mode, sharing)\n", + " Starting server from /content/drive/MyDrive/Colab Notebooks/modules/h2o/backend/bin/h2o.jar\n", + " Ice root: /tmp/tmps3fyhcp5\n", + " JVM stdout: /tmp/tmps3fyhcp5/h2o_unknownUser_started_from_python.out\n", + " JVM stderr: /tmp/tmps3fyhcp5/h2o_unknownUser_started_from_python.err\n", + " Server is running at http://127.0.0.1:54321\n", + "Connecting to H2O server at http://127.0.0.1:54321 ... successful.\n", + "Warning: Your H2O cluster version is too old (3 months and 13 days)!Please download and install the latest version from http://h2o.ai/download/\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
H2O_cluster_uptime:04 secs
H2O_cluster_timezone:Etc/UTC
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.36.0.2
H2O_cluster_version_age:3 months and 13 days !!!
H2O_cluster_name:H2O_from_python_unknownUser_b0cajz
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:3.172 Gb
H2O_cluster_total_cores:2
H2O_cluster_allowed_cores:2
H2O_cluster_status:locked, healthy
H2O_connection_url:http://127.0.0.1:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
H2O_API_Extensions:Amazon S3, XGBoost, Algos, Infogram, AutoML, Core V3, TargetEncoder, Core V4
Python_version:3.7.13 final
" + ], + "text/plain": [ + "-------------------------- ----------------------------------------------------------------------------\n", + "H2O_cluster_uptime: 04 secs\n", + "H2O_cluster_timezone: Etc/UTC\n", + "H2O_data_parsing_timezone: UTC\n", + "H2O_cluster_version: 3.36.0.2\n", + "H2O_cluster_version_age: 3 months and 13 days !!!\n", + "H2O_cluster_name: H2O_from_python_unknownUser_b0cajz\n", + "H2O_cluster_total_nodes: 1\n", + "H2O_cluster_free_memory: 3.172 Gb\n", + "H2O_cluster_total_cores: 2\n", + "H2O_cluster_allowed_cores: 2\n", + "H2O_cluster_status: locked, healthy\n", + "H2O_connection_url: http://127.0.0.1:54321\n", + "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", + "H2O_internal_security: False\n", + "H2O_API_Extensions: Amazon S3, XGBoost, Algos, Infogram, AutoML, Core V3, TargetEncoder, Core V4\n", + "Python_version: 3.7.13 final\n", + "-------------------------- ----------------------------------------------------------------------------" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "h2o.init()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ICa4GXDWTY1o" + }, + "source": [ + "Data prep for the h2o pipeline, autoML model selection.\n", + "The best-performed model will be saved in the directory specified by 'pth.'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 2286285, + "status": "ok", + "timestamp": 1652083949200, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "74Z9NlvxSUBm", + "outputId": "ebb4410d-e186-4f06-a6d8-38bb4a581833" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%\n", + "** Model validation with 'valid' hf **\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n", + "\n", + "ModelMetricsMultinomialGLM: stackedensemble\n", + "** Reported on test data. **\n", + "\n", + "MSE: 0.01759988910980335\n", + "RMSE: 0.13266457368040405\n", + "\n" + ] + } + ], + "source": [ + "# Convert the training data to an h2o frame.\n", + "# NOTE that this process will be inefficien if the original data has many NaNs.\n", + "hf = H2OFrame(training_data)\n", + "\n", + "\n", + "# This block of code is fairly h2o standard. It trains 20 models on this data,\n", + "# limiting the runtime to 1 hour. At the end of an hour or training 20 models,\n", + "# whichever is first, it returns a DataFrame of predictions as preds, ordered by the quality of their predictions.\n", + "\n", + "# Split 'hf' into a taraining frame and validation frame.\n", + "train, valid = hf.split_frame(ratios=[0.8], seed=10)\n", + "\n", + "# Identify predictors and response\n", + "x = predictors\n", + "y = response\n", + "\n", + "## For binary classification, response should be a factor\n", + "train[y] = train[y].asfactor()\n", + "valid[y] = valid[y].asfactor()\n", + "\n", + "# Run AutoML for 20 base models (limited to 1 hour max runtime by default)\n", + "aml = H2OAutoML(max_models=20, seed=1)\n", + "aml.train(x=x, y=y, training_frame=train)\n", + "\n", + "# View the AutoML Leaderboard\n", + "lb = aml.leaderboard\n", + "\n", + "\n", + "# Print all rows instead of default (10 rows)\n", + "lb.head(rows=lb.nrows)\n", + "\n", + "print(\"** Model validation with 'valid' hf **\")\n", + "preds = aml.leader.predict(valid)\n", + "\n", + "# Here, we print out the performance of our top performing model.\n", + "res = aml.leader.model_performance(valid)\n", + "\n", + "print(res)\n", + "\n", + "# We save the model down to its own save location.\n", + "model_path = h2o.save_model(model=aml.leader, path=pth, force=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pnH6Xw8QT5ee" + }, + "source": [ + "### **Supervised ML classification based on the selected model**\n", + "h2o struggled to generate predictions for more than 100,000 rows at a time.Thus, we split the original DataFrame into 100,000 row chunks, run the predictions on the h2o version of the frame, then send these to file." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 334981, + "status": "ok", + "timestamp": 1652084321443, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "HuWvDj9YT2dY", + "outputId": "99160640-2aaa-4242-aaf3-db2347dd4db8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Processing Chunk No. 1 ----> row 0–100000\n", + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n", + "Processing Chunk No. 2 ----> row 100000–200000\n", + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n", + "Processing Chunk No. 3 ----> row 200000–300000\n", + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n", + "Processing Chunk No. 4 ----> row 300000–400000\n", + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n", + "Processing Chunk No. 5 ----> row 400000–500000\n", + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n", + "Processing Chunk No. 5 ----> row 500000 till the end\n", + "Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%\n", + "stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%\n" + ] + } + ], + "source": [ + "max_row_size = 100000\n", + "\n", + "chunk_num = int(len(building_df) / max_row_size)\n", + "chunk_mod = len(building_df) % max_row_size\n", + "\n", + "building_df[\"type\"] = \"unknown\"\n", + "\n", + "\n", + "def MLpred(df):\n", + " df_input = df[predictors]\n", + " # Extract predictor cols only (specified by the 'predictors' LIST)\n", + " hf_temp = H2OFrame(df_input)\n", + "\n", + " preds_temp = aml.leader.predict(hf_temp)\n", + " pred_df_temp = preds_temp.as_data_frame()\n", + "\n", + " # add 'PID' to 'pred_df_temp' so that it will be merged to the original 'df.'\n", + " df.reset_index(inplace=True)\n", + " pred_df_temp[\"PID\"] = df.PID\n", + "\n", + " ans = pd.merge(df, pred_df_temp, on=\"PID\")\n", + "\n", + " return ans\n", + "\n", + "\n", + "# Create an empty DF for append\n", + "prediction_df = pd.DataFrame()\n", + "\n", + "# If the total number of building footprints is smaller than 100,000:\n", + "if len(building_df) < 100000:\n", + " # Prediction process here\n", + " pred_x = MLpred(building_df)\n", + " prediction_df = prediction_df.append(pred_x)\n", + "\n", + "else:\n", + " for i in range(0, chunk_num):\n", + " if i == 0:\n", + " print(\"Processing Chunk No. {} ----> row 0–{}\".format(i + 1, max_row_size))\n", + " df_temp2 = building_df[0:max_row_size].copy()\n", + "\n", + " # Prediction process here\n", + " pred_x = MLpred(df_temp2)\n", + "\n", + " prediction_df = prediction_df.append(pred_x)\n", + "\n", + " else:\n", + " start = i * max_row_size\n", + " stop = (i * max_row_size) + max_row_size\n", + " print(\"Processing Chunk No. {} ----> row {}–{}\".format(i + 1, start, stop))\n", + " df_temp2 = building_df[start:stop].copy()\n", + "\n", + " # Prediction process here\n", + " pred_x = MLpred(df_temp2)\n", + "\n", + " prediction_df = prediction_df.append(pred_x)\n", + "\n", + " if chunk_mod > 0:\n", + " start = chunk_num * max_row_size\n", + " print(\"Processing Chunk No. {} ----> row {} till the end\".format(i + 1, start))\n", + " df_temp2 = building_df[start:].copy()\n", + "\n", + " # Prediction process here\n", + " pred_x = MLpred(df_temp2)\n", + "\n", + " prediction_df = prediction_df.append(pred_x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FqwpmSsrUIY_" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "### **Post-analytical process**\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 715775, + "status": "ok", + "timestamp": 1652085093365, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "wuP096anUHzu", + "outputId": "dd0cb167-5cb1-432a-d4b3-468e2590ca74" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Exporting reulst to shapefile...\n" + ] + } + ], + "source": [ + "### Exporting\n", + "print(\"Exporting reulst to shapefile...\")\n", + "output_path = pth + \"\\prediction_result.shp\"\n", + "prediction_df.to_file(output_path, driver=\"ESRI Shapefile\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 4234, + "status": "ok", + "timestamp": 1649758011062, + "user": { + "displayName": "立石英吾", + "userId": "17884307298980796766" + }, + "user_tz": -540 + }, + "id": "3uhE9cHGH-DI", + "outputId": "33c0d7ab-5ffc-4e70-a59b-a3c8c2cd005e" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: H2ODeprecationWarning: Deprecated, use ``h2o.cluster().shutdown()``.\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Are you sure you want to shutdown the H2O instance running at http://127.0.0.1:54321 (Y/N)? Y\n", + "H2O session _sid_95c4 closed.\n" + ] + } + ], + "source": [ + "### Refreshing H2O cluster (if necessary)\n", + "h2o.shutdown(prompt=True)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyPDUwW/OThJXd73gqgjAayU", + "collapsed_sections": [], + "mount_file_id": "1qBfdsFv3exlWCp9Tdm8STEdKPypVpEvO", + "name": "STEP2.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py index 4cea4cc..96ea775 100644 --- a/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py +++ b/notebooks/Implementations/Slum_Mapping/slumML/STEP2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- -import os, sys, time +import os +import time import pandas as pd import geopandas as gpd import h2o @@ -14,22 +15,20 @@ ### Initial file setting -------------------------------------------------------------------- pth = os.getcwd() -building_file = '/Niamey_data/buildings_altered.shp' -sample_file = '/Niamey_data/Niamey_sample_data.shp' +building_file = "/Niamey_data/buildings_altered.shp" +sample_file = "/Niamey_data/Niamey_sample_data.shp" # Read a processed Building Footprint layer building_df = gpd.read_file(pth + building_file) -building_df = building_df.to_crs({'init':'epsg:4326'}) +building_df = building_df.to_crs({"init": "epsg:4326"}) # Read a Sample Area layer sample_area = gpd.read_file(pth + sample_file) -sample_area = sample_area.to_crs({'init':'epsg:4326'}) +sample_area = sample_area.to_crs({"init": "epsg:4326"}) # Urban classes to be used in the sample layer and for classification # Assign unique integer for each class by yourself here. -class_map = {'middle income':1,'informal':2,'formal':3, 'commercial':4} - - +class_map = {"middle income": 1, "informal": 2, "formal": 3, "commercial": 4} ### Variable prep here ---------------------------------------------------------------------- @@ -38,9 +37,7 @@ # Current setting is very basic: Apply all variables in the building_df. col = building_df.columns predictors = list(col[1:21]) -response = 'type' - - +response = "type" ### Generate a training data by intersecting 'building_df' and 'sample_area'----------------- @@ -48,24 +45,22 @@ source_df = building_df.copy() -source_df['type'] = 'unknown' +source_df["type"] = "unknown" # Create an empty DF for append training_data = pd.DataFrame() -# 'training_data' is now our official 'training data' for the ML model. +# 'training_data' is now our official 'training data' for the ML model. for index, row in sample_area.iterrows(): x = row.geometry y = row.type - + df_temp = source_df[source_df.intersects(x)].copy() - df_temp['type'] = y - - training_data = training_data.append(df_temp) - -training_data['type'] = training_data['type'].map(class_map) + df_temp["type"] = y + training_data = training_data.append(df_temp) +training_data["type"] = training_data["type"].map(class_map) ### Model training here --------------------------------------------------------------------- @@ -81,7 +76,7 @@ # whichever is first, it returns a DataFrame of predictions as preds, ordered by the quality of their predictions. # Split 'hf' into a taraining frame and validation frame. -train, valid = hf.split_frame(ratios = [.8], seed = 10) +train, valid = hf.split_frame(ratios=[0.8], seed=10) # Identify predictors and response x = predictors @@ -92,10 +87,8 @@ valid[y] = valid[y].asfactor() # Run AutoML for 20 base models (limited to 1 hour max runtime by default) -aml = H2OAutoML(max_models = 20, seed =1) -aml.train(x = x, - y = y, - training_frame = train) +aml = H2OAutoML(max_models=20, seed=1) +aml.train(x=x, y=y, training_frame=train) # View the AutoML Leaderboard lb = aml.leaderboard @@ -113,11 +106,7 @@ print(res) # We save the model down to its own save location. -model_path = h2o.save_model(model = aml.leader, - path = pth, - force = True) - - +model_path = h2o.save_model(model=aml.leader, path=pth, force=True) ### Model fitting here ---------------------------------------------------------------------- @@ -131,74 +120,70 @@ chunk_num = int(len(building_df) / max_row_size) chunk_mod = len(building_df) % max_row_size -building_df['type'] = 'unknown' +building_df["type"] = "unknown" def MLpred(df): df_input = df[predictors] # Extract predictor cols only (specified by the 'predictors' LIST) hf_temp = H2OFrame(df_input) - + preds_temp = aml.leader.predict(hf_temp) pred_df_temp = preds_temp.as_data_frame() - + # add 'PID' to 'pred_df_temp' so that it will be merged to the original 'df.' - df.reset_index(inplace = True) - pred_df_temp['PID'] = df.PID - - ans = pd.merge(df, pred_df_temp, on = "PID") - - return(ans) - + df.reset_index(inplace=True) + pred_df_temp["PID"] = df.PID + + ans = pd.merge(df, pred_df_temp, on="PID") + + return ans + # Create an empty DF for append prediction_df = pd.DataFrame() for i in range(0, chunk_num): if i == 0: - print("Processing Chunk No. {} ----> row 0–{}".format(i+1, max_row_size)) + print("Processing Chunk No. {} ----> row 0–{}".format(i + 1, max_row_size)) df_temp2 = building_df[0:max_row_size].copy() - + # Prediction process here pred_x = MLpred(df_temp2) - + prediction_df = prediction_df.append(pred_x) - + else: - start = (i * max_row_size) - stop = (i * max_row_size) + max_row_size - print("Processing Chunk No. {} ----> row {}–{}".format(i+1, start, stop)) + start = i * max_row_size + stop = (i * max_row_size) + max_row_size + print("Processing Chunk No. {} ----> row {}–{}".format(i + 1, start, stop)) df_temp2 = building_df[start:stop].copy() # Prediction process here pred_x = MLpred(df_temp2) - - prediction_df = prediction_df.append(pred_x) - + + prediction_df = prediction_df.append(pred_x) + if chunk_mod > 0: - start = (chunk_num * max_row_size) - print("Processing Chunk No. {} ----> row {} till the end".format(i+1, start)) + start = chunk_num * max_row_size + print("Processing Chunk No. {} ----> row {} till the end".format(i + 1, start)) df_temp2 = building_df[start:].copy() - + # Prediction process here pred_x = MLpred(df_temp2) prediction_df = prediction_df.append(pred_x) - - ### Exporting ------------------------------------------- -print('Exporting reulst to shapefile...') -output_path = pth + '\prediction_result.shp' -prediction_df.to_file(output_path, driver = 'ESRI Shapefile') - - +print("Exporting reulst to shapefile...") +output_path = pth + "\prediction_result.shp" +prediction_df.to_file(output_path, driver="ESRI Shapefile") ### Refreshing H2O cluster (if necessary) ------------------------------------------- elapsed_time = (time.time() - start_time) / 60 -print ("elapsed_time:{0}".format(elapsed_time) + "[Min]") +print("elapsed_time:{0}".format(elapsed_time) + "[Min]") h2o.cluster().shutdown(prompt=True) diff --git a/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb b/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb index 57c42d9..41dfcd2 100644 --- a/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb +++ b/notebooks/Implementations/URB_DECAT_B_ExploringGHSSMODcode.ipynb @@ -26,24 +26,23 @@ } ], "source": [ - "import sys, os, importlib\n", + "import sys\n", + "import os\n", + "import importlib\n", "import rasterio\n", "import rasterio.warp\n", "\n", - "import pandas as pd\n", "import geopandas as gpd\n", "import numpy as np\n", "\n", "from rasterio import features\n", - "from matplotlib import pyplot\n", - "from shapely.geometry import MultiPolygon, Polygon\n", + "from shapely.geometry import Polygon\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../GOST_Urban\")\n", "import src.UrbanRaster as urban\n", - "import src.urban_helper as helper\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.append(\"../../gostrocks/src\")\n", "\n", "import GOSTRocks.rasterMisc as rMisc\n", @@ -57,7 +56,7 @@ "outputs": [], "source": [ "global_bounds_adm1 = \"/home/public/Data/GLOBAL/ADMIN/Admin1_Polys.shp\"\n", - "iso3 = 'GHA'\n", + "iso3 = \"GHA\"\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA\" % iso3\n", "pop_layer = os.path.join(output_folder, \"FINAL_STANDARD\", \"%s_GHS.tif\" % iso3)\n", "temp_pop_file = os.path.join(output_folder, \"GHS_TEMP.tif\")\n", @@ -72,20 +71,22 @@ "outputs": [], "source": [ "inG1 = gpd.read_file(global_bounds_adm1)\n", - "inD1 = inG1.loc[inG1['ISO3'] == iso3]\n", - "inD1 = inD1.to_crs({'init':'epsg:4326'})\n", + "inD1 = inG1.loc[inG1[\"ISO3\"] == iso3]\n", + "inD1 = inD1.to_crs({\"init\": \"epsg:4326\"})\n", "if not os.path.exists(adm1_file):\n", " inD1.to_file(adm1_file)\n", "\n", "if not os.path.exists(temp_pop_file):\n", " inR = rasterio.open(pop_layer)\n", - " selD = inD1.loc[inD1['WB_ADM1_NA'] == \"Central\"]\n", + " selD = inD1.loc[inD1[\"WB_ADM1_NA\"] == \"Central\"]\n", " rMisc.clipRaster(inR, selD, temp_pop_file)\n", - " \n", + "\n", "national_ghs_1k = os.path.join(output_folder, \"GHS_POP_1k.tif\")\n", "if not os.path.exists(national_ghs_1k):\n", - " inR = rasterio.open('/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif')\n", - " inD = gpd.read_file(os.path.join(output_folder, \"FINAL_STANDARD\", 'GHA_ADMIN.shp'))\n", + " inR = rasterio.open(\n", + " \"/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif\"\n", + " )\n", + " inD = gpd.read_file(os.path.join(output_folder, \"FINAL_STANDARD\", \"GHA_ADMIN.shp\"))\n", " if inD.crs != inR.crs:\n", " inD = inD.to_crs(inR.crs)\n", " rMisc.clipRaster(inR, inD, national_ghs_1k)" @@ -136,7 +137,9 @@ "source": [ "importlib.reload(urban)\n", "xx = urban.urbanGriddedPop(pop_layer)\n", - "res = xx.calculateDegurba(urbDens=21, hdDens=(15 * 7), minPopThresh=0.5 * 7, out_raster=deg_file, verbose=True)" + "res = xx.calculateDegurba(\n", + " urbDens=21, hdDens=(15 * 7), minPopThresh=0.5 * 7, out_raster=deg_file, verbose=True\n", + ")" ] }, { @@ -165,7 +168,9 @@ "importlib.reload(urban)\n", "deg_file = national_ghs_1k.replace(\".tif\", \"DEGURBA.tif\")\n", "xx = urban.urbanGriddedPop(national_ghs_1k)\n", - "res = xx.calculateDegurba(urbDens=300, hdDens=1500, minPopThresh=50, out_raster=deg_file, verbose=True)" + "res = xx.calculateDegurba(\n", + " urbDens=300, hdDens=1500, minPopThresh=50, out_raster=deg_file, verbose=True\n", + ")" ] }, { @@ -194,10 +199,14 @@ ], "source": [ "importlib.reload(urban)\n", - "temp_pop_file = os.path.join(output_folder, \"GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0_22_3.tif\")\n", + "temp_pop_file = os.path.join(\n", + " output_folder, \"GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0_22_3.tif\"\n", + ")\n", "deg_file = temp_pop_file.replace(\".tif\", \"DEGURBA.tif\")\n", - "xx = urban.urbanGriddedPop(temp_pop_file) #(pop_layer)\n", - "res = xx.calculateDegurba(urbDens=300, hdDens=1500, minPopThresh=50, out_raster=deg_file, verbose=True)" + "xx = urban.urbanGriddedPop(temp_pop_file) # (pop_layer)\n", + "res = xx.calculateDegurba(\n", + " urbDens=300, hdDens=1500, minPopThresh=50, out_raster=deg_file, verbose=True\n", + ")" ] }, { @@ -206,7 +215,6 @@ "metadata": {}, "outputs": [], "source": [ - "import rasterio.features as features\n", "features.shapes?" ] }, @@ -226,7 +234,6 @@ "source": [ "import os\n", "import matplotlib.pyplot as plt\n", - "from matplotlib.patches import Patch\n", "from matplotlib.colors import ListedColormap\n", "import matplotlib.colors as colors" ] @@ -237,10 +244,12 @@ "metadata": {}, "outputs": [], "source": [ - "cmap = ListedColormap([\"lightgreen\", \"green\", \"darkgreen\", \"yellow\", 'brown', 'black', 'red'])\n", + "cmap = ListedColormap(\n", + " [\"lightgreen\", \"green\", \"darkgreen\", \"yellow\", \"brown\", \"black\", \"red\"]\n", + ")\n", "norm = colors.BoundaryNorm([10, 11, 12, 13, 21, 22, 23, 30], 7)\n", "fig, ax = plt.subplots(figsize=(10, 5))\n", - "ax.imshow(hd[350:450,500:650], cmap=cmap, norm=norm)\n", + "ax.imshow(hd[350:450, 500:650], cmap=cmap, norm=norm)\n", "plt.show()" ] }, @@ -251,7 +260,7 @@ "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(10, 5))\n", - "ax.imshow(urb[350:450,500:650], cmap=cmap, norm=norm)#, extent=[350, 450, 650, 500])\n", + "ax.imshow(urb[350:450, 500:650], cmap=cmap, norm=norm) # , extent=[350, 450, 650, 500])\n", "plt.show()" ] }, @@ -278,20 +287,20 @@ "metadata": {}, "outputs": [], "source": [ - "import geojson, json\n", + "import geojson\n", + "import json\n", "\n", - "from shapely.geometry import shape, Polygon\n", - "from rasterio import features\n", + "from shapely.geometry import shape\n", "from rasterio.features import rasterize\n", "\n", - "urbDens=3 * 7\n", - "hdDens=15 * 7\n", - "urbThresh=5000\n", - "hdThresh=50000\n", - "minPopThresh=0.5 * 7\n", + "urbDens = 3 * 7\n", + "hdDens = 15 * 7\n", + "urbThresh = 5000\n", + "hdThresh = 50000\n", + "minPopThresh = 0.5 * 7\n", "out_raster = deg_file\n", - "print_message=''\n", - "verbose=False" + "print_message = \"\"\n", + "verbose = False" ] }, { @@ -304,27 +313,32 @@ "popRaster = xx.inR\n", "data = popRaster.read()\n", "urban_raster = data * 0\n", - "final_raster = data[0,:,:] * 0 + 11\n", + "final_raster = data[0, :, :] * 0 + 11\n", "\n", "urban_raster[np.where(data > hdDens)] = 30\n", "\n", "idx = 0\n", "allFeatures = []\n", - "#Analyze the high density shapes\n", + "# Analyze the high density shapes\n", "for cShape, value in features.shapes(urban_raster, transform=popRaster.transform):\n", " if idx % 1000 == 0 and verbose:\n", " tPrint(\"%s: Creating Shape %s\" % (print_message, idx))\n", " idx = idx + 1\n", " if value > 0:\n", - " if value == 30: # only smooth the HD density shapes\n", + " if value == 30: # only smooth the HD density shapes\n", " origShape = cShape\n", " xx = shape(cShape)\n", " xx = Polygon(xx.exterior)\n", " cShape = xx.__geo_interface__\n", - " #If the shape is urban, claculate total pop \n", - " mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform)\n", + " # If the shape is urban, claculate total pop\n", + " mask = rasterize(\n", + " [(cShape, 0)],\n", + " out_shape=data[0, :, :].shape,\n", + " fill=1,\n", + " transform=popRaster.transform,\n", + " )\n", " inData = np.ma.array(data=data, mask=mask.astype(bool))\n", - " pop = np.nansum(inData) \n", + " pop = np.nansum(inData)\n", "\n", " val = 0\n", " if pop > 5000:\n", @@ -333,8 +347,8 @@ " if pop > 50000:\n", " val = 30\n", "\n", - " #Burn value into the final raster\n", - " mask = (mask^1) * val \n", + " # Burn value into the final raster\n", + " mask = (mask ^ 1) * val\n", " yy = np.dstack([final_raster, mask])\n", " final_raster = np.amax(yy, axis=2)\n", " allFeatures.append([idx, pop, val, shape(geojson.loads(json.dumps(cShape)))])\n", @@ -348,7 +362,7 @@ "outputs": [], "source": [ "unique_elements, counts_elements = np.unique(HD_raster, return_counts=True)\n", - "print(np.asarray((unique_elements, counts_elements)))\n" + "print(np.asarray((unique_elements, counts_elements)))" ] }, { @@ -371,9 +385,9 @@ "metadata": {}, "outputs": [], "source": [ - "final_raster = data[0,:,:] * 0\n", + "final_raster = data[0, :, :] * 0\n", "yy = np.dstack([final_raster, mask])\n", - "final_raster = np.amax(yy, axis=2) " + "final_raster = np.amax(yy, axis=2)" ] }, { @@ -382,7 +396,9 @@ "metadata": {}, "outputs": [], "source": [ - "mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform)" + "mask = rasterize(\n", + " [(cShape, 0)], out_shape=data[0, :, :].shape, fill=1, transform=popRaster.transform\n", + ")" ] }, { @@ -401,7 +417,7 @@ "metadata": {}, "outputs": [], "source": [ - "tempR.meta\n" + "tempR.meta" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb b/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb index 58c94c4..545df73 100644 --- a/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb +++ b/notebooks/Implementations/URB_SEAU1_B_A_Ka_ExtractDataUrban.ipynb @@ -27,29 +27,25 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, importlib, shutil\n", - "import requests\n", - "import rasterio, elevation, richdem\n", + "import sys\n", + "import os\n", + "import importlib\n", + "import rasterio\n", "import rasterio.warp\n", - "from rasterio import features\n", "\n", - "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import MultiPolygon, Polygon, box\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.append(\"../../../gostrocks/src\")\n", "\n", "import GOSTRocks.rasterMisc as rMisc\n", "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../\")\n", "import src.UrbanRaster as urban\n", - "import src.urban_helper as helper\n", - "\n" + "import src.urban_helper as helper" ] }, { @@ -77,8 +73,9 @@ "importlib.reload(helper)\n", "importlib.reload(rMisc)\n", "\n", + "\n", "def calculate_urban(iso3, inG, inG2, pop_files, ea_file, km=True, small=True):\n", - " global_landcover = \"/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif\"\n", + " global_landcover = \"/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif\"\n", " global_ghspop = \"/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif\"\n", " global_ghspop_1k = \"/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif\"\n", " global_ghbuilt = \"/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif\"\n", @@ -87,21 +84,34 @@ " ghsl_vrt = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n", "\n", " output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - " inD = inG.loc[inG['ISO3'] == iso3]\n", - " inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500))\n", - " inD = inD.to_crs({'init':'epsg:4326'})\n", - " \n", - " inD2 = inG2.loc[inG2['ISO3'] == iso3]\n", - " inD2 = inD2.to_crs({'init':'epsg:4326'}) \n", - " \n", + " inD = inG.loc[inG[\"ISO3\"] == iso3]\n", + " inD[\"geometry\"] = inD[\"geometry\"].apply(lambda x: x.buffer(500))\n", + " inD = inD.to_crs({\"init\": \"epsg:4326\"})\n", + "\n", + " inD2 = inG2.loc[inG2[\"ISO3\"] == iso3]\n", + " inD2 = inD2.to_crs({\"init\": \"epsg:4326\"})\n", + "\n", " ### Process 1km data\n", " if km:\n", - " xx = helper.urban_country(iso3, output_folder, inD, pop_files,\n", - " final_folder=\"FINAL_STANDARD_1KM\", ghspop_suffix=\"1k\")\n", - " adm2_res = os.path.join(xx.final_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\") \n", - " ea_res = os.path.join(xx.final_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", + " xx = helper.urban_country(\n", + " iso3,\n", + " output_folder,\n", + " inD,\n", + " pop_files,\n", + " final_folder=\"FINAL_STANDARD_1KM\",\n", + " ghspop_suffix=\"1k\",\n", + " )\n", + " adm2_res = os.path.join(xx.final_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")\n", + " ea_res = os.path.join(xx.final_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", " tPrint(\"***** Extracting Global Layers %s\" % iso3)\n", - " xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod)\n", + " xx.extract_layers(\n", + " global_landcover,\n", + " global_ghspop,\n", + " global_ghspop_1k,\n", + " global_ghbuilt,\n", + " ghsl_vrt,\n", + " ghs_smod,\n", + " )\n", " tPrint(\"***** Downloading and processing elevation %s\" % iso3)\n", " xx.process_dem(global_dem=global_dem_1k)\n", " tPrint(\"***** Standardizing rasters\")\n", @@ -110,12 +120,19 @@ " xx.calculate_urban()\n", " tPrint(\"***** Evaluating Data\")\n", " xx.evaluateOutput()\n", - " \n", - " ### Process 250m data \n", + "\n", + " ### Process 250m data\n", " if small:\n", " xx = helper.urban_country(iso3, output_folder, inD, pop_files)\n", " tPrint(\"***** Extracting Global Layers %s\" % iso3)\n", - " xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod)\n", + " xx.extract_layers(\n", + " global_landcover,\n", + " global_ghspop,\n", + " global_ghspop_1k,\n", + " global_ghbuilt,\n", + " ghsl_vrt,\n", + " ghs_smod,\n", + " )\n", " tPrint(\"***** Downloading and processing elevation %s\" % iso3)\n", " xx.process_dem(global_dem=global_dem_1k)\n", " tPrint(\"***** Standardizing rasters\")\n", @@ -125,14 +142,22 @@ " xx.evaluateOutput()\n", " tPrint(\"***** Calculating Zonal admin2\")\n", " if os.path.exists(ea_file):\n", - " if not os.path.exists(os.path.join(output_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")):\n", + " if not os.path.exists(\n", + " os.path.join(output_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")\n", + " ):\n", " zonal_adm2 = xx.pop_zonal_admin(inD2)\n", - " zonal_adm2.to_csv(os.path.join(output_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\"))\n", + " zonal_adm2.to_csv(\n", + " os.path.join(output_folder, \"URBAN_ADMIN2_STATS_COMPILED.csv\")\n", + " )\n", " tPrint(\"***** Calculating Zonal communes\")\n", - " if not os.path.exists(os.path.join(output_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")):\n", + " if not os.path.exists(\n", + " os.path.join(output_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", + " ):\n", " inEA = gpd.read_file(ea_file)\n", " zonal_ea = xx.pop_zonal_admin(inEA)\n", - " zonal_ea.to_csv(os.path.join(output_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\"))" + " zonal_ea.to_csv(\n", + " os.path.join(output_folder, \"URBAN_COMMUNE_STATS_COMPILED.csv\")\n", + " )" ] }, { @@ -146,10 +171,12 @@ "local_path = \"/home/public/Data/COUNTRY/{country}/WORLDPOP/\".format(country=iso3)\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_2018_un = os.path.join(local_path, \"%s_ppp_2016_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()]]\n", + "pop_files = [\n", + " [pop_2015_un, \"%s_upo15.tif\" % iso3.lower()],\n", + " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()],\n", + "]\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = ''\n", + "ea_file = \"\"\n", "\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" ] @@ -220,13 +247,17 @@ "importlib.reload(helper)\n", "# Process EGY\n", "iso3 = \"EGY\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(\n", + " country=iso3\n", + ")\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_2018_un = os.path.join(local_path, \"%s_ppp_2013_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()]]\n", + "pop_files = [\n", + " [pop_2015_un, \"%s_upo15.tif\" % iso3.lower()],\n", + " [pop_2018_un, \"%s_upo16.tif\" % iso3.lower()],\n", + "]\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = ''\n", + "ea_file = \"\"\n", "\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" ] @@ -260,11 +291,13 @@ "source": [ "# Process COL\n", "iso3 = \"COL\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(\n", + " country=iso3\n", + ")\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()]]\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = ''\n", + "ea_file = \"\"\n", "\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" ] @@ -280,18 +313,22 @@ "importlib.reload(helper)\n", "# Process GHA\n", "iso3 = \"GHA\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(\n", + " country=iso3\n", + ")\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_2018_un = os.path.join(local_path, \"%s_ppp_2017_UNadj.tif\" % iso3.lower())\n", "pop_2015_con = os.path.join(local_path, \"ppp_prj_2015_%s_UNadj.tif\" % iso3)\n", "pop_2018_con = os.path.join(local_path, \"ppp_prj_2017_%s_UNadj.tif\" % iso3)\n", "\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo17.tif\" % iso3.lower()], \n", - " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", - " [pop_2018_con, \"%s_cpo17.tif\" % iso3.lower()]]\n", + "pop_files = [\n", + " [pop_2015_un, \"%s_upo15.tif\" % iso3.lower()],\n", + " [pop_2018_un, \"%s_upo17.tif\" % iso3.lower()],\n", + " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()],\n", + " [pop_2018_con, \"%s_cpo17.tif\" % iso3.lower()],\n", + "]\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')\n", + "ea_file = os.path.join(output_folder, \"FINAL_EAS.shp\")\n", "\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runsmall, km=runLarge)" ] @@ -307,13 +344,17 @@ "importlib.reload(helper)\n", "# Process BGD\n", "iso3 = \"BGD\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", - "ea_file = os.path.join(output_folder, 'mauza11_reprojected.shp')\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(\n", + " country=iso3\n", + ")\n", + "ea_file = os.path.join(output_folder, \"mauza11_reprojected.shp\")\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", "\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()]]\n", + "pop_files = [\n", + " [pop_2015_un, \"%s_upo15.tif\" % iso3.lower()],\n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()],\n", + "]\n", "\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" @@ -329,19 +370,25 @@ "source": [ "# Process TZA\n", "iso3 = \"TZA\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018\".format(country=iso3)\n", + "local_path = (\n", + " \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/TZA_2015_2018\".format(\n", + " country=iso3\n", + " )\n", + ")\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", "pop_2015_con = os.path.join(local_path, \"ppp_prj_2015_%s_UNadj.tif\" % iso3)\n", "pop_2018_con = os.path.join(local_path, \"ppp_prj_2018_%s_UNadj.tif\" % iso3)\n", "\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()], \n", - " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", - " [pop_2018_con, \"%s_cpo18.tif\" % iso3.lower()]]\n", + "pop_files = [\n", + " [pop_2015_un, \"%s_upo15.tif\" % iso3.lower()],\n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()],\n", + " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()],\n", + " [pop_2018_con, \"%s_cpo18.tif\" % iso3.lower()],\n", + "]\n", "\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'FINAL_EAS.shp')\n", + "ea_file = os.path.join(output_folder, \"FINAL_EAS.shp\")\n", "\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" ] @@ -356,15 +403,19 @@ "source": [ "# Process VNM\n", "iso3 = \"VNM\"\n", - "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(country=iso3)\n", + "local_path = \"/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/\".format(\n", + " country=iso3\n", + ")\n", "pop_2015_un = os.path.join(local_path, \"%s_ppp_2015_UNadj.tif\" % iso3.lower())\n", "pop_2018_un = os.path.join(local_path, \"%s_ppp_2018_UNadj.tif\" % iso3.lower())\n", - "pop_files = [[pop_2015_un, \"%s_upo15.tif\" % iso3.lower()], \n", - " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()], \n", - " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()], \n", - " [pop_2018_con, \"%s_cpo18.tif\" % iso3.lower()]]\n", + "pop_files = [\n", + " [pop_2015_un, \"%s_upo15.tif\" % iso3.lower()],\n", + " [pop_2018_un, \"%s_upo18.tif\" % iso3.lower()],\n", + " [pop_2015_con, \"%s_cpo15.tif\" % iso3.lower()],\n", + " [pop_2018_con, \"%s_cpo18.tif\" % iso3.lower()],\n", + "]\n", "output_folder = \"/home/wb411133/temp/%s_URBAN_DATA_new_naming\" % iso3\n", - "ea_file = os.path.join(output_folder, 'Commune shapefiles', 'VN_communes2008.shp')\n", + "ea_file = os.path.join(output_folder, \"Commune shapefiles\", \"VN_communes2008.shp\")\n", "\n", "calculate_urban(iso3, inG, inG2, pop_files, ea_file, small=runSmall, km=runLarge)" ] @@ -411,10 +462,13 @@ "outputs": [], "source": [ "importlib.reload(urban)\n", - "pop_file = '/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k1k_gpo.tif'\n", + "pop_file = (\n", + " \"/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k1k_gpo.tif\"\n", + ")\n", "urban_shp = urban.urbanGriddedPop(pop_file)\n", - "shps = urban_shp.calculateUrban(raster=out_file, densVal=1500, totalPopThresh=50000,\n", - " smooth=True, queen=True)" + "shps = urban_shp.calculateUrban(\n", + " raster=out_file, densVal=1500, totalPopThresh=50000, smooth=True, queen=True\n", + ")" ] }, { @@ -423,10 +477,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Comparing complete GHS-SMOD classification with \n", + "# Comparing complete GHS-SMOD classification with\n", "ghs_smod = \"/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_POP2015_GLOBE_R2019A_54009_1K_V2_0.tif\"\n", - "in_bounds = '/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd_adm.shp'\n", - "out_file = '/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_smod.tif'\n", + "in_bounds = (\n", + " \"/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd_adm.shp\"\n", + ")\n", + "out_file = (\n", + " \"/home/wb411133/temp/BGD_URBAN_DATA_new_naming/FINAL_STANDARD_1KM/bgd1k_smod.tif\"\n", + ")\n", "\n", "inR = rasterio.open(ghs_smod)\n", "inB = gpd.read_file(in_bounds)\n", @@ -452,13 +510,13 @@ "metadata": {}, "outputs": [], "source": [ - "smod_vals = [10,11,12,13,21,22,23,30]\n", + "smod_vals = [10, 11, 12, 13, 21, 22, 23, 30]\n", "total_pop = pop.sum()\n", "for val in smod_vals:\n", " cur_smod = (smod == val).astype(int)\n", " cur_pop = pop * cur_smod\n", " total_curpop = cur_pop.sum()\n", - " print(f'{val}: {(total_curpop.sum()/total_pop*100)}')" + " print(f\"{val}: {(total_curpop.sum()/total_pop*100)}\")" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb index ea9583c..034dd7f 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/Create_Mosaick_Datasets.ipynb @@ -23,36 +23,30 @@ } ], "source": [ - "import sys, os, importlib, shutil, pathlib, datetime, math\n", - "import json, urllib, boto3\n", + "import sys\n", + "import os\n", + "import boto3\n", "import rasterio\n", "\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", "\n", "from rasterio.merge import merge\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", "import GOSTRocks.rasterMisc as rMisc\n", - "import GOSTRocks.metadataMisc as meta\n", "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", - "import GOST_Urban.urban_helper as helper\n", "\n", - "#Import local functions\n", - "import novelUrbanization as nu\n", + "# Import local functions\n", "from novelUrbanization import *\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "\n", - "s3client = boto3.client('s3')" + "s3client = boto3.client(\"s3\")" ] }, { @@ -73,31 +67,34 @@ "bucket = \"wbg-geography01\"\n", "prefix = \"URBANIZATION/MR_Novel_Poverty/\"\n", "\n", - "# Loop through the S3 bucket and get all the keys for files that are .tif \n", + "# Loop through the S3 bucket and get all the keys for files that are .tif\n", "more_results = True\n", "loops = 0\n", "urban_tiff = {}\n", "\n", "while more_results:\n", " if loops > 0:\n", - " objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=token)\n", + " objects = s3client.list_objects_v2(\n", + " Bucket=bucket, Prefix=prefix, ContinuationToken=token\n", + " )\n", " else:\n", " objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)\n", - " more_results = objects['IsTruncated']\n", + " more_results = objects[\"IsTruncated\"]\n", " if more_results:\n", - " token = objects['NextContinuationToken']\n", + " token = objects[\"NextContinuationToken\"]\n", " loops += 1\n", - " for res in objects['Contents']:\n", - " if (\"FINAL_STANDARD_1KM\" in res['Key']) and \\\n", - " (res['Key'].endswith(\"urban.tif\") or res['Key'].endswith(\"urban_hd.tif\")):\n", - " cur_pop = os.path.basename(res['Key']).split(\"_\")[1]\n", - " cur_type = os.path.basename(res['Key']).split(\"_\")[-1].replace(\".tif\", \"\")\n", - " cur_lyr = f'{cur_pop}_{cur_type}'\n", - " cur_path = os.path.join(\"s3://\", bucket, res['Key'])\n", + " for res in objects[\"Contents\"]:\n", + " if (\"FINAL_STANDARD_1KM\" in res[\"Key\"]) and (\n", + " res[\"Key\"].endswith(\"urban.tif\") or res[\"Key\"].endswith(\"urban_hd.tif\")\n", + " ):\n", + " cur_pop = os.path.basename(res[\"Key\"]).split(\"_\")[1]\n", + " cur_type = os.path.basename(res[\"Key\"]).split(\"_\")[-1].replace(\".tif\", \"\")\n", + " cur_lyr = f\"{cur_pop}_{cur_type}\"\n", + " cur_path = os.path.join(\"s3://\", bucket, res[\"Key\"])\n", " try:\n", " urban_tiff[cur_lyr].append(cur_path)\n", " except:\n", - " urban_tiff[cur_lyr] = [cur_path] " + " urban_tiff[cur_lyr] = [cur_path]" ] }, { @@ -138,9 +135,10 @@ "\n", "for cur_lbl, rasters in urban_tiff.items():\n", " tPrint(cur_lbl)\n", - " out_file = os.path.join(out_folder, f'{cur_lbl}_DoU_Africa_mosaic.tif')\n", - " curD, profile = rMisc.merge_rasters(rasters, merge_method='max', boolean_gt_0=True,\n", - " out_file=out_file)" + " out_file = os.path.join(out_folder, f\"{cur_lbl}_DoU_Africa_mosaic.tif\")\n", + " curD, profile = rMisc.merge_rasters(\n", + " rasters, merge_method=\"max\", boolean_gt_0=True, out_file=out_file\n", + " )" ] }, { @@ -157,12 +155,16 @@ "# Create a new raster file with the merged data\n", "metadata = opened_tiffs[0].meta.copy()\n", "metadata.update(\n", - " {\"height\":merged.shape[1],\n", - " \"width\":merged.shape[2],\n", - " \"transform\":out_transform,\n", - " 'dtype':'uint8'}\n", + " {\n", + " \"height\": merged.shape[1],\n", + " \"width\": merged.shape[2],\n", + " \"transform\": out_transform,\n", + " \"dtype\": \"uint8\",\n", + " }\n", ")\n", - "with rasterio.open(f'/home/wb411133/temp/{sel_pop}_combo_urban_hd.tif', 'w', **metadata) as dst:\n", + "with rasterio.open(\n", + " f\"/home/wb411133/temp/{sel_pop}_combo_urban_hd.tif\", \"w\", **metadata\n", + ") as dst:\n", " dst.write(merged)" ] }, @@ -184,31 +186,33 @@ "bucket = \"wbg-geography01\"\n", "prefix = \"URBANIZATION/MR_Novel_Poverty/AAPPC/Delineations/\"\n", "\n", - "# Loop through the S3 bucket and get all the keys for files that are .tif \n", + "# Loop through the S3 bucket and get all the keys for files that are .tif\n", "more_results = True\n", "loops = 0\n", "db_tiffs = {}\n", "\n", "while more_results:\n", " if loops > 0:\n", - " objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=token)\n", + " objects = s3client.list_objects_v2(\n", + " Bucket=bucket, Prefix=prefix, ContinuationToken=token\n", + " )\n", " else:\n", " objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)\n", - " more_results = objects['IsTruncated']\n", + " more_results = objects[\"IsTruncated\"]\n", " if more_results:\n", - " token = objects['NextContinuationToken']\n", + " token = objects[\"NextContinuationToken\"]\n", " loops += 1\n", - " for res in objects['Contents']:\n", - " if (\"1k\" in res['Key']): \n", - " cur_path = os.path.join(\"s3://\", bucket, res['Key'])\n", + " for res in objects[\"Contents\"]:\n", + " if \"1k\" in res[\"Key\"]:\n", + " cur_path = os.path.join(\"s3://\", bucket, res[\"Key\"])\n", " cur_type = os.path.basename(cur_path).split(\"_\")[2][:2]\n", - " if cur_type in ['cc', 'co', 'ur']:\n", - " cur_pop = os.path.basename(cur_path).split(\"_\")[1].split('d')[0]\n", - " cur_label = f'{cur_pop}_{cur_type}'\n", + " if cur_type in [\"cc\", \"co\", \"ur\"]:\n", + " cur_pop = os.path.basename(cur_path).split(\"_\")[1].split(\"d\")[0]\n", + " cur_label = f\"{cur_pop}_{cur_type}\"\n", " try:\n", " db_tiffs[cur_label].append(cur_path)\n", " except:\n", - " db_tiffs[cur_label] = [cur_path] " + " db_tiffs[cur_label] = [cur_path]" ] }, { @@ -249,9 +253,10 @@ "\n", "for cur_lbl, rasters in urban_tiff.items():\n", " tPrint(cur_lbl)\n", - " out_file = os.path.join(out_folder, f'{cur_lbl}_DB_Africa_mosaic.tif')\n", - " curD, profile = rMisc.merge_rasters(rasters, merge_method='max', boolean_gt_0=True,\n", - " out_file=out_file)" + " out_file = os.path.join(out_folder, f\"{cur_lbl}_DB_Africa_mosaic.tif\")\n", + " curD, profile = rMisc.merge_rasters(\n", + " rasters, merge_method=\"max\", boolean_gt_0=True, out_file=out_file\n", + " )" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/GHSL_Standardize_To_Country.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/GHSL_Standardize_To_Country.ipynb index 04d2897..f818cba 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/GHSL_Standardize_To_Country.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/GHSL_Standardize_To_Country.ipynb @@ -15,34 +15,25 @@ } ], "source": [ - "import sys, os, importlib, shutil, pathlib, datetime\n", - "import requests\n", - "import rasterio, elevation, richdem\n", + "import sys\n", + "import os\n", + "import rasterio\n", "import rasterio.warp\n", - "from rasterio import features\n", - "from datetime import datetime\n", "\n", - "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", "import GOSTRocks.rasterMisc as rMisc\n", "import GOSTRocks.mapMisc as mapMisc\n", - "import GOSTRocks.metadataMisc as meta\n", "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", - "import GOST_Urban.urban_helper as helper\n", "\n", - "#Import local functions\n", - "import novelUrbanization as nu\n", + "# Import local functions\n", "from novelUrbanization import *\n", "\n", "%load_ext autoreload\n", @@ -61,7 +52,9 @@ "runSmall = True\n", "runLarge = True\n", "\n", - "gha_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/GHA_URBAN_DATA_new_naming\"\n", + "gha_folder = (\n", + " \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/GHA_URBAN_DATA_new_naming\"\n", + ")\n", "ghsl_data = \"/home/public/Data/GLOBAL/GHSL/Built/GHS_BUILT_S_E{year}_GLOBE_R2023A_54009_100_V1_0.tif\"\n", "template_data = os.path.join(gha_folder, \"FINAL_STANDARD\", \"gha_cpo20.tif\")\n", "out_ghsl = os.path.join(gha_folder, \"ghsl_{year}_250m.tif\")" @@ -74,8 +67,7 @@ "outputs": [], "source": [ "inG = gpd.read_file(global_bounds)\n", - "inG2 = gpd.read_file(global_bounds_adm2)\n", - "\n" + "inG2 = gpd.read_file(global_bounds_adm2)" ] }, { @@ -121,10 +113,14 @@ "for yr in [2020, 2015, 2010, 2005, 2000, 1995, 1990]:\n", " cur_ghsl = rasterio.open(ghsl_data.format(year=yr))\n", " tPrint(yr)\n", - " \n", + "\n", " # need to scale ghsl data to 250m data\n", - " res = rMisc.standardizeInputRasters(cur_ghsl, in_template, inR1_outFile=out_ghsl.format(year=yr), resampling_type=\"sum\")\n", - " " + " res = rMisc.standardizeInputRasters(\n", + " cur_ghsl,\n", + " in_template,\n", + " inR1_outFile=out_ghsl.format(year=yr),\n", + " resampling_type=\"sum\",\n", + " )" ] }, { @@ -156,7 +152,9 @@ "source": [ "yr = 2020\n", "curD = rasterio.open(out_ghsl.format(year=yr))\n", - "mapMisc.static_map_raster(curD, figsize=(20,20), thresh=[50,100,1000,2000,5000,10000,25000,50000])" + "mapMisc.static_map_raster(\n", + " curD, figsize=(20, 20), thresh=[50, 100, 1000, 2000, 5000, 10000, 25000, 50000]\n", + ")" ] }, { @@ -188,7 +186,9 @@ "source": [ "yr = 2000\n", "curD = rasterio.open(out_ghsl.format(year=yr))\n", - "mapMisc.static_map_raster(curD, figsize=(20,20), thresh=[50,100,1000,2000,5000,10000,25000,50000])" + "mapMisc.static_map_raster(\n", + " curD, figsize=(20, 20), thresh=[50, 100, 1000, 2000, 5000, 10000, 25000, 50000]\n", + ")" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb index 3d959ea..ed5ef70 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/MAP_Urbanization.ipynb @@ -13,33 +13,24 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, importlib, shutil, pathlib, datetime, multiprocessing\n", - "import requests\n", + "import sys\n", + "import os\n", + "import multiprocessing\n", "\n", - "import rasterio, elevation, richdem\n", - "import rasterio.warp\n", "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from rasterio import features\n", - "from datetime import datetime\n", - "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", "from osgeo import gdal\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", - "import GOSTRocks.rasterMisc as rMisc\n", "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", - "import GOST_Urban.urban_helper as helper\n", "\n", - "#Import local functions\n", - "import novelUrbanization as nu\n", + "# Import local functions\n", "from novelUrbanization import *\n", "\n", "%load_ext autoreload\n", @@ -61,9 +52,9 @@ ], "source": [ "base_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data\"\n", - "dou_pop = 'gpo'\n", - "db_pop = 'cpo15'\n", - "aapc_folder = os.path.join(base_folder, \"AAPPC\", 'Delineations')\n", + "dou_pop = \"gpo\"\n", + "db_pop = \"cpo15\"\n", + "aapc_folder = os.path.join(base_folder, \"AAPPC\", \"Delineations\")\n", "agg_folder = f\"/home/wb411133/data/Projects/MR_Novel_Urbanization/Aggregate_Data/DOU{dou_pop}_DB{db_pop}\"\n", "if not os.path.exists(agg_folder):\n", " os.makedirs(agg_folder)\n", @@ -96,14 +87,14 @@ ], "source": [ "# Generate comparison of DOU gpo and DB cpo15\n", - "iso3 = 'AGO'\n", - "country_folder = os.path.join(base_folder, f'{iso3}_URBAN_DATA_new_naming')\n", + "iso3 = \"AGO\"\n", + "country_folder = os.path.join(base_folder, f\"{iso3}_URBAN_DATA_new_naming\")\n", "urb = urban_data(iso3, country_folder, aapc_folder)\n", "comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop], debug=True)\n", "if comboRes:\n", " if not os.path.exists(agg_folder):\n", " os.makedirs(agg_folder)\n", - " urb.write_results(comboRes, agg_folder, dbhd = 'co')\n", + " urb.write_results(comboRes, agg_folder, dbhd=\"co\")\n", "tPrint(iso3)" ] }, @@ -114,9 +105,9 @@ "outputs": [], "source": [ "def multiP(iso3):\n", - " dou_pop = 'gpo'\n", - " db_pop = 'cpo15'\n", - " country_folder = os.path.join(base_folder, f'{iso3}_URBAN_DATA_new_naming')\n", + " dou_pop = \"gpo\"\n", + " db_pop = \"cpo15\"\n", + " country_folder = os.path.join(base_folder, f\"{iso3}_URBAN_DATA_new_naming\")\n", " urb = urban_data(iso3, country_folder, aapc_folder)\n", " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop], debug=True)\n", " if comboRes:\n", @@ -160,11 +151,15 @@ "metadata": {}, "outputs": [], "source": [ - "vrt_options = gdal.BuildVRTOptions(resampleAlg='cubic', addAlpha=True)\n", - "pop_layer = 'cpo'\n", - "my_vrt = gdal.BuildVRT(f'DOU{dou_pop}_DB{db_pop}_sum.vrt', sum_files, options=vrt_options)\n", + "vrt_options = gdal.BuildVRTOptions(resampleAlg=\"cubic\", addAlpha=True)\n", + "pop_layer = \"cpo\"\n", + "my_vrt = gdal.BuildVRT(\n", + " f\"DOU{dou_pop}_DB{db_pop}_sum.vrt\", sum_files, options=vrt_options\n", + ")\n", "my_vrt = None\n", - "my_vrt = gdal.BuildVRT(f'DOU{dou_pop}_DB{db_pop}_binary.vrt', bin_files, options=vrt_options)\n", + "my_vrt = gdal.BuildVRT(\n", + " f\"DOU{dou_pop}_DB{db_pop}_binary.vrt\", bin_files, options=vrt_options\n", + ")\n", "my_vrt = None" ] }, @@ -181,8 +176,8 @@ "metadata": {}, "outputs": [], "source": [ - "iso3 = 'BGD'\n", - "country_folder = os.path.join(base_folder, f'{iso3}_URBAN_DATA_new_naming')\n", + "iso3 = \"BGD\"\n", + "country_folder = os.path.join(base_folder, f\"{iso3}_URBAN_DATA_new_naming\")\n", "urb = urban_data(iso3, country_folder, aapc_folder)\n", "comboRes = urb.generate_combo_layer(pop_type=pop_layer, debug=True)\n", "res = urb.jaccard_index(pop_type=pop_layer)\n", @@ -216,27 +211,28 @@ ], "source": [ "def jaccardP(iso3, debug=False):\n", - " country_folder = os.path.join(base_folder, f'{iso3}_URBAN_DATA_new_naming')\n", + " country_folder = os.path.join(base_folder, f\"{iso3}_URBAN_DATA_new_naming\")\n", " urb = urban_data(iso3, country_folder, aapc_folder)\n", " if not debug:\n", " try:\n", - " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop]) \n", - " res = urb.jaccard_index(pop_type=[dou_pop, db_pop]) \n", + " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop])\n", + " res = urb.jaccard_index(pop_type=[dou_pop, db_pop])\n", " except:\n", - " res = {'urb_jaccard': -1, 'hd_jaccard': -1}\n", + " res = {\"urb_jaccard\": -1, \"hd_jaccard\": -1}\n", " else:\n", - " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop]) \n", - " res = urb.jaccard_index(pop_type=[dou_pop, db_pop]) \n", - " tPrint(f'{iso3}: {res}')\n", - " return({iso3:res})\n", + " comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop])\n", + " res = urb.jaccard_index(pop_type=[dou_pop, db_pop])\n", + " tPrint(f\"{iso3}: {res}\")\n", + " return {iso3: res}\n", "\n", - "jaccardP('NAM', True)\n", "\n", - "'''\n", + "jaccardP(\"NAM\", True)\n", + "\n", + "\"\"\"\n", "with multiprocessing.Pool(len(processed_iso3)) as mp:\n", " all_jaccard = mp.map(jaccardP, processed_iso3)\n", "\n", - "'''" + "\"\"\"" ] }, { @@ -633,7 +629,7 @@ " cISO = list(cntry.keys())[0]\n", " res[cISO] = cntry[cISO]\n", "res = pd.DataFrame(res).transpose()\n", - "res = res.sort_values('urb_jaccard', ascending=False)\n", + "res = res.sort_values(\"urb_jaccard\", ascending=False)\n", "res" ] }, @@ -645,7 +641,7 @@ "source": [ "# Attach geometry to the res\n", "admin0_polys = gpd.read_file(\"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\")\n", - "res['ISO3'] = res.index" + "res[\"ISO3\"] = res.index" ] }, { @@ -656,10 +652,10 @@ }, "outputs": [], "source": [ - "temp_res = res.loc[res['hd_jaccard'] > 0]\n", + "temp_res = res.loc[res[\"hd_jaccard\"] > 0]\n", "temp_res = temp_res.merge(admin0_polys, on=\"ISO3\")\n", "temp_res = gpd.GeoDataFrame(temp_res, geometry=\"geometry\", crs=4326)\n", - "temp_res.to_file(f'{agg_folder}_national_jaccard.shp')" + "temp_res.to_file(f\"{agg_folder}_national_jaccard.shp\")" ] }, { @@ -668,7 +664,7 @@ "metadata": {}, "outputs": [], "source": [ - "temp_res['ISO3'].values" + "temp_res[\"ISO3\"].values" ] }, { @@ -678,9 +674,9 @@ "outputs": [], "source": [ "# Create subset of GHS UCDB\n", - "ucdb_file = '/home/public/Data/GLOBAL/URBAN/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg'\n", + "ucdb_file = \"/home/public/Data/GLOBAL/URBAN/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg\"\n", "ucdb = gpd.read_file(ucdb_file)\n", - "ucdb = ucdb.loc[ucdb['CTR_MN_ISO'].isin(temp_res['ISO3'].values)]" + "ucdb = ucdb.loc[ucdb[\"CTR_MN_ISO\"].isin(temp_res[\"ISO3\"].values)]" ] }, { @@ -701,9 +697,9 @@ "all_grps = []\n", "cnt_size = 2\n", "for idx, grp in ucdb.groupby(\"CTR_MN_ISO\"):\n", - " grp = grp.sort_values('P15', ascending=False)\n", + " grp = grp.sort_values(\"P15\", ascending=False)\n", " if grp.shape[0] > cnt_size:\n", - " all_grps.append(grp.iloc[0:cnt_size,:])\n", + " all_grps.append(grp.iloc[0:cnt_size, :])\n", " else:\n", " all_grps.append(grp)" ] @@ -724,7 +720,7 @@ "metadata": {}, "outputs": [], "source": [ - "sel_res.to_file(f'{agg_folder}_select_cities.geojson', driver=\"GeoJSON\")" + "sel_res.to_file(f\"{agg_folder}_select_cities.geojson\", driver=\"GeoJSON\")" ] }, { @@ -733,7 +729,7 @@ "metadata": {}, "outputs": [], "source": [ - "f'{agg_folder}_select_cities.geojson'" + "f\"{agg_folder}_select_cities.geojson\"" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md index aab035d..dcefbac 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/README.md @@ -4,10 +4,10 @@ The code herein support the extraction of the data and calculation of urban exte ## [Degree of Urbanization](https://ghsl.jrc.ec.europa.eu/degurbaOverview.php) The European Commission developed a globally consistent, people-centric definition of urban areas. The basic approach is to apply a threshold to population grids on both the minimum population density, and then on the minimum total population of the resulting settlements. While the team at the EC continues to advance and iterate on their methodology, we rely on the original definitions of urban they produced: -| Urban area | Min Pop Density | Min Settlement Pop | -| --- | --- | --- | -| Urban areas | 300 people/km2 | 5000 people | -| High density urban areas | 1500 people/km2 | 50000 people | +| Urban area | Min Pop Density | Min Settlement Pop | +| --- | --- | --- | +| Urban areas | 300 people/km2 | 5000 people | +| High density urban areas | 1500 people/km2 | 50000 people | ## [Bellefon (2021)](https://www.sciencedirect.com/science/article/pii/S0094119019301032) @@ -20,13 +20,6 @@ This method eschews the absolute density thresholds of the EC methodology and in | Urban area | label | Definition | | --- | --- | --- | -| Urban areas | __ur__ | contiguous pixels for which the density is above the 95th percentile of the counterfactual | +| Urban areas | __ur__ | contiguous pixels for which the density is above the 95th percentile of the counterfactual | | Cores | __co__ | contiguous pixels within urban areas that are above the 95th percentile of the counterfactual within the urban core | -| Cities | __cc__ | urban areas that have a core | - - - - - - - +| Cities | __cc__ | urban areas that have a core | diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb index 0578068..c3cbf12 100755 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/URB_SEAU1_B_A_Ka_NovelUrbanizaton.ipynb @@ -43,32 +43,31 @@ } ], "source": [ - "import sys, os, importlib, shutil, pathlib, datetime, math\n", - "import requests\n", - "import rasterio, elevation, richdem\n", + "import sys\n", + "import os\n", + "import importlib\n", + "import shutil\n", + "import pathlib\n", + "import datetime\n", + "import math\n", + "import rasterio\n", "import rasterio.warp\n", - "from rasterio import features\n", - "from datetime import datetime\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", "import numpy as np\n", "\n", - "from shapely.geometry import MultiPolygon, Polygon, box, Point\n", + "from shapely.geometry import Point\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", - "import GOSTRocks.rasterMisc as rMisc\n", "import GOSTRocks.metadataMisc as meta\n", - "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../../src\")\n", - "import GOST_Urban.UrbanRaster as urban\n", - "import GOST_Urban.urban_helper as helper\n", "\n", - "#Import local functions\n", + "# Import local functions\n", "import novelUrbanization as nu\n", "from novelUrbanization import *\n", "\n", @@ -111,9 +110,9 @@ "ea_files = []\n", "for root, dirs, files in os.walk(in_folder):\n", " for x in files:\n", - " if ((x.endswith(\".csv\")) and (not \"URBAN\" in x)):\n", + " if (x.endswith(\".csv\")) and (\"URBAN\" not in x):\n", " ea_files.append(os.path.join(root, x))\n", - " \n", + "\n", "ea_files" ] }, @@ -136,38 +135,42 @@ "source": [ "def try_float(x):\n", " try:\n", - " return(float(x))\n", + " return float(x)\n", " except:\n", - " return(None)\n", + " return None\n", + "\n", "\n", - "def read_geog(file, lat_column, lon_column, crs='epsg:4326', write_out=True):\n", + "def read_geog(file, lat_column, lon_column, crs=\"epsg:4326\", write_out=True):\n", " print(os.path.basename(file))\n", " out_file = file.replace(\".csv\", \".geojson\")\n", " inD = pd.read_csv(file)\n", - " \n", + "\n", " print(inD.shape)\n", " inD[lat_column] = inD[lat_column].apply(try_float)\n", - " inD[lon_column] = inD[lon_column].apply(try_float) \n", + " inD[lon_column] = inD[lon_column].apply(try_float)\n", " inD = inD.loc[~(inD[lat_column].isna() | inD[lon_column].isna())]\n", " print(inD.shape)\n", - " \n", - " inD_geom = inD.apply(lambda x: Point(float(x[lon_column]), float(x[lat_column])), axis=1)\n", + "\n", + " inD_geom = inD.apply(\n", + " lambda x: Point(float(x[lon_column]), float(x[lat_column])), axis=1\n", + " )\n", " inD = gpd.GeoDataFrame(inD, geometry=inD_geom, crs=crs)\n", - " \n", + "\n", " if write_out:\n", - " inD.to_file(out_file, driver=\"GeoJSON\") \n", - " return(inD)\n", - "\n", - "#res = read_geog(ea_files[0], \"latdd_corrected\", \"londd_corrected\")\n", - "#res = read_geog(ea_files[1], \"lat\", \"lon\")\n", - "#res = read_geog(ea_files[2], \"latitude\", \"longitude\")\n", - "#res = read_geog(ea_files[3], \"latitude\", \"longitude\")\n", - "#res = read_geog(ea_files[4], \"lat_mean\", \"long_mean\")\n", - "#res = read_geog(ea_files[5], \"latdd_corrected\", \"londd_corrected\")\n", - "#res = read_geog(ea_files[6], \"latdd_corrected\", \"londd_corrected\")\n", - "#res = read_geog(ea_files[7], \"lat_modified\",\"lon_modified\")\n", - "#res = read_geog(ea_files[8], \"lat_corrected\", \"lon_corrected\")\n", - "#res = read_geog(ea_files[9], \"lat_corrected\", \"lon_corrected\")\n", + " inD.to_file(out_file, driver=\"GeoJSON\")\n", + " return inD\n", + "\n", + "\n", + "# res = read_geog(ea_files[0], \"latdd_corrected\", \"londd_corrected\")\n", + "# res = read_geog(ea_files[1], \"lat\", \"lon\")\n", + "# res = read_geog(ea_files[2], \"latitude\", \"longitude\")\n", + "# res = read_geog(ea_files[3], \"latitude\", \"longitude\")\n", + "# res = read_geog(ea_files[4], \"lat_mean\", \"long_mean\")\n", + "# res = read_geog(ea_files[5], \"latdd_corrected\", \"londd_corrected\")\n", + "# res = read_geog(ea_files[6], \"latdd_corrected\", \"londd_corrected\")\n", + "# res = read_geog(ea_files[7], \"lat_modified\",\"lon_modified\")\n", + "# res = read_geog(ea_files[8], \"lat_corrected\", \"lon_corrected\")\n", + "# res = read_geog(ea_files[9], \"lat_corrected\", \"lon_corrected\")\n", "res = read_geog(ea_files[-1], \"latDD_corrected\", \"lonDD_corrected\")" ] }, @@ -190,19 +193,24 @@ "iso3 = \"COG\"\n", "local_path = \"/home/public/Data/COUNTRY/{country}/WORLDPOP/\".format(country=iso3)\n", "constrained_WP_folder = \"/home/public/Data/GLOBAL/Population/RF_SSA_2015-2020\"\n", - "worldPop_2015 = \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt\"\n", - "global_ghspop = \"/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif\" \n", - "c_WP_15 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif'\n", - "c_WP_20 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif'\n", + "worldPop_2015 = (\n", + " \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt\"\n", + ")\n", + "global_ghspop = \"/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif\"\n", + "c_WP_15 = f\"{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif\"\n", + "c_WP_20 = f\"{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif\"\n", "custom_pop = \"/home/public/Data/COUNTRY/COG/Population/COG_population_202309271640.tif\"\n", "\n", - "pop_files = [[worldPop_2015, f'{iso3.lower()}_upo15.tif']] \n", - "pop_files.append([global_ghspop, f'{iso3.lower()}_gpo.tif']) \n", - "pop_files.append([c_WP_15, f'{iso3.lower()}_cpo15.tif'])\n", - "pop_files.append([c_WP_20, f'{iso3.lower()}_cpo20.tif'])\n", - "pop_files.append([custom_pop, f'{iso3.lower()}_cpo20_WB.tif'])\n", + "pop_files = [[worldPop_2015, f\"{iso3.lower()}_upo15.tif\"]]\n", + "pop_files.append([global_ghspop, f\"{iso3.lower()}_gpo.tif\"])\n", + "pop_files.append([c_WP_15, f\"{iso3.lower()}_cpo15.tif\"])\n", + "pop_files.append([c_WP_20, f\"{iso3.lower()}_cpo20.tif\"])\n", + "pop_files.append([custom_pop, f\"{iso3.lower()}_cpo20_WB.tif\"])\n", "\n", - "output_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\" % iso3\n", + "output_folder = (\n", + " \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming\"\n", + " % iso3\n", + ")\n", "ea_file = \"/home/public/Data/COUNTRY/COG/Population/ZD_CONGO_CLIP_FIXED.shp\"\n", "db_folder = os.path.join(output_folder, \"DB_Results\", \"SentWB\", \"delineations\")" ] @@ -256,9 +264,15 @@ "source": [ "importlib.reload(nu)\n", "# Calculate urban definitions\n", - "nu.calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge)\n", - "pp_urban = nu.calc_pp_urban(db_folder, \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder)\n", - "pd.DataFrame(pp_urban.drop(['geometry'], axis=1)).to_csv(os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation_admin3.csv\"))" + "nu.calculate_urban(\n", + " iso3, inG, inG2, pop_files, ea_file, output_folder, small=runSmall, km=runLarge\n", + ")\n", + "pp_urban = nu.calc_pp_urban(\n", + " db_folder, \"%s_gpo.tif\" % iso3.lower(), ea_file, output_folder\n", + ")\n", + "pd.DataFrame(pp_urban.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(output_folder, f\"{iso3}_DB_UrbanPopulation_admin3.csv\")\n", + ")" ] }, { @@ -271,20 +285,22 @@ "source": [ "# Calculate Point-based statistics\n", "input_file = os.path.join(output_folder, \"HBS_GPS.csv\")\n", - "pop_tiffs = [\"eth_gpo.tif\", \"eth_upo15.tif\", 'eth_upo16.tif']\n", + "pop_tiffs = [\"eth_gpo.tif\", \"eth_upo15.tif\", \"eth_upo16.tif\"]\n", "all_tiffs = []\n", "base_folder = os.path.join(output_folder, \"FINAL_STANDARD\")\n", "base_folder_1km = os.path.join(output_folder, \"FINAL_STANDARD_1KM\")\n", "for pFile in pop_tiffs:\n", " all_tiffs.append(os.path.join(base_folder, pFile))\n", - " all_tiffs.append(os.path.join(base_folder_1km, pFile.replace(\"eth\", \"eth1k\"))) \n", + " all_tiffs.append(os.path.join(base_folder_1km, pFile.replace(\"eth\", \"eth1k\")))\n", "\n", "# Read in ETH HH locations, clean\n", "inD = pd.read_csv(input_file)\n", - "inD = inD.loc[~inD['latDD_corrected'].isnull()]\n", - "inD = inD.loc[~inD['lonDD_corrected'].isnull()]\n", - "geoms = [Point(row['lonDD_corrected'], row['latDD_corrected']) for idx, row in inD.iterrows()]\n", - "inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={'init':'epsg:4326'})\n", + "inD = inD.loc[~inD[\"latDD_corrected\"].isnull()]\n", + "inD = inD.loc[~inD[\"lonDD_corrected\"].isnull()]\n", + "geoms = [\n", + " Point(row[\"lonDD_corrected\"], row[\"latDD_corrected\"]) for idx, row in inD.iterrows()\n", + "]\n", + "inD = gpd.GeoDataFrame(inD, geometry=geoms, crs={\"init\": \"epsg:4326\"})\n", "# Calculate point urbanization for degree of urbanization\n", "out_file = os.path.join(output_folder, f\"{iso3}_DoU_Urban.csv\")\n", "nu.point_urban_summaries(inD, all_tiffs, out_file)\n", @@ -303,17 +319,19 @@ "source": [ "# Run zonal stats\n", "constrained_WP_folder = \"/home/public/Data/GLOBAL/Population/RF_SSA_2015-2020\"\n", - "worldPop_2015 = \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt\"\n", - "global_ghspop = \"/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif\" \n", - "c_WP_15 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif'\n", - "c_WP_20 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif'\n", + "worldPop_2015 = (\n", + " \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt\"\n", + ")\n", + "global_ghspop = \"/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif\"\n", + "c_WP_15 = f\"{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif\"\n", + "c_WP_20 = f\"{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif\"\n", "\n", - "pop_files = [[worldPop_2015, f'{iso3.lower()}_upo15.tif']] \n", - "pop_files.append([global_ghspop, f'{iso3.lower()}_gpo.tif']) \n", - "pop_files.append([c_WP_15, f'{iso3.lower()}_cpo15.tif'])\n", - "pop_files.append([c_WP_20, f'{iso3.lower()}_cpo20.tif'])\n", - " \n", - "nu.run_zonal(iso3, output_folder, inG, pop_files, ea_file, '')" + "pop_files = [[worldPop_2015, f\"{iso3.lower()}_upo15.tif\"]]\n", + "pop_files.append([global_ghspop, f\"{iso3.lower()}_gpo.tif\"])\n", + "pop_files.append([c_WP_15, f\"{iso3.lower()}_cpo15.tif\"])\n", + "pop_files.append([c_WP_20, f\"{iso3.lower()}_cpo20.tif\"])\n", + "\n", + "nu.run_zonal(iso3, output_folder, inG, pop_files, ea_file, \"\")" ] }, { @@ -331,26 +349,35 @@ }, "outputs": [], "source": [ - "countries = {'AGO':'angola','BGD':'bangladesh','EGY':'egypt','ETH':'ethiopia',\n", - " 'GHA':'ghana','TZA':'tanzania','VNM':'vietnam'}\n", + "countries = {\n", + " \"AGO\": \"angola\",\n", + " \"BGD\": \"bangladesh\",\n", + " \"EGY\": \"egypt\",\n", + " \"ETH\": \"ethiopia\",\n", + " \"GHA\": \"ghana\",\n", + " \"TZA\": \"tanzania\",\n", + " \"VNM\": \"vietnam\",\n", + "}\n", "for iso3 in countries.keys():\n", " out_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_Data\"\n", - " data_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming/\" % iso3\n", + " data_folder = (\n", + " \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/%s_URBAN_DATA_new_naming/\"\n", + " % iso3\n", + " )\n", " dou_folder = os.path.join(data_folder, \"FINAL_STANDARD\")\n", - " db_folder = os.path.join(data_folder, countries[iso3])\n", - " \n", - " dou_urban = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban.tif')\n", - " dou_urban_hd = os.path.join(dou_folder, f'{iso3.lower()}_upo15_urban_hd.tif')\n", - " \n", + " db_folder = os.path.join(data_folder, countries[iso3])\n", + "\n", + " dou_urban = os.path.join(dou_folder, f\"{iso3.lower()}_upo15_urban.tif\")\n", + " dou_urban_hd = os.path.join(dou_folder, f\"{iso3.lower()}_upo15_urban_hd.tif\")\n", + "\n", " db_urban_cc = os.path.join(db_folder, f\"{iso3.lower()}_upo15d20b2000_cc.tif\")\n", " db_urban_co = os.path.join(db_folder, f\"{iso3.lower()}_upo15d20b2000_co.tif\")\n", " db_urban_ur = os.path.join(db_folder, f\"{iso3.lower()}_upo15d20b2000_ur.tif\")\n", - " \n", + "\n", " for uFile in [dou_urban, dou_urban_hd, db_urban_cc, db_urban_co, db_urban_ur]:\n", - " print(f'{iso3}: {os.path.exists(uFile)}')\n", + " print(f\"{iso3}: {os.path.exists(uFile)}\")\n", " out_file = os.path.join(out_folder, os.path.basename(uFile))\n", - " shutil.copy(uFile, out_file)\n", - " " + " shutil.copy(uFile, out_file)" ] }, { @@ -373,19 +400,23 @@ "out_folder = os.path.join(in_folder, \"URBAN_ZONAL_RESULTS_EAs\")\n", "if not os.path.exists(out_folder):\n", " os.makedirs(out_folder)\n", - " \n", + "\n", "for root, dirs, files in os.walk(in_folder):\n", - " if \"URBAN_DATA_new_naming\" in root: \n", - " country = os.path.basename(root).split(\"_\")[0] \n", + " if \"URBAN_DATA_new_naming\" in root:\n", + " country = os.path.basename(root).split(\"_\")[0]\n", " if country in nu.EA_DEFS.keys():\n", " for f in files:\n", - " if (\"EA_PP_URBAN_Updated\" in f) | (\"EA_WB_URBAN_\" in f) | (\"HH_GPS\" in f):\n", - " fName = pathlib.Path(os.path.join(root, f)) \n", + " if (\n", + " (\"EA_PP_URBAN_Updated\" in f)\n", + " | (\"EA_WB_URBAN_\" in f)\n", + " | (\"HH_GPS\" in f)\n", + " ):\n", + " fName = pathlib.Path(os.path.join(root, f))\n", " date = datetime.fromtimestamp(fName.stat().st_mtime)\n", - " if datetime(2021,6,1) < date:\n", - " print(f'{country}: {f} - {date}') \n", + " if datetime(2021, 6, 1) < date:\n", + " print(f\"{country}: {f} - {date}\")\n", " else:\n", - " print(f'***OLD: {country}: {f} - {date}') \n", + " print(f\"***OLD: {country}: {f} - {date}\")\n", " shutil.copy(os.path.join(root, f), os.path.join(out_folder, f))" ] }, @@ -401,19 +432,23 @@ "out_folder = os.path.join(in_folder, \"URBAN_ZONAL_RESULTS\")\n", "if not os.path.exists(out_folder):\n", " os.makedirs(out_folder)\n", - " \n", + "\n", "for root, dirs, files in os.walk(in_folder):\n", - " if \"URBAN_DATA_new_naming\" in root: \n", - " country = os.path.basename(root).split(\"_\")[0] \n", + " if \"URBAN_DATA_new_naming\" in root:\n", + " country = os.path.basename(root).split(\"_\")[0]\n", " if country in nu.EA_DEFS.keys():\n", " for f in files:\n", - " if (\"EA_PP_URBAN_Updated\" in f) | (\"EA_WB_URBAN_\" in f) | (\"HH_GPS\" in f):\n", - " fName = pathlib.Path(os.path.join(root, f)) \n", + " if (\n", + " (\"EA_PP_URBAN_Updated\" in f)\n", + " | (\"EA_WB_URBAN_\" in f)\n", + " | (\"HH_GPS\" in f)\n", + " ):\n", + " fName = pathlib.Path(os.path.join(root, f))\n", " date = datetime.fromtimestamp(fName.stat().st_mtime)\n", - " if datetime(2021,6,1) < date:\n", - " print(f'{country}: {f} - {date}') \n", + " if datetime(2021, 6, 1) < date:\n", + " print(f\"{country}: {f} - {date}\")\n", " else:\n", - " print(f'***OLD: {country}: {f} - {date}') \n", + " print(f\"***OLD: {country}: {f} - {date}\")\n", " shutil.copy(os.path.join(root, f), os.path.join(out_folder, f))" ] }, @@ -423,7 +458,7 @@ "metadata": {}, "outputs": [], "source": [ - "datetime(2021,6,1)" + "datetime(2021, 6, 1)" ] }, { @@ -436,12 +471,12 @@ "source": [ "# Delete all zonal stats\n", "for root, dirs, files in os.walk(in_folder):\n", - " if \"URBAN_DATA_new_naming\" in root: \n", - " country = os.path.basename(root).split(\"_\")[0] \n", + " if \"URBAN_DATA_new_naming\" in root:\n", + " country = os.path.basename(root).split(\"_\")[0]\n", " if country in nu.EA_DEFS.keys():\n", " for f in files:\n", " if (\"URBAN_COMMUNE_STATS\" in f) | (\"URBAN_ADMIN2\" in f):\n", - " print(f'{country}: {f}')\n", + " print(f\"{country}: {f}\")\n", " os.remove(os.path.join(root, f))" ] }, @@ -471,13 +506,13 @@ "source": [ "base_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/{ISO3}_URBAN_DATA_new_naming\"\n", "country_name = \"Angola\"\n", - "iso3 = 'AGO'\n", + "iso3 = \"AGO\"\n", "in_folder = base_folder.format(ISO3=iso3)\n", - "out_dir = os.path.join(in_folder, 'metadata')\n", + "out_dir = os.path.join(in_folder, \"metadata\")\n", "\n", "make_meta = meta.metadata_gost(in_folder, out_dir)\n", "layers = make_meta.get_layers()\n", - "metadata = make_meta.generate_metadata()\n" + "metadata = make_meta.generate_metadata()" ] }, { @@ -486,7 +521,9 @@ "metadata": {}, "outputs": [], "source": [ - "layer_info['layer_name'] = [p.replace(\"lso\", iso3.lower()) for p in layer_info['layer_name']]" + "layer_info[\"layer_name\"] = [\n", + " p.replace(\"lso\", iso3.lower()) for p in layer_info[\"layer_name\"]\n", + "]" ] }, { @@ -497,7 +534,17 @@ }, "outputs": [], "source": [ - "sel_info = layer_info.loc[:,['layer_name', 'layer_label','description','source_name','source_url','data_process_summary']]\n", + "sel_info = layer_info.loc[\n", + " :,\n", + " [\n", + " \"layer_name\",\n", + " \"layer_label\",\n", + " \"description\",\n", + " \"source_name\",\n", + " \"source_url\",\n", + " \"data_process_summary\",\n", + " ],\n", + "]\n", "sel_info" ] }, @@ -507,9 +554,20 @@ "metadata": {}, "outputs": [], "source": [ - "final_meta = metadata['metadata']\n", - "final_meta = final_meta.loc[:,~final_meta.columns.isin(['layer_label','description','source_name','source_url','data_process_summary'])]\n", - "final_meta.merge(sel_info, on='layer_name')" + "final_meta = metadata[\"metadata\"]\n", + "final_meta = final_meta.loc[\n", + " :,\n", + " ~final_meta.columns.isin(\n", + " [\n", + " \"layer_label\",\n", + " \"description\",\n", + " \"source_name\",\n", + " \"source_url\",\n", + " \"data_process_summary\",\n", + " ]\n", + " ),\n", + "]\n", + "final_meta.merge(sel_info, on=\"layer_name\")" ] }, { @@ -518,17 +576,19 @@ "metadata": {}, "outputs": [], "source": [ - "make_meta.write_metadata(os.path.join(out_dir, f\"{iso3}_novel_urbanization_metadata.xlsx\"), \n", - " layer_metadata = final_meta, field_metadata = metadata['fields'],\n", - " dataset_id = dataset_info.Definition[0].format(ISO3=iso3, Country=country_name),\n", - " dataset_title = dataset_info.Definition[1].format(ISO3=iso3, Country=country_name),\n", - " country = dataset_info.Definition[2].format(ISO3=iso3, Country=country_name),\n", - " abstract = dataset_info.Definition[3].format(ISO3=iso3, Country=country_name),\n", - " purpose = dataset_info.Definition[4].format(ISO3=iso3, Country=country_name),\n", - " creation_date = datetime.today().strftime('%Y-%m-%d'),\n", - " release_date = datetime.today().strftime('%Y-%m-%d'),\n", - " owner = dataset_info.Definition[7].format(ISO3=iso3, Country=country_name),\n", - " email = dataset_info.Definition[8].format(ISO3=iso3, Country=country_name),\n", + "make_meta.write_metadata(\n", + " os.path.join(out_dir, f\"{iso3}_novel_urbanization_metadata.xlsx\"),\n", + " layer_metadata=final_meta,\n", + " field_metadata=metadata[\"fields\"],\n", + " dataset_id=dataset_info.Definition[0].format(ISO3=iso3, Country=country_name),\n", + " dataset_title=dataset_info.Definition[1].format(ISO3=iso3, Country=country_name),\n", + " country=dataset_info.Definition[2].format(ISO3=iso3, Country=country_name),\n", + " abstract=dataset_info.Definition[3].format(ISO3=iso3, Country=country_name),\n", + " purpose=dataset_info.Definition[4].format(ISO3=iso3, Country=country_name),\n", + " creation_date=datetime.today().strftime(\"%Y-%m-%d\"),\n", + " release_date=datetime.today().strftime(\"%Y-%m-%d\"),\n", + " owner=dataset_info.Definition[7].format(ISO3=iso3, Country=country_name),\n", + " email=dataset_info.Definition[8].format(ISO3=iso3, Country=country_name),\n", ")" ] }, @@ -560,9 +620,16 @@ " for d in dirs:\n", " if (d == \"FINAL_STANDARD\") or (d == \"FINAL_STANDARD_1KM\"):\n", " cur_dir = os.path.join(root, d)\n", - " print(\"zip -r {out_file} {infolder}\".format(\n", - " out_file = \"%s_%s.zip\" % (cur_dir.split(\"/\")[-2].split(\"_\")[0], cur_dir.split(\"_\")[-1]),\n", - " infolder = os.path.join(os.path.basename(os.path.dirname(cur_dir)), os.path.basename(cur_dir))))" + " print(\n", + " \"zip -r {out_file} {infolder}\".format(\n", + " out_file=\"%s_%s.zip\"\n", + " % (cur_dir.split(\"/\")[-2].split(\"_\")[0], cur_dir.split(\"_\")[-1]),\n", + " infolder=os.path.join(\n", + " os.path.basename(os.path.dirname(cur_dir)),\n", + " os.path.basename(cur_dir),\n", + " ),\n", + " )\n", + " )" ] }, { @@ -579,12 +646,12 @@ "outputs": [], "source": [ "# there is an error in scaling a new population dataset; testing out why\n", - "pop_raster = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/COG_URBAN_DATA_new_naming/cog_cpo20_WB.tif\"\n", - "template_raster = '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/COG_URBAN_DATA_new_naming/FINAL_STANDARD/cog_gpo.tif'\n", + "pop_raster = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/COG_URBAN_DATA_new_naming/cog_cpo20_WB.tif\"\n", + "template_raster = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/COG_URBAN_DATA_new_naming/FINAL_STANDARD/cog_gpo.tif\"\n", "\n", "in_raster = rasterio.open(pop_raster)\n", "in_r = in_raster.read()\n", - "in_r[in_r == in_raster.meta['nodata']] = 0\n", + "in_r[in_r == in_raster.meta[\"nodata\"]] = 0\n", "\n", "ghs_R = rasterio.open(template_raster)\n", "out_array = np.zeros(ghs_R.shape)" @@ -607,7 +674,7 @@ } ], "source": [ - "in_r[0,0,0] == in_raster.meta['nodata']" + "in_r[0, 0, 0] == in_raster.meta[\"nodata\"]" ] }, { @@ -627,7 +694,7 @@ } ], "source": [ - "in_r[0,0,0].__class__" + "in_r[0, 0, 0].__class__" ] }, { @@ -653,7 +720,7 @@ } ], "source": [ - "temp_nodata = type(in_r[0,0,0])(in_raster.meta['nodata'])\n", + "temp_nodata = type(in_r[0, 0, 0])(in_raster.meta[\"nodata\"])\n", "in_r == temp_nodata" ] }, @@ -680,7 +747,7 @@ } ], "source": [ - "in_r == in_raster.meta['nodata']" + "in_r == in_raster.meta[\"nodata\"]" ] }, { @@ -689,15 +756,20 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "#in_r[in_r < 0] = 0\n", + "# in_r[in_r < 0] = 0\n", "rSample = rasterio.warp.Resampling.bilinear\n", - "rasterio.warp.reproject(in_r, out_array, \n", - " src_transform=in_raster.meta['transform'], dst_transform=ghs_R.meta['transform'],\n", - " src_crs = in_raster.crs, dst_crs = ghs_R.crs,\n", - " src_nodata = in_raster.meta['nodata'], dst_nodata = ghs_R.meta['nodata'],\n", - " resampling = rSample)\n", - "out_array[out_array == ghs_R.meta['nodata']] = 0.\n" + "rasterio.warp.reproject(\n", + " in_r,\n", + " out_array,\n", + " src_transform=in_raster.meta[\"transform\"],\n", + " dst_transform=ghs_R.meta[\"transform\"],\n", + " src_crs=in_raster.crs,\n", + " dst_crs=ghs_R.crs,\n", + " src_nodata=in_raster.meta[\"nodata\"],\n", + " dst_nodata=ghs_R.meta[\"nodata\"],\n", + " resampling=rSample,\n", + ")\n", + "out_array[out_array == ghs_R.meta[\"nodata\"]] = 0.0" ] }, { @@ -707,7 +779,7 @@ "outputs": [], "source": [ "out_array_sum = out_array.sum()\n", - "original_sum = in_r.sum()\n" + "original_sum = in_r.sum()" ] }, { @@ -729,7 +801,7 @@ "total_ratio = original_sum / out_array_sum\n", "\n", "out_array = out_array * total_ratio\n", - "out_array[out_array < 0] = ghs_R.meta['nodata']" + "out_array[out_array < 0] = ghs_R.meta[\"nodata\"]" ] }, { @@ -823,7 +895,7 @@ } ], "source": [ - "in_r == float(in_raster.meta['nodata'])" + "in_r == float(in_raster.meta[\"nodata\"])" ] }, { @@ -843,7 +915,7 @@ } ], "source": [ - "in_raster.meta['nodata'].__class__" + "in_raster.meta[\"nodata\"].__class__" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/WBGAPI_Extract_urbanization_GDP.ipynb b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/WBGAPI_Extract_urbanization_GDP.ipynb index 160f004..bba019c 100644 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/WBGAPI_Extract_urbanization_GDP.ipynb +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/WBGAPI_Extract_urbanization_GDP.ipynb @@ -6,13 +6,12 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, json\n", - "import rasterio\n", + "import os\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", "\n", - "import wbgapi as wb # https://blogs.worldbank.org/opendata/introducing-wbgapi-new-python-package-accessing-world-bank-data" + "import wbgapi as wb # https://blogs.worldbank.org/opendata/introducing-wbgapi-new-python-package-accessing-world-bank-data" ] }, { @@ -971,7 +970,7 @@ "# Define input data\n", "in_admin = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", "inA = gpd.read_file(in_admin)\n", - "ssa = inA.loc[inA['Region'] == 'Sub-Saharan Africa']\n", + "ssa = inA.loc[inA[\"Region\"] == \"Sub-Saharan Africa\"]\n", "ssa" ] }, @@ -1061,7 +1060,7 @@ ], "source": [ "# Identify indicators related to GDP\n", - "wb.series.info(q='gdp')" + "wb.series.info(q=\"gdp\")" ] }, { @@ -1131,7 +1130,7 @@ } ], "source": [ - "wb.series.info(q='urban')" + "wb.series.info(q=\"urban\")" ] }, { @@ -1190,10 +1189,10 @@ ], "source": [ "# These are the datasets we are interested in extracting\n", - "selected_indicators = ['NY.GDP.MKTP.CD','SP.URB.TOTL','SP.URB.TOTL.IN.ZS']\n", - "urb_data = wb.data.DataFrame(selected_indicators, economy=ssa['ISO3'].values, mrnev=1)\n", - "urb_data = urb_data.reset_index()#.drop(['index'], axis=1)\n", - "urb_data.loc[urb_data['economy'] == 'SYC']" + "selected_indicators = [\"NY.GDP.MKTP.CD\", \"SP.URB.TOTL\", \"SP.URB.TOTL.IN.ZS\"]\n", + "urb_data = wb.data.DataFrame(selected_indicators, economy=ssa[\"ISO3\"].values, mrnev=1)\n", + "urb_data = urb_data.reset_index() # .drop(['index'], axis=1)\n", + "urb_data.loc[urb_data[\"economy\"] == \"SYC\"]" ] }, { @@ -1224,8 +1223,8 @@ }, "outputs": [], "source": [ - "urb_data.columns = ['ISO3','GDP','UrubPop','UrbPercent']\n", - "urb_data.to_csv('SSA_countries_GDP_Urbanization.csv')" + "urb_data.columns = [\"ISO3\", \"GDP\", \"UrubPop\", \"UrbPercent\"]\n", + "urb_data.to_csv(\"SSA_countries_GDP_Urbanization.csv\")" ] }, { @@ -1283,7 +1282,7 @@ } ], "source": [ - "urb_data.loc[urb_data['economy'] == 'SYC']" + "urb_data.loc[urb_data[\"economy\"] == \"SYC\"]" ] }, { @@ -1308,24 +1307,29 @@ "metadata": {}, "outputs": [], "source": [ - "pp_folder = '/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_SUMMARIES'\n", + "pp_folder = \"/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_SUMMARIES\"\n", "excel_files = [x for x in os.listdir(pp_folder) if x.endswith(\"xlsx\")]\n", "for excel_file in excel_files:\n", " curD = pd.read_excel(os.path.join(pp_folder, excel_file))\n", " cols = list(curD.columns)\n", - " cols[0] = 'ISO3'\n", - " cols[1] = 'country'\n", - " cols[2] = 'Urban_Type'\n", + " cols[0] = \"ISO3\"\n", + " cols[1] = \"country\"\n", + " cols[2] = \"Urban_Type\"\n", " curD.columns = cols\n", - " curD['ISO3'] = curD['ISO3'].apply(lambda x: x.upper())\n", - " curG = pd.merge(ssa, curD, on='ISO3')\n", - " curG = pd.merge(curG, urb_data, on='ISO3')\n", - " curG = gpd.GeoDataFrame(curG, geometry='geometry', crs=4326)\n", - " curG.to_file(os.path.join(pp_folder, excel_file.replace(\".xlsx\", '.geojson')), driver=\"GeoJSON\")\n", + " curD[\"ISO3\"] = curD[\"ISO3\"].apply(lambda x: x.upper())\n", + " curG = pd.merge(ssa, curD, on=\"ISO3\")\n", + " curG = pd.merge(curG, urb_data, on=\"ISO3\")\n", + " curG = gpd.GeoDataFrame(curG, geometry=\"geometry\", crs=4326)\n", + " curG.to_file(\n", + " os.path.join(pp_folder, excel_file.replace(\".xlsx\", \".geojson\")),\n", + " driver=\"GeoJSON\",\n", + " )\n", " # Create point file as well\n", - " curG['geometry'] = curG['geometry'].apply(lambda x: x.centroid)\n", - " curG.to_file(os.path.join(pp_folder, excel_file.replace(\".xlsx\", '_CENTROID.geojson')), driver=\"GeoJSON\")\n", - " " + " curG[\"geometry\"] = curG[\"geometry\"].apply(lambda x: x.centroid)\n", + " curG.to_file(\n", + " os.path.join(pp_folder, excel_file.replace(\".xlsx\", \"_CENTROID.geojson\")),\n", + " driver=\"GeoJSON\",\n", + " )" ] }, { @@ -1334,7 +1338,9 @@ "metadata": {}, "outputs": [], "source": [ - "pd.DataFrame(curG).drop(['geometry'], axis=1).to_csv(os.path.join(pp_folder, excel_file.replace(\".xlsx\", '_joined.csv')))" + "pd.DataFrame(curG).drop([\"geometry\"], axis=1).to_csv(\n", + " os.path.join(pp_folder, excel_file.replace(\".xlsx\", \"_joined.csv\"))\n", + ")" ] }, { diff --git a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py index 0cccb14..0d59808 100755 --- a/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py +++ b/notebooks/Implementations/URB_SEAU1_NovelUrbanization/novelUrbanization.py @@ -1,72 +1,77 @@ -import sys, os, importlib, shutil, multiprocessing -import requests -import rasterio, elevation, richdem +import sys +import os +import importlib +import multiprocessing +import rasterio import rasterio.warp -from rasterio import features import pandas as pd import geopandas as gpd import numpy as np -from shapely.geometry import MultiPolygon, Polygon, box, Point -#Import raster helpers +# Import raster helpers import GOSTRocks.rasterMisc as rMisc from GOSTRocks.misc import tPrint -#Import GOST urban functions +# Import GOST urban functions sys.path.append("../../../src") -import GOST_Urban.UrbanRaster as urban import GOST_Urban.urban_helper as helper importlib.reload(helper) importlib.reload(rMisc) + class urban_data(object): def __init__(self, iso3, base_folder, aapc_folder): - ''' Summarize completed urbanization layers; combine into single output - ''' + """Summarize completed urbanization layers; combine into single output""" self.iso3 = iso3 self.in_folder = base_folder self.aapc_folder = aapc_folder - self.dou_urban_files, self.db_urban_files = self.get_urban_layers() - + self.dou_urban_files, self.db_urban_files = self.get_urban_layers() + def get_urban_layers(self): - ''' get a list of all urban deleniations - + """get a list of all urban deleniations + INPUT aapc_folder [string] - folder containing dartboard deleniations - + RETURNS [list of strings] - ''' + """ db_urban_files = [] for root, dirs, files in os.walk(self.in_folder): for f in files: - if "urban" in f and f.endswith('tif'): + if "urban" in f and f.endswith("tif"): db_urban_files.append(os.path.join(root, f)) - + dou_urban_files = [] for root, dirs, files in os.walk(self.aapc_folder): for f in files: - if self.iso3.lower() in f and f.endswith('tif'): - if f[-6:-4] in ['co','cc','ur']: + if self.iso3.lower() in f and f.endswith("tif"): + if f[-6:-4] in ["co", "cc", "ur"]: dou_urban_files.append(os.path.join(root, f)) db_urban_files.sort() - dou_urban_files.sort() - return([db_urban_files, dou_urban_files]) - - def jaccard_index(self, pop_type='gpo', res='', - dou_urb = "_urban.tif", dou_hd = "_hd.tif", - db_urb = "_ur.tif", db_hd = "_co.tif"): - ''' Calculate the Jaccard index comparing urban and then hd urban layers - https://www.statisticshowto.com/jaccard-index/ - ''' + dou_urban_files.sort() + return [db_urban_files, dou_urban_files] + + def jaccard_index( + self, + pop_type="gpo", + res="", + dou_urb="_urban.tif", + dou_hd="_hd.tif", + db_urb="_ur.tif", + db_hd="_co.tif", + ): + """Calculate the Jaccard index comparing urban and then hd urban layers + https://www.statisticshowto.com/jaccard-index/ + """ if pop_type.__class__ == str: sel_rasters = self.get_rasters(pop_type, pop_type, res) else: sel_rasters = self.get_rasters(pop_type[0], pop_type[1], res) - for f in sel_rasters: + for f in sel_rasters: if f.endswith(dou_urb): dou_urb_file = f if f.endswith(dou_hd): @@ -75,7 +80,7 @@ def jaccard_index(self, pop_type='gpo', res='', db_urb_file = f if f.endswith(db_hd): db_hd_file = f - + # open all data files dou_urb_r = rasterio.open(dou_urb_file) dou_urb_d = dou_urb_r.read() @@ -87,57 +92,56 @@ def jaccard_index(self, pop_type='gpo', res='', db_hd_r = rasterio.open(db_hd_file) db_hd_d = db_hd_r.read() db_hd_d = (db_hd_d > 0) * 1 - + def calculate_jaccard(inD1, inD2): - #Calculate urban jaccard + # Calculate urban jaccard jaccardD = inD1 + inD2 xx = np.unique(jaccardD, return_counts=True) outDict = {} for itemIdx in range(0, len(xx[0])): outDict[xx[0][itemIdx]] = xx[1][itemIdx] jIdx = outDict[2] / float(outDict[2] + outDict[1]) - return(jIdx) - + return jIdx + urb_jaccard = calculate_jaccard(dou_urb_d, db_urb_d) hd_jaccard = calculate_jaccard(dou_hd_d, db_hd_d) - return({'urb_jaccard': urb_jaccard, 'hd_jaccard':hd_jaccard}) - - def get_rasters(self, pop_type_dou='gpo', pop_type_db='gpo', res=''): - ''' filter rasters based on pop_type and resolution - ''' + return {"urb_jaccard": urb_jaccard, "hd_jaccard": hd_jaccard} + + def get_rasters(self, pop_type_dou="gpo", pop_type_db="gpo", res=""): + """filter rasters based on pop_type and resolution""" sel_rasters = [] for f in self.dou_urban_files: if pop_type_dou in f: - if res == '': - if not '1k' in f: + if res == "": + if "1k" not in f: sel_rasters.append(f) elif res in f: sel_rasters.append(f) - + for f in self.db_urban_files: if pop_type_db in f: - if res == '': - if not '1k' in f: + if res == "": + if "1k" not in f: sel_rasters.append(f) elif res in f: sel_rasters.append(f) - return(sel_rasters) - - def generate_combo_layer(self, pop_type='gpo', res='', debug=False): - ''' open urban rasters and combine into a single dataset - - INPUT + return sel_rasters + + def generate_combo_layer(self, pop_type="gpo", res="", debug=False): + """open urban rasters and combine into a single dataset + + INPUT pop_type [string or tuple of strings] - ''' + """ if pop_type.__class__ == str: sel_rasters = self.get_rasters(pop_type, pop_type, res) else: sel_rasters = self.get_rasters(pop_type[0], pop_type[1], res) - + if debug: for p in sel_rasters: print(p) - + if len(sel_rasters) > 0: # Open all the ratser files and covert to pixel-level summary numbers idx = 0 @@ -153,79 +157,90 @@ def generate_combo_layer(self, pop_type='gpo', res='', debug=False): else: sumFinal = sumFinal + sumD binFinal = binFinal + binD - idx +=1 + idx += 1 pro = curR.profile - pro.update(dtype='int32') - res = { 'sumD':sumFinal, 'profile':pro, - 'binD':binFinal} - return(res) + pro.update(dtype="int32") + res = {"sumD": sumFinal, "profile": pro, "binD": binFinal} + return res else: - return(None) - - def write_results(self, res, out_folder, reclass_bin=True, dbhd = 'co'): - ''' Write the results from the function generate_combo_layer to file - + return None + + def write_results(self, res, out_folder, reclass_bin=True, dbhd="co"): + """Write the results from the function generate_combo_layer to file + INPUT res [dictionary] - results from function generate_combo_layer out_folder [string] - path to directory to create output tif files - [optional] reclass_bin [boolean: default True] - reclassify the binary map product into + [optional] reclass_bin [boolean: default True] - reclassify the binary map product into 4 classes: agree urban, agree rural, disagree on urban class, disagree on rurality - ''' + """ out_sum_file = os.path.join(out_folder, f"{self.iso3}_urban_sum_{dbhd}.tif") out_bin_file = os.path.join(out_folder, f"{self.iso3}_urban_binary_{dbhd}.tif") - + if reclass_bin: # DB UR, DB CO, DB CC, DOU HD, DOU UR convert_dict_dbcc = { - 0:0, - 1:1, # Disagree rural DOU is urban - 10:1, - 11:1, - 10000:2, # Disagree rural DB is urban - 10001:3, # Agree urban - 10010:4, # Disagree class - 10011:4, - 10100:1, - 10101:5, - 10110:6, # Agree High density urban - 10111:6, - 11100:2, - 11101:5, - 11110:6, - 11111:6 - } + 0: 0, + 1: 1, # Disagree rural DOU is urban + 10: 1, + 11: 1, + 10000: 2, # Disagree rural DB is urban + 10001: 3, # Agree urban + 10010: 4, # Disagree class + 10011: 4, + 10100: 1, + 10101: 5, + 10110: 6, # Agree High density urban + 10111: 6, + 11100: 2, + 11101: 5, + 11110: 6, + 11111: 6, + } convert_dict_dbco = { - 0:0, - 1:1, - 10:1, - 11:1, - 10000:2, - 10001:3, - 10010:4, - 10011:4, - 10100:1, - 10101:3, - 10110:4, - 10111:4, - 11100:2, - 11101:5, - 11110:6, - 11111:6 - } - if dbhd == 'co': + 0: 0, + 1: 1, + 10: 1, + 11: 1, + 10000: 2, + 10001: 3, + 10010: 4, + 10011: 4, + 10100: 1, + 10101: 3, + 10110: 4, + 10111: 4, + 11100: 2, + 11101: 5, + 11110: 6, + 11111: 6, + } + if dbhd == "co": sel_dict = convert_dict_dbco else: sel_dict = convert_dict_dbcc - res['binD'] = np.vectorize(sel_dict.get)(res['binD']) + res["binD"] = np.vectorize(sel_dict.get)(res["binD"]) if not os.path.exists(out_folder): os.makedirs(out_folder) - with rasterio.open(out_sum_file, 'w', **res['profile']) as outSum: - outSum.write(res['sumD']) - with rasterio.open(out_bin_file, 'w', **res['profile']) as outBin: - outBin.write(res['binD']) - -def calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, small=True, include_ghsl_h20=True, evaluate=False): - global_landcover = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif" + with rasterio.open(out_sum_file, "w", **res["profile"]) as outSum: + outSum.write(res["sumD"]) + with rasterio.open(out_bin_file, "w", **res["profile"]) as outBin: + outBin.write(res["binD"]) + + +def calculate_urban( + iso3, + inG, + inG2, + pop_files, + ea_file, + output_folder, + km=True, + small=True, + include_ghsl_h20=True, + evaluate=False, +): + global_landcover = "/home/public/Data/GLOBAL/LANDCOVER/GLOBCOVER/2015/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif" global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif" global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif" global_ghbuilt = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_1K_BUILT/GHS_BUILT_LDS2014_GLOBE_R2018A_54009_1K_V1_0.tif" @@ -233,26 +248,47 @@ def calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, ghs_smod = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_SMOD/GHS_SMOD_E2020_GLOBE_R2022A_54009_1000_V1_0.tif" ghsl_vrt = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt" - admin2_250_stats = os.path.join(output_folder, f"{iso3}_URBAN_ADMIN2_STATS_COMPILED.csv") - commune_250_stats = os.path.join(output_folder, f"{iso3}_URBAN_COMMUNE_STATS_COMPILED.csv") - admin2_1k_stats = os.path.join(output_folder, f"{iso3}_URBAN_ADMIN2_STATS_COMPILED_1k.csv") - commune_1k_stats = os.path.join(output_folder, f"{iso3}_URBAN_COMMUNE_STATS_COMPILED_1k.csv") - - inD = inG.loc[inG['ISO3'] == iso3] - inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500)) - inD = inD.to_crs('epsg:4326') - - inD2 = inG2.loc[inG2['ISO3'] == iso3] - inD2 = inD2.to_crs('epsg:4326') - + admin2_250_stats = os.path.join( + output_folder, f"{iso3}_URBAN_ADMIN2_STATS_COMPILED.csv" + ) + commune_250_stats = os.path.join( + output_folder, f"{iso3}_URBAN_COMMUNE_STATS_COMPILED.csv" + ) + admin2_1k_stats = os.path.join( + output_folder, f"{iso3}_URBAN_ADMIN2_STATS_COMPILED_1k.csv" + ) + commune_1k_stats = os.path.join( + output_folder, f"{iso3}_URBAN_COMMUNE_STATS_COMPILED_1k.csv" + ) + + inD = inG.loc[inG["ISO3"] == iso3] + inD["geometry"] = inD["geometry"].apply(lambda x: x.buffer(500)) + inD = inD.to_crs("epsg:4326") + + inD2 = inG2.loc[inG2["ISO3"] == iso3] + inD2 = inD2.to_crs("epsg:4326") + ### Process 1km data if km: - xx = helper.urban_country(iso3, output_folder, inD, pop_files, - final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1k") - adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") - ea_res = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv") + xx = helper.urban_country( + iso3, + output_folder, + inD, + pop_files, + final_folder="FINAL_STANDARD_1KM", + ghspop_suffix="1k", + ) + adm2_res = os.path.join(xx.final_folder, "URBAN_ADMIN2_STATS_COMPILED.csv") + ea_res = os.path.join(xx.final_folder, "URBAN_COMMUNE_STATS_COMPILED.csv") tPrint(f"{iso3} ***1k Extracting Global Layers") - xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod) + xx.extract_layers( + global_landcover, + global_ghspop, + global_ghspop_1k, + global_ghbuilt, + ghsl_vrt, + ghs_smod, + ) tPrint(f"{iso3} ***1k Downloading and processing elevation") xx.process_dem(global_dem=global_dem_1k) tPrint(f"{iso3} ***1k Standardizing rasters") @@ -263,20 +299,27 @@ def calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, if not os.path.exists(admin2_1k_stats): zonal_adm2 = xx.pop_zonal_admin(inD2) zonal_adm2.to_csv(admin2_1k_stats) - tPrint(f"{iso3} ***1k Calculating Zonal communes") + tPrint(f"{iso3} ***1k Calculating Zonal communes") if os.path.exists(ea_file): inEA = gpd.read_file(ea_file) zonal_ea = xx.pop_zonal_admin(inEA) zonal_ea.to_csv(commune_1k_stats) if evaluate: tPrint(f"{iso3} ***1k Evaluating Data") - xx.evaluateOutput(admin2_1k_stats, commune_1k_stats) - - ### Process 250m data + xx.evaluateOutput(admin2_1k_stats, commune_1k_stats) + + ### Process 250m data if small: xx = helper.urban_country(iso3, output_folder, inD, pop_files) tPrint(f"{iso3} ***** Extracting Global Layers %s" % iso3) - xx.extract_layers(global_landcover, global_ghspop, global_ghspop_1k, global_ghbuilt, ghsl_vrt, ghs_smod) + xx.extract_layers( + global_landcover, + global_ghspop, + global_ghspop_1k, + global_ghbuilt, + ghsl_vrt, + ghs_smod, + ) tPrint(f"{iso3} ***** Downloading and processing elevation %s" % iso3) xx.process_dem(global_dem=global_dem_1k) tPrint(f"{iso3} ***** Standardizing rasters") @@ -287,43 +330,47 @@ def calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, if not os.path.exists(admin2_250_stats): zonal_adm2 = xx.pop_zonal_admin(inD2) zonal_adm2.to_csv(admin2_250_stats) - tPrint(f"{iso3} ***** Calculating Zonal communes") - if os.path.exists(ea_file): + tPrint(f"{iso3} ***** Calculating Zonal communes") + if os.path.exists(ea_file): inEA = gpd.read_file(ea_file) zonal_ea = xx.pop_zonal_admin(inEA) zonal_ea.to_csv(commune_250_stats) if evaluate: tPrint(f"{iso3} ***** Evaluating Data") xx.evaluateOutput(admin2_250_stats, commune_250_stats) - - -def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3=''): - ''' Summarize urbanization from Pierre-Philippe's Dartboard methodology - + + +def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3=""): + """Summarize urbanization from Pierre-Philippe's Dartboard methodology + INPUT input_folder [string path] - location of dartboard urbanization - default_pop_file [string path] - default pop filename to use for urban population calculations + default_pop_file [string path] - default pop filename to use for urban population calculations admin_layer [string path] - zones used to summarize population RETURN [geopandas dataframe] - contains total population and urban population for each shape - ''' - urban_layers = [os.path.join(in_folder, x) for x in os.listdir(in_folder) if x[-4:] == ".tif"] - if iso3 != '': + """ + urban_layers = [ + os.path.join(in_folder, x) for x in os.listdir(in_folder) if x[-4:] == ".tif" + ] + if iso3 != "": urban_layers = [x for x in urban_layers if iso3.lower() in x] - + cur_layer = urban_layers[0] inD = gpd.read_file(admin_layer) - default_pop_1k = default_pop_file.replace(default_pop_file[:3], "%s1k" % default_pop_file[:3]) + default_pop_1k = default_pop_file.replace( + default_pop_file[:3], "%s1k" % default_pop_file[:3] + ) for cur_layer in urban_layers: - #tPrint(cur_layer) - #Open and read in urban data + # tPrint(cur_layer) + # Open and read in urban data urban_r = rasterio.open(cur_layer) urban_data = urban_r.read() - urban_data = (urban_data > 0).astype(urban_r.meta['dtype']) - #Extract population data - urban_layer = os.path.basename(cur_layer) + urban_data = (urban_data > 0).astype(urban_r.meta["dtype"]) + # Extract population data + urban_layer = os.path.basename(cur_layer) default_pop = default_pop_file - + if "1k" in urban_layer: default_pop = default_pop_1k pop_layer = os.path.basename(cur_layer)[:11] @@ -331,9 +378,9 @@ def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3= else: pop_layer = os.path.basename(cur_layer)[:9] pop_folder = os.path.join(output_folder, "FINAL_STANDARD") - pop_file = os.path.join(pop_folder,"%s.tif" % pop_layer) - - if not os.path.exists(pop_file): + pop_file = os.path.join(pop_folder, "%s.tif" % pop_layer) + + if not os.path.exists(pop_file): if "1k" in urban_layer: default_pop = default_pop_1k pop_layer = os.path.basename(cur_layer)[:9] @@ -341,62 +388,66 @@ def calc_pp_urban(in_folder, default_pop_file, admin_layer, output_folder, iso3= else: pop_layer = os.path.basename(cur_layer)[:7] pop_folder = os.path.join(output_folder, "FINAL_STANDARD") - pop_file = os.path.join(pop_folder,"%s.tif" % pop_layer) - + pop_file = os.path.join(pop_folder, "%s.tif" % pop_layer) + pop_r = rasterio.open(pop_file) pop_data = pop_r.read() pop_data = pop_data * urban_data meta = urban_r.meta.copy() - meta.update(dtype = pop_data.dtype) - - #Calculate total population + meta.update(dtype=pop_data.dtype) + + # Calculate total population total_pop_field = os.path.basename(pop_file).replace(".tif", "") - if not total_pop_field in inD.columns: + if total_pop_field not in inD.columns: res = rMisc.zonalStats(inD, pop_r, reProj=True, minVal=0) - res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN']) - inD[total_pop_field] = res['SUM'] - - #Calculate urban population + res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"]) + inD[total_pop_field] = res["SUM"] + + # Calculate urban population with rMisc.create_rasterio_inmemory(meta, pop_data) as pop_r: res = rMisc.zonalStats(inD, pop_r, reProj=True, minVal=0) - res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN']) - - inD[os.path.basename(cur_layer).replace(".tif","")] = res['SUM'] - return(inD) + res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"]) + + inD[os.path.basename(cur_layer).replace(".tif", "")] = res["SUM"] + return inD + def check_no_data(in_folder): - ''' loop through all the tif files in the FINAL folders and calculate the number of no-data cells - ''' + """loop through all the tif files in the FINAL folders and calculate the number of no-data cells""" for root, dirs, files in os.walk(in_folder): if "FINAL" in root: for f in files: - if (not "NO_DATA" in f) and (not 'urban' in f): + if ("NO_DATA" not in f) and ("urban" not in f): if f[-4:] == ".tif": cur_file = os.path.join(root, f) curR = rasterio.open(cur_file) curD = curR.read() print(f'{f}: {(curD == curR.meta["nodata"]).sum()}') - + + def pp_point_urban_summaries(inD, urban_tiffs, out_file): - ''' summarize urbanization for point locations (inD) for each urban definition file (urban_tiffs) - ''' + """summarize urbanization for point locations (inD) for each urban definition file (urban_tiffs)""" for pFile in urban_tiffs: if pFile.endswith(".tif"): try: rFile = rasterio.open(pFile) if inD.crs != rFile.crs: inD = inD.to_crs(rFile.crs) - geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()] + geoms = [ + (row["geometry"].x, row["geometry"].y) + for idx, row in inD.iterrows() + ] urb_res = rFile.sample(geoms) - inD[os.path.basename(pFile).replace(".tif","")] = [x[0] for x in list(urb_res)] + inD[os.path.basename(pFile).replace(".tif", "")] = [ + x[0] for x in list(urb_res) + ] except: pass - pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(out_file) + pd.DataFrame(inD.drop(["geometry"], axis=1)).to_csv(out_file) + - def point_urban_summaries(inD, pop_tiffs, out_file): - ''' summarize urbanization for point locations (inD) for each population file (pop_tiffs) - ''' + """summarize urbanization for point locations (inD) for each population file (pop_tiffs)""" for pFile in pop_tiffs: urb_file = pFile.replace(".tif", "_urban.tif") hd_file = pFile.replace(".tif", "_urban_hd.tif") @@ -404,151 +455,248 @@ def point_urban_summaries(inD, pop_tiffs, out_file): for curFile in [urb_file, hd_file]: try: inUrb = rasterio.open(curFile) - if inD.crs!= inUrb.crs: + if inD.crs != inUrb.crs: inD = inD.to_crs(inUrb.crs) - geoms = [(row['geometry'].x, row['geometry'].y) for idx, row in inD.iterrows()] + geoms = [ + (row["geometry"].x, row["geometry"].y) + for idx, row in inD.iterrows() + ] urb_res = inUrb.sample(geoms) - inD[os.path.basename(curFile).replace(".tif","")] = [x[0] for x in list(urb_res)] + inD[os.path.basename(curFile).replace(".tif", "")] = [ + x[0] for x in list(urb_res) + ] except: pass - pd.DataFrame(inD.drop(['geometry'], axis=1)).to_csv(out_file) - + pd.DataFrame(inD.drop(["geometry"], axis=1)).to_csv(out_file) + + def run_country(iso3): - local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format(country=iso3) + local_path = "/home/public/Data/COUNTRY/{country}/POPULATION/WORLDPOP/".format( + country=iso3 + ) + def run_zonal(iso3, output_folder, inG, pop_files, ea_file, pt_file): - ''' Summarize zonal statistics for urbanization numbers against polygons and points for both WB and PP urban calculations - ''' - tPrint(f'Starting zonal calculations {iso3}') - pp_deleniations_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations" - - inD = inG.loc[inG['ISO3'] == iso3].copy() - inD['geometry'] = inD['geometry'].apply(lambda x: x.buffer(500)) - inD = inD.to_crs('epsg:4326') - + """Summarize zonal statistics for urbanization numbers against polygons and points for both WB and PP urban calculations""" + tPrint(f"Starting zonal calculations {iso3}") + pp_deleniations_folder = ( + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations" + ) + + inD = inG.loc[inG["ISO3"] == iso3].copy() + inD["geometry"] = inD["geometry"].apply(lambda x: x.buffer(500)) + inD = inD.to_crs("epsg:4326") + # Run zonal stats on WB stats using ea boundary out_ea_zonal = os.path.join(output_folder, f"{iso3}_EA_WB_URBAN_1K.csv") - if os.path.exists(ea_file): #not os.path.exists(out_ea_zonal) & - xx = helper.urban_country(iso3, output_folder, inD, pop_files, final_folder="FINAL_STANDARD_1KM", ghspop_suffix="1k") + if os.path.exists(ea_file): # not os.path.exists(out_ea_zonal) & + xx = helper.urban_country( + iso3, + output_folder, + inD, + pop_files, + final_folder="FINAL_STANDARD_1KM", + ghspop_suffix="1k", + ) zonal_ea = xx.pop_zonal_admin(gpd.read_file(ea_file)) zonal_ea.to_csv(out_ea_zonal) out_ea_zonal = os.path.join(output_folder, f"{iso3}_EA_WB_URBAN_250.csv") - xx = helper.urban_country(iso3, output_folder, inD, pop_files, final_folder="FINAL_STANDARD", ghspop_suffix="") + xx = helper.urban_country( + iso3, + output_folder, + inD, + pop_files, + final_folder="FINAL_STANDARD", + ghspop_suffix="", + ) zonal_ea = xx.pop_zonal_admin(gpd.read_file(ea_file)) zonal_ea.to_csv(out_ea_zonal) - + # Run zonal stats on pp urban using ea boundary out_ea_pp_zonal = os.path.join(output_folder, f"{iso3}_EA_PP_URBAN_Updated.csv") - if (os.path.exists(ea_file)): # & not os.path.exists(out_ea_pp_zonal): - pp_zonal_ea = calc_pp_urban(pp_deleniations_folder, pop_files[0][1], ea_file, output_folder, iso3) - if 'geometry' in pp_zonal_ea.columns: - pp_zonal_ea = pp_zonal_ea.drop(['geometry'], axis=1) + if os.path.exists(ea_file): # & not os.path.exists(out_ea_pp_zonal): + pp_zonal_ea = calc_pp_urban( + pp_deleniations_folder, pop_files[0][1], ea_file, output_folder, iso3 + ) + if "geometry" in pp_zonal_ea.columns: + pp_zonal_ea = pp_zonal_ea.drop(["geometry"], axis=1) pp_zonal_ea.to_csv(out_ea_pp_zonal) - + wb_out_file = os.path.join(output_folder, f"{iso3}_HH_GPS_WB_URBAN.csv") - pp_out_file = os.path.join(output_folder, f"{iso3}_HH_GPS_PP_URBAN.csv") + pp_out_file = os.path.join(output_folder, f"{iso3}_HH_GPS_PP_URBAN.csv") print(pt_file) - if os.path.exists(pt_file): # and not os.path.exists(wb_out_file): - cur_pt = gpd.read_file(pt_file) + if os.path.exists(pt_file): # and not os.path.exists(wb_out_file): + cur_pt = gpd.read_file(pt_file) all_tiffs = [] base_folder = os.path.join(output_folder, "FINAL_STANDARD") base_folder_1km = os.path.join(output_folder, "FINAL_STANDARD_1KM") for file_defs in pop_files: pFile = file_defs[1] all_tiffs.append(os.path.join(base_folder, pFile)) - all_tiffs.append(os.path.join(base_folder_1km, pFile.replace(f"{iso3.lower()}", f"{iso3.lower()}1k"))) + all_tiffs.append( + os.path.join( + base_folder_1km, + pFile.replace(f"{iso3.lower()}", f"{iso3.lower()}1k"), + ) + ) point_urban_summaries(cur_pt, all_tiffs, wb_out_file) - - if os.path.exists(pt_file): # and not os.path.exists(pp_out_file): + + if os.path.exists(pt_file): # and not os.path.exists(pp_out_file): # Get list of urban tiffs from PP - urban_tiffs = [os.path.join(pp_deleniations_folder, x) for x in os.listdir(pp_deleniations_folder) if iso3.lower() in x] + urban_tiffs = [ + os.path.join(pp_deleniations_folder, x) + for x in os.listdir(pp_deleniations_folder) + if iso3.lower() in x + ] pp_point_urban_summaries(cur_pt, urban_tiffs, pp_out_file) - tPrint(f'Completed zonal calculations {iso3}') - - -EA_DEFS = { # Define ea files per iso3 + tPrint(f"Completed zonal calculations {iso3}") + + +EA_DEFS = { # Define ea files per iso3 # iso3 : folder, polygon file, point file - "BFA": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/BurkinaFaso/","bfa_admbnda_adm3_igb_20200323.shp", "bfa.geojson"], - "TCD": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Chad", "tcd_a_admbnd_adm3_ocha.shp", "ChadFinal.geojson"], - "GIN": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Guinea/","gin_admbnda_adm3_ocha.shp", "GINFinal.geojson"], - "GNB": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Guinea Bissau/","gnb_admbnda_adm2_1m_salb_20210609.shp", "GNBFinal.geojson"], - "GAB": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Gabon", "CANTONS_region.shp", "gabon_gps.geojson"], - "LSO": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Lesotho/","lso_admbnda_adm2_FAO_MLGCA_2019.shp", "lesotho_list.geojson"], - "MWI": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Malawi", "mwi_admbnda_adm3_nso_20181016.shp", "checkedcoord_malawi.geojson"], - "MLI": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Mali", "mli_admbnda_adm3_1m_dnct_20190802.shp", "MaliFinal.geojson"], - "MRT": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Mauritania", "MAU_edit.shp", "mauritania.geojson"], - "NER": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Niger", "NER_adm03_feb2018.shp", "NigerFinal.geojson"], - "SEN": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Senegal/","sen_admbnda_adm3_1m_gov_ocha_20190426.shp", "senegal.geojson"], - #"UGA": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Uganda/uganda_parishes_cleaned_attached", "uganda_parishes_cleaned_attached.shp", ""], - "UGA": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Uganda/GeoBoundaries", "geoBoundaries-UGA-ADM2.shp", ""], - 'CIV': ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/CIV/", "civ_admbnda_adm1_cntig_ocha_itos_20180706.shp", "civ.geojson"], - 'AGO': ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Angola/", "bairros.shp", ""], - 'ETH': ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Ethiopia/", "Ethiopia_pti_admin3.shp", "HBS_GPS.geojson"], + "BFA": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/BurkinaFaso/", + "bfa_admbnda_adm3_igb_20200323.shp", + "bfa.geojson", + ], + "TCD": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Chad", + "tcd_a_admbnd_adm3_ocha.shp", + "ChadFinal.geojson", + ], + "GIN": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Guinea/", + "gin_admbnda_adm3_ocha.shp", + "GINFinal.geojson", + ], + "GNB": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Guinea Bissau/", + "gnb_admbnda_adm2_1m_salb_20210609.shp", + "GNBFinal.geojson", + ], + "GAB": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Gabon", + "CANTONS_region.shp", + "gabon_gps.geojson", + ], + "LSO": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Lesotho/", + "lso_admbnda_adm2_FAO_MLGCA_2019.shp", + "lesotho_list.geojson", + ], + "MWI": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Malawi", + "mwi_admbnda_adm3_nso_20181016.shp", + "checkedcoord_malawi.geojson", + ], + "MLI": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Mali", + "mli_admbnda_adm3_1m_dnct_20190802.shp", + "MaliFinal.geojson", + ], + "MRT": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Mauritania", + "MAU_edit.shp", + "mauritania.geojson", + ], + "NER": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Niger", + "NER_adm03_feb2018.shp", + "NigerFinal.geojson", + ], + "SEN": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Senegal/", + "sen_admbnda_adm3_1m_gov_ocha_20190426.shp", + "senegal.geojson", + ], + # "UGA": ["/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Uganda/uganda_parishes_cleaned_attached", "uganda_parishes_cleaned_attached.shp", ""], + "UGA": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Uganda/GeoBoundaries", + "geoBoundaries-UGA-ADM2.shp", + "", + ], + "CIV": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/CIV/", + "civ_admbnda_adm1_cntig_ocha_itos_20180706.shp", + "civ.geojson", + ], + "AGO": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Angola/", + "bairros.shp", + "", + ], + "ETH": [ + "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/EA_Files/Ethiopia/", + "Ethiopia_pti_admin3.shp", + "HBS_GPS.geojson", + ], } - + if __name__ == "__main__": print("STARTING") - global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp" - global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp" + global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp" + global_bounds_adm2 = "/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp" global_ghspop = "/home/public/Data/GLOBAL/Population/GHS/250/GHS_POP_E2015_GLOBE_R2019A_54009_250_V1_0.tif" global_ghspop_1k = "/home/public/Data/GLOBAL/Population/GHS/GHS_POP_E2015_GLOBE_R2019A_54009_1K_V1_0.tif" - worldPop_2015 = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt" + worldPop_2015 = ( + "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2015/worldPop_2015.vrt" + ) constrained_WP_folder = "/home/public/Data/GLOBAL/Population/RF_SSA_2015-2020" - out_base = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data" - + out_base = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data" + inG = gpd.read_file(global_bounds) inG2 = gpd.read_file(global_bounds_adm2) runSmall = True runLarge = True - - focal_countries = inG.loc[inG['Region'] == 'Sub-Saharan Africa'].sort_values(['ISO3'])['ISO3'].values - nCores = min(len(focal_countries), round(multiprocessing.cpu_count() * .8)) + + focal_countries = ( + inG.loc[inG["Region"] == "Sub-Saharan Africa"] + .sort_values(["ISO3"])["ISO3"] + .values + ) + nCores = min(len(focal_countries), round(multiprocessing.cpu_count() * 0.8)) all_commands = [] zonal_commands = [] - for iso3 in ['MRT']: #EA_DEFS.keys(): #focal_countries: #: # + for iso3 in ["MRT"]: # EA_DEFS.keys(): #focal_countries: #: # tPrint(iso3) try: cur_def = EA_DEFS[iso3] ea_file = os.path.join(cur_def[0], cur_def[1]) pt_file = os.path.join(cur_def[0], cur_def[2]) except: - ea_file = '' - pt_file = '' - + ea_file = "" + pt_file = "" + output_folder = os.path.join(out_base, "%s_URBAN_DATA_new_naming" % iso3) - pop_files = [[worldPop_2015, f'{iso3.lower()}_upo15.tif']] + pop_files = [[worldPop_2015, f"{iso3.lower()}_upo15.tif"]] # Identify the constrained WorldPop layer - c_WP_15 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif' - c_WP_20 = f'{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif' + c_WP_15 = f"{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2015.tif" + c_WP_20 = f"{constrained_WP_folder}/{iso3}/ppp_{iso3}_const_2020.tif" if os.path.exists(c_WP_15): - pop_files.append([c_WP_15, f'{iso3.lower()}_cpo15.tif']) + pop_files.append([c_WP_15, f"{iso3.lower()}_cpo15.tif"]) else: - print(f'***** Could not locate constrained WorldPop 2015 for {iso3}') + print(f"***** Could not locate constrained WorldPop 2015 for {iso3}") if os.path.exists(c_WP_20): - pop_files.append([c_WP_20, f'{iso3.lower()}_cpo20.tif']) + pop_files.append([c_WP_20, f"{iso3.lower()}_cpo20.tif"]) else: - print(f'***** Could not locate constrained WorldPop 2020 for {iso3}') - - ''' + print(f"***** Could not locate constrained WorldPop 2020 for {iso3}") + + """ try: run_zonal(iso3, output_folder, inG, pop_files, ea_file, pt_file) #calculate_urban(iso3, inG, inG2, pop_files, ea_file, output_folder, km=True, small=True) except: - print(f"Error with {iso3}") - ''' + print(f"Error with {iso3}") + """ cur_args = [iso3, inG, inG2, pop_files, ea_file, output_folder] all_commands.append(cur_args) - - pop_files.append([global_ghspop, f'{iso3.lower()}_gpo.tif']) + + pop_files.append([global_ghspop, f"{iso3.lower()}_gpo.tif"]) zonal_args = [iso3, output_folder, inG, pop_files, ea_file, pt_file] zonal_commands.append(zonal_args) - + with multiprocessing.Pool(nCores) as pool: - #pool.starmap(calculate_urban, all_commands) + # pool.starmap(calculate_urban, all_commands) pool.starmap(run_zonal, zonal_commands) - - - - \ No newline at end of file diff --git a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Data Preparation.ipynb b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Data Preparation.ipynb index f58d791..23e1c9f 100644 --- a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Data Preparation.ipynb +++ b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Data Preparation.ipynb @@ -24,12 +24,12 @@ } ], "source": [ - "import sys, os, importlib, requests\n", - "import rasterio, geojson\n", + "import sys\n", + "import os\n", + "import rasterio\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "import skimage.graph as graph\n", "\n", "from shapely.geometry import box, Point\n", @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define input variables \n", + "# Define input variables\n", "in_folder = \"/home/wb411133/projects/URB_SURDR_ZAF\"\n", "ntl_folder = os.path.join(in_folder, \"NTL_data\")\n", "ghsl_folder = os.path.join(in_folder, \"GHSL_data\")\n", @@ -64,26 +64,41 @@ "ma_folder = os.path.join(in_folder, \"market_access\")\n", "infra_folder = os.path.join(in_folder, \"Infra\")\n", "protected_areas_folder = os.path.join(in_folder, \"Protected_Areas\")\n", - "for f in [in_folder, ntl_folder, ghsl_folder, ma_folder, infra_folder, protected_areas_folder]:\n", + "for f in [\n", + " in_folder,\n", + " ntl_folder,\n", + " ghsl_folder,\n", + " ma_folder,\n", + " infra_folder,\n", + " protected_areas_folder,\n", + "]:\n", " if not os.path.exists(f):\n", " os.makedirs(f)\n", - " \n", + "\n", "# Define global variables\n", - "global_bounds = \"/home/public/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM0.shp\"\n", + "global_bounds = (\n", + " \"/home/public/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM0.shp\"\n", + ")\n", "ghs_folder = \"/home/public/Data/GLOBAL/GHSL\"\n", "ghs_built_folder = os.path.join(ghs_folder, \"Built\")\n", "ghs_built_files = [x for x in os.listdir(ghs_built_folder) if x.endswith(\".tif\")]\n", - "ghs_smod_file = os.path.join(ghs_folder, \"SMOD\", \"GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif\")\n", - "ghs_ucdb = os.path.join(ghs_folder, \"GHS_UCBD_R2019A\", \"GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg\")\n", + "ghs_smod_file = os.path.join(\n", + " ghs_folder, \"SMOD\", \"GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif\"\n", + ")\n", + "ghs_ucdb = os.path.join(\n", + " ghs_folder, \"GHS_UCBD_R2019A\", \"GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg\"\n", + ")\n", "global_friction = \"/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff\"\n", - "global_airports_file = os.path.join(infra_folder, \"airport_volume_airport_locations.csv\")\n", + "global_airports_file = os.path.join(\n", + " infra_folder, \"airport_volume_airport_locations.csv\"\n", + ")\n", "\n", "# Define local variables\n", - "admin0_file = os.path.join(in_folder, 'ZAF_select_adm0.shp')\n", + "admin0_file = os.path.join(in_folder, \"ZAF_select_adm0.shp\")\n", "admin3_file = os.path.join(in_folder, \"ADMIN\", \"admin3_geoBounds_FINAL.shp\")\n", "ghsl_thresh = 0.1\n", "local_ghsl_file = os.path.join(in_folder, f\"ghsl_combined_{int(ghsl_thresh*100)}.tif\")\n", - "urban_raster = os.path.join(urban_folder, \"zaf1k_cpo20_urban.tif\")\n", + "urban_raster = os.path.join(urban_folder, \"zaf1k_cpo20_urban.tif\")\n", "urban_raster_pop = os.path.join(urban_folder, \"zaf1k_cpo20.tif\")\n", "urban_extents_file = os.path.join(urban_folder, \"cpo20_urban_extents.shp\")\n", "local_ghs_smod_file = os.path.join(in_folder, \"GHS_SMOD_2020.tif\")\n", @@ -105,16 +120,18 @@ "ntl_files = dataMisc.aws_search_ntl()\n", "admin0_bounds = gpd.read_file(global_bounds)\n", "if not os.path.exists(admin0_file):\n", - " zaf_bounds = admin0_bounds.loc[admin0_bounds['WB_A3'] == 'ZAF']\n", + " zaf_bounds = admin0_bounds.loc[admin0_bounds[\"WB_A3\"] == \"ZAF\"]\n", " zaf_bounds.to_file(admin0_file)\n", "else:\n", " zaf_bounds = gpd.read_file(admin0_file)\n", - "neighbours = admin0_bounds.loc[admin0_bounds.intersects(zaf_bounds.unary_union.buffer(0.1))]\n", + "neighbours = admin0_bounds.loc[\n", + " admin0_bounds.intersects(zaf_bounds.unary_union.buffer(0.1))\n", + "]\n", "#\n", "admin1_bounds = dataMisc.get_geoboundaries(\"ZAF\", \"ADM1\")\n", "admin2_bounds = dataMisc.get_geoboundaries(\"ZAF\", \"ADM2\")\n", "admin3_bounds = dataMisc.get_geoboundaries(\"ZAF\", \"ADM3\")\n", - "focal_state = admin1_bounds.loc[admin1_bounds['shapeName'] == 'Mpumalanga']" + "focal_state = admin1_bounds.loc[admin1_bounds[\"shapeName\"] == \"Mpumalanga\"]" ] }, { @@ -143,11 +160,13 @@ "source": [ "# Clip out nighttime lights annual images\n", "# Mpumalanga\n", - "ntlMisc.generate_annual_composites(focal_state.unary_union, out_folder=os.path.join(ntl_folder, \"Mpumalanga\"))\n", + "ntlMisc.generate_annual_composites(\n", + " focal_state.unary_union, out_folder=os.path.join(ntl_folder, \"Mpumalanga\")\n", + ")\n", "# ZAF\n", - "#ntlMisc.generate_annual_composites(zaf_bounds.unary_union, out_folder=os.path.join(ntl_folder, \"ZAF\"))\n", + "# ntlMisc.generate_annual_composites(zaf_bounds.unary_union, out_folder=os.path.join(ntl_folder, \"ZAF\"))\n", "# Neighbours\n", - "#ntlMisc.generate_annual_composites(neighbours.unary_union, out_folder=os.path.join(ntl_folder, \"Neighbours\"))\n" + "# ntlMisc.generate_annual_composites(neighbours.unary_union, out_folder=os.path.join(ntl_folder, \"Neighbours\"))" ] }, { @@ -160,13 +179,26 @@ "for cur_raster_file in ghs_built_files:\n", " out_file = os.path.join(ghsl_folder, os.path.basename(cur_raster_file))\n", " if not os.path.exists(out_file):\n", - " rMisc.clipRaster(rasterio.open(os.path.join(ghs_built_folder, cur_raster_file)), zaf_bounds, out_file, crop=False)\n", + " rMisc.clipRaster(\n", + " rasterio.open(os.path.join(ghs_built_folder, cur_raster_file)),\n", + " zaf_bounds,\n", + " out_file,\n", + " crop=False,\n", + " )\n", " tPrint(out_file)\n", - " \n", + "\n", "# Combine GHSL layers into single file\n", - "ghsl_files = sorted([os.path.join(ghsl_folder, x) for x in os.listdir(ghsl_folder) if x.endswith(\".tif\")])\n", + "ghsl_files = sorted(\n", + " [\n", + " os.path.join(ghsl_folder, x)\n", + " for x in os.listdir(ghsl_folder)\n", + " if x.endswith(\".tif\")\n", + " ]\n", + ")\n", "if not os.path.exists(local_ghsl_file):\n", - " ghsl_res = ghslMisc.combine_ghsl_annual(ghsl_files, built_thresh=ghsl_thresh, out_file=out_file)" + " ghsl_res = ghslMisc.combine_ghsl_annual(\n", + " ghsl_files, built_thresh=ghsl_thresh, out_file=out_file\n", + " )" ] }, { @@ -177,7 +209,9 @@ "source": [ "# clip out GHS-SMOD data\n", "if not os.path.exists(local_ghs_smod_file):\n", - " rMisc.clipRaster(rasterio.open(ghs_smod_file), neighbours, local_ghs_smod_file, crop=False)" + " rMisc.clipRaster(\n", + " rasterio.open(ghs_smod_file), neighbours, local_ghs_smod_file, crop=False\n", + " )" ] }, { @@ -189,12 +223,12 @@ "# Convert urban centres from the constrained world_pop 2020 dataset to vector\n", "if not os.path.exists(urban_extents_file):\n", " urban_extents = rMisc.vectorize_raster(rasterio.open(urban_raster), bad_vals=[0])\n", - " urban_extents['geometry'] = urban_extents['geometry'].apply(lambda x: x.buffer(0))\n", + " urban_extents[\"geometry\"] = urban_extents[\"geometry\"].apply(lambda x: x.buffer(0))\n", "\n", - " #Attribute with population\n", + " # Attribute with population\n", " res = rMisc.zonalStats(urban_extents, urban_raster_pop, minVal=0)\n", - " res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])\n", - " urban_extents['Pop2020'] = res['SUM']\n", + " res = pd.DataFrame(res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])\n", + " urban_extents[\"Pop2020\"] = res[\"SUM\"]\n", " urban_extents.to_file(urban_extents_file)" ] }, @@ -208,7 +242,7 @@ "if not os.path.exists(major_urban_extents):\n", " all_extents = gpd.read_file(ghs_ucdb)\n", " sel_extents = all_extents.loc[all_extents.intersects(box(*neighbours.total_bounds))]\n", - " sel_extents.to_file(major_urban_extents) " + " sel_extents.to_file(major_urban_extents)" ] }, { @@ -235,11 +269,13 @@ "source": [ "if not os.path.exists(local_airports):\n", " inA = pd.read_csv(global_airports_file)\n", - " inA_geom = [Point(x) for x in zip(inA['Airport1Longitude'], inA['Airport1Latitude'])]\n", + " inA_geom = [\n", + " Point(x) for x in zip(inA[\"Airport1Longitude\"], inA[\"Airport1Latitude\"])\n", + " ]\n", " inA = gpd.GeoDataFrame(inA, geometry=inA_geom, crs=4326)\n", " selA = inA.loc[inA.intersects(neighbours.unary_union)]\n", " selA.to_file(local_airports, driver=\"GeoJSON\")\n", - " \n", + "\n", "\"\"\"headers = {'Accept': 'application/json'}\n", "ddh_international_airports = \"https://wiki.worldbank.org/pages/viewpage.action?spaceKey=GEOS&title=Guide+to+procurement+of+satellite+imagery+and+derived+products\"\n", "ddh_r = requests.get(ddh_international_airports, headers=headers)\n", @@ -260,10 +296,12 @@ "outputs": [], "source": [ "if not os.path.exists(local_friction_file):\n", - " rMisc.clipRaster(rasterio.open(global_friction), neighbours, local_friction_file, crop=False)\n", + " rMisc.clipRaster(\n", + " rasterio.open(global_friction), neighbours, local_friction_file, crop=False\n", + " )\n", "\n", "friction = rasterio.open(local_friction_file)\n", - "frictionD = friction.read()[0,:,:] * 1000\n", + "frictionD = friction.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] }, @@ -277,9 +315,9 @@ "tt_major_cities = os.path.join(ma_folder, \"tt_major_cities.tif\")\n", "if not os.path.exists(tt_major_cities):\n", " dests = gpd.read_file(major_urban_extents)\n", - " dests['geometry'] = dests['geometry'].apply(lambda x: x.centroid)\n", + " dests[\"geometry\"] = dests[\"geometry\"].apply(lambda x: x.centroid)\n", " travel_costs, traceback = ma.calculate_travel_time(friction, mcp, dests)\n", - " with rasterio.open(tt_major_cities, 'w', **friction.profile.copy()) as out_tt:\n", + " with rasterio.open(tt_major_cities, \"w\", **friction.profile.copy()) as out_tt:\n", " out_tt.write_band(1, travel_costs)" ] }, @@ -294,7 +332,7 @@ "if not os.path.exists(tt_airports):\n", " airports = gpd.read_file(local_airports)\n", " travel_costs, traceback = ma.calculate_travel_time(friction, mcp, airports)\n", - " with rasterio.open(tt_airports, 'w', **friction.profile.copy()) as out_tt:\n", + " with rasterio.open(tt_airports, \"w\", **friction.profile.copy()) as out_tt:\n", " out_tt.write_band(1, travel_costs)" ] }, @@ -309,7 +347,7 @@ "if not os.path.exists(tt_ports):\n", " ports = gpd.read_file(local_ports)\n", " travel_costs, traceback = ma.calculate_travel_time(friction, mcp, ports)\n", - " with rasterio.open(tt_ports, 'w', **friction.profile.copy()) as out_tt:\n", + " with rasterio.open(tt_ports, \"w\", **friction.profile.copy()) as out_tt:\n", " out_tt.write_band(1, travel_costs)" ] }, @@ -324,8 +362,8 @@ "if not os.path.exists(tt_ports):\n", " ports = gpd.read_file(tourist_locations)\n", " travel_costs, traceback = ma.calculate_travel_time(friction, mcp, ports)\n", - " with rasterio.open(tt_ports, 'w', **friction.profile.copy()) as out_tt:\n", - " out_tt.write_band(1, travel_costs)\n" + " with rasterio.open(tt_ports, \"w\", **friction.profile.copy()) as out_tt:\n", + " out_tt.write_band(1, travel_costs)" ] }, { @@ -338,9 +376,9 @@ "tt_ports = os.path.join(ma_folder, \"tt_protected.tif\")\n", "if not os.path.exists(tt_ports):\n", " ports = gpd.read_file(protected_areas)\n", - " ports['geometry'] = ports['geometry'].apply(lambda x: x.centroid)\n", + " ports[\"geometry\"] = ports[\"geometry\"].apply(lambda x: x.centroid)\n", " travel_costs, traceback = ma.calculate_travel_time(friction, mcp, ports)\n", - " with rasterio.open(tt_ports, 'w', **friction.profile.copy()) as out_tt:\n", + " with rasterio.open(tt_ports, \"w\", **friction.profile.copy()) as out_tt:\n", " out_tt.write_band(1, travel_costs)" ] }, @@ -351,7 +389,7 @@ "outputs": [], "source": [ "dests = gpd.read_file(admin3_file)\n", - "dests['geometry'] = dests['geometry'].apply(lambda x: x.centroid)\n", + "dests[\"geometry\"] = dests[\"geometry\"].apply(lambda x: x.centroid)\n", "\n", "od_res = ma.calculate_od_matrix(friction, mcp, dests)\n", "final_od = pd.DataFrame(od_res)\n", @@ -367,7 +405,7 @@ "# calculate total population and nighttime lights brightness for each admin3\n", "dests = gpd.read_file(admin3_file)\n", "pop_res = rMisc.zonalStats(dests, urban_raster_pop, minVal=0, reProj=True)\n", - "pop_res = pd.DataFrame(pop_res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])\n", + "pop_res = pd.DataFrame(pop_res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])\n", "pop_res" ] }, @@ -379,7 +417,7 @@ "source": [ "ntl_raster = os.path.join(ntl_folder, \"Neighbours\", \"VIIRS_2022_annual.tif\")\n", "ntl_res = rMisc.zonalStats(dests, ntl_raster, minVal=1, reProj=True)\n", - "ntl_res = pd.DataFrame(ntl_res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])" + "ntl_res = pd.DataFrame(ntl_res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])" ] }, { @@ -388,10 +426,15 @@ "metadata": {}, "outputs": [], "source": [ - "# Map random \n", - "dests['Pop'] = pop_res['SUM']\n", - "dests['NTL'] = pop_res['SUM']\n", - "mapMisc.static_map_vector(dests, \"Pop\", legend_loc=\"upper left\", thresh=[0,50000, 100000, 250000, 500000, 200000000])" + "# Map random\n", + "dests[\"Pop\"] = pop_res[\"SUM\"]\n", + "dests[\"NTL\"] = pop_res[\"SUM\"]\n", + "mapMisc.static_map_vector(\n", + " dests,\n", + " \"Pop\",\n", + " legend_loc=\"upper left\",\n", + " thresh=[0, 50000, 100000, 250000, 500000, 200000000],\n", + ")" ] }, { @@ -407,15 +450,17 @@ "metadata": {}, "outputs": [], "source": [ - "municipalities = os.path.join(in_folder, \"MiningCommunities\", \"MainPlaces\", \"MP_SA_2011.shp\")\n", + "municipalities = os.path.join(\n", + " in_folder, \"MiningCommunities\", \"MainPlaces\", \"MP_SA_2011.shp\"\n", + ")\n", "inM = gpd.read_file(municipalities)\n", - "inM['geometry'] = inM['geometry'].apply(lambda x: x.centroid)\n", + "inM[\"geometry\"] = inM[\"geometry\"].apply(lambda x: x.centroid)\n", "\n", "destinations = gpd.read_file(protected_areas)\n", "destinations = destinations.to_crs(22293)\n", - "destinations['area_km'] = destinations['geometry'].apply(lambda x: x.area/1000000)\n", + "destinations[\"area_km\"] = destinations[\"geometry\"].apply(lambda x: x.area / 1000000)\n", "destinations = destinations.to_crs(4326)\n", - "destinations['geometry'] = destinations['geometry'].apply(lambda x: x.centroid)" + "destinations[\"geometry\"] = destinations[\"geometry\"].apply(lambda x: x.centroid)" ] }, { @@ -427,7 +472,7 @@ "# Calculate travel time\n", "popR = rasterio.open(urban_raster_pop)\n", "ttr = rasterio.open(local_friction_file)\n", - "frictionD = ttr.read()[0,:,:] * 1000\n", + "frictionD = ttr.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] }, @@ -2170,9 +2215,11 @@ "metadata": {}, "outputs": [], "source": [ - "municipalities = os.path.join(in_folder, \"MiningCommunities\", \"MainPlaces\", \"MP_SA_2011.shp\")\n", + "municipalities = os.path.join(\n", + " in_folder, \"MiningCommunities\", \"MainPlaces\", \"MP_SA_2011.shp\"\n", + ")\n", "inM = gpd.read_file(municipalities)\n", - "inM['geometry'] = inM['geometry'].apply(lambda x: x.centroid)" + "inM[\"geometry\"] = inM[\"geometry\"].apply(lambda x: x.centroid)" ] }, { @@ -2184,7 +2231,7 @@ "# Calculate travel time\n", "popR = rasterio.open(urban_raster_pop)\n", "ttr = rasterio.open(local_friction_file)\n", - "frictionD = ttr.read()[0,:,:] * 1000\n", + "frictionD = ttr.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] }, @@ -2260,7 +2307,7 @@ "metadata": {}, "outputs": [], "source": [ - "weights = pd.read_csv(os.path.join(in_folder, \"ZONAL_RES\", 'named_places_zonal.csv'))\n", + "weights = pd.read_csv(os.path.join(in_folder, \"ZONAL_RES\", \"named_places_zonal.csv\"))\n", "weights.head()" ] }, @@ -2298,7 +2345,7 @@ "metadata": {}, "outputs": [], "source": [ - "pop_gravity = ma.calculate_gravity(xx, weights['POP'].values, weights['POP'].values)\n", + "pop_gravity = ma.calculate_gravity(xx, weights[\"POP\"].values, weights[\"POP\"].values)\n", "pop_gravity.head()" ] }, @@ -2308,7 +2355,9 @@ "metadata": {}, "outputs": [], "source": [ - "ntl_gravity = ma.calculate_gravity(xx, weights['NTL_2023'].values, weights['NTL_2023'].values)\n", + "ntl_gravity = ma.calculate_gravity(\n", + " xx, weights[\"NTL_2023\"].values, weights[\"NTL_2023\"].values\n", + ")\n", "ntl_gravity.head()" ] }, @@ -2327,15 +2376,15 @@ "metadata": {}, "outputs": [], "source": [ - "def create_ma_geometry(ma_df, out_file, xx_inM, ma_col=[\"d_0.001\"], driver='GeoJSON'):\n", + "def create_ma_geometry(ma_df, out_file, xx_inM, ma_col=[\"d_0.001\"], driver=\"GeoJSON\"):\n", " # create output geospatial market access data\n", " simple_geog = ma_df.copy()\n", - " simple_geog = simple_geog.loc[:,ma_col]\n", - " simple_geog['geometry'] = xx_inM['geometry'].values\n", - " simple_geog = gpd.GeoDataFrame(simple_geog, geometry='geometry', crs= xx_inM.crs)\n", - " pd.DataFrame(simple_geog.drop(['geometry'], axis=1)).to_csv(f'{out_file}.csv')\n", + " simple_geog = simple_geog.loc[:, ma_col]\n", + " simple_geog[\"geometry\"] = xx_inM[\"geometry\"].values\n", + " simple_geog = gpd.GeoDataFrame(simple_geog, geometry=\"geometry\", crs=xx_inM.crs)\n", + " pd.DataFrame(simple_geog.drop([\"geometry\"], axis=1)).to_csv(f\"{out_file}.csv\")\n", " simple_geog.to_file(out_file, driver=driver)\n", - " return(simple_geog)" + " return simple_geog" ] }, { @@ -2353,9 +2402,18 @@ "metadata": {}, "outputs": [], "source": [ - "create_ma_geometry(simple_gravity, os.path.join(ma_folder, 'simple_ma.shp'), inM, driver='ESRI Shapefile')\n", - "create_ma_geometry(pop_gravity, os.path.join(ma_folder, 'pop_ma.shp'), inM, driver='ESRI Shapefile')\n", - "create_ma_geometry(ntl_gravity, os.path.join(ma_folder, 'ntl_ma.shp'), inM, driver='ESRI Shapefile')" + "create_ma_geometry(\n", + " simple_gravity,\n", + " os.path.join(ma_folder, \"simple_ma.shp\"),\n", + " inM,\n", + " driver=\"ESRI Shapefile\",\n", + ")\n", + "create_ma_geometry(\n", + " pop_gravity, os.path.join(ma_folder, \"pop_ma.shp\"), inM, driver=\"ESRI Shapefile\"\n", + ")\n", + "create_ma_geometry(\n", + " ntl_gravity, os.path.join(ma_folder, \"ntl_ma.shp\"), inM, driver=\"ESRI Shapefile\"\n", + ")" ] }, { @@ -2373,7 +2431,7 @@ "metadata": {}, "outputs": [], "source": [ - "weights.loc[weights['MP_CODE_st'] == 798020]" + "weights.loc[weights[\"MP_CODE_st\"] == 798020]" ] }, { diff --git a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md index f8e7335..5d9d788 100644 --- a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md +++ b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/README.md @@ -1,2 +1,2 @@ # Just energy transition -tbd \ No newline at end of file +tbd diff --git a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb index c0bb243..ad15def 100644 --- a/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb +++ b/notebooks/Implementations/URB_SURDR_ZAF_Energy_Transition/Zonal_statistics.ipynb @@ -42,23 +42,20 @@ } ], "source": [ - "import sys, os, importlib, requests\n", - "import rasterio, geojson\n", + "import sys\n", + "import os\n", + "import importlib\n", + "import rasterio\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "import skimage.graph as graph\n", "\n", - "from shapely.geometry import box, Point\n", "\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", "\n", "import GOSTRocks.dataMisc as dataMisc\n", - "import GOSTRocks.ntlMisc as ntlMisc\n", - "import GOSTRocks.ghslMisc as ghslMisc\n", "import GOSTRocks.rasterMisc as rMisc\n", - "import GOSTRocks.mapMisc as mapMisc\n", "from GOSTRocks.misc import tPrint\n", "\n", "sys.path.insert(0, \"/home/wb411133/Code/GOSTNets_Raster/src\")\n", @@ -75,7 +72,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Define input variables \n", + "# Define input variables\n", "in_folder = \"/home/wb411133/projects/URB_SURDR_ZAF\"\n", "ntl_folder = os.path.join(in_folder, \"NTL_data\")\n", "ghsl_folder = os.path.join(in_folder, \"GHSL_data\")\n", @@ -86,23 +83,33 @@ "zonal_res_folder = os.path.join(in_folder, \"ZONAL_RES\")\n", "protected_areas_folder = os.path.join(in_folder, \"Protected_Areas\")\n", "reference_folder = os.path.join(in_folder, \"Reference\")\n", - "for f in [in_folder, ntl_folder, ghsl_folder, ma_folder, infra_folder, zonal_res_folder, protected_areas_folder]:\n", + "for f in [\n", + " in_folder,\n", + " ntl_folder,\n", + " ghsl_folder,\n", + " ma_folder,\n", + " infra_folder,\n", + " zonal_res_folder,\n", + " protected_areas_folder,\n", + "]:\n", " if not os.path.exists(f):\n", " os.makedirs(f)\n", - " \n", + "\n", "# Define local variables\n", - "admin0_file = os.path.join(in_folder, 'ZAF_select_adm0.shp')\n", + "admin0_file = os.path.join(in_folder, \"ZAF_select_adm0.shp\")\n", "admin1_file = os.path.join(in_folder, \"admin1_geoBounds.shp\")\n", "admin3_file = os.path.join(in_folder, \"ADMIN\", \"admin3_geoBounds_FINAL.shp\")\n", "ghsl_thresh = 0.1\n", "local_ghsl_file = os.path.join(in_folder, f\"ghsl_combined_{int(ghsl_thresh*100)}.tif\")\n", - "high_res_pop = '/home/public/Data/GLOBAL/Population/RF_SSA_2015-2020/ZAF/ppp_ZAF_const_2020.tif'\n", - "urban_raster = os.path.join(urban_folder, \"zaf_cpo20_urban.tif\")\n", + "high_res_pop = (\n", + " \"/home/public/Data/GLOBAL/Population/RF_SSA_2015-2020/ZAF/ppp_ZAF_const_2020.tif\"\n", + ")\n", + "urban_raster = os.path.join(urban_folder, \"zaf_cpo20_urban.tif\")\n", "urban_raster_pop = os.path.join(urban_folder, \"zaf_cpo20.tif\")\n", "urban_extents_file = os.path.join(urban_folder, \"cpo20_urban_extents.shp\")\n", "local_ghs_smod_file = os.path.join(in_folder, \"GHS_SMOD_2020.tif\")\n", "major_urban_extents = os.path.join(in_folder, \"major_cities_UCDB2019.shp\")\n", - "zaf_capitals = os.path.join(in_folder, 'ZAF_provincial_capitals.kml')\n", + "zaf_capitals = os.path.join(in_folder, \"ZAF_provincial_capitals.kml\")\n", "local_friction_file = os.path.join(ma_folder, \"friction_2020.tif\")\n", "local_airports = os.path.join(reference_folder, \"Major_Airports.shp\")\n", "local_ports = os.path.join(reference_folder, \"Ports.shp\")\n", @@ -114,7 +121,7 @@ "municipalities = os.path.join(zaf_folder, \"MainPlaces\", \"MP_SA_2011.shp\")\n", "muni_id = \"MP_CODE_st\"\n", "\n", - "proj_epsg = 22293 #https://epsg.io/22293" + "proj_epsg = 22293 # https://epsg.io/22293" ] }, { @@ -137,16 +144,20 @@ "source": [ "# Zonal stats on nighttime lights\n", "regional_ntl_folder = os.path.join(ntl_folder, \"Neighbours\")\n", - "ntl_files = [os.path.join(regional_ntl_folder, x) for x in os.listdir(regional_ntl_folder)]\n", + "ntl_files = [\n", + " os.path.join(regional_ntl_folder, x) for x in os.listdir(regional_ntl_folder)\n", + "]\n", "\n", "inM_ntl_res = inM.copy()\n", "for ntl_file in ntl_files:\n", " year = ntl_file.split(\"_\")[-2]\n", " tPrint(year)\n", " res = rMisc.zonalStats(inM, ntl_file, minVal=0.1)\n", - " res = pd.DataFrame(res, columns = ['SUM', 'MIN', 'MAX', 'MEAN'])\n", - " inM_ntl_res[f'NTL_{year}'] = res['SUM']\n", - "pd.DataFrame(inM_ntl_res.drop(['geometry'], axis=1)).to_csv(os.path.join(zonal_res_folder, \"NTL_Zonal_res.csv\")) " + " res = pd.DataFrame(res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])\n", + " inM_ntl_res[f\"NTL_{year}\"] = res[\"SUM\"]\n", + "pd.DataFrame(inM_ntl_res.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(zonal_res_folder, \"NTL_Zonal_res.csv\")\n", + ")" ] }, { @@ -160,10 +171,12 @@ "inM_pop_res = inM.copy()\n", "\n", "pop_res = rMisc.zonalStats(inM, high_res_pop, minVal=0, reProj=True)\n", - "pop_res = pd.DataFrame(pop_res, columns = ['SUM', 'MIN', 'MAX', 'MEAN'])\n", - "inM_pop_res['POP'] = pop_res['SUM']\n", + "pop_res = pd.DataFrame(pop_res, columns=[\"SUM\", \"MIN\", \"MAX\", \"MEAN\"])\n", + "inM_pop_res[\"POP\"] = pop_res[\"SUM\"]\n", "\n", - "pd.DataFrame(inM_pop_res.drop(['geometry'], axis=1)).to_csv(os.path.join(zonal_res_folder, \"Pop_res.csv\")) " + "pd.DataFrame(inM_pop_res.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(zonal_res_folder, \"Pop_res.csv\")\n", + ")" ] }, { @@ -174,15 +187,21 @@ "outputs": [], "source": [ "# GHSL _summary\n", - "ghsl_res = rMisc.zonalStats(inM, local_ghsl_file, rastType='C', unqVals=list(range(1975, 2031, 5)), reProj=True)\n", - "ghsl_res = pd.DataFrame(ghsl_res, columns = [f'c_{x}' for x in list(range(1975, 2031, 5))])\n", + "ghsl_res = rMisc.zonalStats(\n", + " inM, local_ghsl_file, rastType=\"C\", unqVals=list(range(1975, 2031, 5)), reProj=True\n", + ")\n", + "ghsl_res = pd.DataFrame(\n", + " ghsl_res, columns=[f\"c_{x}\" for x in list(range(1975, 2031, 5))]\n", + ")\n", "ghsl_res = ghsl_res.cumsum(axis=1)\n", "ghsl_area = ghsl_res.copy()\n", "# GHSL conversion to area (km2)\n", "for col in ghsl_area.columns:\n", - " ghsl_area[col] = ghsl_area[col] * (100*100) / 1000000\n", - "ghsl_area['AREA_KM'] = inM['ALBERS_ARE']\n", - "ghsl_area['per_built_2020'] = ghsl_area.apply(lambda x: x['c_2020']/x['AREA_KM'], axis=1)\n", + " ghsl_area[col] = ghsl_area[col] * (100 * 100) / 1000000\n", + "ghsl_area[\"AREA_KM\"] = inM[\"ALBERS_ARE\"]\n", + "ghsl_area[\"per_built_2020\"] = ghsl_area.apply(\n", + " lambda x: x[\"c_2020\"] / x[\"AREA_KM\"], axis=1\n", + ")\n", "ghsl_area[muni_id] = inM[muni_id]\n", "ghsl_area.to_csv(os.path.join(zonal_res_folder, \"Muni_GHSL_res.csv\"))" ] @@ -197,7 +216,7 @@ "# Calculate travel time\n", "popR = rasterio.open(urban_raster_pop)\n", "ttr = rasterio.open(local_friction_file)\n", - "frictionD = ttr.read()[0,:,:] * 1000\n", + "frictionD = ttr.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] }, @@ -210,11 +229,11 @@ "source": [ "# Calculate travel time to largest city within province\n", "in_cities = gpd.read_file(major_urban_extents)\n", - "zaf_adm1 = dataMisc.get_geoboundaries('ZAF', 'ADM1')\n", - "in_cities['geometry'] = in_cities['geometry'].apply(lambda x: x.centroid)\n", - "in_cities = in_cities.loc[:,['ID_HDC_G0','CTR_MN_NM','UC_NM_MN','P15','geometry']]\n", + "zaf_adm1 = dataMisc.get_geoboundaries(\"ZAF\", \"ADM1\")\n", + "in_cities[\"geometry\"] = in_cities[\"geometry\"].apply(lambda x: x.centroid)\n", + "in_cities = in_cities.loc[:, [\"ID_HDC_G0\", \"CTR_MN_NM\", \"UC_NM_MN\", \"P15\", \"geometry\"]]\n", "zaf_adm1 = gpd.read_file(admin1_file)\n", - "in_cities = gpd.sjoin(in_cities, zaf_adm1)\n" + "in_cities = gpd.sjoin(in_cities, zaf_adm1)" ] }, { @@ -228,13 +247,13 @@ "source": [ "##### Ports and airports\n", "ports = gpd.read_file(local_ports)\n", - "zaf_ports = ports.loc[ports['COUNTRY'] == 'ZA']\n", - "foreign_ports = ports.loc[ports['COUNTRY'] != 'ZA']\n", - "maputo_port = foreign_ports.loc[foreign_ports['PORT_NAME'] == 'MAPUTO']\n", + "zaf_ports = ports.loc[ports[\"COUNTRY\"] == \"ZA\"]\n", + "foreign_ports = ports.loc[ports[\"COUNTRY\"] != \"ZA\"]\n", + "maputo_port = foreign_ports.loc[foreign_ports[\"PORT_NAME\"] == \"MAPUTO\"]\n", "\n", "airports = gpd.read_file(local_airports)\n", - "zaf_airports = airports.loc[airports['soc'] == 'ZAF']\n", - "foreign_airports = airports.loc[airports['soc'] != 'ZAF']" + "zaf_airports = airports.loc[airports[\"soc\"] == \"ZAF\"]\n", + "foreign_airports = airports.loc[airports[\"soc\"] != \"ZAF\"]" ] }, { @@ -247,16 +266,19 @@ "outputs": [], "source": [ "# Largest 5 cities\n", - "largest_5_cities = in_cities.sort_values('P15', ascending=False)[:5]\n", + "largest_5_cities = in_cities.sort_values(\"P15\", ascending=False)[:5]\n", + "\n", + "\n", "# Largest city in each province\n", "def get_largest(x):\n", - " return(x.sort_values('P15', ascending=False).iloc[0])\n", - "provincial_largest = in_cities.groupby('shapeName').apply(get_largest)\n", + " return x.sort_values(\"P15\", ascending=False).iloc[0]\n", + "\n", + "\n", + "provincial_largest = in_cities.groupby(\"shapeName\").apply(get_largest)\n", "provincial_largest.crs = in_cities.crs\n", "# Read in KML of provincial capitals\n", - "import fiona\n", - "gpd.io.file.fiona.drvsupport.supported_drivers['LIBKML'] = 'rw'\n", - "prov_capitals = gpd.read_file(zaf_capitals).loc[:,['Name','geometry']]" + "gpd.io.file.fiona.drvsupport.supported_drivers[\"LIBKML\"] = \"rw\"\n", + "prov_capitals = gpd.read_file(zaf_capitals).loc[:, [\"Name\", \"geometry\"]]" ] }, { @@ -269,7 +291,7 @@ "# Plants and mines\n", "plants = gpd.read_file(plants_file)\n", "mines = gpd.read_file(mines_file)\n", - "mines = mines.loc[mines['Commodity'] != 'coal']\n", + "mines = mines.loc[mines[\"Commodity\"] != \"coal\"]\n", "mines = mines.loc[~mines.geometry.isna()]" ] }, @@ -376,35 +398,37 @@ "# Calculate travel time\n", "popR = rasterio.open(urban_raster_pop)\n", "ttr = rasterio.open(local_friction_file)\n", - "frictionD = ttr.read()[0,:,:] * 1000\n", + "frictionD = ttr.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)\n", "\n", - "inN, profile = rMisc.standardizeInputRasters(popR, ttr, resampling_type='sum')\n", + "inN, profile = rMisc.standardizeInputRasters(popR, ttr, resampling_type=\"sum\")\n", "with rMisc.create_rasterio_inmemory(profile, inN) as pop_temp:\n", " for dest in [\n", - " #[gpd.read_file(protected_areas), 'tt_protected_areas'],\n", - " #[gpd.read_file(tourist_locations), 'tt_kruger'],\n", - " #[gpd.read_file(major_urban_extents), 'tt_cities'],\n", - " #[largest_5_cities, 'tt_largest_5_cities'],\n", - " #[provincial_largest, 'tt_prov_largest'],\n", - " #[prov_capitals, 'tt_prov_capital'],\n", - " [zaf_ports, 'tt_zaf_ports'],\n", - " [foreign_ports, 'tt_foreign_ports'],\n", - " [maputo_port, 'tt_maputo_ports'],\n", - " [zaf_airports, 'tt_zaf_airports'],\n", - " [mines, 'tt_mines_noncoal'],\n", - " [plants, 'tt_plants'],\n", - " #[foreign_airports, 'tt_foreign_airports']\n", - " ]: \n", - " out_file = os.path.join(zonal_res_folder, f'{dest[1]}_tt.csv')\n", + " # [gpd.read_file(protected_areas), 'tt_protected_areas'],\n", + " # [gpd.read_file(tourist_locations), 'tt_kruger'],\n", + " # [gpd.read_file(major_urban_extents), 'tt_cities'],\n", + " # [largest_5_cities, 'tt_largest_5_cities'],\n", + " # [provincial_largest, 'tt_prov_largest'],\n", + " # [prov_capitals, 'tt_prov_capital'],\n", + " [zaf_ports, \"tt_zaf_ports\"],\n", + " [foreign_ports, \"tt_foreign_ports\"],\n", + " [maputo_port, \"tt_maputo_ports\"],\n", + " [zaf_airports, \"tt_zaf_airports\"],\n", + " [mines, \"tt_mines_noncoal\"],\n", + " [plants, \"tt_plants\"],\n", + " # [foreign_airports, 'tt_foreign_airports']\n", + " ]:\n", + " out_file = os.path.join(zonal_res_folder, f\"{dest[1]}_tt.csv\")\n", " tPrint(out_file)\n", " if not os.path.exists(out_file):\n", " dests = dest[0]\n", - " if not dests.geom_type.iloc[0] == 'Point':\n", - " dests['geometry'] = dests['geometry'].apply(lambda x: x.centroid)\n", + " if not dests.geom_type.iloc[0] == \"Point\":\n", + " dests[\"geometry\"] = dests[\"geometry\"].apply(lambda x: x.centroid)\n", " suffix = os.path.basename(out_file[:-4])\n", - " res = ma.summarize_travel_time_populations(pop_temp, ttr, dests, mcp, inM, col_suffix=suffix, calc_small=True)\n", - " pd.DataFrame(res.drop(['geometry'], axis=1)).to_csv(out_file) " + " res = ma.summarize_travel_time_populations(\n", + " pop_temp, ttr, dests, mcp, inM, col_suffix=suffix, calc_small=True\n", + " )\n", + " pd.DataFrame(res.drop([\"geometry\"], axis=1)).to_csv(out_file)" ] }, { @@ -423,7 +447,7 @@ "outputs": [], "source": [ "baseD = gpd.read_file(municipalities)\n", - "baseD.index = baseD[muni_id].astype('int64')" + "baseD.index = baseD[muni_id].astype(\"int64\")" ] }, { @@ -435,11 +459,11 @@ "source": [ "def join_data(in_file_name, baseD):\n", " ntl_data = pd.read_csv(in_file_name, index_col=0)\n", - " ntl_data.index = ntl_data[muni_id].astype('int64')\n", - " cols_to_use = ntl_data.columns.difference(baseD.drop('geometry', axis=1).columns)\n", + " ntl_data.index = ntl_data[muni_id].astype(\"int64\")\n", + " cols_to_use = ntl_data.columns.difference(baseD.drop(\"geometry\", axis=1).columns)\n", " ntl_data = ntl_data[cols_to_use]\n", " baseD = pd.merge(baseD, ntl_data, left_index=True, right_index=True)\n", - " return(baseD)" + " return baseD" ] }, { @@ -464,11 +488,10 @@ } ], "source": [ - "for in_file in os.listdir(zonal_res_folder): \n", + "for in_file in os.listdir(zonal_res_folder):\n", " if in_file.endswith(\".csv\"):\n", " print(in_file)\n", - " baseD = join_data(os.path.join(zonal_res_folder, in_file), baseD)\n", - " " + " baseD = join_data(os.path.join(zonal_res_folder, in_file), baseD)" ] }, { @@ -478,7 +501,9 @@ "metadata": {}, "outputs": [], "source": [ - "baseD.drop(\"MP_CODE_st\", axis=1).reset_index().to_file(os.path.join(zonal_res_folder, \"named_places_zonal.geojson\"), driver=\"GeoJSON\")" + "baseD.drop(\"MP_CODE_st\", axis=1).reset_index().to_file(\n", + " os.path.join(zonal_res_folder, \"named_places_zonal.geojson\"), driver=\"GeoJSON\"\n", + ")" ] }, { @@ -488,7 +513,9 @@ "metadata": {}, "outputs": [], "source": [ - "pd.DataFrame(baseD.drop(['geometry'], axis=1)).to_csv(os.path.join(zonal_res_folder, \"named_places_zonal.csv\"))" + "pd.DataFrame(baseD.drop([\"geometry\"], axis=1)).to_csv(\n", + " os.path.join(zonal_res_folder, \"named_places_zonal.csv\")\n", + ")" ] }, { @@ -506,7 +533,7 @@ "metadata": {}, "outputs": [], "source": [ - "out_varun_folder = os.path.join(in_folder, 'SP_VARUN', 'RESULTS')" + "out_varun_folder = os.path.join(in_folder, \"SP_VARUN\", \"RESULTS\")" ] }, { @@ -517,10 +544,10 @@ "outputs": [], "source": [ "### Read in origins\n", - "sp_varun_file = os.path.join(in_folder, 'SP_VARUN', 'SP_SA_2011.shp')\n", + "sp_varun_file = os.path.join(in_folder, \"SP_VARUN\", \"SP_SA_2011.shp\")\n", "in_sp = gpd.read_file(sp_varun_file)\n", - "in_sp.crs=4326\n", - "in_sp['geometry'] = in_sp['geometry'].apply(lambda x: x.centroid)\n", + "in_sp.crs = 4326\n", + "in_sp[\"geometry\"] = in_sp[\"geometry\"].apply(lambda x: x.centroid)\n", "# inM = gpd.read_file(municipalities)\n", "# selM = inM.loc[inM['PR_NAME'] == 'Mpumalanga'].copy()\n", "# selM['geometry'] = selM['geometry'].apply(lambda x: x.centroid)" @@ -537,8 +564,8 @@ "source": [ "### Read in destinations\n", "in_cities = gpd.read_file(major_urban_extents)\n", - "largest_5_cities = in_cities.sort_values('P15', ascending=False)[:5]\n", - "largest_5_cities['geometry'] = largest_5_cities['geometry'].apply(lambda x: x.centroid)\n", + "largest_5_cities = in_cities.sort_values(\"P15\", ascending=False)[:5]\n", + "largest_5_cities[\"geometry\"] = largest_5_cities[\"geometry\"].apply(lambda x: x.centroid)\n", "largest_5_cities" ] }, @@ -552,7 +579,7 @@ "# Calculate travel time\n", "popR = rasterio.open(urban_raster_pop)\n", "ttr = rasterio.open(local_friction_file)\n", - "frictionD = ttr.read()[0,:,:] * 1000\n", + "frictionD = ttr.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)" ] }, @@ -585,7 +612,7 @@ "source": [ "importlib.reload(ma)\n", "# Calculate for Gauteng\n", - "sel_sp = in_sp.loc[in_sp['PR_NAME'] == 'Gauteng'].copy()\n", + "sel_sp = in_sp.loc[in_sp[\"PR_NAME\"] == \"Gauteng\"].copy()\n", "od = ma.calculate_od_matrix(ttr, mcp, sel_sp, sel_sp)\n", "xx = pd.DataFrame(od)\n", "xx.columns = sel_sp.SP_CODE_st\n", @@ -604,7 +631,7 @@ "source": [ "importlib.reload(ma)\n", "# Calculate for City of Cape Town\n", - "sel_sp = in_sp.loc[in_sp['DC_NAME'] == 'City of Cape Town'].copy()\n", + "sel_sp = in_sp.loc[in_sp[\"DC_NAME\"] == \"City of Cape Town\"].copy()\n", "od = ma.calculate_od_matrix(ttr, mcp, sel_sp, sel_sp)\n", "xx = pd.DataFrame(od)\n", "xx.columns = sel_sp.SP_CODE_st\n", @@ -623,7 +650,7 @@ "source": [ "importlib.reload(ma)\n", "# Calculate for City of eTh\n", - "sel_sp = in_sp.loc[in_sp['DC_NAME'] == 'eThekwini'].copy()\n", + "sel_sp = in_sp.loc[in_sp[\"DC_NAME\"] == \"eThekwini\"].copy()\n", "od = ma.calculate_od_matrix(ttr, mcp, sel_sp, sel_sp)\n", "xx = pd.DataFrame(od)\n", "xx.columns = sel_sp.SP_CODE_st\n", @@ -640,7 +667,7 @@ "source": [ "importlib.reload(ma)\n", "# Calculate for City of eTh\n", - "sel_sp = in_sp.loc[in_sp['DC_NAME'] == 'Nelson Mandela Bay'].copy()\n", + "sel_sp = in_sp.loc[in_sp[\"DC_NAME\"] == \"Nelson Mandela Bay\"].copy()\n", "od = ma.calculate_od_matrix(ttr, mcp, sel_sp, sel_sp)\n", "xx = pd.DataFrame(od)\n", "xx.columns = sel_sp.SP_CODE_st\n", @@ -657,7 +684,7 @@ "source": [ "importlib.reload(ma)\n", "# Calculate for City of eTh\n", - "sel_sp = in_sp.loc[in_sp['DC_NAME'] == 'Buffalo City'].copy()\n", + "sel_sp = in_sp.loc[in_sp[\"DC_NAME\"] == \"Buffalo City\"].copy()\n", "od = ma.calculate_od_matrix(ttr, mcp, sel_sp, sel_sp)\n", "xx = pd.DataFrame(od)\n", "xx.columns = sel_sp.SP_CODE_st\n", @@ -684,10 +711,10 @@ "# Calculate travel time\n", "popR = rasterio.open(urban_raster_pop)\n", "ttr = rasterio.open(local_friction_file)\n", - "frictionD = ttr.read()[0,:,:] * 1000\n", + "frictionD = ttr.read()[0, :, :] * 1000\n", "mcp = graph.MCP_Geometric(frictionD)\n", "\n", - "inN, profile = rMisc.standardizeInputRasters(popR, ttr, resampling_type='sum')" + "inN, profile = rMisc.standardizeInputRasters(popR, ttr, resampling_type=\"sum\")" ] }, { @@ -707,7 +734,9 @@ ], "source": [ "with rMisc.create_rasterio_inmemory(profile, inN) as pop_temp:\n", - " res = ma.summarize_travel_time_populations(pop_temp, ttr, zaf_airports, mcp, inM, calc_small=True)" + " res = ma.summarize_travel_time_populations(\n", + " pop_temp, ttr, zaf_airports, mcp, inM, calc_small=True\n", + " )" ] }, { @@ -1092,7 +1121,7 @@ } ], "source": [ - "res.loc[res['total_pop'] == 0.]" + "res.loc[res[\"total_pop\"] == 0.0]" ] }, { @@ -1111,7 +1140,7 @@ "outputs": [], "source": [ "# Investigating 1s in traveltime results\n", - "tt_file = os.path.join(zonal_res_folder, f'tt_cities_tt.csv')\n", + "tt_file = os.path.join(zonal_res_folder, \"tt_cities_tt.csv\")\n", "inT = pd.read_csv(tt_file, index_col=0)\n", "inT.head()" ] @@ -1123,7 +1152,7 @@ "metadata": {}, "outputs": [], "source": [ - "badT = inT.loc[inT['tt_pop_w_tt_cities_tt'] == 1.]\n", + "badT = inT.loc[inT[\"tt_pop_w_tt_cities_tt\"] == 1.0]\n", "badT" ] }, @@ -1135,7 +1164,7 @@ "outputs": [], "source": [ "inM_proj = inM.to_crs(proj_epsg)\n", - "inM_proj['area_km2'] = inM_proj['geometry'].apply(lambda x: x.area/1000000)" + "inM_proj[\"area_km2\"] = inM_proj[\"geometry\"].apply(lambda x: x.area / 1000000)" ] }, { @@ -1145,7 +1174,7 @@ "metadata": {}, "outputs": [], "source": [ - "inM_proj.loc[badT.index].sort_values(['area_km2'])\n" + "inM_proj.loc[badT.index].sort_values([\"area_km2\"])" ] }, { diff --git a/notebooks/Implementations/WSF/wsfdata.py b/notebooks/Implementations/WSF/wsfdata.py index a03dd07..968f0dc 100755 --- a/notebooks/Implementations/WSF/wsfdata.py +++ b/notebooks/Implementations/WSF/wsfdata.py @@ -1,17 +1,15 @@ -import os, sys - import rasterio import pandas as pd -import geopandas as gpd import numpy as np + class wsf_dataset(object): def __init__(self, imageList): - ''' Create object organizning and analyzing WSF data. - + """Create object organizning and analyzing WSF data. + INPUT imageList [array of strings] - list of paths to input images - ''' + """ for img in imageList: if "AW3D30.tif" in img: self.heightImg = img @@ -21,10 +19,10 @@ def __init__(self, imageList): self.evolution = img if "WSFevolution_IDCscore.tif" in img: self.evolution_idc = img - - def analyze_idc(self, outFile='', badThreshold=2): - ''' Analyze the IDC (quality) image - + + def analyze_idc(self, outFile="", badThreshold=2): + """Analyze the IDC (quality) image + INPUT [optional] outfile [string] - path to create output file describing quality [optional] badThreshold [number] - values above this will be considered low quality @@ -32,45 +30,45 @@ def analyze_idc(self, outFile='', badThreshold=2): [numpy array] - 2band np array of same size as the input IDC image. Band 1 - Total number of years with bad data Band 2 - Most recent year of bad data - ''' + """ idc = rasterio.open(self.evolution_idc) idcD = idc.read() # Analyze the IDC dataset and write the results to file - outArray = np.zeros([2,idcD.shape[1], idcD.shape[2]]) + outArray = np.zeros([2, idcD.shape[1], idcD.shape[2]]) for rIdx in range(0, idcD.shape[2]): for cIdx in range(0, idcD.shape[1]): - curD = idcD[:,cIdx,rIdx] + curD = idcD[:, cIdx, rIdx] notGood = curD > badThreshold try: newestBadYear = max([i for i, x in enumerate(notGood) if x]) except: newestBadYear = 0 - outArray[0,cIdx,rIdx] = notGood.sum() - outArray[1,cIdx,rIdx] = newestBadYear - if outFile != '': + outArray[0, cIdx, rIdx] = notGood.sum() + outArray[1, cIdx, rIdx] = newestBadYear + if outFile != "": # Write the summary dataset to file ### BAND 1 - total number of bad years ### BAND 2 - most recent bad year outProfile = idc.profile.copy() outProfile.update(count=2) - with rasterio.open(outFile, 'w', **outProfile) as outData: - outData.write(outArray.astype(outProfile['dtype'])) - outData.set_band_description(1,"TotalBadYears") - outData.set_band_description(2,"MostRecentBad") + with rasterio.open(outFile, "w", **outProfile) as outData: + outData.write(outArray.astype(outProfile["dtype"])) + outData.set_band_description(1, "TotalBadYears") + outData.set_band_description(2, "MostRecentBad") self.quality_summary = outArray - return(outArray.astype(idc.profile['dtype'])) - - def correct_evolution_idc(self, outfile='', badThreshold=2): - ''' Correct the WSF evolution dataset based on quality flags. This is done by changing - the WSF built date if the quality flag is worse than badThreshold. If it is worse, + return outArray.astype(idc.profile["dtype"]) + + def correct_evolution_idc(self, outfile="", badThreshold=2): + """Correct the WSF evolution dataset based on quality flags. This is done by changing + the WSF built date if the quality flag is worse than badThreshold. If it is worse, the cell is assigned the next date in the WSF quality flag that is of acceptable quality. - + INPUT [optional] outfile [string] - path to create output file with corrected evolution dataset [optional] badThreshold [number] - values above this will be considered low quality RETURNS [numpy array] - np array of same size as the input evolution image. - ''' + """ inEvolution = rasterio.open(self.evolution) inIDC = rasterio.open(self.evolution_idc) inE = inEvolution.read() @@ -78,8 +76,8 @@ def correct_evolution_idc(self, outfile='', badThreshold=2): outArray = np.zeros(inE.shape) for rIdx in range(0, inE.shape[2]): for cIdx in range(0, inE.shape[1]): - curE = inE[0,cIdx, rIdx] - curD = inD[:,cIdx, rIdx] + curE = inE[0, cIdx, rIdx] + curD = inD[:, cIdx, rIdx] if curE > 0: qualityIdx = curE - 1985 if curD[qualityIdx] > badThreshold: @@ -88,24 +86,24 @@ def correct_evolution_idc(self, outfile='', badThreshold=2): if curD[xIdx + 1] <= badThreshold: break curE = 1985 + xIdx - inE[0,cIdx,rIdx] = curE - if outfile != '': + inE[0, cIdx, rIdx] = curE + if outfile != "": # Write the corrected evolution dataset to file - outProfile = inEvolution.profile.copy() - with rasterio.open(outfile, 'w', **outProfile) as outData: - outData.write(inE.astype(outProfile['dtype'])) - return(inE) - + outProfile = inEvolution.profile.copy() + with rasterio.open(outfile, "w", **outProfile) as outData: + outData.write(inE.astype(outProfile["dtype"])) + return inE + def generate_evolution_plot(self, dataset="normal"): - ''' generate a dataframe for matplotlib plotting - + """generate a dataframe for matplotlib plotting + INPUT - [optional] dataset [pandas dataframe] - provide a dataset to analyze, + [optional] dataset [pandas dataframe] - provide a dataset to analyze, if you don't want to read in the evolution dataset - + RETURNS [geopandas dataframe] - + EXAMPLE wsfD = wsfdata.wsf_dataset(images_list) basePlot = wsfD.generate_evolution_plot() @@ -113,11 +111,11 @@ def generate_evolution_plot(self, dataset="normal"): correctedRes = wsfD.correct_evolution_idc(badThreshold=3) correctedPlot = wsfD.generate_evolution_plot(dataset=correctedRes) basePlot['corrected'] = correctedPlot['cumBuilt'] - + basePlot.drop('built', axis=1).plot() - + basePlot['cumBuilt'].plot() - ''' + """ if dataset == "normal": evolution = rasterio.open(self.evolution) inD = evolution.read() @@ -125,39 +123,36 @@ def generate_evolution_plot(self, dataset="normal"): inD = dataset unique, counts = np.unique(inD, return_counts=True) res = pd.DataFrame(counts, unique).drop(0) - res.columns=['built'] - missingDates = [x for x in range(1985, 2015) if not x in res.index] + res.columns = ["built"] + missingDates = [x for x in range(1985, 2015) if x not in res.index] for x in missingDates: res.loc[x] = 0 res = res.sort_index() - res['cumBuilt'] = res['built'].cumsum() - return(res) - + res["cumBuilt"] = res["built"].cumsum() + return res + def summarize_idc(self, thresh): - ''' Summarize IDC by measuring what percentage of the built cells in every + """Summarize IDC by measuring what percentage of the built cells in every year are above the defined quality threshold - + INPUT thresh [number] - value from 1-6 defining the acceptable quality threshold, every value below or equal to that threshold (better than that value) are considered acceptable - + RETURNS [numpy array] - fraction of built cells that are of acceptable quality per year. HOPEFULLY the reutrning array should be 31 records long - ''' + """ idc = rasterio.open(self.evolution_idc).read() evolution = rasterio.open(self.evolution).read() - + evolution_mask = idc.copy() * 0 - evolution_mask[:,:,:] = evolution[0,:,:] == 0 - evolution_masked = np.ma.array(idc, mask=evolution_mask[np.newaxis,:,:]) - - totalCells = (evolution[0,:,:] > 0).sum() + evolution_mask[:, :, :] = evolution[0, :, :] == 0 + evolution_masked = np.ma.array(idc, mask=evolution_mask[np.newaxis, :, :]) + + totalCells = (evolution[0, :, :] > 0).sum() allRes = [] for idx in range(0, evolution_masked.shape[0]): - allRes.append((evolution_masked[idx,:,:] > thresh).sum() / totalCells) - - return(allRes) - - - \ No newline at end of file + allRes.append((evolution_masked[idx, :, :] > thresh).sum() / totalCells) + + return allRes diff --git a/notebooks/Tutorials/LEI_Example.ipynb b/notebooks/Tutorials/LEI_Example.ipynb index 03d9d00..2dcb56c 100644 --- a/notebooks/Tutorials/LEI_Example.ipynb +++ b/notebooks/Tutorials/LEI_Example.ipynb @@ -16,22 +16,19 @@ "metadata": {}, "outputs": [], "source": [ - "import os, sys, importlib\n", + "import os\n", + "import sys\n", + "import importlib\n", "import rasterio\n", "import rasterio.features\n", "\n", "import geopandas as gpd\n", "import pandas as pd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import shape, GeometryCollection\n", - "from shapely.wkt import loads\n", - "from matplotlib import pyplot\n", - "from rasterio.plot import show, show_hist\n", - "sys.path.append('../../../gostrocks/src/')\n", + "sys.path.append(\"../../../gostrocks/src/\")\n", "import GOSTRocks.rasterMisc as rMisc\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../../src/GOST_Urban\")\n", "import LEI as lei\n", "\n", @@ -107,13 +104,12 @@ "if not os.path.exists(input_ghsl):\n", " # clip from global GHSL file\n", " ghsl_vrt = \"/home/public/Data/GLOBAL/GHSL/ghsl.vrt\"\n", - " aoi = os.path.join(input_folder, 'Grand_lome_dissolve.shp')\n", + " aoi = os.path.join(input_folder, \"Grand_lome_dissolve.shp\")\n", " in_ghsl = rasterio.open(ghsl_vrt)\n", " inA = gpd.read_file(aoi)\n", " if not inA.crs == in_ghsl.crs:\n", " inA = inA.to_crs(in_ghsl.crs)\n", - " rMisc.clipRaster(in_ghsl, inA, input_ghsl)\n", - " " + " rMisc.clipRaster(in_ghsl, inA, input_ghsl)" ] }, { @@ -168,9 +164,9 @@ "outputs": [], "source": [ "# This calculates the change from 1990 and 2000\n", - "lei_raw = lei.calculate_LEI(input_ghsl, old_list = [5,6], new_list=[4])\n", - "lei_90_00 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total'])\n", - "lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] \n", + "lei_raw = lei.calculate_LEI(input_ghsl, old_list=[5, 6], new_list=[4])\n", + "lei_90_00 = pd.DataFrame(lei_raw, columns=[\"geometry\", \"old\", \"total\"])\n", + "lei_90_00[\"LEI\"] = lei_90_00[\"old\"] / lei_90_00[\"total\"]\n", "lei_90_00.head()" ] }, @@ -181,9 +177,9 @@ "outputs": [], "source": [ "# This calculates the change from 2000 and 2014\n", - "lei_raw = lei.calculate_LEI(input_ghsl, old_list = [4,5,6], new_list=[3])\n", - "lei_00_14 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total'])\n", - "lei_00_14['LEI'] = lei_00_14['old'] / lei_00_14['total'] \n", + "lei_raw = lei.calculate_LEI(input_ghsl, old_list=[4, 5, 6], new_list=[3])\n", + "lei_00_14 = pd.DataFrame(lei_raw, columns=[\"geometry\", \"old\", \"total\"])\n", + "lei_00_14[\"LEI\"] = lei_00_14[\"old\"] / lei_00_14[\"total\"]\n", "lei_00_14.head()" ] }, @@ -194,8 +190,8 @@ "outputs": [], "source": [ "importlib.reload(lei)\n", - "#Calculate summaries of lei\n", - "lei.summarize_LEI(lei_90_00, leap_val=0.05, exp_val=0.75)/1000000" + "# Calculate summaries of lei\n", + "lei.summarize_LEI(lei_90_00, leap_val=0.05, exp_val=0.75) / 1000000" ] }, { @@ -204,8 +200,8 @@ "metadata": {}, "outputs": [], "source": [ - "#Calculate summaries of lei\n", - "lei.summarize_LEI(lei_00_14, leap_val=0.05, exp_val=0.75)/1000000" + "# Calculate summaries of lei\n", + "lei.summarize_LEI(lei_00_14, leap_val=0.05, exp_val=0.75) / 1000000" ] }, { @@ -245,18 +241,18 @@ "if not os.path.exists(input_WSF_proj):\n", " # clip from global GHSL file\n", " wsf = \"/home/public/Data/GLOBAL/WSF/Togo/Togo_WSF_evolution.tif\"\n", - " aoi = os.path.join(input_folder, 'Grand_lome_dissolve.shp')\n", + " aoi = os.path.join(input_folder, \"Grand_lome_dissolve.shp\")\n", " in_ghsl = rasterio.open(wsf)\n", " inA = gpd.read_file(aoi)\n", " if not inA.crs == in_ghsl.crs:\n", " inA = inA.to_crs(in_ghsl.crs)\n", " rMisc.clipRaster(in_ghsl, inA, input_WSF)\n", - " # WSF is stored in WGS84, making buffering and area calculations impossible. \n", + " # WSF is stored in WGS84, making buffering and area calculations impossible.\n", " # Instead we will standardize to the GHSL raster\n", " ghsl_raster = os.path.join(input_folder, \"GHSL.tif\")\n", " in_wsf = rasterio.open(input_WSF)\n", " in_ghsl = rasterio.open(ghsl_raster)\n", - " rMisc.standardizeInputRasters(in_wsf, in_ghsl, input_WSF_proj, 'C')" + " rMisc.standardizeInputRasters(in_wsf, in_ghsl, input_WSF_proj, \"C\")" ] }, { @@ -347,9 +343,11 @@ ], "source": [ "# This calculates the change from 1990 and 2000\n", - "lei_raw = lei.calculate_LEI(input_WSF_proj, old_list = list(range(1985,1991)), new_list=list(range(1991,2001)))\n", - "lei_90_00 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total'])\n", - "lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] \n", + "lei_raw = lei.calculate_LEI(\n", + " input_WSF_proj, old_list=list(range(1985, 1991)), new_list=list(range(1991, 2001))\n", + ")\n", + "lei_90_00 = pd.DataFrame(lei_raw, columns=[\"geometry\", \"old\", \"total\"])\n", + "lei_90_00[\"LEI\"] = lei_90_00[\"old\"] / lei_90_00[\"total\"]\n", "lei_90_00.head()" ] }, @@ -441,9 +439,11 @@ ], "source": [ "# This calculates the change from 2000 and 2015\n", - "lei_raw = lei.calculate_LEI(input_WSF_proj, old_list = list(range(1985,2001)), new_list=list(range(2001,2016)))\n", - "lei_00_14 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total'])\n", - "lei_00_14['LEI'] = lei_00_14['old'] / lei_00_14['total'] \n", + "lei_raw = lei.calculate_LEI(\n", + " input_WSF_proj, old_list=list(range(1985, 2001)), new_list=list(range(2001, 2016))\n", + ")\n", + "lei_00_14 = pd.DataFrame(lei_raw, columns=[\"geometry\", \"old\", \"total\"])\n", + "lei_00_14[\"LEI\"] = lei_00_14[\"old\"] / lei_00_14[\"total\"]\n", "lei_00_14.head()" ] }, @@ -468,8 +468,8 @@ } ], "source": [ - "#Calculate summaries of lei\n", - "lei.summarize_LEI(lei_90_00, leap_val=0.001, exp_val=0.5)/1000000" + "# Calculate summaries of lei\n", + "lei.summarize_LEI(lei_90_00, leap_val=0.001, exp_val=0.5) / 1000000" ] }, { @@ -493,8 +493,8 @@ } ], "source": [ - "#Calculate summaries of lei\n", - "lei.summarize_LEI(lei_00_14, leap_val=0.001, exp_val=0.5)/1000000" + "# Calculate summaries of lei\n", + "lei.summarize_LEI(lei_00_14, leap_val=0.001, exp_val=0.5) / 1000000" ] }, { diff --git a/notebooks/Tutorials/Untitled.ipynb b/notebooks/Tutorials/Untitled.ipynb index d024997..dda80c0 100644 --- a/notebooks/Tutorials/Untitled.ipynb +++ b/notebooks/Tutorials/Untitled.ipynb @@ -26,14 +26,10 @@ } ], "source": [ - "import sys, os, importlib\n", - "import rasterio\n", + "import sys\n", "\n", - "import geopandas as gpd\n", "\n", - "sys.path.append(\"../../../\")\n", - "\n", - "import src.GOST_Urban.UrbanRaster as urban" + "sys.path.append(\"../../../\")" ] }, { diff --git a/notebooks/Tutorials/UrbanAreas_tutorials.ipynb b/notebooks/Tutorials/UrbanAreas_tutorials.ipynb index 4898c05..6abbacd 100644 --- a/notebooks/Tutorials/UrbanAreas_tutorials.ipynb +++ b/notebooks/Tutorials/UrbanAreas_tutorials.ipynb @@ -13,12 +13,13 @@ "metadata": {}, "outputs": [], "source": [ - "import sys, os, importlib\n", + "import sys\n", + "import os\n", "import rasterio\n", "\n", "import geopandas as gpd\n", "\n", - "import GOST_Urban.UrbanRaster as urban\n" + "import GOST_Urban.UrbanRaster as urban" ] }, { @@ -46,15 +47,16 @@ "\n", "inAOI = gpd.read_file(aoi_file)\n", "\n", - "# Shouldn't need to execute this unless you change your AOI; \n", + "# Shouldn't need to execute this unless you change your AOI;\n", "# you will need to find a global population to extract from\n", "if not os.path.exists(pop_file):\n", " sys.path.append(\"../../../gostrocks/src\")\n", " import GOSTRocks.rasterMisc as rMisc\n", + "\n", " global_population = \"/path/to/global/pop_layer/ppp_2020_1km_Aggregated.tif\"\n", " inR = rasterio.open(global_population)\n", " rMisc.clipRaster(inR, inAOI, pop_file)\n", - " \n", + "\n", "inR = rasterio.open(pop_file)" ] }, @@ -66,7 +68,7 @@ "source": [ "# Initiate the urban calculator\n", "urban_calculator = urban.urbanGriddedPop(inR)\n", - "#urban_calculator.calculateUrban?" + "# urban_calculator.calculateUrban?" ] }, { @@ -76,9 +78,9 @@ "outputs": [], "source": [ "# Extract the urban extents (minimum density 300/km2, minimum total population 5000)\n", - "urban_extents = urban_calculator.calculateUrban(densVal=300, totalPopThresh=5000, \n", - " smooth=False, queen=False,\n", - " verbose=True)" + "urban_extents = urban_calculator.calculateUrban(\n", + " densVal=300, totalPopThresh=5000, smooth=False, queen=False, verbose=True\n", + ")" ] }, { @@ -111,9 +113,13 @@ ], "source": [ "# Extract the high density urban extents (minimum density 1500/km2, minimum total population 50000)\n", - "hd_urban_extents = urban_calculator.calculateUrban(densVal=1500, totalPopThresh=50000, \n", - " smooth=True, queen=True,# high density extents use queen's case contiguity, and \n", - " verbose=True) #High density extents have hole smoothing applied." + "hd_urban_extents = urban_calculator.calculateUrban(\n", + " densVal=1500,\n", + " totalPopThresh=50000,\n", + " smooth=True,\n", + " queen=True, # high density extents use queen's case contiguity, and\n", + " verbose=True,\n", + ") # High density extents have hole smoothing applied." ] }, { diff --git a/notebooks/Tutorials/UrbanRural_extents_from_griddedPop.ipynb b/notebooks/Tutorials/UrbanRural_extents_from_griddedPop.ipynb index 666e9ee..3823e98 100644 --- a/notebooks/Tutorials/UrbanRural_extents_from_griddedPop.ipynb +++ b/notebooks/Tutorials/UrbanRural_extents_from_griddedPop.ipynb @@ -26,21 +26,23 @@ } ], "source": [ - "import sys, os, importlib, json\n", - "import rasterio, geojson\n", + "import os\n", + "import json\n", + "import rasterio\n", + "import geojson\n", "\n", "import pandas as pd\n", "import geopandas as gpd\n", "import numpy as np\n", "\n", - "from shapely.geometry import shape, Polygon\n", + "from shapely.geometry import shape\n", "\n", - "#Import raster helpers\n", - "#sys.path.append(\"../../../gostrocks/src\")\n", - "#import GOSTRocks.rasterMisc as rMisc\n", - "#from GOSTRocks.misc import tPrint\n", + "# Import raster helpers\n", + "# sys.path.append(\"../../../gostrocks/src\")\n", + "# import GOSTRocks.rasterMisc as rMisc\n", + "# from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "import GOST_Urban.UrbanRaster as urban" ] }, @@ -56,17 +58,19 @@ " os.makedirs(out_folder)\n", "pop_file = os.path.join(out_folder, \"WP_POP.tif\")\n", "urban_file = os.path.join(out_folder, \"URBAN.tif\")\n", - " \n", + "\n", "global_bounds = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", - "global_pop = \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + "global_pop = (\n", + " \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + ")\n", "\n", "inG = gpd.read_file(global_bounds)\n", - "selG = inG.loc[inG['ISO3'] == iso3]\n", + "selG = inG.loc[inG[\"ISO3\"] == iso3]\n", "inPop_raster = rasterio.open(global_pop)\n", "\n", "if selG.crs != inPop_raster.crs:\n", " selG = selG.to_crs(inPop_raster.crs)\n", - " \n", + "\n", "if not os.path.exists(pop_file):\n", " rMisc.clipRaster(inPop_raster, selG, pop_file)" ] @@ -121,7 +125,9 @@ "inD = inR.read()\n", "all_features = []\n", "# create vector results\n", - "for cShape, value in features.shapes(inD, transform=urban_raster.transform, connectivity=8):\n", + "for cShape, value in features.shapes(\n", + " inD, transform=urban_raster.transform, connectivity=8\n", + "):\n", " if value > 0:\n", " all_features.append([value, shape(geojson.loads(json.dumps(cShape)))])" ] @@ -132,10 +138,14 @@ "metadata": {}, "outputs": [], "source": [ - "geoms = gpd.GeoDataFrame(pd.DataFrame(all_features, columns=['VALUE','geometry']), geometry='geometry', crs=urban_raster.crs)\n", - "geoms['area'] = geoms['geometry'].apply(lambda x: x.area)\n", - "#Limit rural areas to those larger than 2 pixels\n", - "geoms = geoms.loc[geoms['area'] >= (urban_raster.res[0] * urban_raster.res[1]) * 2]" + "geoms = gpd.GeoDataFrame(\n", + " pd.DataFrame(all_features, columns=[\"VALUE\", \"geometry\"]),\n", + " geometry=\"geometry\",\n", + " crs=urban_raster.crs,\n", + ")\n", + "geoms[\"area\"] = geoms[\"geometry\"].apply(lambda x: x.area)\n", + "# Limit rural areas to those larger than 2 pixels\n", + "geoms = geoms.loc[geoms[\"area\"] >= (urban_raster.res[0] * urban_raster.res[1]) * 2]" ] }, { diff --git a/notebooks/URB_DECAT_ExtractByISO3.ipynb b/notebooks/URB_DECAT_ExtractByISO3.ipynb index aeba79d..3553398 100644 --- a/notebooks/URB_DECAT_ExtractByISO3.ipynb +++ b/notebooks/URB_DECAT_ExtractByISO3.ipynb @@ -24,24 +24,20 @@ } ], "source": [ - "import sys, os, importlib, json\n", - "import rasterio, geojson\n", + "import sys\n", + "import os\n", + "import rasterio\n", "\n", - "import pandas as pd\n", "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import shape, Polygon\n", "\n", - "#Import raster helpers\n", + "# Import raster helpers\n", "sys.path.append(\"../../gostrocks/src\")\n", "import GOSTRocks.rasterMisc as rMisc\n", - "from GOSTRocks.misc import tPrint\n", "\n", - "#Import GOST urban functions\n", + "# Import GOST urban functions\n", "sys.path.append(\"../\")\n", - "import src.UrbanRaster as urban\n", - "import src.urban_helper as helper\n" + "import src.UrbanRaster as urban" ] }, { @@ -58,17 +54,19 @@ "urban_ext = os.path.join(out_folder, \"URBAN_Extents.shp\")\n", "hd_urban_ext = os.path.join(out_folder, \"HD_URBAN_Extents.shp\")\n", "pop_file = os.path.join(out_folder, \"population_2020.tif\")\n", - " \n", + "\n", "global_bounds = \"/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp\"\n", - "global_pop = \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + "global_pop = (\n", + " \"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif\"\n", + ")\n", "\n", "inG = gpd.read_file(global_bounds)\n", - "selG = inG.loc[inG['ISO3'] == iso3]\n", + "selG = inG.loc[inG[\"ISO3\"] == iso3]\n", "inPop_raster = rasterio.open(global_pop)\n", "\n", "if selG.crs != inPop_raster.crs:\n", " selG = selG.to_crs(inPop_raster.crs)\n", - " \n", + "\n", "if not os.path.exists(pop_file):\n", " rMisc.clipRaster(inPop_raster, selG, pop_file)" ] @@ -91,9 +89,9 @@ "source": [ "curR = rasterio.open(pop_file)\n", "urban_calculator = urban.urbanGriddedPop(curR)\n", - "urban_extents = urban_calculator.calculateUrban(densVal=300, totalPopThresh=5000, \n", - " smooth=False, queen=False,\n", - " verbose=True)\n", + "urban_extents = urban_calculator.calculateUrban(\n", + " densVal=300, totalPopThresh=5000, smooth=False, queen=False, verbose=True\n", + ")\n", "urban_extents.to_file(urban_ext)" ] }, @@ -121,9 +119,9 @@ } ], "source": [ - "urban_extents = urban_calculator.calculateUrban(densVal=1500, totalPopThresh=50000, \n", - " smooth=True, queen=True,\n", - " verbose=True)\n", + "urban_extents = urban_calculator.calculateUrban(\n", + " densVal=1500, totalPopThresh=50000, smooth=True, queen=True, verbose=True\n", + ")\n", "urban_extents.to_file(hd_urban_ext)" ] }, @@ -208,14 +206,16 @@ "source": [ "extents = gpd.read_file(urban_ext)\n", "\n", - "extents['NAME'] = ''\n", - "extents['COUNTRY'] = ''\n", + "extents[\"NAME\"] = \"\"\n", + "extents[\"COUNTRY\"] = \"\"\n", "for idx, row in extents.iterrows():\n", - " cur_city = reverse_geocode.search([(row['geometry'].centroid.y, row['geometry'].centroid.x)])\n", - " extents.loc[idx, 'COUNTRY'] = cur_city[0]['country']\n", - " extents.loc[idx, 'NAME'] = cur_city[0]['city']\n", - " #print(idx)\n", - "extents.groupby('COUNTRY').sum()" + " cur_city = reverse_geocode.search(\n", + " [(row[\"geometry\"].centroid.y, row[\"geometry\"].centroid.x)]\n", + " )\n", + " extents.loc[idx, \"COUNTRY\"] = cur_city[0][\"country\"]\n", + " extents.loc[idx, \"NAME\"] = cur_city[0][\"city\"]\n", + " # print(idx)\n", + "extents.groupby(\"COUNTRY\").sum()" ] }, { diff --git a/notebooks/Untitled.ipynb b/notebooks/Untitled.ipynb index d791561..67d0c5b 100644 --- a/notebooks/Untitled.ipynb +++ b/notebooks/Untitled.ipynb @@ -27,19 +27,11 @@ } ], "source": [ - "import sys, os, importlib, json\n", - "import rasterio, geojson\n", + "import sys\n", "\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "import numpy as np\n", "\n", - "from shapely.geometry import shape, Polygon\n", - "\n", - "#Import raster helpers\n", - "sys.path.append(\"../../gostrocks/src\")\n", - "import GOSTRocks.rasterMisc as rMisc\n", - "from GOSTRocks.misc import tPrint" + "# Import raster helpers\n", + "sys.path.append(\"../../gostrocks/src\")" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 82beae1..644e22e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,5 +57,5 @@ skip = 'docs/_build,docs/references.bib,__pycache__,*.png,*.gz,*.whl' ignore-regex = '^\s*"image\/png":\s.*' ignore-words-list = "gost," -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "numpy" diff --git a/src/GOSTurban/LEI.py b/src/GOSTurban/LEI.py index 3a9509f..7d86c9e 100755 --- a/src/GOSTurban/LEI.py +++ b/src/GOSTurban/LEI.py @@ -1,76 +1,101 @@ -import os, sys, logging, multiprocessing +import multiprocessing -import geojson, rasterio +import rasterio import rasterio.features import pandas as pd import numpy as np from GOSTRocks.misc import tPrint -from shapely.geometry import shape, GeometryCollection +from shapely.geometry import shape from shapely.wkt import loads -def mp_lei(curRxx, transformxx, idx_xx, old_list=[4,5,6], new_list=[3], buffer_dist=300): - ''' calculate and summarize LEI for curRxx, designed for use in multiprocessing function ''' - curRes = calculate_LEI(curRxx, transform=transformxx, old_list=old_list, new_list=new_list, buffer_dist=buffer_dist) - xx = pd.DataFrame(curRes, columns=['geometry', 'old', 'total']) - xx['LEI'] = xx['old'] / xx['total'] + +def mp_lei( + curRxx, transformxx, idx_xx, old_list=[4, 5, 6], new_list=[3], buffer_dist=300 +): + """calculate and summarize LEI for curRxx, designed for use in multiprocessing function""" + curRes = calculate_LEI( + curRxx, + transform=transformxx, + old_list=old_list, + new_list=new_list, + buffer_dist=buffer_dist, + ) + xx = pd.DataFrame(curRes, columns=["geometry", "old", "total"]) + xx["LEI"] = xx["old"] / xx["total"] final = summarize_LEI(xx) - final['idx'] = idx_xx - return(final) + final["idx"] = idx_xx + return final + + +def lei_from_feature( + inD, + inR, + old_list=[4, 5, 6], + new_list=[3], + buffer_dist=300, + transform="", + nCores=0, + measure_crs=None, + idx_col=None, +): + """Calculate LEI for each feature in inD, leveraging multi-processing, based on the built area in inR -def lei_from_feature(inD, inR, old_list = [4,5,6], new_list=[3], buffer_dist=300, transform='', nCores=0, measure_crs=None, idx_col=None): - ''' Calculate LEI for each feature in inD, leveraging multi-processing, based on the built area in inR - INPUT inD [geopandas] inR [rasterio] [optional] nCores [int] - number of cores to use in multi-processing [optional] measure_crs [string] - string to convert all data to a CRS where distance and area measurements don't suck ie - "ESRI:54009" ... see calculate_LEI for remaining arguments - ''' + """ if inD.crs != inR.crs: inD = inD.to_crs(inR.crs) - - if not measure_crs is None: + + if measure_crs is not None: measureD = inD.to_crs(measure_crs) - + lei_results = {} # For grid cells, extract the GHSL and calculate in_vals = [] - tPrint('***** Preparing values for multiprocessing') + tPrint("***** Preparing values for multiprocessing") for idx, row in inD.iterrows(): if idx % 100 == 0: - tPrint(f'{idx} of {inD.shape[0]}: {len(in_vals)}') - ul = inR.index(*row['geometry'].bounds[0:2]) - lr = inR.index(*row['geometry'].bounds[2:4]) + tPrint(f"{idx} of {inD.shape[0]}: {len(in_vals)}") + ul = inR.index(*row["geometry"].bounds[0:2]) + lr = inR.index(*row["geometry"].bounds[2:4]) # read the subset of the data into a numpy array - window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1))) + window = ((float(lr[0]), float(ul[0] + 1)), (float(ul[1]), float(lr[1] + 1))) curR = inR.read(1, window=window) if (np.isin(curR, old_list).sum() > 2) & (np.isin(curR, new_list).sum() > 2): if measure_crs is None: - transform = rasterio.transform.from_bounds(*row['geometry'].bounds, curR.shape[0], curR.shape[1]) + transform = rasterio.transform.from_bounds( + *row["geometry"].bounds, curR.shape[0], curR.shape[1] + ) else: - transform = rasterio.transform.from_bounds(*measureD.loc[idx,'geometry'].bounds, curR.shape[0], curR.shape[1]) + transform = rasterio.transform.from_bounds( + *measureD.loc[idx, "geometry"].bounds, curR.shape[0], curR.shape[1] + ) cur_idx = idx if idx_col: cur_idx = row[idx_col] in_vals.append([curR, transform, cur_idx, old_list, new_list, buffer_dist]) - + if nCores == 0: nCores = multiprocessing.cpu_count() - tPrint('***** starting multiprocessing') + tPrint("***** starting multiprocessing") with multiprocessing.Pool(nCores) as pool: res = pool.starmap(mp_lei, in_vals) - - res = pd.DataFrame(res) + + res = pd.DataFrame(res) res = res.reset_index() - res.index = res['idx'] - #res.drop(['idx'], axis=1, inplace=True) - return(res) + res.index = res["idx"] + # res.drop(['idx'], axis=1, inplace=True) + return res + -def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=''): - """ Calculate Landscape Expansion Index (LEI) through comparison of categorical values in a single raster dataset. +def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=""): + """Calculate Landscape Expansion Index (LEI) through comparison of categorical values in a single raster dataset. :param inputGHSL: Path to a geotiff or a rasterio object, or a numpy array containing the categorical data used to calculate LEI @@ -83,7 +108,7 @@ def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=''): :type buffer_dist: int, optional :param transform: rasterio transformation object. Required if inputGHSL is a numpy array, defaults to '' :type transform: str, optional - :returns: individual vectors of new built areas with LEI results. Each item is a single new built feature with three columns: + :returns: individual vectors of new built areas with LEI results. Each item is a single new built feature with three columns: 1. geometry of the new built area feature 2. number of pixels in new built area donut from old built area 3. area of new built area buffer @@ -92,8 +117,8 @@ def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=''): # This calculates the LEI between 1990 and 2000 in the categorical GHSL lei_raw = calculate_LEI(input_ghsl, old_list = [5,6], new_list=[4]) lei_90_00 = pd.DataFrame(lei_raw, columns=['geometry', 'old', 'total']) - lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] - lei_90_00.head() + lei_90_00['LEI'] = lei_90_00['old'] / lei_90_00['total'] + lei_90_00.head() """ if isinstance(inputGHSL, str): inRaster = rasterio.open(inputGHSL).read() @@ -105,15 +130,17 @@ def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=''): inR = inputGHSL if len(inR.shape) == 2: inR = inR.reshape(1, inR.shape[0], inR.shape[1]) - newR = (np.isin(inR, new_list)).astype('int') - oldR = (np.isin(inR, old_list)).astype('int') + newR = (np.isin(inR, new_list)).astype("int") + oldR = (np.isin(inR, old_list)).astype("int") allVals = [] - for geom, value in rasterio.features.shapes(newR.astype('uint8'), transform=transform): + for geom, value in rasterio.features.shapes( + newR.astype("uint8"), transform=transform + ): if value == 1: # Convert the geom to a shape and buffer by 300 metres curShape = shape(geom) bufferArea = curShape.buffer(buffer_dist) - #Clip out the original shape to leave just the donut + # Clip out the original shape to leave just the donut try: donutArea = bufferArea.difference(curShape) except: @@ -122,19 +149,23 @@ def calculate_LEI(inputGHSL, old_list, new_list, buffer_dist=300, transform=''): donutArea = bufferArea.difference(curShape) # Rasterize donut shape shapes = [(donutArea, 1)] - burned = rasterio.features.rasterize(shapes=shapes, fill=0, - out_shape=(oldR.shape[1], oldR.shape[2]), - transform=transform) + burned = rasterio.features.rasterize( + shapes=shapes, + fill=0, + out_shape=(oldR.shape[1], oldR.shape[2]), + transform=transform, + ) # Multiply the new raster by the old urban data to get the total # amount of old area in the buffer around the new urban area - oldArea = (oldR[0,:,:] * burned).sum() + oldArea = (oldR[0, :, :] * burned).sum() totalArea = burned.sum() allVals.append([curShape, oldArea, totalArea]) - - return(allVals) - -def summarize_LEI(in_file, leap_val=0.05, exp_val=0.9): - """ Summarize the LEI results produced by self.calculate_LEI + + return allVals + + +def summarize_LEI(in_file, leap_val=0.05, exp_val=0.9): + """Summarize the LEI results produced by self.calculate_LEI :param in_file: LEI results generated from calculate_LEI above :type in_file: string path to csv file or pandas dataframe @@ -144,39 +175,40 @@ def summarize_LEI(in_file, leap_val=0.05, exp_val=0.9): :type exp_val: float, optional """ - ''' - - in_file [string path or datafrane]: - leap_val [float]: - exp_val [float]: - + """ + + in_file [string path or datafrane]: + leap_val [float]: + exp_val [float]: + returns [pandas groupby row] - + example - + for res_file in all_results_files: res = summarize_LEI(res_file) baseName = os.path.basename(os.path.dirname(res_file)) summarized_results[baseName] = res - + all_results = pd.DataFrame(summarized_results).transpose() - ''' + """ if isinstance(in_file, str): res = pd.read_csv(in_file) - res['area'] = res['geometry'].apply(lambda x: loads(x).area) + res["area"] = res["geometry"].apply(lambda x: loads(x).area) else: res = in_file - if not 'area' in res.columns: - res['area'] = res['geometry'].apply(lambda x: x.area) - + if "area" not in res.columns: + res["area"] = res["geometry"].apply(lambda x: x.area) + def calculate_LEI(val, leap_val, exp_val): if val <= leap_val: - return('Leapfrog') + return "Leapfrog" elif val < exp_val: - return('Expansion') + return "Expansion" else: - return('Infill') - res['class'] = res['LEI'].apply(lambda x: calculate_LEI(x, leap_val, exp_val)) - xx = res.groupby('class') - return(xx.sum()['area']) + return "Infill" + + res["class"] = res["LEI"].apply(lambda x: calculate_LEI(x, leap_val, exp_val)) + xx = res.groupby("class") + return xx.sum()["area"] diff --git a/src/GOSTurban/UrbanRaster.py b/src/GOSTurban/UrbanRaster.py index 2720f84..9a05029 100644 --- a/src/GOSTurban/UrbanRaster.py +++ b/src/GOSTurban/UrbanRaster.py @@ -1,11 +1,13 @@ -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- # Calculate urban areas from gridded population data # Benjamin P Stewart, April 2019 # Purpose is to create high density urban clusters and urban cluster above minimum # density and total population thresholds -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------------- -import os, sys, logging, geojson, json, time +import geojson +import json +import time import rasterio import geopandas as gpd @@ -19,61 +21,81 @@ from rasterio import features from rasterio.features import rasterize from shapely.geometry import shape, Polygon -from geopy.geocoders import Nominatim, GeoNames +from geopy.geocoders import Nominatim + +"""prints the time along with the message""" + -'''prints the time along with the message''' def tPrint(s): print("%s\t%s" % (time.strftime("%H:%M:%S"), s)) - + + def geocode_cities(urban_extents): - ''' Generate names for polygon urban extents - - :param urban_extents: geopandas dataframe of polygons to be named. Need to be in epsg:4326 - ''' + """Generate names for polygon urban extents + + :param urban_extents: geopandas dataframe of polygons to be named. Need to be in epsg:4326 + """ geolocator = Nominatim(user_agent="new_app") all_res = [] for idx, row in urban_extents.iterrows(): - res = geolocator.reverse(query = (row['geometry'].centroid.y, row['geometry'].centroid.x), language = "en", zoom = 10) + res = geolocator.reverse( + query=(row["geometry"].centroid.y, row["geometry"].centroid.x), + language="en", + zoom=10, + ) all_res.append(res) - - urban_extents['City'] = '' - urban_extents['State'] = '' - urban_extents['Country'] = '' + + urban_extents["City"] = "" + urban_extents["State"] = "" + urban_extents["Country"] = "" for idx, row in urban_extents.iterrows(): res = all_res[idx] try: - urban_extents.loc[idx,'City'] = res.raw['address']['city'] + urban_extents.loc[idx, "City"] = res.raw["address"]["city"] except: break try: - urban_extents.loc[idx,'State'] = res.raw['address']['state'] + urban_extents.loc[idx, "State"] = res.raw["address"]["state"] except: pass try: - urban_extents.loc[idx,'Country'] = res.raw['address']['country'] + urban_extents.loc[idx, "Country"] = res.raw["address"]["country"] except: pass - return(urban_extents) - - + return urban_extents + + class urbanGriddedPop(object): def __init__(self, inRaster): """ Create urban definitions using gridded population data. - - :param inRaster: string or rasterio object representing gridded population data + + :param inRaster: string or rasterio object representing gridded population data """ if type(inRaster) == str: self.inR = rasterio.open(inRaster) elif isinstance(inRaster, rasterio.DatasetReader): self.inR = inRaster else: - raise(ValueError("Input raster dataset must be a file path or a rasterio object")) - - def calculateDegurba(self, urbDens=300, hdDens=1500, urbThresh=5000, hdThresh=50000, minPopThresh=50, - out_raster = '', print_message='', verbose=False): - ''' Calculate complete DEGURBA classification based on gridded population data + raise ( + ValueError( + "Input raster dataset must be a file path or a rasterio object" + ) + ) + + def calculateDegurba( + self, + urbDens=300, + hdDens=1500, + urbThresh=5000, + hdThresh=50000, + minPopThresh=50, + out_raster="", + print_message="", + verbose=False, + ): + """Calculate complete DEGURBA classification based on gridded population data https://ghsl.jrc.ec.europa.eu/degurbaDefinitions.php CLASSES: (30) Urban centre - dens: 1500, totalpop: 50000, smoothed @@ -83,39 +105,42 @@ def calculateDegurba(self, urbDens=300, hdDens=1500, urbThresh=5000, hdThresh=50 (13) Rural, village - dens: >300, totalpop: >500, <5000 (12) Rural, dispersed, low density - dens: >50, (11) Rural, dispersed, low density - the rest that are populated - + :param urbDens: integer of the minimum density value to be counted as urban :param hdDens: integer of the minimum density value to be counted as high density :param urbThresh: integer minimum total settlement population to be considered urban - :param hdThresh: integer minimum total settlement population to be considered high density - ''' - + :param hdThresh: integer minimum total settlement population to be considered high density + """ + popRaster = self.inR data = popRaster.read() urban_raster = data * 0 - final_raster = data[0,:,:] * 0 + 11 - + final_raster = data[0, :, :] * 0 + 11 + urban_raster[np.where(data > hdDens)] = 30 idx = 0 urban_raster = urban_raster.astype("int16") allFeatures = [] - + if verbose: - tPrint(f'{print_message}: Smoothing Urban Clusters') + tPrint(f"{print_message}: Smoothing Urban Clusters") + # Smooth the HD urban clusters def modal(P): mode = stats.mode(P) - return(mode.mode[0]) + return mode.mode[0] + + smooth_urban = generic_filter(urban_raster[0, :, :], modal, (3, 3)) + yy = np.dstack([smooth_urban, urban_raster[0, :, :]]) + urban_raster[0, :, :] = np.amax(yy, axis=2) - smooth_urban = generic_filter(urban_raster[0,:,:], modal, (3,3)) - yy = np.dstack([smooth_urban, urban_raster[0,:,:]]) - urban_raster[0,:,:] = np.amax(yy, axis=2) - - #Analyze the high density shapes + # Analyze the high density shapes if verbose: - tPrint(f'{print_message}: extracting HD clusters') - - for cShape, value in features.shapes(urban_raster, transform=popRaster.transform): + tPrint(f"{print_message}: extracting HD clusters") + + for cShape, value in features.shapes( + urban_raster, transform=popRaster.transform + ): if idx % 1000 == 0 and verbose: tPrint("%s: Creating Shape %s" % (print_message, idx)) idx = idx + 1 @@ -125,10 +150,15 @@ def modal(P): xx = shape(cShape) xx = Polygon(xx.exterior) cShape = xx.__geo_interface__ - #If the shape is urban, claculate total pop - mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform) + # If the shape is urban, claculate total pop + mask = rasterize( + [(cShape, 0)], + out_shape=data[0, :, :].shape, + fill=1, + transform=popRaster.transform, + ) inData = np.ma.array(data=data, mask=mask.astype(bool)) - pop = np.nansum(inData) + pop = np.nansum(inData) val = 0 if pop > urbThresh: @@ -136,139 +166,180 @@ def modal(P): val = 23 if pop > hdThresh: val = 30 - - #Burn value into the final raster - mask = (mask^1) * val + + # Burn value into the final raster + mask = (mask ^ 1) * val yy = np.dstack([final_raster, mask]) final_raster = np.amax(yy, axis=2) - allFeatures.append([idx, pop, val, shape(geojson.loads(json.dumps(cShape)))]) - + allFeatures.append( + [idx, pop, val, shape(geojson.loads(json.dumps(cShape)))] + ) + HD_raster = final_raster - + urban_raster = data * 0 - final_raster = data[0,:,:] * 0 + 11 + final_raster = data[0, :, :] * 0 + 11 urban_raster[np.where(data > urbDens)] = 22 urban_raster = urban_raster.astype("int16") - #Analyze the high density shapes + # Analyze the high density shapes if verbose: - tPrint(f'{print_message}: extracting URBAN clusters') - - for cShape, value in features.shapes(urban_raster, transform=popRaster.transform, connectivity=8): + tPrint(f"{print_message}: extracting URBAN clusters") + + for cShape, value in features.shapes( + urban_raster, transform=popRaster.transform, connectivity=8 + ): if idx % 1000 == 0 and verbose: tPrint("%s: Creating Shape %s" % (print_message, idx)) idx = idx + 1 if value > 0: - #If the shape is urban, claculate total pop - mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform) + # If the shape is urban, claculate total pop + mask = rasterize( + [(cShape, 0)], + out_shape=data[0, :, :].shape, + fill=1, + transform=popRaster.transform, + ) inData = np.ma.array(data=data, mask=mask.astype(bool)) - pop = np.nansum(inData) + pop = np.nansum(inData) val = 0 if pop > 500: - val = 13 + val = 13 if pop > urbThresh: val = 21 - #Burn value into the final raster - mask = (mask^1) * val + # Burn value into the final raster + mask = (mask ^ 1) * val yy = np.dstack([final_raster, mask]) final_raster = np.amax(yy, axis=2) - allFeatures.append([idx, pop, val, shape(geojson.loads(json.dumps(cShape)))]) + allFeatures.append( + [idx, pop, val, shape(geojson.loads(json.dumps(cShape)))] + ) URB_raster = final_raster - - #Combine the urban layers + + # Combine the urban layers yy = np.dstack([HD_raster, URB_raster]) final_raster = np.amax(yy, axis=2) - final_raster[(final_raster == 11) & (data[0,:,:] > minPopThresh) & (data[0,:,:] < urbDens)] = 12 - + final_raster[ + (final_raster == 11) + & (data[0, :, :] > minPopThresh) + & (data[0, :, :] < urbDens) + ] = 12 + if verbose: - tPrint(f'{print_message}: performing distance calculations') - - #Identify the urban areas of class 22 by measuring distance to other features + tPrint(f"{print_message}: performing distance calculations") + + # Identify the urban areas of class 22 by measuring distance to other features feats = allFeatures - sel = pd.DataFrame(feats, columns=['ID','POP','CLASS','geometry']) + sel = pd.DataFrame(feats, columns=["ID", "POP", "CLASS", "geometry"]) sel = gpd.GeoDataFrame(sel, geometry="geometry", crs=self.inR.crs) - to_be = sel.loc[sel['CLASS'] == 21] - to_be = to_be.loc[to_be['POP'] < hdThresh] - distance = sel.loc[sel['CLASS'].isin([21,23])] - dist_shp = distance.sindex - + to_be = sel.loc[sel["CLASS"] == 21] + to_be = to_be.loc[to_be["POP"] < hdThresh] + distance = sel.loc[sel["CLASS"].isin([21, 23])] + dist_shp = distance.sindex + def calc_nearest(x, dist_gpd, dist_idx): xx = dist_gpd.iloc[list(dist_idx.nearest([x.centroid.x, x.centroid.y], 2))] - dists = xx['geometry'].apply(lambda y: y.distance(x)) + dists = xx["geometry"].apply(lambda y: y.distance(x)) try: - return(min(dists[dists > 0])) + return min(dists[dists > 0]) except: - return(0) - - to_be['dist'] = to_be['geometry'].apply(lambda x: calc_nearest(x, distance, dist_shp)) - features_22 = to_be.loc[to_be['dist'] > 3000] - - #Burn features into output raster + return 0 + + to_be["dist"] = to_be["geometry"].apply( + lambda x: calc_nearest(x, distance, dist_shp) + ) + features_22 = to_be.loc[to_be["dist"] > 3000] + + # Burn features into output raster cShape = features_22.unary_union.__geo_interface__ - mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform) - mask_vals = (mask^1) * 22 - + mask = rasterize( + [(cShape, 0)], + out_shape=data[0, :, :].shape, + fill=1, + transform=popRaster.transform, + ) + mask_vals = (mask ^ 1) * 22 + final_raster = (final_raster * mask) + mask_vals - + if len(out_raster) > 0: out_metadata = popRaster.meta.copy() - out_metadata['dtype'] = urban_raster.dtype - out_metadata['nodata'] = -999 - final_raster = final_raster.astype(out_metadata['dtype']) - with rasterio.open(out_raster, 'w', **out_metadata) as rOut: + out_metadata["dtype"] = urban_raster.dtype + out_metadata["nodata"] = -999 + final_raster = final_raster.astype(out_metadata["dtype"]) + with rasterio.open(out_raster, "w", **out_metadata) as rOut: rOut.write_band(1, final_raster) - - return({'raster':final_raster, 'shapes':allFeatures, 'HD':HD_raster, 'URB':URB_raster}) - - - - def calculateUrban(self, densVal=300, totalPopThresh=5000, smooth=False, verbose=False, queen=False, - raster='', raster_pop='', print_message=''): - ''' + + return { + "raster": final_raster, + "shapes": allFeatures, + "HD": HD_raster, + "URB": URB_raster, + } + + def calculateUrban( + self, + densVal=300, + totalPopThresh=5000, + smooth=False, + verbose=False, + queen=False, + raster="", + raster_pop="", + print_message="", + ): + """ Generate urban extents from gridded population data through the application of a minimum density threshold and a minimum total population threshold - + :param densVal: integer of the minimum density value to be counted as urban :param totalPopThresh: integer minimum total settlement population to ne considered urban - :param smooth: boolean to run a single modal smoothing function (this should be run when running + :param smooth: boolean to run a single modal smoothing function (this should be run when running on WorldPop as the increased resolution often leads to small holes and funny shapes :param verbose: boolean on what messages to receive :param queen: boolean to determine whether to dissolve final shape to connect queen's contiguity - :param raster: string path to create a boolean raster of urban and not. + :param raster: string path to create a boolean raster of urban and not. Empty string is the default and will create no raster :param raster_pop: string path to create a raster of the population layer only in the urban areas Empty string is the default and will create no raster :returns: GeoPandasDataFrame of the urban extents - ''' + """ popRaster = self.inR data = popRaster.read() urbanData = (data > densVal) * 1 - urbanData = urbanData.astype('int16') - + urbanData = urbanData.astype("int16") + if verbose: tPrint("%s: Read in urban data" % print_message) - idx = 0 + idx = 0 # create output array to store urban raster urban_raster = urbanData * 0 for cShape, value in features.shapes(urbanData, transform=popRaster.transform): if idx % 1000 == 0 and verbose: tPrint("%s: Creating Shape %s" % (print_message, idx)) - if value == 1: - #If the shape is urban, claculate total pop - mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform) + if value == 1: + # If the shape is urban, claculate total pop + mask = rasterize( + [(cShape, 0)], + out_shape=data[0, :, :].shape, + fill=1, + transform=popRaster.transform, + ) inData = np.ma.array(data=data, mask=mask.astype(bool)) - curPop = np.nansum(inData) - if curPop < 0: # when smoothed, sometimes the pop withh be < 0 because of no data + curPop = np.nansum(inData) + if ( + curPop < 0 + ): # when smoothed, sometimes the pop withh be < 0 because of no data inData = np.ma.array(data=inData, mask=(inData < 0).astype(bool)) - curPop = np.nansum(inData) - if curPop > totalPopThresh: - urban_raster += (mask^1) - + curPop = np.nansum(inData) + if curPop > totalPopThresh: + urban_raster += mask ^ 1 + idx = idx + 1 - + if smooth: - inD = urban_raster[0,:,:] + inD = urban_raster[0, :, :] total_urban_cells = inD.sum() current_cells = 0 cnt = 0 @@ -281,51 +352,60 @@ def calculateUrban(self, densVal=300, totalPopThresh=5000, smooth=False, verbose finalD = np.amax(stackD, axis=2) current_cells = finalD.sum() urban_res = finalD - urban_raster[0,:,:] = urban_res - + urban_raster[0, :, :] = urban_res + allFeatures = [] badFeatures = [] - for cShape, value in features.shapes(urban_raster, transform=popRaster.transform): + for cShape, value in features.shapes( + urban_raster, transform=popRaster.transform + ): if idx % 1000 == 0 and verbose: tPrint("%s: Creating Shape %s" % (print_message, idx)) - if value == 1: - #If the shape is urban, claculate total pop - mask = rasterize([(cShape, 0)], out_shape=data[0,:,:].shape,fill=1,transform=popRaster.transform) + if value == 1: + # If the shape is urban, claculate total pop + mask = rasterize( + [(cShape, 0)], + out_shape=data[0, :, :].shape, + fill=1, + transform=popRaster.transform, + ) inData = np.ma.array(data=data, mask=mask.astype(bool)) - curPop = np.nansum(inData) - if curPop < 0: # when smoothed, sometimes the pop withh be < 0 because of no data + curPop = np.nansum(inData) + if ( + curPop < 0 + ): # when smoothed, sometimes the pop withh be < 0 because of no data inData = np.ma.array(data=inData, mask=(inData < 0).astype(bool)) - curPop = np.nansum(inData) - if curPop > totalPopThresh: - allFeatures.append([idx, curPop, shape(geojson.loads(json.dumps(cShape)))]) + curPop = np.nansum(inData) + if curPop > totalPopThresh: + allFeatures.append( + [idx, curPop, shape(geojson.loads(json.dumps(cShape)))] + ) idx = idx + 1 - + if len(raster): out_metadata = popRaster.meta.copy() - out_metadata['dtype'] = urban_raster.dtype - out_metadata['nodata'] = 0 - with rasterio.open(raster, 'w', **out_metadata) as rOut: + out_metadata["dtype"] = urban_raster.dtype + out_metadata["nodata"] = 0 + with rasterio.open(raster, "w", **out_metadata) as rOut: rOut.write(urban_raster) - + if len(raster_pop): out_metadata = popRaster.meta.copy() urban_pop = data * urban_raster - with rasterio.open(raster_pop, 'w', **out_metadata) as rOut: + with rasterio.open(raster_pop, "w", **out_metadata) as rOut: rOut.write(urban_pop) - - xx = pd.DataFrame(allFeatures, columns=['ID', 'Pop','geometry']) - xxGeom = gpd.GeoDataFrame(xx, geometry='geometry') + + xx = pd.DataFrame(allFeatures, columns=["ID", "Pop", "geometry"]) + xxGeom = gpd.GeoDataFrame(xx, geometry="geometry") xxGeom.crs = popRaster.crs - + if queen: - xxGeom['geometry'] = xxGeom.buffer((popRaster.res[0] / 2)) - s = xxGeom['geometry'] + xxGeom["geometry"] = xxGeom.buffer((popRaster.res[0] / 2)) + s = xxGeom["geometry"] overlap_matrix = s.apply(lambda x: s.intersects(x)).values.astype(int) n, ids = connected_components(overlap_matrix) - xxGeom['group'] = ids + xxGeom["group"] = ids xxGeom = xxGeom.dissolve(by="group", aggfunc="sum") - - return(xxGeom) - - \ No newline at end of file + + return xxGeom diff --git a/src/GOSTurban/country_helper.py b/src/GOSTurban/country_helper.py index c702727..a07e474 100755 --- a/src/GOSTurban/country_helper.py +++ b/src/GOSTurban/country_helper.py @@ -1,97 +1,114 @@ -import sys, os, importlib, shutil, multiprocessing -import requests -import rasterio, elevation, richdem +import sys +import os +import rasterio import rasterio.warp -from rasterio import features import pandas as pd import geopandas as gpd -import numpy as np -from shapely.geometry import MultiPolygon, Polygon, box, Point -#Import raster helpers +# Import raster helpers import GOSTRocks.rasterMisc as rMisc import GOSTRocks.ntlMisc as ntl from GOSTRocks.misc import tPrint -#Import GOST urban functions +# Import GOST urban functions sys.path.append("../../../src") import GOST_Urban.UrbanRaster as urban -import GOST_Urban.urban_helper as helper -class urban_country(): - ''' helper function to centralize urban calculations for a single country - ''' + +class urban_country: + """helper function to centralize urban calculations for a single country""" + def __init__(self, iso3, sel_country, cur_folder, inP): - ''' calculate urban extents for selected country and population raster - + """calculate urban extents for selected country and population raster + INPUT iso3 [string] - ISO 3 of selected country sel_country [geopandas dataframe] - selected country bounds cur_folder [string path] - path to output folder inP [rasterio read] - opened population raster dataset - ''' - self.iso3 = iso3 - self.sel_country = sel_country - self.cur_folder = cur_folder - self.urban_extents_file = os.path.join(cur_folder, f"{iso3}_urban_extents.geojson") - self.urban_extents_raster_file = os.path.join(cur_folder, f"{iso3}_urban_extents.tif") - self.urban_extents_hd_file = os.path.join(cur_folder, f"{iso3}_urban_extents_hd.geojson") - self.urban_extents_hd_raster_file = os.path.join(cur_folder, f"{iso3}_urban_extents_hd.tif") - - self.ghsl_folder = os.path.join(self.cur_folder, "GHSL_Rasters") - + """ + self.iso3 = iso3 + self.sel_country = sel_country + self.cur_folder = cur_folder + self.urban_extents_file = os.path.join( + cur_folder, f"{iso3}_urban_extents.geojson" + ) + self.urban_extents_raster_file = os.path.join( + cur_folder, f"{iso3}_urban_extents.tif" + ) + self.urban_extents_hd_file = os.path.join( + cur_folder, f"{iso3}_urban_extents_hd.geojson" + ) + self.urban_extents_hd_raster_file = os.path.join( + cur_folder, f"{iso3}_urban_extents_hd.tif" + ) + + self.ghsl_folder = os.path.join(self.cur_folder, "GHSL_Rasters") + # Define zonal summary files - self.urban_ntl = os.path.join(cur_folder, f'{iso3}_urban_ntl.csv') - self.urban_hd_ntl = os.path.join(cur_folder, f'{iso3}_hd_urban_ntl.csv') - - self.urban_ghsl = os.path.join(cur_folder, f'{iso3}_urban_ghsl.csv') - self.urban_hd_ghsl = os.path.join(cur_folder, f'{iso3}_urban_hd_ghsl.csv') - + self.urban_ntl = os.path.join(cur_folder, f"{iso3}_urban_ntl.csv") + self.urban_hd_ntl = os.path.join(cur_folder, f"{iso3}_hd_urban_ntl.csv") + + self.urban_ghsl = os.path.join(cur_folder, f"{iso3}_urban_ghsl.csv") + self.urban_hd_ghsl = os.path.join(cur_folder, f"{iso3}_urban_hd_ghsl.csv") + if type(inP) == str: inP = rasterio.open(inP) self.inP = inP - + def calculate_urban_extents(self, calculate_area=True, area_crs=3857): - ''' Run EC urban extent analysis - ''' + """Run EC urban extent analysis""" urban_calculator = urban.urbanGriddedPop(self.inP) if not os.path.exists(self.urban_extents_file): tPrint(f"Running urbanization for {self.iso3}") - urban_extents = urban_calculator.calculateUrban(densVal=300, totalPopThresh=5000, - smooth=False, queen=False, - verbose=True, raster=self.urban_extents_raster_file) + urban_extents = urban_calculator.calculateUrban( + densVal=300, + totalPopThresh=5000, + smooth=False, + queen=False, + verbose=True, + raster=self.urban_extents_raster_file, + ) # Calculate area of urban extents if calculate_area: urban_extents = urban_extents.to_crs(area_crs) urban_extents = urban_extents.to_crs(area_crs) - urban_extents['area_km'] = urban_extents['geometry'].apply(lambda x: x.area/1000000) - + urban_extents["area_km"] = urban_extents["geometry"].apply( + lambda x: x.area / 1000000 + ) + # Name urban extents if urban_extents.crs.to_epsg() != 4326: urban_extents = urban_extents.to_crs(4326) try: - urban_extents = urban.geocode_cities(urban_extents) + urban_extents = urban.geocode_cities(urban_extents) except: pass - + urban_extents.to_file(self.urban_extents_file, driver="GeoJSON") self.urban_extents = urban_extents else: self.urban_extents = gpd.read_file(self.urban_extents_file) - - - if not os.path.exists(self.urban_extents_hd_file): - urban_extents_hd = urban_calculator.calculateUrban(densVal=1500, totalPopThresh=50000, - smooth=True, queen=False, - verbose=True, raster=self.urban_extents_hd_raster_file) + + if not os.path.exists(self.urban_extents_hd_file): + urban_extents_hd = urban_calculator.calculateUrban( + densVal=1500, + totalPopThresh=50000, + smooth=True, + queen=False, + verbose=True, + raster=self.urban_extents_hd_raster_file, + ) # Calculate area of urban extents if calculate_area: urban_extents_hd = urban_extents_hd.to_crs(area_crs) urban_extents_hd = urban_extents_hd.to_crs(area_crs) - urban_extents_hd['area_km'] = urban_extents_hd['geometry'].apply(lambda x: x.area/1000000) - + urban_extents_hd["area_km"] = urban_extents_hd["geometry"].apply( + lambda x: x.area / 1000000 + ) + # Name urban extents if urban_extents_hd.crs.to_epsg() != 4326: urban_extents_hd = urban_extents_hd.to_crs(4326) @@ -103,21 +120,24 @@ def calculate_urban_extents(self, calculate_area=True, area_crs=3857): self.urban_extents_hd = urban_extents_hd else: self.urban_extents_hd = gpd.read_file(self.urban_extents_hd_file) - - def summarize_ntl(self, ntl_files = []): - ''' run zonal analysis on nighttime lights using urban extents - ''' - if (not os.path.exists(self.urban_ntl)) or (not os.path.exists(self.urban_hd_ntl)): + + def summarize_ntl(self, ntl_files=[]): + """run zonal analysis on nighttime lights using urban extents""" + if (not os.path.exists(self.urban_ntl)) or ( + not os.path.exists(self.urban_hd_ntl) + ): if len(ntl_files) == 0: - ntl_files = ntl.aws_search_ntl() + ntl_files = ntl.aws_search_ntl() for ntl_file in ntl_files: name = ntl_file.split("/")[-1].split("_")[2][:8] try: - inR = rasterio.open(ntl_file) + inR = rasterio.open(ntl_file) # tPrint("Processing %s" % name) - viirs_folder = os.path.join(self.cur_folder, 'VIIRS') - urban_res_file = os.path.join(viirs_folder, f'URBAN_{name}.csv') - urban_hd_res_file = os.path.join(viirs_folder, f'HD_URBAN_{name}.csv') + viirs_folder = os.path.join(self.cur_folder, "VIIRS") + urban_res_file = os.path.join(viirs_folder, f"URBAN_{name}.csv") + urban_hd_res_file = os.path.join( + viirs_folder, f"HD_URBAN_{name}.csv" + ) if not os.path.exists(viirs_folder): os.makedirs(viirs_folder) @@ -129,46 +149,57 @@ def summarize_ntl(self, ntl_files = []): self.calculate_urban_extents() urbanD = self.urban_extents urbanHD = self.urban_extents_hd - + # Urban Summary if not os.path.exists(urban_res_file): urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1) - col_names = [f'URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']] + col_names = [ + f"URBAN_{name}_{x}" for x in ["SUM", "MIN", "MAX", "MEAN"] + ] urban_df = pd.DataFrame(urban_res, columns=col_names) urban_df.to_csv(urban_res_file) # HD Urban Summary if not os.path.exists(urban_hd_res_file): hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1) - col_names = [f'HD_URBAN_{name}_{x}' for x in ['SUM','MIN','MAX','MEAN']] + col_names = [ + f"HD_URBAN_{name}_{x}" + for x in ["SUM", "MIN", "MAX", "MEAN"] + ] hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names) - hd_urban_df.to_csv(urban_hd_res_file) + hd_urban_df.to_csv(urban_hd_res_file) except: - tPrint(f'***********ERROR with {iso3} and {name}') - + tPrint(f"***********ERROR with {iso3} and {name}") + # Compile VIIRS results urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("URBAN")] for x in urb_files: tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0) - urbanD[x[:-4]] = tempD.iloc[:,0] + urbanD[x[:-4]] = tempD.iloc[:, 0] - hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("HD_URBAN")] + hd_urb_files = [ + x for x in os.listdir(viirs_folder) if x.startswith("HD_URBAN") + ] for x in hd_urb_files: tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0) - urbanHD[x[:-4]] = tempD.iloc[:,0] - - urbanD.drop(['geometry'], axis=1).to_csv(self.urban_ntl) - urbanHD.drop(['geometry'], axis=1).to_csv(self.urban_hd_ntl) - - def summarize_ghsl(self, ghsl_files, binary_calc=False, binary_thresh=1000, clip_raster=False): - ''' Summarize GHSL data - + urbanHD[x[:-4]] = tempD.iloc[:, 0] + + urbanD.drop(["geometry"], axis=1).to_csv(self.urban_ntl) + urbanHD.drop(["geometry"], axis=1).to_csv(self.urban_hd_ntl) + + def summarize_ghsl( + self, ghsl_files, binary_calc=False, binary_thresh=1000, clip_raster=False + ): + """Summarize GHSL data + INPUT ghsl_files [list of paths] - path to individual built area raster files [optional] binary_calc [binary, default=False] - if True, additionally calculate zonal stats on a binary built raster [optional] binary_thresh [int, default=1000] - if binary_calc is True, all cells above threshold will be considered built [optional] clip_raster [binary, default=False] - if True, clip the GHSL datasets for the calculations - ''' - if (not os.path.exists(self.urban_ghsl)) or (not os.path.exists(self.urban_hd_ghsl)): + """ + if (not os.path.exists(self.urban_ghsl)) or ( + not os.path.exists(self.urban_hd_ghsl) + ): try: urbanD = self.urban_extents urbanHD = self.urban_extents_hd @@ -176,8 +207,7 @@ def summarize_ghsl(self, ghsl_files, binary_calc=False, binary_thresh=1000, clip self.calculate_urban_extents() urbanD = self.urban_extents urbanHD = self.urban_extents_hd - - + for ghsl_file in ghsl_files: date = os.path.basename(ghsl_file).split("_")[3] tPrint(date) @@ -185,57 +215,64 @@ def summarize_ghsl(self, ghsl_files, binary_calc=False, binary_thresh=1000, clip if urbanD.crs != inR.crs: urbanD = urbanD.to_crs(inR.crs) urbanHD = urbanHD.to_crs(inR.crs) - + local_file = os.path.join(self.ghsl_folder, os.path.basename(ghsl_file)) if clip_raster: if not os.path.exists(self.ghsl_folder): os.makedirs(self.ghsl_folder) if not os.path.exists(local_file): rMisc.clipRaster(inR, self.sel_country, local_file) - + res_urban = rMisc.zonalStats(urbanD, inR, minVal=0) - res_urban = pd.DataFrame(res_urban, columns=["SUM","MIN","MAX","MEAN"]) - urbanD[f'ghsl_{date}'] = res_urban['SUM'] - + res_urban = pd.DataFrame( + res_urban, columns=["SUM", "MIN", "MAX", "MEAN"] + ) + urbanD[f"ghsl_{date}"] = res_urban["SUM"] + res_hd_urban = rMisc.zonalStats(urbanHD, inR, minVal=0) - res_hd_urban = pd.DataFrame(res_hd_urban, columns=["SUM","MIN","MAX","MEAN"]) - urbanHD[f'ghsl_{date}'] = res_hd_urban['SUM'] + res_hd_urban = pd.DataFrame( + res_hd_urban, columns=["SUM", "MIN", "MAX", "MEAN"] + ) + urbanHD[f"ghsl_{date}"] = res_hd_urban["SUM"] - if binary_calc: # run zonal stats on a binary built layer + if binary_calc: # run zonal stats on a binary built layer try: localR = rasterio.open(local_file) inD = localR.read() - inD[inD == localR.meta['nodata']] = 0 + inD[inD == localR.meta["nodata"]] = 0 except: - raise(ValueError("In order to calculate binary zonal, you need to clip out local ghsl data")) + raise ( + ValueError( + "In order to calculate binary zonal, you need to clip out local ghsl data" + ) + ) inD = inD > binary_thresh with rMisc.create_rasterio_inmemory(localR.profile, inD) as binaryR: res_urban = rMisc.zonalStats(urbanD, binaryR, minVal=0) - res_urban = pd.DataFrame(res_urban, columns=["SUM","MIN","MAX","MEAN"]) - urbanD[f'ghsl_binary_{date}'] = res_urban['SUM'] - + res_urban = pd.DataFrame( + res_urban, columns=["SUM", "MIN", "MAX", "MEAN"] + ) + urbanD[f"ghsl_binary_{date}"] = res_urban["SUM"] + res_hd_urban = rMisc.zonalStats(urbanHD, binaryR, minVal=0) - res_hd_urban = pd.DataFrame(res_hd_urban, columns=["SUM","MIN","MAX","MEAN"]) - urbanHD[f'ghsl_binary_{date}'] = res_hd_urban['SUM'] - + res_hd_urban = pd.DataFrame( + res_hd_urban, columns=["SUM", "MIN", "MAX", "MEAN"] + ) + urbanHD[f"ghsl_binary_{date}"] = res_hd_urban["SUM"] # Write results to file - pd.DataFrame(urbanD.drop(['geometry'], axis=1)).to_csv(self.urban_ghsl) - pd.DataFrame(urbanHD.drop(['geometry'], axis=1)).to_csv(self.urban_hd_ghsl) - + pd.DataFrame(urbanD.drop(["geometry"], axis=1)).to_csv(self.urban_ghsl) + pd.DataFrame(urbanHD.drop(["geometry"], axis=1)).to_csv(self.urban_hd_ghsl) + def delete_urban_data(self): - ''' delete urban extents - ''' - for cFile in [self.urban_extents_file, self.urban_extents_raster_file, - self.urban_extents_hd_file, self.urban_extents_hd_raster_file]: + """delete urban extents""" + for cFile in [ + self.urban_extents_file, + self.urban_extents_raster_file, + self.urban_extents_hd_file, + self.urban_extents_hd_raster_file, + ]: try: os.remove(cFile) except: pass - - - - - - - \ No newline at end of file diff --git a/src/GOSTurban/urban_helper.py b/src/GOSTurban/urban_helper.py index cd8b73d..4b6dce2 100755 --- a/src/GOSTurban/urban_helper.py +++ b/src/GOSTurban/urban_helper.py @@ -1,15 +1,13 @@ -import sys, os, importlib, shutil, pyproj, json, requests, math -import rasterio, elevation, richdem +import sys +import os +import rasterio +import elevation +import richdem import rasterio.warp -from shapely.ops import transform -from shapely.geometry import box -from functools import partial from rasterio import features -from rasterio.mask import mask import pandas as pd -import geopandas as gpd import numpy as np sys.path.append("../") @@ -18,39 +16,41 @@ import GOSTRocks.rasterMisc as rMisc from GOSTRocks.misc import tPrint + class summarize_population(object): - ''' summarize population and urban populations for defined admin regions - ''' - def __init__(self, pop_layer, admin_layer, urban_layer='', hd_urban_layer='', temp_folder=''): - ''' Summarize population into urban and rural based on GOST_Urban.UrbanRaster.calculateUrban - + """summarize population and urban populations for defined admin regions""" + + def __init__( + self, pop_layer, admin_layer, urban_layer="", hd_urban_layer="", temp_folder="" + ): + """Summarize population into urban and rural based on GOST_Urban.UrbanRaster.calculateUrban + INPUT - pop_layer [string] - path - ''' + pop_layer [string] - path + """ self.pop_layer = pop_layer - + self.urban_layer = urban_layer - if self.urban_layer == '': + if self.urban_layer == "": self.urban_layer = pop_layer.replace(".tif", "_urban.tif") - + self.urban_hd_layer = hd_urban_layer - if self.urban_hd_layer == '': + if self.urban_hd_layer == "": self.urban_hd_layer = pop_layer.replace(".tif", "_urban_hd.tif") self.admin_layer = admin_layer - - #Open population layer + + # Open population layer self.in_pop = rasterio.open(self.pop_layer) if self.admin_layer.crs != self.in_pop.crs: self.admin_layer = self.admin_layer.to_crs(self.in_pop.crs) - - if temp_folder == '': + + if temp_folder == "": self.temp_folder = os.path.dirname(self.pop_layer) else: self.temp_folder = temp_folder - + def check_inputs(self): - ''' Ensure all layers exist - ''' + """Ensure all layers exist""" check_vals = {} good = True for lyr in [self.pop_layer, self.urban_layer, self.urban_hd_layer]: @@ -58,10 +58,10 @@ def check_inputs(self): if not check_vals[lyr]: good = False self.check_vals = check_vals - return(good) - - def calculate_zonal(self, out_name='', convert_urban_binary=False): - """ Run zonal statistics on input admin layers, population layers, and urban layers + return good + + def calculate_zonal(self, out_name="", convert_urban_binary=False): + """Run zonal statistics on input admin layers, population layers, and urban layers Args: out_name (str, optional): name to append to output populations columns. Defaults to ''. @@ -69,12 +69,19 @@ def calculate_zonal(self, out_name='', convert_urban_binary=False): """ inP = self.in_pop.read() - inA = self.admin_layer #gpd.read_file(self.admin_layer) - + inA = self.admin_layer # gpd.read_file(self.admin_layer) + res = rMisc.zonalStats(inA, self.in_pop, minVal=0) - final = pd.DataFrame(res, columns=["TOTALPOP_%s_%s" % (os.path.basename(self.pop_layer).replace(".tif", ""), x) for x in ['SUM', 'MIN', 'MAX', 'MEAN']]) - - for lyr in [self.urban_layer, self.urban_hd_layer]: + final = pd.DataFrame( + res, + columns=[ + "TOTALPOP_%s_%s" + % (os.path.basename(self.pop_layer).replace(".tif", ""), x) + for x in ["SUM", "MIN", "MAX", "MEAN"] + ], + ) + + for lyr in [self.urban_layer, self.urban_hd_layer]: name = os.path.basename(lyr).replace(".tif", "") in_urban = rasterio.open(lyr) inU = in_urban.read() @@ -82,53 +89,67 @@ def calculate_zonal(self, out_name='', convert_urban_binary=False): inU = (inU > 0) * 1 cur_pop = inP * inU out_file = os.path.join(self.temp_folder, "urban_pop.tif") - - with rasterio.open(out_file, 'w', **self.in_pop.meta) as out_urban: + + with rasterio.open(out_file, "w", **self.in_pop.meta) as out_urban: out_urban.write(cur_pop) - + res = rMisc.zonalStats(inA, out_file, minVal=0) - res = pd.DataFrame(res, columns=["%s_%s_%s" % (out_name, name, x) for x in ['SUM', 'MIN', 'MAX', 'MEAN']]) + res = pd.DataFrame( + res, + columns=[ + "%s_%s_%s" % (out_name, name, x) + for x in ["SUM", "MIN", "MAX", "MEAN"] + ], + ) try: final = final.join(res) except: final = res - return(final) + return final + class urban_country(object): - ''' Extract and summarize urbanization in selected country, based on novel urbanization work of Mark Roberts and Shohei Nakamura - ''' - - def __init__(self, iso3, output_folder, country_bounds, pop_files, final_folder = "", ghspop_suffix=""): - ''' Create object for managing input data for summarizing urban extents - + """Extract and summarize urbanization in selected country, based on novel urbanization work of Mark Roberts and Shohei Nakamura""" + + def __init__( + self, + iso3, + output_folder, + country_bounds, + pop_files, + final_folder="", + ghspop_suffix="", + ): + """Create object for managing input data for summarizing urban extents + INPUT :param: iso3 - string describing iso3 code :param: output_folder - string path to folder to hold results :param: country_bounds - geopandas dataframe of admin0 boundary - - + + NAMING CONVENTION To save this renaming step on my side, which can also induce mistakes, would be possible for you Ben to rename the files in your code directly? This would be also helpful for all other countries we have to do, and for the 1km*1km rasters. My conventions are pretty simple. All rasters starts with the three lettres of the country and then _ as you do, and then 3 lettres for the variable, and possibly two figures for the year. So for instance for Tanzania, this is: tza_ele tza_slo tza_wat for elevation, slope and water tza_gpo tza_gbu for GHS population and built-up - tza_upo15 and tza_upo18 for WorldPop population unconstrained + tza_upo15 and tza_upo18 for WorldPop population unconstrained tza_cpo15 and tza_cpo18 for WorldPop population constrained. Then for 1km*1km raster, names are the same except that the three lettres of the country's name are followed by 1k, ie tza1k_slo, tza1k_ele and so on. - ''' + """ self.iso3 = iso3 self.out_folder = output_folder self.suffix = ghspop_suffix - + if final_folder == "": - self.final_folder = os.path.join(self.out_folder, "FINAL_STANDARD") + self.final_folder = os.path.join(self.out_folder, "FINAL_STANDARD") else: - self.final_folder = os.path.join(self.out_folder, final_folder) + self.final_folder = os.path.join(self.out_folder, final_folder) if not os.path.exists(self.out_folder): os.makedirs(self.out_folder) if not os.path.exists(self.final_folder): os.makedirs(self.final_folder) - + self.dem_file = os.path.join(output_folder, "%s_ele.tif" % iso3.lower()) self.slope_file = os.path.join(output_folder, "%s_slo.tif" % iso3.lower()) self.desert_file = os.path.join(output_folder, "%s_des.tif" % iso3.lower()) @@ -139,87 +160,104 @@ def __init__(self, iso3, output_folder, country_bounds, pop_files, final_folder self.ghspop1k_file = os.path.join(output_folder, "%s1k_gpo.tif" % iso3.lower()) self.ghsbuilt_file = os.path.join(output_folder, "%s_gbu.tif" % iso3.lower()) self.ghssmod_file = os.path.join(output_folder, "%s_gsmod.tif" % iso3.lower()) - self.admin_file = os.path.join(output_folder, "%s_adm.tif" % iso3.lower()) - self.admin_shp = os.path.join(self.final_folder, "%s_adm.shp" % iso3.lower()) + self.admin_file = os.path.join(output_folder, "%s_adm.tif" % iso3.lower()) + self.admin_shp = os.path.join(self.final_folder, "%s_adm.shp" % iso3.lower()) self.pop_files = [] # Copy and rename the population files for fileDef in pop_files: out_pop_file = os.path.join(output_folder, fileDef[1]) self.pop_files.append(out_pop_file) if not os.path.exists(out_pop_file): - tPrint(f'Clipping {fileDef[0]}') - rMisc.clipRaster(rasterio.open(fileDef[0]), country_bounds, out_pop_file) - ''' + tPrint(f"Clipping {fileDef[0]}") + rMisc.clipRaster( + rasterio.open(fileDef[0]), country_bounds, out_pop_file + ) + """ if ghspop_suffix == '1k': if not self.ghspop1k_file in self.pop_files: - self.pop_files.append(self.ghspop1k_file) + self.pop_files.append(self.ghspop1k_file) else: - ''' - if not self.ghspop_file in self.pop_files: + """ + if self.ghspop_file not in self.pop_files: self.pop_files.append(self.ghspop_file) - + self.pop_files = list(set(self.pop_files)) # Write admin shapefile to output file self.inD = country_bounds if not os.path.exists(self.admin_shp): self.inD.to_file(self.admin_shp) - - def process_dem(self, global_dem=''): - ''' Download DEM from AWS, calculate slope - ''' + + def process_dem(self, global_dem=""): + """Download DEM from AWS, calculate slope""" # Download DEM - if not os.path.exists(self.dem_file) and global_dem == '': + if not os.path.exists(self.dem_file) and global_dem == "": tPrint("Downloading DEM") - elevation.clip(bounds=self.inD.total_bounds, max_download_tiles=90000, output=self.dem_file, product='SRTM3') + elevation.clip( + bounds=self.inD.total_bounds, + max_download_tiles=90000, + output=self.dem_file, + product="SRTM3", + ) - if not os.path.exists(self.dem_file) and not global_dem == '': + if not os.path.exists(self.dem_file) and not global_dem == "": tPrint("Downloading DEM") rMisc.clipRaster(rasterio.open(global_dem), self.inD, self.dem_file) - + # Calculate slope if not os.path.exists(self.slope_file) and os.path.exists(self.dem_file): tPrint("Calculating slope") in_dem = rasterio.open(self.dem_file) in_dem_data = in_dem.read() - beau = richdem.rdarray(in_dem_data[0,:,:], no_data=in_dem.meta['nodata']) - slope = richdem.TerrainAttribute(beau, attrib='slope_riserun') + beau = richdem.rdarray(in_dem_data[0, :, :], no_data=in_dem.meta["nodata"]) + slope = richdem.TerrainAttribute(beau, attrib="slope_riserun") meta = in_dem.meta.copy() - meta.update(dtype = slope.dtype) - with rasterio.open(self.slope_file, 'w', **meta) as outR: + meta.update(dtype=slope.dtype) + with rasterio.open(self.slope_file, "w", **meta) as outR: outR.write_band(1, slope) - - def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, global_ghbuilt, global_ghsl, global_smod): - ''' extract global layers for current country - ''' + + def extract_layers( + self, + global_landcover, + global_ghspop, + global_ghspop1k, + global_ghbuilt, + global_ghsl, + global_smod, + ): + """extract global layers for current country""" # Extract desert from globcover if not os.path.exists(self.desert_file): tPrint("Extracting desert") if not os.path.exists(self.lc_file): - rMisc.clipRaster(rasterio.open(global_landcover), self.inD, self.lc_file) + rMisc.clipRaster( + rasterio.open(global_landcover), self.inD, self.lc_file + ) in_lc = rasterio.open(self.lc_file) inL = in_lc.read() lcmeta = in_lc.meta.copy() - tempL = (inL == 200).astype(lcmeta['dtype']) + tempL = (inL == 200).astype(lcmeta["dtype"]) lcmeta.update(nodata=255) - with rasterio.open(self.desert_file, 'w', **lcmeta) as out: + with rasterio.open(self.desert_file, "w", **lcmeta) as out: out.write(tempL) os.remove(self.lc_file) - + # Extract water from globcover if not os.path.exists(self.lc_file_h20): tPrint("Extracting water") if not os.path.exists(self.lc_file): - rMisc.clipRaster(rasterio.open(global_landcover), self.inD, self.lc_file) + rMisc.clipRaster( + rasterio.open(global_landcover), self.inD, self.lc_file + ) in_lc = rasterio.open(self.lc_file) inL = in_lc.read() lcmeta = in_lc.meta.copy() - tempL = (inL == 210).astype(lcmeta['dtype']) + tempL = (inL == 210).astype(lcmeta["dtype"]) lcmeta.update(nodata=255) - with rasterio.open(self.lc_file_h20, 'w', **lcmeta) as out: + with rasterio.open(self.lc_file_h20, "w", **lcmeta) as out: out.write(tempL) os.remove(self.lc_file) - + # Extract water from GHSL if not os.path.exists(self.ghsl_h20): tPrint("Extracting water from GHSL") @@ -231,100 +269,137 @@ def extract_layers(self, global_landcover, global_ghspop, global_ghspop1k, globa ul = inR.index(*tempD.total_bounds[0:2]) lr = inR.index(*tempD.total_bounds[2:4]) # read the subset of the data into a numpy array - window = ((float(lr[0]), float(ul[0]+1)), (float(ul[1]), float(lr[1]+1))) - data = inR.read(1, window=window, masked = False) + window = ( + (float(lr[0]), float(ul[0] + 1)), + (float(ul[1]), float(lr[1] + 1)), + ) + data = inR.read(1, window=window, masked=False) data = data == 1 b = tempD.total_bounds - new_transform = rasterio.transform.from_bounds(b[0], b[1], b[2], b[3], data.shape[1], data.shape[0]) + new_transform = rasterio.transform.from_bounds( + b[0], b[1], b[2], b[3], data.shape[1], data.shape[0] + ) meta = inR.meta.copy() - meta.update(driver='GTiff',width=data.shape[1], height=data.shape[0], transform=new_transform) - data = data.astype(meta['dtype']) - with rasterio.open(self.ghsl_h20, 'w', **meta) as outR: + meta.update( + driver="GTiff", + width=data.shape[1], + height=data.shape[0], + transform=new_transform, + ) + data = data.astype(meta["dtype"]) + with rasterio.open(self.ghsl_h20, "w", **meta) as outR: outR.write_band(1, data) - - #Extract GHS-Pop + # Extract GHS-Pop if not os.path.exists(self.ghspop_file): tPrint("Extracting GHS-POP") rMisc.clipRaster(rasterio.open(global_ghspop), self.inD, self.ghspop_file) - - #Extract GHS-Pop-1k + + # Extract GHS-Pop-1k if not os.path.exists(self.ghspop1k_file): tPrint("Extracting GHS-POP 1K: %s" % self.ghspop1k_file) - rMisc.clipRaster(rasterio.open(global_ghspop1k), self.inD, self.ghspop1k_file) + rMisc.clipRaster( + rasterio.open(global_ghspop1k), self.inD, self.ghspop1k_file + ) - #Extract GHS-Built + # Extract GHS-Built if not os.path.exists(self.ghsbuilt_file): tPrint("Clipping GHS-Built") - rMisc.clipRaster(rasterio.open(global_ghbuilt), self.inD, self.ghsbuilt_file) - - #Extract GHS-SMOD + rMisc.clipRaster( + rasterio.open(global_ghbuilt), self.inD, self.ghsbuilt_file + ) + + # Extract GHS-SMOD if not os.path.exists(self.ghssmod_file): tPrint("Clipping GHS-SMOD") rMisc.clipRaster(rasterio.open(global_smod), self.inD, self.ghssmod_file) - - #Rasterize admin boundaries + + # Rasterize admin boundaries if not os.path.exists(self.admin_file): tPrint("Rasterizing admin boundaries") xx = rasterio.open(self.ghspop_file) - res = xx.meta['transform'][0] + res = xx.meta["transform"][0] tempD = self.inD.to_crs(xx.crs) - shapes = ((row['geometry'], 1) for idx, row in tempD.iterrows()) - burned = features.rasterize(shapes=shapes, out_shape=xx.shape, fill=0, transform=xx.meta['transform'], dtype='int16') + shapes = ((row["geometry"], 1) for idx, row in tempD.iterrows()) + burned = features.rasterize( + shapes=shapes, + out_shape=xx.shape, + fill=0, + transform=xx.meta["transform"], + dtype="int16", + ) meta = xx.meta.copy() meta.update(dtype=burned.dtype) - with rasterio.open(self.admin_file, 'w', **meta) as outR: + with rasterio.open(self.admin_file, "w", **meta) as outR: outR.write_band(1, burned) - + def calculate_urban(self, urb_val=300, hd_urb_val=1500): - ''' Calculate urban and HD urban extents from population files - ''' + """Calculate urban and HD urban extents from population files""" # Calculate urban extents from population layers ghs_R = rasterio.open(self.ghspop_file) - for p_file in self.pop_files: - final_pop = os.path.join(self.final_folder, os.path.basename(p_file).replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix))) + for p_file in self.pop_files: + final_pop = os.path.join( + self.final_folder, + os.path.basename(p_file).replace( + self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix) + ), + ) print(final_pop) if "1k1k" in final_pop: final_pop = final_pop.replace("1k1k", "1k") - final_urban = final_pop.replace(".tif", "_urban.tif") + final_urban = final_pop.replace(".tif", "_urban.tif") final_urban_hd = final_pop.replace(".tif", "_urban_hd.tif") urbanR = urban.urbanGriddedPop(final_pop) # Convert density values for urbanization from 1km resolution to current resolution in_raster = rasterio.open(final_pop) total_ratio = (in_raster.res[0] * in_raster.res[1]) / 1000000 if not os.path.exists(final_urban): - urban_shp = urbanR.calculateUrban(densVal= (urb_val * total_ratio), totalPopThresh=5000, raster=final_urban) + urban_shp = urbanR.calculateUrban( + densVal=(urb_val * total_ratio), + totalPopThresh=5000, + raster=final_urban, + ) if not os.path.exists(final_urban_hd): - cluster_shp = urbanR.calculateUrban(densVal=(hd_urb_val * total_ratio), totalPopThresh=50000, raster=final_urban_hd, smooth=True, queen=True) - + cluster_shp = urbanR.calculateUrban( + densVal=(hd_urb_val * total_ratio), + totalPopThresh=50000, + raster=final_urban_hd, + smooth=True, + queen=True, + ) + def pop_zonal_admin(self, admin_layer): - ''' calculate urban and rural - - :param: - admin_layer - ''' - for p_file in self.pop_files: - pop_file = os.path.join(self.final_folder, os.path.basename(p_file).replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix))) + """calculate urban and rural + + :param: - admin_layer + """ + for p_file in self.pop_files: + pop_file = os.path.join( + self.final_folder, + os.path.basename(p_file).replace( + self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix) + ), + ) if "1k1k" in pop_file: pop_file = pop_file.replace("1k1k", "1k") yy = summarize_population(pop_file, admin_layer) if yy.check_inputs(): - res = yy.calculate_zonal(out_name='') + res = yy.calculate_zonal(out_name="") out_file = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/LSO_URBAN_DATA_new_naming/LSO_{os.path.basename(p_file)}.csv" try: final = final.join(res) except: - final = res + final = res else: print("Error summarizing population for %s" % pop_file) admin_layer = admin_layer.reset_index() - final = final.filter(regex='_SUM') + final = final.filter(regex="_SUM") final = final.join(admin_layer) - final = final.drop(['geometry'], axis=1) - return(final) - + final = final.drop(["geometry"], axis=1) + return final + def compare_pop_rasters(self, verbose=True): - ''' read in and summarize population rasters - ''' + """read in and summarize population rasters""" all_res = [] for pFile in self.pop_files: inR = rasterio.open(pFile) @@ -333,64 +408,84 @@ def compare_pop_rasters(self, verbose=True): all_res.append([os.path.basename(pFile), inD.sum()]) if verbose: print(f"{os.path.basename(pFile)}: {inD.sum()}") - return(all_res) - - def standardize_rasters(self, include_ghsl_h20 = True): - ''' - ''' + return all_res + + def standardize_rasters(self, include_ghsl_h20=True): + """ """ ghs_R = rasterio.open(self.ghspop_file) pFile = self.ghspop_file if self.suffix == "1k": ghs_R = rasterio.open(self.ghspop1k_file) pFile = self.ghspop1k_file file_defs = [ - #file, type, scale values - [self.admin_file,'C',False], - [self.desert_file, 'C', False], - [self.lc_file_h20, 'C', False], - [self.slope_file, 'N', False], - [self.dem_file, 'N', False], - [self.ghssmod_file, 'N', False], - [self.ghsbuilt_file, 'N', False], - ] - + # file, type, scale values + [self.admin_file, "C", False], + [self.desert_file, "C", False], + [self.lc_file_h20, "C", False], + [self.slope_file, "N", False], + [self.dem_file, "N", False], + [self.ghssmod_file, "N", False], + [self.ghsbuilt_file, "N", False], + ] + if include_ghsl_h20: - file_defs.append([self.ghsl_h20, 'C', False]) - file_defs.append([self.ghsl_h20, 'N', False, os.path.join(self.final_folder, "%s%s_wat_p.tif" % (self.iso3.lower(), self.suffix))]) - - for cFile in self.pop_files: - file_defs.append([cFile, 'N', True]) - + file_defs.append([self.ghsl_h20, "C", False]) + file_defs.append( + [ + self.ghsl_h20, + "N", + False, + os.path.join( + self.final_folder, + "%s%s_wat_p.tif" % (self.iso3.lower(), self.suffix), + ), + ] + ) + + for cFile in self.pop_files: + file_defs.append([cFile, "N", True]) + for file_def in file_defs: try: out_file = file_def[3] except: - out_file = os.path.join(self.final_folder, os.path.basename(file_def[0]).replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix))) + out_file = os.path.join( + self.final_folder, + os.path.basename(file_def[0]).replace( + self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix) + ), + ) if "1k1k" in out_file: out_file = out_file.replace("1k1k", "1k") if (file_def[0] == self.admin_file) and (os.path.exists(out_file)): os.remove(out_file) - out_array = np.zeros(ghs_R.shape) + out_array = np.zeros(ghs_R.shape) if not os.path.exists(out_file) and os.path.exists(file_def[0]): in_raster = rasterio.open(file_def[0]) in_r = in_raster.read() - temp_nodata = type(in_r[0,0,0])(in_raster.meta['nodata']) + temp_nodata = type(in_r[0, 0, 0])(in_raster.meta["nodata"]) in_r[in_r == temp_nodata] = 0 rSample = rasterio.warp.Resampling.sum - if file_def[1] == 'C': + if file_def[1] == "C": rSample = rasterio.warp.Resampling.nearest - rasterio.warp.reproject(in_r, out_array, - src_transform=in_raster.meta['transform'], dst_transform=ghs_R.meta['transform'], - src_crs = in_raster.crs, dst_crs = ghs_R.crs, - src_nodata = in_raster.meta['nodata'], dst_nodata = ghs_R.meta['nodata'], - resampling = rSample) - out_array[out_array == ghs_R.meta['nodata']] = 0. + rasterio.warp.reproject( + in_r, + out_array, + src_transform=in_raster.meta["transform"], + dst_transform=ghs_R.meta["transform"], + src_crs=in_raster.crs, + dst_crs=ghs_R.crs, + src_nodata=in_raster.meta["nodata"], + dst_nodata=ghs_R.meta["nodata"], + resampling=rSample, + ) + out_array[out_array == ghs_R.meta["nodata"]] = 0.0 # scale and project file to GHS pop if defined so - if (file_def[0] == self.admin_file): + if file_def[0] == self.admin_file: adminA = out_file in_a = out_array in_a_mask = in_a == 0 - + # If values are to be scaled based on area change, do it here if file_def[2]: out_array_sum = out_array.sum() @@ -398,61 +493,70 @@ def standardize_rasters(self, include_ghsl_h20 = True): total_ratio = original_sum / out_array_sum self.total_ratio = total_ratio out_array = out_array * total_ratio - out_array[out_array < 0] = ghs_R.meta['nodata'] - - + out_array[out_array < 0] = ghs_R.meta["nodata"] + # Set area outside national boundaries to nodata - out_array[in_a_mask] = ghs_R.meta['nodata'] + out_array[in_a_mask] = ghs_R.meta["nodata"] out_meta = ghs_R.meta.copy() - out_meta.update(nodata=ghs_R.meta['nodata']) - out_array = out_array.astype(out_meta['dtype']) - with rasterio.open(out_file, 'w', **out_meta) as outR: + out_meta.update(nodata=ghs_R.meta["nodata"]) + out_array = out_array.astype(out_meta["dtype"]) + with rasterio.open(out_file, "w", **out_meta) as outR: outR.write_band(1, out_array) # Write no data layers to file - out_no_data_file = os.path.join(self.final_folder, "NO_DATA_%s" % os.path.basename(file_def[0])) + out_no_data_file = os.path.join( + self.final_folder, "NO_DATA_%s" % os.path.basename(file_def[0]) + ) if not os.path.exists(out_no_data_file) and os.path.exists(file_def[0]): out_array = ghs_R.read() * 0 in_raster = rasterio.open(file_def[0]) in_r = in_raster.read() # create binary file defining no data area - in_r = (in_r == in_raster.meta['nodata']).astype(ghs_R.meta['dtype']) - rasterio.warp.reproject(in_r, out_array, - src_transform=in_raster.meta['transform'], dst_transform=ghs_R.meta['transform'], - src_crs = in_raster.crs, dst_crs = ghs_R.crs, - src_nodata = in_raster.meta['nodata'], dst_nodata = ghs_R.meta['nodata'], - resample = rasterio.warp.Resampling.nearest) + in_r = (in_r == in_raster.meta["nodata"]).astype(ghs_R.meta["dtype"]) + rasterio.warp.reproject( + in_r, + out_array, + src_transform=in_raster.meta["transform"], + dst_transform=ghs_R.meta["transform"], + src_crs=in_raster.crs, + dst_crs=ghs_R.crs, + src_nodata=in_raster.meta["nodata"], + dst_nodata=ghs_R.meta["nodata"], + resample=rasterio.warp.Resampling.nearest, + ) out_meta = ghs_R.meta.copy() - with rasterio.open(out_no_data_file, 'w', **out_meta) as outR: + with rasterio.open(out_no_data_file, "w", **out_meta) as outR: outR.write(out_array) - - #Apply admin mask to population file + + # Apply admin mask to population file gpo1R = rasterio.open(pFile) admR = rasterio.open(adminA) - + gpo1D = gpo1R.read() maskD = admR.read() - gpo1D[gpo1D == gpo1R.meta['nodata']] = 0 - gpo1D[maskD == admR.meta['nodata']] = gpo1R.meta['nodata'] + gpo1D[gpo1D == gpo1R.meta["nodata"]] = 0 + gpo1D[maskD == admR.meta["nodata"]] = gpo1R.meta["nodata"] out_file = os.path.join(self.final_folder, os.path.basename(pFile)) - with rasterio.open(out_file, 'w',**gpo1R.meta) as outR: + with rasterio.open(out_file, "w", **gpo1R.meta) as outR: outR.write(gpo1D) - + def evaluateOutput(self, admin_stats, commune_stats): - ''' + """ Check the outputs to determine if processing worked correctly - + 1. compare population totals between raw, 250m and 1km data 2. Calculate urbanization rate 3. Water mask a. calculate overlap between water classes b. calculate overlap between water and population c. calculate overlap between water and urban - + https://ghsl.jrc.ec.europa.eu/documents/cfs01/V3/CFS_Ghana.pdf - ''' - stats_file = os.path.join(self.out_folder, "DATA_EVALUATION_%s_%s.txt" % (self.iso3, self.suffix)) - with open(stats_file, 'w') as out_stats: + """ + stats_file = os.path.join( + self.out_folder, "DATA_EVALUATION_%s_%s.txt" % (self.iso3, self.suffix) + ) + with open(stats_file, "w") as out_stats: # Compare pop rasters pop_comparison = self.compare_pop_rasters(verbose=False) out_stats.write("***** Evaluate Total Population *****\n") @@ -466,19 +570,21 @@ def evaluateOutput(self, admin_stats, commune_stats): name = "WP_U_%s" % pop_file[-6:-4] if "cpo" in pop_file: name = "WP_C_%s" % pop_file[-6:-4] - + pop_file_base = os.path.basename(pop_file) if self.suffix == "1k": - pop_file_base = pop_file_base.replace(self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix)) + pop_file_base = pop_file_base.replace( + self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix) + ) if "1k1k" in pop_file_base: pop_file_base = pop_file_base.replace("1k1k", "1k") - + out_pop_file = os.path.join(self.final_folder, pop_file_base) urban_pop_file = out_pop_file.replace(".tif", "_urban.tif") hd_pop_file = out_pop_file.replace(".tif", "_urban_hd.tif") pop_file_defs.append([out_pop_file, urban_pop_file, hd_pop_file, name]) out_stats.write("***** Evaluate Urbanization *****\n") - for fileDef in pop_file_defs: + for fileDef in pop_file_defs: pFile = fileDef[0] urb_file = fileDef[1] hd_file = fileDef[2] @@ -488,23 +594,35 @@ def evaluateOutput(self, admin_stats, commune_stats): inPop = inPop * (inPop > 0) inUrb = rasterio.open(urb_file).read() inHd = rasterio.open(hd_file).read() - + tPop = inPop.sum() urbPop = (inPop * inUrb).sum() hdPop = (inPop * inHd).sum() - out_stats.write(f"{name}: TotalPop: {tPop.round(0)}, UrbanPop: {urbPop.round(0)}, HD Pop: {hdPop.round(0)}\n") - out_stats.write(f"{name}: {((urbPop/tPop) * 100).round(2)}% Urban; {((hdPop/tPop) * 100).round(2)}% HD Urban\n") + out_stats.write( + f"{name}: TotalPop: {tPop.round(0)}, UrbanPop: {urbPop.round(0)}, HD Pop: {hdPop.round(0)}\n" + ) + out_stats.write( + f"{name}: {((urbPop/tPop) * 100).round(2)}% Urban; {((hdPop/tPop) * 100).round(2)}% HD Urban\n" + ) except: print(f"Error processing {name}") print(fileDef) # Summarize population in SMOD classes - out_stats.write('***** Evaluate SMOD ******\n') - smod_vals = [10,11,12,13,21,22,23,30] - inSMOD = rasterio.open(os.path.join(self.final_folder, os.path.basename(self.ghssmod_file).replace("%s" % self.iso3.lower(), "%s%s" % (self.iso3.lower(), self.suffix)))) + out_stats.write("***** Evaluate SMOD ******\n") + smod_vals = [10, 11, 12, 13, 21, 22, 23, 30] + inSMOD = rasterio.open( + os.path.join( + self.final_folder, + os.path.basename(self.ghssmod_file).replace( + "%s" % self.iso3.lower(), + "%s%s" % (self.iso3.lower(), self.suffix), + ), + ) + ) smod = inSMOD.read() for pFile in self.pop_files: - if 'gpo' in pFile: + if "gpo" in pFile: inPop = rasterio.open(pFile) pop = inPop.read() pop[pop < 0] = 0 @@ -514,62 +632,80 @@ def evaluateOutput(self, admin_stats, commune_stats): cur_smod = (smod == val).astype(int) cur_pop = pop * cur_smod total_curpop = cur_pop.sum() - perUrban = (total_curpop.sum()/total_pop*100) + perUrban = total_curpop.sum() / total_pop * 100 if val > 20: total_per = total_per + perUrban - out_stats.write(f'{val}: {perUrban}\n') - out_stats.write(f'Total Urban: {total_per}\n') - - '''3. Water mask - ''' + out_stats.write(f"{val}: {perUrban}\n") + out_stats.write(f"Total Urban: {total_per}\n") + + """3. Water mask + """ out_stats.write("***** Evaluate Water Intersection *****\n") # a. calculate overlap between water classes - water_ghsl = os.path.join(self.final_folder, "%s%s_wat.tif" % (self.iso3.lower(), self.suffix)) - water_lc = os.path.join(self.final_folder, "%s%s_wat_lc.tif" % (self.iso3.lower(), self.suffix)) + water_ghsl = os.path.join( + self.final_folder, "%s%s_wat.tif" % (self.iso3.lower(), self.suffix) + ) + water_lc = os.path.join( + self.final_folder, "%s%s_wat_lc.tif" % (self.iso3.lower(), self.suffix) + ) inWG = rasterio.open(water_ghsl) wgData = inWG.read() - wgData[wgData == inWG.meta['nodata']] = 0 - inWLC= rasterio.open(water_lc) + wgData[wgData == inWG.meta["nodata"]] = 0 + inWLC = rasterio.open(water_lc) wlcData = inWLC.read() - wlcData[wlcData == inWLC.meta['nodata']] = 0 + wlcData[wlcData == inWLC.meta["nodata"]] = 0 combo = wgData + wlcData - out_stats.write(f"WATER: GHSL count: {wgData.sum()}; LC count: {wlcData.sum()}; overlap: {(combo == 2).sum()}\n") + out_stats.write( + f"WATER: GHSL count: {wgData.sum()}; LC count: {wlcData.sum()}; overlap: {(combo == 2).sum()}\n" + ) - # b. calculate overlap between water and population out_stats.write("***** Evaluate Water Population Overlap *****\n") for fileDef in pop_file_defs: pop_file = fileDef[0] urb_file = fileDef[1] - hd_file = fileDef[2] - name = fileDef[3] - + hd_file = fileDef[2] + name = fileDef[3] + cur_pop = rasterio.open(pop_file) curP = cur_pop.read() - curP[curP == cur_pop.meta['nodata']] = 0 - + curP[curP == cur_pop.meta["nodata"]] = 0 + urb = rasterio.open(urb_file).read() - hd = rasterio.open(hd_file).read() - + hd = rasterio.open(hd_file).read() + # c. calculate overlap between water and urban - out_stats.write(f"WATER {name} Population: TotalPop: {curP.sum().round()}, WaterPop GHSL: {(curP * wgData).sum().round()}, WaterPop LC: {(curP * wlcData).sum().round()}\n") - out_stats.write(f"WATER {name} Urban Cells: TotalUrban Cells: {urb.sum().round()}, WaterUrban GHSL: {(urb * wgData).sum()}, WaterUrb LC: {(urb * wlcData).sum()}\n") - out_stats.write(f"WATER {name} HD Cells: TotalPop: {hd.sum().round()}, WaterHD GHSL: {(hd * wgData).sum()}, WaterHD LC: {(hd * wlcData).sum()}\n") - - #Summarize zonal stats files + out_stats.write( + f"WATER {name} Population: TotalPop: {curP.sum().round()}, WaterPop GHSL: {(curP * wgData).sum().round()}, WaterPop LC: {(curP * wlcData).sum().round()}\n" + ) + out_stats.write( + f"WATER {name} Urban Cells: TotalUrban Cells: {urb.sum().round()}, WaterUrban GHSL: {(urb * wgData).sum()}, WaterUrb LC: {(urb * wlcData).sum()}\n" + ) + out_stats.write( + f"WATER {name} HD Cells: TotalPop: {hd.sum().round()}, WaterHD GHSL: {(hd * wgData).sum()}, WaterHD LC: {(hd * wlcData).sum()}\n" + ) + + # Summarize zonal stats files for sFile in [admin_stats, commune_stats]: if os.path.exists(sFile): tPrint(sFile) file_name = os.path.basename(sFile) inD = pd.read_csv(sFile, index_col=0) out_stats.write(f"***** Summarizing {file_name}\n") - bad_cols = ['index','OBJECTID','WB_ADM1_CO','WB_ADM0_CO','WB_ADM2_CO','Shape_Leng','Shape_Area'] + bad_cols = [ + "index", + "OBJECTID", + "WB_ADM1_CO", + "WB_ADM0_CO", + "WB_ADM2_CO", + "Shape_Leng", + "Shape_Area", + ] for col in inD.columns: - if not col in bad_cols: + if col not in bad_cols: curD = inD[col] try: curD_sum = curD.loc[curD > 0].sum() out_stats.write(f"{col}: {round(curD_sum)}\n") except: pass -