Skip to content

Commit

Permalink
feat: create_polygons destination can be a full s3 path (#15)
Browse files Browse the repository at this point in the history
* feat: create_polygons destination can be a full s3 path

* fix: remove unused import

* fix: s3 path does not need 's3://'
  • Loading branch information
paulfouquet authored Jun 26, 2022
1 parent fd60e53 commit d276baa
Showing 1 changed file with 19 additions and 13 deletions.
32 changes: 19 additions & 13 deletions scripts/create_polygons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
from collections import Counter
from urllib.parse import urlparse

import aws_helper as aws_helper
from linz_logger import get_log
from osgeo import gdal

import aws_helper as aws_helper

logger = get_log()

parser = argparse.ArgumentParser()
parser.add_argument('--uri', dest='uri', required=True)
parser.add_argument('--destination', dest='destination', required=True)
arguments = parser.parse_args()
uri = arguments.uri
dest_bucket = arguments.destination
destination = arguments.destination

# Split the s3 destination path
destination_bucket_name = aws_helper.bucket_name_from_path(destination)
destination_path = destination.replace("s3://", "").replace(f"{destination_bucket_name}/", "")

def create_mask(file_path, mask_dst):
set_srs_command = f'gdal_edit.py -a_srs EPSG:2193 "{file_path}"'
Expand All @@ -42,19 +47,19 @@ def get_pixel_count(file_path):
return data_pixels_count

with tempfile.TemporaryDirectory() as tmp_dir:
file_name = os.path.basename(uri)
source_file_name = os.path.basename(uri)
# Download the file
if str(uri).startswith("s3://"):
uri_parse = urlparse(uri, allow_fragments=False)
bucket_name = uri_parse.netloc
bucket = aws_helper.get_bucket(bucket_name)
uri = os.path.join(tmp_dir, "temp.tif")
logger.debug("download_file", source=uri_parse.path[1:], bucket=bucket_name, dest=uri, fileName=file_name)
logger.debug("download_file", source=uri_parse.path[1:], bucket=bucket_name, destination=uri, sourceFileName=source_file_name)
bucket.download_file(uri_parse.path[1:], uri)


# Run create_mask=
logger.debug("create_mask", source=uri_parse.path[1:], bucket=bucket_name, dest=uri)
# Run create_mask
logger.debug("create_mask", source=uri_parse.path[1:], bucket=bucket_name, destination=uri)
mask_file = os.path.join(tmp_dir, "mask.tif")
create_mask(uri, mask_file)

Expand All @@ -64,17 +69,18 @@ def get_pixel_count(file_path):
# exclude extents if tif is all white or black
logger.debug(f"- data_px_count was zero in create_mask function for the tif {mask_file}")
else:
file_name_dst = os.path.splitext(file_name)[0] + ".geojson"
poly_dst = os.path.join(tmp_dir, file_name_dst)
polygonize_command = f'gdal_polygonize.py -q "{mask_file}" "{poly_dst}" -f GeoJSON'
destination_file_name = os.path.splitext(source_file_name)[0] + ".geojson"
temp_file_path = os.path.join(tmp_dir, destination_file_name)
polygonize_command = f'gdal_polygonize.py -q "{mask_file}" "{temp_file_path}" -f GeoJSON'
os.system(polygonize_command)

# Upload shape file
destination = aws_helper.get_bucket(dest_bucket)
logger.debug("upload_start", destinationBucket=dest_bucket, destinationFile=file_name_dst)
destination_bucket = aws_helper.get_bucket(destination_bucket_name)
destination_file_path = os.path.join(destination_path, destination_file_name)
logger.debug("upload_start", destinationBucket=destination_bucket_name, destinationFile=destination_file_path)
try:
destination.upload_file(poly_dst, file_name_dst)
destination_bucket.upload_file(temp_file_path, destination_file_path)
except Exception as e:
logger.debug("upload_error", err=e)
raise e
logger.debug("upload_end", destinationBucket=dest_bucket, destinationFile=file_name_dst)
logger.debug("upload_end", destinationBucket=destination_bucket_name, destinationFile=destination_file_path)

0 comments on commit d276baa

Please sign in to comment.