Skip to content

Commit

Permalink
Wagner added
Browse files Browse the repository at this point in the history
  • Loading branch information
bw4sz committed Sep 20, 2024
1 parent 60e8640 commit 6ddd4b6
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 2 deletions.
38 changes: 38 additions & 0 deletions data_prep/Firoze2023.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import json
import pandas as pd

def extract_bounding_boxes_from_labelme(folder_path):
data = []

for filename in os.listdir(folder_path):
if filename.endswith('.json'):
json_path = os.path.join(folder_path, filename)
with open(json_path, 'r') as file:
labelme_data = json.load(file)
image_path = os.path.join(folder_path, labelme_data['imagePath'])

polygons = []
for shape in labelme_data['shapes']:
points = shape['points']
# Add the first to the end to close the polygon
points.append(points[0])
wkt_polygon = 'POLYGON(({}))'.format(', '.join(['{} {}'.format(p[0], p[1]) for p in points]))
polygons.append([image_path, wkt_polygon])

# Create dataframe with image path
df = pd.DataFrame(polygons, columns=['image_path', 'geometry'])
data.append(df)

annotations = pd.concat(data)

return annotations

# Example usage
folder_path = '/orange/ewhite/DeepForest/Firoze2023/annotated_forest_dataset/annotated_real_forest'
df = extract_bounding_boxes_from_labelme(folder_path)
df["source"] = "Firoze et al. 2023"
df["label"] = "Tree"
print("There are {} annotations in {} images".format(df.shape[0], len(df.image_path.unique())))
df.to_csv("/orange/ewhite/DeepForest/Firoze2023/annotations.csv")

140 changes: 140 additions & 0 deletions data_prep/Wagner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import os
import geopandas as gpd
from shapely.geometry import box
from deepforest.utilities import read_file
import pandas as pd
import rasterio as rio
from rasterio.warp import calculate_default_transform, reproject, Resampling


# Define paths
geopackage_dir = "/orange/ewhite/DeepForest/Wagner_Australia/Crowns (manual)"
cropped_plots_dir = "/orange/ewhite/DeepForest/Wagner_Australia/Cropped plots"
output_csv = "/orange/ewhite/DeepForest/Wagner_Australia/annotations.csv"

# Padding in meters
padding = 10

# Initialize an empty list to store annotations
annotations = []

# Iterate over each geopackage in the directory
for filename in os.listdir(geopackage_dir):
if filename.endswith(".gpkg"):
basename = os.path.splitext(filename)[0]
geopackage_path = os.path.join(geopackage_dir, filename)

basename = basename.replace(".", "")

# Look for the corresponding image in the cropped plots folder
image_path = os.path.join(cropped_plots_dir, basename + ".tif")
if not os.path.exists(image_path):
print(f"Image for {basename} not found in cropped plots folder.")
continue

# Read the geopackage
gdf = gpd.read_file(geopackage_path)

# Get the total bounds of the polygons and pad by a few meters
minx, miny, maxx, maxy = gdf.total_bounds
minx -= padding
miny -= padding
maxx += padding
maxy += padding

# Read the image using rasterio
with rio.open(image_path) as src:
# Read the first three bands
bands = src.read([1, 2, 3])

# Define the output path for the three-band image
three_band_path = os.path.join("/orange/ewhite/DeepForest/Wagner_Australia/three_band", basename + "_three_band.tif")

# Save the three-band image

# project into utm S55

# Save the three-band image
with rio.open(
three_band_path,
'w',
driver='GTiff',
height=src.height,
width=src.width,
count=3,
dtype=bands.dtype,
crs=src.crs,
transform=src.transform,
) as dst:
dst.write(bands[0], 1)
dst.write(bands[1], 2)
dst.write(bands[2], 3)

# Reproject the three-band image to UTM S55
with rio.open(three_band_path) as src:
transform, width, height = calculate_default_transform(
src.crs, 'EPSG:32755', src.width, src.height, *src.bounds)
kwargs = src.meta.copy()
kwargs.update({
'crs': 'EPSG:32755',
'transform': transform,
'width': width,
'height': height
})

reprojected_path = os.path.join("/orange/ewhite/DeepForest/Wagner_Australia/three_band", basename + "_three_band_utm.tif")
with rio.open(reprojected_path, 'w', **kwargs) as dst:
for i in range(1, src.count + 1):
reproject(
source=rio.band(src, i),
destination=rio.band(dst, i),
src_transform=src.transform,
src_crs=src.crs,
dst_transform=transform,
dst_crs='EPSG:32755',
resampling=Resampling.nearest)

png_path = os.path.join("/orange/ewhite/DeepForest/Wagner_Australia/three_band", basename + "_three_band_utm.png")
# Normalize the bands to 0-255
bands = bands.astype('float32')
for i in range(bands.shape[0]):
band_min, band_max = bands[i].min(), bands[i].max()
bands[i] = 255 * (bands[i] - band_min) / (band_max - band_min)
bands = bands.astype('uint8')

# Save the normalized image as PNG
with rio.open(
png_path,
'w',
driver='PNG',
height=src.height,
width=src.width,
count=3,
dtype='uint8',
crs='EPSG:32755',
transform=transform,
) as dst:
dst.write(bands[0], 1)
dst.write(bands[1], 2)
dst.write(bands[2], 3)

# Process the annotations
gdf = gpd.read_file(geopackage_path)
gdf["label"] = "Tree"

# project into utm S55
gdf = gdf.to_crs("EPSG:32755")
gdf["image_path"] = reprojected_path
annotations_df = read_file(gdf, root_dir=os.path.dirname(three_band_path))
annotations_df['source'] = 'Wagner et al. 2023'

# save a png without geospecific information
annotations_df["image_path"] = png_path

# Append to the list of annotations
annotations.append(annotations_df)

# Concatenate all annotations into a single DataFrame
all_annotations = gpd.GeoDataFrame(pd.concat(annotations, ignore_index=True))
# Save to CSV
all_annotations.to_csv(output_csv, index=False)
4 changes: 2 additions & 2 deletions data_prep/collect_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
"/orange/ewhite/DeepForest/Jansen_2023/pngs/annotations.csv",
"/orange/ewhite/DeepForest/Troles_Bamberg/coco2048/annotations/annotations.csv",
"/orange/ewhite/DeepForest/Cloutier2023/images/annotations.csv",
"/orange/ewhite/DeepForest/Firoze2023/annotations.csv"
"/orange/ewhite/DeepForest/Firoze2023/annotations.csv",
"/orange/ewhite/DeepForest/Wagner_Australia/annotations.csv"
]

# Current errors
Expand Down Expand Up @@ -183,7 +184,6 @@
shutil.copy("/orange/ewhite/DeepForest/MillionTrees/TreePolygons_v0.0/images/" + image, destination)

# Write examples from the mini datasets to the MillionTrees doc folder

mini_TreeBoxes_annotations.root_dir = "/orange/ewhite/DeepForest/MillionTrees/MiniTreeBoxes_v0.0/images/"
mini_TreePoints_annotations.root_dir = "/orange/ewhite/DeepForest/MillionTrees/MiniTreePoints_v0.0/images/"
mini_TreePolygons_annotations.root_dir = "/orange/ewhite/DeepForest/MillionTrees/MiniTreePolygons_v0.0/images/"
Expand Down
2 changes: 2 additions & 0 deletions docs/datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,5 @@ https://www.mdpi.com/2504-446X/7/3/155

Australia
https://www.mdpi.com/2072-4292/16/11/1935

![sample_image](public/Wagner_et_al._2023.png)
Binary file added docs/public/Wagner_et_al._2023.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 6ddd4b6

Please sign in to comment.