From 2cfc1ba4ea7f9cb2bd5c7e8707f8baa8f2c081e4 Mon Sep 17 00:00:00 2001 From: swinersha Date: Wed, 31 Jul 2024 13:44:46 +0000 Subject: [PATCH 01/19] feat: k_all pixels and interactive find pairs drafted --- .gitignore | 2 +- methods/matching/calculate_k.py | 29 +- .../cluster_find_pairs_interactive.py | 581 ++++++++++++++++++ 3 files changed, 599 insertions(+), 13 deletions(-) create mode 100644 methods/matching/cluster_find_pairs_interactive.py diff --git a/.gitignore b/.gitignore index eac8079..123285e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .DS_Store *.npy data - +Figures # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py index dfecd05..dba2284 100644 --- a/methods/matching/calculate_k.py +++ b/methods/matching/calculate_k.py @@ -21,10 +21,13 @@ LARGE_PROJECT_PIXEL_DENSITY_PER_HECTARE = 0.05 # The '2 *' in this is because I'm just considering one axis, rather than area -PIXEL_SKIP_SMALL_PROJECT = \ - round((HECTARE_WIDTH_IN_METERS / (2 * SMALL_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS) -PIXEL_SKIP_LARGE_PROJECT = \ - round((HECTARE_WIDTH_IN_METERS / (2 * LARGE_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS) +# PIXEL_SKIP_SMALL_PROJECT = \ +# round((HECTARE_WIDTH_IN_METERS / (2 * SMALL_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS) +# PIXEL_SKIP_LARGE_PROJECT = \ +# round((HECTARE_WIDTH_IN_METERS / (2 * LARGE_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS) + +PIXEL_SKIP_SMALL_PROJECT = 1 +PIXEL_SKIP_LARGE_PROJECT = 1 MatchingCollection = namedtuple('MatchingCollection', ['boundary', 'lucs', 'cpcs', 'ecoregions', 'elevation', 'slope', 'access', 'countries']) @@ -66,10 +69,11 @@ def build_layer_collection( # ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once # as part of import of the ecoregions data - ecoregions = GroupLayer([ - RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in - glob.glob("*.tif", root_dir=ecoregions_directory_path) - ], name="ecoregions") + # ecoregions = GroupLayer([ + # RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in + # glob.glob("*.tif", root_dir=ecoregions_directory_path) + # ], name="ecoregions") + ecoregions = RasterLayer.layer_from_file(ecoregions_directory_path) elevation = GroupLayer([ RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in @@ -80,10 +84,11 @@ def build_layer_collection( glob.glob("slope*.tif", root_dir=slope_directory_path) ], name="slopes") - access = GroupLayer([ - RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in - glob.glob("*.tif", root_dir=access_directory_path) - ], name="access") + # access = GroupLayer([ + # RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in + # glob.glob("*.tif", root_dir=access_directory_path) + # ], name="access") + access = RasterLayer.layer_from_file(access_directory_path) countries = RasterLayer.layer_from_file(countries_raster_filename) diff --git a/methods/matching/cluster_find_pairs_interactive.py b/methods/matching/cluster_find_pairs_interactive.py new file mode 100644 index 0000000..9ac25fb --- /dev/null +++ b/methods/matching/cluster_find_pairs_interactive.py @@ -0,0 +1,581 @@ +# source myenv/bin/activate + +import pandas as pd +import numpy as np +from numba import njit +from sklearn.decomposition import PCA +from sklearn.cluster import KMeans +import matplotlib.pyplot as plt +import geopandas as gpd +import os +import time +import sys +import faiss + +# Read K and M +# Repeat 100 times - Ultimately we would like to remove this step +# PCA across K and M +# Divide into clusters - Check how much splitting into clusters speeds things up +# Sample K to find K_sub +# Sample M to find M_sub +# Split M into LUC combinations - How big is each set? +# For each cluster in K_sub +# Split cluster in K_sub into categorical combinations - How big is each set? +# For each categorical combination +# Sample from the equivalent cluster and categorical combindation in M_sub +# Find pairs for the categorical combinations in K_sub from the categorical combinations in M_sub +# RowBind categorical combination sets +# RowBind cluster sets +# Save Pairs + + +# NOTES +# 1. We might need to combine some categorical subsets because at least for comparing validity of the matches +# because if the subsets are too small the standardised mean differences can be very wrong +# 2. One option would be to combine clusters based on the proximity of the cluster centres. For the LUCs, we might +# combine groups that are in the same state when the project begins, even if the LUC history is not the same. +# 3. There is a question of how much supposed additionality is created by each categorical subset? If it is +# nothing, it might not matter. If it is substantive then it definitely does matter. + + + +# this function uses loops instead of numpy vector operations +@njit +def loop_match(m_pca, k_pca): + picked = np.ones((m_pca.shape[0],), dtype=np.bool_) + fast_matches = np.full(k_pca.shape[0], -1, dtype=np.int32) + for i in range(0, k_pca.shape[0]): + min_squared_diff_sum = np.inf + min_squared_diff_j = -1 + for j in range(0, m_pca.shape[0]): + if picked[j]: + squared_diff_sum = np.sum((m_pca[j, :] - k_pca[i, :])**2) + if squared_diff_sum < min_squared_diff_sum: + min_squared_diff_sum = squared_diff_sum + min_squared_diff_j = j + fast_matches[i] = min_squared_diff_j + picked[min_squared_diff_j] = False + return fast_matches + +### Now try with real numbers + +def to_int32(x): + # Normalize the data to the range 0 to 1 + min_val = np.min(x) + max_val = np.max(x) + normalized_data = (x - min_val) / (max_val - min_val) + + # Scale the normalized data to the range 0 to 255 for unsigned 8-bit integers + scaled_data = normalized_data * 255 + + # Convert to 32-bit integers (0 to 255) + int32_data = scaled_data.astype(np.int32) + + return int32_data + +def to_pca_int32(x): + # Perform PCA and convert to dataframe + pca = PCA(n_components=min(len(x), len(x.columns)), + whiten=False) # Centering and scaling done by default + pca_result = pca.fit_transform(x) + pca_df = pd.DataFrame(pca_result) + # Convert all columns to int8 + pca_32 = pca_df.apply(to_int32) + return pca_32 + + +def calculate_smd(group1, group2): + # Means + mean1, mean2 = np.mean(group1), np.mean(group2) + # Standard deviations + std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1) + # Sample sizes + n1, n2 = len(group1), len(group2) + # Pooled standard deviation + pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2)) + # Standardized mean difference + smd = (mean1 - mean2) / pooled_std + return smd, mean1, mean2, pooled_std + + +K_SUB_PROPORTION = 0.01 +M_SUB_PROPORTION = 0.1 +# Number of clusters +N_CLUSTERS = 9 +# Number of iterations for K means fitting +N_ITERATIONS = 100 +VERBOSE = True + +# Define the start year +t0 = 2012 # READ THIS IN +match_years = [t0-10, t0-5, t0] + +# Read in the data +boundary = gpd.read_file('/maps/aew85/projects/1201.geojson') + +k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k_all.parquet') +m_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/1201/matches.parquet') + +# concat m and k +km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), + m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) + +# Select columns (excluding 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt', and those starting with 'luc') +exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt'] +exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')] + +match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] + +# Extract only the continuous columns +continuous_columns = km_pixels.columns.difference(exclude_columns) +km_pixels_selected = km_pixels[continuous_columns] + +# PCA transform and conversion to 32 bit ints +km_pca = to_pca_int32(km_pixels_selected) + +# Looks good +km_pca.head() + +#------------------------------------------ + +# Initialize the KMeans object +kmeans = faiss.Kmeans(d=km_pca.shape[1], k=N_CLUSTERS, niter=N_ITERATIONS, verbose=True) +# Perform clustering +kmeans.train(km_pca) + +# Get cluster assignments +km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten() + +# Frequency distribution in each cluster +cluster_counts = pd.Series(km_pixels['cluster']).value_counts() +if VERBOSE: + print("Cluster counts:\n", cluster_counts) + + +# Convert to spatial (simple features) +km_pixels_sf = gpd.GeoDataFrame( + km_pixels, + geometry=gpd.points_from_xy(km_pixels['lng'], km_pixels['lat']), + crs="EPSG:4326" +) + + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Plot cluster centres + +# Get cluster centers +centroids = kmeans.centroids + +if VERBOSE: + # Plot the cluster centers + plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=100, marker='x', label='Cluster centers') + # Add cluster IDs as labels on the plot + for i, center in enumerate(centroids): + plt.text(center[0], center[1], str(i), color='red', fontsize=12, weight='bold') + + plt.title('K-means Clustering with Faiss') + plt.xlabel('PCA Component 1') + plt.ylabel('PCA Component 2') + plt.legend() + plt.show() + plt.savefig('Figures/cluster_centres_faiss_1.png') + plt.close() # Close the plot to free up memory + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Plot clusters as separate facets +if VERBOSE: + fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15, 15)) + axes = axes.flatten() + + clusters = sorted(km_pixels_sf['cluster'].unique()) + + for i, cluster in enumerate(clusters): + ax = axes[i] + cluster_data = km_pixels_sf[km_pixels_sf['cluster'] == cluster] + cluster_data.plot(ax=ax, color='blue', markersize=0.2) + boundary.plot(ax=ax, edgecolor='black', facecolor='none') + ax.set_title(f'Cluster {cluster}') + ax.set_axis_off() + + # Turn off any unused subplots + for j in range(len(clusters), len(axes)): + fig.delaxes(axes[j]) + + plt.tight_layout() + plt.savefig('Figures/cluster_faiss_1_facet.png') + plt.close() + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +# Extract K and M pixels +k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt'] +m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl'] + +# Extract K and M PCA transforms +k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy() +m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy() + +k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION) +m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION) + +# Define indexs for the samples from K and M +k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False) +m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False) + +# Take random samples from K and M pixels +k_sub = k_pixels.iloc[k_random_indices] +m_sub = m_pixels.iloc[m_random_indices] + +# Take corresponding random samples from the PCA transformed K and M +k_sub_pca = k_pca[k_random_indices,:] +m_sub_pca = m_pca[m_random_indices,:] + +if VERBOSE: + # Handy code for displaying the number of counts in each unique category combination + # In K + k_combination_counts = k_sub.groupby(match_cats).size().reset_index(name='counts') + print("k_combination_counts") + print(k_combination_counts) + # In M + m_combination_counts = m_sub.groupby(match_cats).size().reset_index(name='counts') + print("m_combination_counts") + print(m_combination_counts) + + +# Identify the unique combinations of luc columns +k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats)) + +pairs_list = [] + +start_time = time.time() +for i in range(0, k_cat_combinations.shape[0]): + # i = 6 # ith element of the unique combinations of the luc time series in k + # for in range() + k_cat_comb = k_cat_combinations.iloc[i] + k_cat = k_sub[(k_sub[match_cats] == k_cat_comb).all(axis=1)] + k_cat_pca = k_sub_pca[(k_sub[match_cats] == k_cat_comb).all(axis=1)] + + # Find the subset in km_pixels that matches this combination + m_cat = m_sub[(m_sub[match_cats] == k_cat_comb).all(axis=1)] + m_cat_pca = m_sub_pca[(m_sub[match_cats] == k_cat_comb).all(axis=1)] + + if VERBOSE: + print('ksub_cat:' + str(k_cat.shape[0])) + print('msub_cat:' + str(m_cat.shape[0])) + + # If there is no suitable match for the pre-project luc time series + # Then it may be preferable to just take the luc state at t0 + # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)] + # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)] + # For if there are no matches return nothing + + if(m_cat.shape[0] < k_cat.shape[0] * 5): + print("M insufficient for matching. Set VERBOSE to True for more details.") + continue + + matches_index = loop_match(m_cat_pca, k_cat_pca) + m_cat_matches = m_cat.iloc[matches_index] + + # i = 0 + # matched = pd.concat([k_cat.iloc[i], m_cat.iloc[matches[i]]], axis=1, ignore_index=True) + # matched.columns = ['trt', 'ctrl'] + # matched + #Looks great! + columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope'] + # Calculate SMDs for the specified columns + smd_results = [] + for column in columns_to_compare: + smd, mean1, mean2, pooled_std = calculate_smd(k_cat[column], m_cat_matches[column]) + smd_results.append((column, smd, mean1, mean2, pooled_std)) + + # Convert the results to a DataFrame for better readability + smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) + + if VERBOSE: + # Print the results + print("categorical combination:") + print(k_cat_comb) + # Count how many items in 'column1' are not equal to the specified integer value + print("LUC flips in K:") + (k_cat['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum() + print("LUC flips in matches:") + (m_cat_matches['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum() + print("Standardized Mean Differences:") + print(smd_df) + + # Join the pairs into one dataframe: + k_cat = k_cat.reset_index(drop = True) + m_cat_matches = m_cat_matches.reset_index(drop = True) + pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1) + + # Append the resulting DataFrame to the list + pairs_list.append(pairs_df) + +# Combine all the DataFrames in the list into a single DataFrame +combined_pairs = pd.concat(pairs_list, ignore_index=True) + +end_time = time.time() +elapsed_time = end_time - start_time +if VERBOSE: + print(f"Elapsed time: {elapsed_time:.2f} seconds") + +columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope'] +# Calculate SMDs for the specified columns +smd_results = [] +for column in columns_to_compare: + smd, mean1, mean2, pooled_std = calculate_smd(combined_pairs['k_' + column], combined_pairs['s_' + column]) + smd_results.append((column, smd, mean1, mean2, pooled_std)) + +# Convert the results to a DataFrame for better readability +smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) +print(smd_df) + +smd_results = [] +for column in columns_to_compare: + smd, mean1, mean2, pooled_std = calculate_smd(k_pixels[column], m_pixels[column]) + smd_results.append((column, smd, mean1, mean2, pooled_std)) + +# Convert the results to a DataFrame for better readability +smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) +print(smd_df) + + + + + + + +def find_match_iteration( + k_parquet_filename: str, + m_parquet_filename: str, + start_year: int, + output_folder: str, + idx_and_seed: tuple[int, int] +) -> None: + logging.info("Find match iteration %d of %d", idx_and_seed[0] + 1, REPEAT_MATCH_FINDING) + rng = np.random.default_rng(idx_and_seed[1]) + + logging.info("Loading K from %s", k_parquet_filename) + k_pixels = pd.read_parquet(k_parquet_filename) + + logging.info("Loading M from %s", m_parquet_filename) + m_pixels = pd.read_parquet(m_parquet_filename) + + # concat m and k + km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), + m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) + + # Find the continuous columns + exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt'] + exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')] + continuous_columns = km_pixels.columns.difference(exclude_columns) + # Categorical columns + match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] + + logging.info("Starting PCA transformation of k and m union. km_pixels.shape: %a", {km_pixels.shape}) + # PCA transform and conversion to 32 bit ints for continuous only + km_pca = to_pca_int32(km_pixels[continuous_columns]) + logging.info("Done PCA transformation") + + # Extract K and M pixels - this might be unnecessary I just wanted to make sure + # K and M were in the same order here and in the PCA transform + k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt'] + m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl'] + # Extract K and M PCA transforms + k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy() + m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy() + + # Sample from K and M + k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION) + m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION) + # Define indexs for the samples from K and M + k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False) + m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False) + # Take random samples from K and M pixels + k_sub = k_pixels.iloc[k_random_indices] + m_sub = m_pixels.iloc[m_random_indices] + # Take corresponding random samples from the PCA transformed K and M + k_sub_pca = k_pca[k_random_indices,:] + m_sub_pca = m_pca[m_random_indices,:] + + logging.info("Samples taken from K and M. k_sub.shape: %a; m_sub.shape: %a", {k_sub.shape, m_sub.shape}) + + # Identify the unique combinations of luc columns + k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats)) + + pairs_list = [] + matchless_list = [] + + logging.info("Starting greedy matching... k_sub.shape: %s, m_sub.shape: %s", + k_sub.shape, m_sub.shape) + + start_time = time.time() + for i in range(0, k_cat_combinations.shape[0]): + # i = 6 # ith element of the unique combinations of the luc time series in k + # for in range() + k_cat_comb = k_cat_combinations.iloc[i] + k_cat = k_sub[(k_sub[match_cats] == k_cat_comb).all(axis=1)] + k_cat_pca = k_sub_pca[(k_sub[match_cats] == k_cat_comb).all(axis=1)] + + # Find the subset in km_pixels that matches this combination + m_cat = m_sub[(m_sub[match_cats] == k_cat_comb).all(axis=1)] + m_cat_pca = m_sub_pca[(m_sub[match_cats] == k_cat_comb).all(axis=1)] + + if VERBOSE: + print('ksub_cat:' + str(k_cat.shape[0])) + print('msub_cat:' + str(m_cat.shape[0])) + + # If there is no suitable match for the pre-project luc time series + # Then it may be preferable to just take the luc state at t0 + # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)] + # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)] + # For if there are no matches return nothing + + if(m_cat.shape[0] < k_cat.shape[0] * 5): + # print("M insufficient for matching. Set VERBOSE to True for more details.") + # Append the matchless DataFrame to the list + matchless_list.append(k_cat) + continue + + # Find the matches + matches_index = loop_match(m_cat_pca, k_cat_pca) + m_cat_matches = m_cat.iloc[matches_index] + + # i = 0 + # matched = pd.concat([k_cat.iloc[i], m_cat.iloc[matches[i]]], axis=1, ignore_index=True) + # matched.columns = ['trt', 'ctrl'] + # matched + #Looks great! + columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope'] + # Calculate SMDs for the specified columns + smd_results = [] + for column in columns_to_compare: + smd, mean1, mean2, pooled_std = calculate_smd(k_cat[column], m_cat_matches[column]) + smd_results.append((column, smd, mean1, mean2, pooled_std)) + + # Convert the results to a DataFrame for better readability + smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) + + if VERBOSE: + # Print the results + print("categorical combination:") + print(k_cat_comb) + # Count how many items in 'column1' are not equal to the specified integer value + print("LUC flips in K:") + (k_cat['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum() + print("LUC flips in matches:") + (m_cat_matches['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum() + print("Standardized Mean Differences:") + print(smd_df) + + # Join the pairs into one dataframe: + k_cat = k_cat.reset_index(drop = True) + m_cat_matches = m_cat_matches.reset_index(drop = True) + pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1) + + # Append the resulting DataFrame to the list + pairs_list.append(pairs_df) + + # Combine all the DataFrames in the list into a single DataFrame + pairs = pd.concat(pairs_list, ignore_index=True) + matchless = pd.concat(matchless_list, ignore_index=True) + + logging.info("Finished greedy matching... pairs.shape: %s, matchless.shape: %s", + pairs.shape, matchless.shape) + + logging.info("Starting storing matches...") + pairs.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet')) + matchless.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet')) + + logging.info("Finished find match iteration") + + +def find_pairs( + k_parquet_filename: str, + m_parquet_filename: str, + start_year: int, + seed: int, + output_folder: str, + processes_count: int +) -> None: + logging.info("Starting find pairs") + os.makedirs(output_folder, exist_ok=True) + + rng = np.random.default_rng(seed) + iteration_seeds = zip(range(REPEAT_MATCH_FINDING), rng.integers(0, 1000000, REPEAT_MATCH_FINDING)) + + with Pool(processes=processes_count) as pool: + pool.map( + partial( + find_match_iteration, + k_parquet_filename, + m_parquet_filename, + start_year, + output_folder + ), + iteration_seeds + ) + + +def main(): + # If you use the default multiprocess model then you risk deadlocks when logging (which we + # have hit). Spawn is the default on macOS, but not on Linux. + set_start_method("spawn") + + parser = argparse.ArgumentParser(description="Takes K and M and finds 100 sets of matches.") + parser.add_argument( + "--k", + type=str, + required=True, + dest="k_filename", + help="Parquet file containing pixels from K as generated by calculate_k.py" + ) + parser.add_argument( + "--m", + type=str, + required=True, + dest="m_filename", + help="Parquet file containing pixels from M as generated by build_m_table.py" + ) + parser.add_argument( + "--start_year", + type=int, + required=True, + dest="start_year", + help="Year project started." + ) + parser.add_argument( + "--seed", + type=int, + required=True, + dest="seed", + help="Random number seed, to ensure experiments are repeatable." + ) + parser.add_argument( + "--output", + type=str, + required=True, + dest="output_directory_path", + help="Directory into which output matches will be written. Will be created if it does not exist." + ) + parser.add_argument( + "-j", + type=int, + required=False, + default=round(cpu_count() / 2), + dest="processes_count", + help="Number of concurrent threads to use." + ) + args = parser.parse_args() + + find_pairs( + args.k_filename, + args.m_filename, + args.start_year, + args.seed, + args.output_directory_path, + args.processes_count + ) + +if __name__ == "__main__": + main() \ No newline at end of file From 088048eb0907685ec24ba6afc20cffe7ee71e2c7 Mon Sep 17 00:00:00 2001 From: swinersha Date: Wed, 31 Jul 2024 18:54:55 +0000 Subject: [PATCH 02/19] feat: fast find pairs experiment incorporated into executable --- methods/matching/find_pairs.py | 469 +++++++++++++++------------------ 1 file changed, 209 insertions(+), 260 deletions(-) diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py index 7f17782..62e9e9c 100644 --- a/methods/matching/find_pairs.py +++ b/methods/matching/find_pairs.py @@ -3,16 +3,35 @@ import logging from functools import partial from multiprocessing import Pool, cpu_count, set_start_method -from numba import jit # type: ignore +from numba import njit # type: ignore import numpy as np import pandas as pd +from sklearn.decomposition import PCA +import faiss from methods.common.luc import luc_matching_columns +# TO DO: +# 1. Rename columns to luc10, luc5 and luc0 to align with the pipeline + +# to delete: +# start_year = 2012 +# k_parquet_filename = '/maps/tws36/tmf_pipe_out/1201/k_all.parquet' +# m_parquet_filename = '/maps/aew85/tmf_pipe_out/1201/tom_pairs/matches.parquet' +# luc_match = True + REPEAT_MATCH_FINDING = 100 -DEFAULT_DISTANCE = 10000000.0 DEBUG = False +K_SUB_PROPORTION = 0.01 +M_SUB_PROPORTION = 0.1 +# Number of clusters +NUM_CLUSTERS = 9 +# Number of iterations for K means fitting +NUM_ITERATIONS = 100 +RELATIVE_MATCH_YEARS = [-10, -5, 0] + + DISTANCE_COLUMNS = [ "elevation", "slope", "access", "cpc0_u", "cpc0_d", @@ -23,297 +42,219 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") +# this function uses loops instead of numpy vector operations +@njit +def loop_match(m_pca, k_pca): + picked = np.ones((m_pca.shape[0],), dtype=np.bool_) + fast_matches = np.full(k_pca.shape[0], -1, dtype=np.int32) + for i in range(0, k_pca.shape[0]): + min_squared_diff_sum = np.inf + min_squared_diff_j = -1 + for j in range(0, m_pca.shape[0]): + if picked[j]: + squared_diff_sum = np.sum((m_pca[j, :] - k_pca[i, :])**2) + if squared_diff_sum < min_squared_diff_sum: + min_squared_diff_sum = squared_diff_sum + min_squared_diff_j = j + fast_matches[i] = min_squared_diff_j + picked[min_squared_diff_j] = False + return fast_matches + +### Now try with real numbers + +def to_int32(x): + # Normalize the data to the range 0 to 1 + min_val = np.min(x) + max_val = np.max(x) + normalized_data = (x - min_val) / (max_val - min_val) + # Scale the normalized data to the range 0 to 255 for unsigned 8-bit integers + scaled_data = normalized_data * 255 + # Convert to 32-bit integers (0 to 255) + int32_data = scaled_data.astype(np.int32) + return int32_data + +def to_pca_int32(x): + # Perform PCA and convert to dataframe + pca = PCA(n_components=min(len(x), len(x.columns)), + whiten=False) # Centering and scaling done by default + pca_result = pca.fit_transform(x) + pca_df = pd.DataFrame(pca_result) + # Convert all columns to int8 + pca_32 = pca_df.apply(to_int32) + return pca_32 + + +def calculate_smd(group1, group2): + # Means + mean1, mean2 = np.mean(group1), np.mean(group2) + # Standard deviations + std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1) + # Sample sizes + n1, n2 = len(group1), len(group2) + # Pooled standard deviation + pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2)) + # Standardized mean difference + smd = (mean1 - mean2) / pooled_std + return smd, mean1, mean2, pooled_std + def find_match_iteration( - k_parquet_filename: str, - m_parquet_filename: str, + km_pixels: pd.DataFrame, + km_pca: np.ndarray, start_year: int, + luc_match: bool, output_folder: str, idx_and_seed: tuple[int, int] ) -> None: logging.info("Find match iteration %d of %d", idx_and_seed[0] + 1, REPEAT_MATCH_FINDING) rng = np.random.default_rng(idx_and_seed[1]) - - logging.info("Loading K from %s", k_parquet_filename) - - # Methodology 6.5.7: For a 10% sample of K - k_set = pd.read_parquet(k_parquet_filename) - k_subset = k_set.sample( - frac=0.1, - random_state=rng - ).reset_index() - - logging.info("Loading M from %s", m_parquet_filename) - m_set = pd.read_parquet(m_parquet_filename) - - # get the column ids for DISTANCE_COLUMNS - thresholds_for_columns = np.array([ - 200.0, # Elev - 2.5, # Slope - 10.0, # Access - 0.1, # CPCs - 0.1, # CPCs - 0.1, # CPCs - 0.1, # CPCs - 0.1, # CPCs - 0.1, # CPCs - ]) - - logging.info("Preparing s_set...") - - m_dist_thresholded_df = m_set[DISTANCE_COLUMNS] / thresholds_for_columns - k_subset_dist_thresholded_df = k_subset[DISTANCE_COLUMNS] / thresholds_for_columns - - # convert to float32 numpy arrays and make them contiguous for numba to vectorise - m_dist_thresholded = np.ascontiguousarray(m_dist_thresholded_df, dtype=np.float32) - k_subset_dist_thresholded = np.ascontiguousarray(k_subset_dist_thresholded_df, dtype=np.float32) - - # LUC columns are all named with the year in, so calculate the column names - # for the years we are intested in - luc0, luc5, luc10 = luc_matching_columns(start_year) - # As well as all the LUC columns for later use - luc_columns = [x for x in m_set.columns if x.startswith('luc')] - - hard_match_columns = ['country', 'ecoregion', luc10, luc5, luc0] - assert len(hard_match_columns) == HARD_COLUMN_COUNT - - # similar to the above, make the hard match columns contiguous float32 numpy arrays - m_dist_hard = np.ascontiguousarray(m_set[hard_match_columns].to_numpy()).astype(np.int32) - k_subset_dist_hard = np.ascontiguousarray(k_subset[hard_match_columns].to_numpy()).astype(np.int32) - - # Methodology 6.5.5: S should be 10 times the size of K, in order to achieve this for every - # pixel in the subsample (which is 10% the size of K) we select 100 pixels. - required = 100 - - logging.info("Running make_s_set_mask... required: %d", required) - starting_positions = rng.integers(0, int(m_dist_thresholded.shape[0]), int(k_subset_dist_thresholded.shape[0])) - s_set_mask_true, no_potentials = make_s_set_mask( - m_dist_thresholded, - k_subset_dist_thresholded, - m_dist_hard, - k_subset_dist_hard, - starting_positions, - required + + match_years = [start_year + year for year in RELATIVE_MATCH_YEARS] + # The categorical columns: + if luc_match: + match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] + else: + match_cats = ["ecoregion", "country", "cluster"] + + # Extract K and M pixels + k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt'] + m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl'] + # Extract K and M PCA transforms + k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy() + m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy() + # Draw subsamples + # Methodology 6.5.7: Needs to be updated !!! + k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION) + m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION) + # Define indexs for the samples from K and M + k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False) + m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False) + # Take random samples from K and M pixels + k_sub = k_pixels.iloc[k_random_indices] + m_sub = m_pixels.iloc[m_random_indices] + # Take corresponding random samples from the PCA transformed K and M + k_sub_pca = k_pca[k_random_indices,:] + m_sub_pca = m_pca[m_random_indices,:] + + logging.info("Starting greedy matching... k_sub.shape: %s, m_sub.shape: %s", + k_sub.shape, m_sub.shape) + + pairs, matchless = greedy_match( + k_sub, + m_sub, + k_sub_pca, + m_sub_pca, + match_cats ) - - logging.info("Done make_s_set_mask. s_set_mask.shape: %a", {s_set_mask_true.shape}) - - s_set = m_set[s_set_mask_true] - potentials = np.invert(no_potentials) - - k_subset = k_subset[potentials] - logging.info("Finished preparing s_set. shape: %a", {s_set.shape}) - - # Notes: - # 1. Not all pixels may have matches - results = [] - matchless = [] - - s_set_for_cov = s_set[DISTANCE_COLUMNS] - logging.info("Calculating covariance...") - covarience = np.cov(s_set_for_cov, rowvar=False) - logging.info("Calculating inverse covariance...") - invconv = np.linalg.inv(covarience).astype(np.float32) - - # Match columns are luc10, luc5, luc0, "country" and "ecoregion" - s_set_match = s_set[hard_match_columns + DISTANCE_COLUMNS].to_numpy(dtype=np.float32) - # this is required so numba can vectorise the loop in greedy_match - s_set_match = np.ascontiguousarray(s_set_match) - - # Now we do the same thing for k_subset - k_subset_match = k_subset[hard_match_columns + DISTANCE_COLUMNS].to_numpy(dtype=np.float32) - # this is required so numba can vectorise the loop in greedy_match - k_subset_match = np.ascontiguousarray(k_subset_match) - - logging.info("Starting greedy matching... k_subset_match.shape: %s, s_set_match.shape: %s", - k_subset_match.shape, s_set_match.shape) - - add_results, k_idx_matchless = greedy_match( - k_subset_match, - s_set_match, - invconv - ) - + + # Combine all the pairs DataFrames in the list into a single DataFrame + combined_pairs = pd.concat(pairs, ignore_index=True) + # Combine all the matchess DataFrames in the list into a single DataFrame + combined_matchless = pd.concat(matchless, ignore_index=True) logging.info("Finished greedy matching...") - + logging.info("Starting storing matches...") - - for result in add_results: - (k_idx, s_idx) = result - k_row = k_subset.iloc[k_idx] - match = s_set.iloc[s_idx] - - if DEBUG: - for hard_match_column in hard_match_columns: - if k_row[hard_match_column] != match[hard_match_column]: - print(k_row) - print(match) - raise ValueError("Hard match inconsistency") - - results.append( - [k_row.lat, k_row.lng] + [k_row[x] for x in luc_columns + DISTANCE_COLUMNS] + \ - [match.lat, match.lng] + [match[x] for x in luc_columns + DISTANCE_COLUMNS] - ) - - logging.info("Finished storing matches...") - - for k_idx in k_idx_matchless: - k_row = k_subset.iloc[k_idx] - matchless.append(k_row) - - columns = ['k_lat', 'k_lng'] + \ - [f'k_{x}' for x in luc_columns + DISTANCE_COLUMNS] + \ - ['s_lat', 's_lng'] + \ - [f's_{x}' for x in luc_columns + DISTANCE_COLUMNS] - - results_df = pd.DataFrame(results, columns=columns) - results_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet')) - - matchless_df = pd.DataFrame(matchless, columns=k_set.columns) - matchless_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet')) - + combined_pairs_df = pd.DataFrame(combined_pairs) + combined_pairs_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet')) + + combined_matchless_df = pd.DataFrame(combined_matchless) + combined_matchless_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet')) + logging.info("Finished find match iteration") -@jit(nopython=True, fastmath=True, error_model="numpy") -def make_s_set_mask( - m_dist_thresholded: np.ndarray, - k_subset_dist_thresholded: np.ndarray, - m_dist_hard: np.ndarray, - k_subset_dist_hard: np.ndarray, - starting_positions: np.ndarray, - required: int -): - m_size = m_dist_thresholded.shape[0] - k_size = k_subset_dist_thresholded.shape[0] - - s_include = np.zeros(m_size, dtype=np.bool_) - k_miss = np.zeros(k_size, dtype=np.bool_) - - for k in range(k_size): - matches = 0 - k_row = k_subset_dist_thresholded[k, :] - k_hard = k_subset_dist_hard[k] - - for index in range(m_size): - m_index = (index + starting_positions[k]) % m_size - - m_row = m_dist_thresholded[m_index, :] - m_hard = m_dist_hard[m_index] - - should_include = True - - # check that every element of m_hard matches k_hard - hard_equals = True - for j in range(m_hard.shape[0]): - if m_hard[j] != k_hard[j]: - hard_equals = False - - if not hard_equals: - should_include = False - else: - for j in range(m_row.shape[0]): - if abs(m_row[j] - k_row[j]) > 1.0: - should_include = False - - if should_include: - s_include[m_index] = True - matches += 1 - - # Don't find any more M's - if matches == required: - break - - k_miss[k] = matches == 0 - - return s_include, k_miss - -# Function which returns a boolean array indicating whether all values in a row are true -@jit(nopython=True, fastmath=True, error_model="numpy") -def rows_all_true(rows: np.ndarray): - # Don't use np.all because not supported by numba - - # Create an array of booleans for rows in s still available - all_true = np.ones((rows.shape[0],), dtype=np.bool_) - for row_idx in range(rows.shape[0]): - for col_idx in range(rows.shape[1]): - if not rows[row_idx, col_idx]: - all_true[row_idx] = False - break - - return all_true - - -@jit(nopython=True, fastmath=True, error_model="numpy") def greedy_match( - k_subset: np.ndarray, - s_set: np.ndarray, - invcov: np.ndarray + k_sub: pd.DataFrame, + m_sub: pd.DataFrame, + k_sub_pca: np.ndarray, + m_sub_pca: np.ndarray, + match_cats: list ): - # Create an array of booleans for rows in s still available - s_available = np.ones((s_set.shape[0],), dtype=np.bool_) - total_available = s_set.shape[0] - - results = [] + # Identify the unique combinations of categorical columns + k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats)) + + # Not all pixels may have matches + pairs = [] matchless = [] - - s_tmp = np.zeros((s_set.shape[0],), dtype=np.float32) - - for k_idx in range(k_subset.shape[0]): - k_row = k_subset[k_idx, :] - - hard_matches = rows_all_true(s_set[:, :HARD_COLUMN_COUNT] == k_row[:HARD_COLUMN_COUNT]) & s_available - hard_matches = hard_matches.reshape( - -1, - ) - - if total_available > 0: - # Now calculate the distance between the k_row and all the hard matches we haven't already matched - s_tmp[hard_matches] = batch_mahalanobis_squared( - s_set[hard_matches, HARD_COLUMN_COUNT:], k_row[HARD_COLUMN_COUNT:], invcov - ) - # Find the index of the minimum distance in s_tmp[hard_matches] but map it back to the index in s_set - if np.any(hard_matches): - min_dist_idx = np.argmin(s_tmp[hard_matches]) - min_dist_idx = np.arange(s_tmp.shape[0])[hard_matches][min_dist_idx] - - results.append((k_idx, min_dist_idx)) - s_available[min_dist_idx] = False - total_available -= 1 - else: - matchless.append(k_idx) - - return (results, matchless) - -# optimised batch implementation of mahalanobis distance that returns a distance per row -@jit(nopython=True, fastmath=True, error_model="numpy") -def batch_mahalanobis_squared(rows, vector, invcov): - # calculate the difference between the vector and each row (broadcasted) - diff = rows - vector - # calculate the distance for each row in one batch - dists = (np.dot(diff, invcov) * diff).sum(axis=1) - return dists - + + for i in range(0, k_cat_combinations.shape[0]): + # i = 6 # ith element of the unique combinations of the luc time series in k + # for in range() + k_cat_comb = k_cat_combinations.iloc[i] + k_cat_index = k_sub[match_cats] == k_cat_comb + k_cat = k_sub[(k_cat_index).all(axis=1)] + k_cat_pca = k_sub_pca[(k_cat_index).all(axis=1)] + + # Find the subset in km_pixels that matches this combination + m_cat_index = m_sub[match_cats] == k_cat_comb + m_cat = m_sub[(m_cat_index).all(axis=1)] + m_cat_pca = m_sub_pca[(m_cat_index).all(axis=1)] + + # If there is no suitable match for the pre-project luc time series + # Then it may be preferable to just take the luc state at t0 + # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)] + # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)] + # For now if there are no matches return nothing + + if(m_cat.shape[0] < k_cat.shape[0] * 5): + matchless.append(k_cat) + continue + + matches_index = loop_match(m_cat_pca, k_cat_pca) + m_cat_matches = m_cat.iloc[matches_index] + + # Join the pairs into one dataframe: + k_cat = k_cat.reset_index(drop = True) + m_cat_matches = m_cat_matches.reset_index(drop = True) + pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1) + # Append the resulting DataFrame to the list + pairs.append(pairs_df) + + return (pairs, matchless) def find_pairs( k_parquet_filename: str, m_parquet_filename: str, start_year: int, + luc_match: bool, seed: int, output_folder: str, processes_count: int ) -> None: + logging.info("Loading K from %s", k_parquet_filename) + k_pixels = pd.read_parquet(k_parquet_filename) + logging.info("Loading M from %s", m_parquet_filename) + m_pixels = pd.read_parquet(m_parquet_filename) + # concat m and k + km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), + m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) + + # Extract only the continuous columns + km_pixels_distance = km_pixels[DISTANCE_COLUMNS] + # PCA transform and conversion to 32 bit ints + logging.info("Transforming continuous variables to PCA space") + km_pca = to_pca_int32(km_pixels_distance) + # Find clusters using Kmeans + logging.info("Starting cluster assignment using kmeans") + # Initialize the KMeans object + kmeans = faiss.Kmeans(d=km_pca.shape[1], k=NUM_CLUSTERS, niter=NUM_ITERATIONS, verbose=True) + # Perform clustering + kmeans.train(km_pca) + # Get cluster assignments + km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten() + logging.info("Starting find pairs") os.makedirs(output_folder, exist_ok=True) - + rng = np.random.default_rng(seed) iteration_seeds = zip(range(REPEAT_MATCH_FINDING), rng.integers(0, 1000000, REPEAT_MATCH_FINDING)) - + with Pool(processes=processes_count) as pool: pool.map( partial( find_match_iteration, - k_parquet_filename, - m_parquet_filename, + km_pixels, + km_pca, start_year, + luc_match, output_folder ), iteration_seeds @@ -346,6 +287,13 @@ def main(): dest="start_year", help="Year project started." ) + parser.add_argument( + "--luc_match", + type=bool, + required=True, + dest="luc_match", + help="Boolean determines whether matching should include LUCs." + ) parser.add_argument( "--seed", type=int, @@ -374,6 +322,7 @@ def main(): args.k_filename, args.m_filename, args.start_year, + args.luc_match, args.seed, args.output_directory_path, args.processes_count From f1d44eee7ab1f09df4d29d0a3be66eef278ca684 Mon Sep 17 00:00:00 2001 From: Abigail E Williams Date: Thu, 18 Jul 2024 13:21:11 +0000 Subject: [PATCH 03/19] Added wrapper script --- README.md | 3 + scripts/tmfpython.sh | 227 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100755 scripts/tmfpython.sh diff --git a/README.md b/README.md index f02d35b..027f896 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,9 @@ The code is broken into three main sections contained in the to generate the outputs from the methodology like the equivalent permanence and the additionality per year. +Additionally, there is a wrapper script called `tmfpython.sh` in the `scripts/` folder. +This contains all of the pipeline commands and uses the `tmfpython3` magic wrapper. + ## Bugs Should you find any bugs or issues with the code then please do open an issue on diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh new file mode 100755 index 0000000..beb8178 --- /dev/null +++ b/scripts/tmfpython.sh @@ -0,0 +1,227 @@ +#!/bin/bash + +#run with command: scripts/tmfpython.sh -p 1113 -t 2010 ... +#p: project ID +#t: year of project start (t0) +#e: evaluation year (default: 2022) +#r: whether to run an ex-post evaluation and knit the results in an R notebook (true/false, default: false). +#a: whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false). + +#NB running evaluations requires the evaluations code + +# Check which branch is currently checked out +current_branch=$(git rev-parse --abbrev-ref HEAD) + +set -e + +############ DEFAULTS ############### + +input_dir="/maps/aew85/projects" +output_dir="/maps/aew85/tmf_pipe_out" +eval_year=2022 +ex_post=false +ex_ante=false + +##################################### + +function display_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " -p Project name" + echo " -t Start year" + echo " -e Evaluation year" + echo " -r Knit ex post evaluation? (true/false)" + echo " -a Knit ex ante evaluation? (true/false)" + echo " -h Display this help message" + echo + echo "Example:" + echo " $0 -p 'gola' -t 2012 -e 2021 -r true -a true" +} + +# Parse arguments +while getopts "p:t:e:r:a:h" flag +do + case "${flag}" in + p) proj=${OPTARG};; + t) t0=${OPTARG};; + e) eval_year=${OPTARG};; + r) ex_post=${OPTARG};; + a) ex_ante=${OPTARG};; + h) display_help; exit 0;; + *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;; + esac +done + +echo "Project: $proj" +echo "t0: $t0" +echo "Evaluation year: $eval_year" +echo "Ex-post evaluation: $ex_post" +echo "Ex-ante evaluation: $ex_ante" + +if [ $# -eq 0 ]; then + display_help + exit 1 +fi + +# Make project output directory +mkdir -p "${output_dir}/${proj}" +echo "--Folder created.--" + +#Make buffer +tmfpython3 -m methods.inputs.generate_boundary --project "${input_dir}/${proj}.geojson" --output "${output_dir}/${proj}/buffer.geojson" +echo "--Buffer created.--" + +#Make leakage area +tmfpython3 -m methods.inputs.generate_leakage --project "${input_dir}/${proj}.geojson" --output "${output_dir}/${proj}/leakage.geojson" +echo "--Leakage created.--" + +# Get GEDI data +tmfpython3 -m methods.inputs.locate_gedi_data "${output_dir}/${proj}/buffer.geojson" /maps/4C/gedi/granule/info/ +tmfpython3 -m methods.inputs.download_gedi_data /maps/4C/gedi/granule/info/* /maps/4C/gedi/granule/ +tmfpython3 -m methods.inputs.filter_gedi_data --buffer "${output_dir}/${proj}/buffer.geojson" \ + --granules /maps/4C/gedi/granule/ \ + --output "${output_dir}/${proj}/gedi.geojson" +tmfpython3 -m methods.inputs.generate_carbon_density --jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ + --gedi "${output_dir}/${proj}/gedi.geojson" \ + --output "${output_dir}/${proj}/carbon-density.csv" + +echo "--GEDI data obtained.--" + +#Generate list of overlapping countries +tmfpython3 -m methods.inputs.generate_country_list \ +--leakage "${output_dir}/${proj}/leakage.geojson" \ +--countries /maps/4C/osm_boundaries.geojson \ +--output "${output_dir}/${proj}/country-list.json" +echo "--Country list created.--" + +#Generate matching area +tmfpython3 -m methods.inputs.generate_matching_area --project "${input_dir}/${proj}.geojson" \ +--countrycodes "${output_dir}/${proj}/country-list.json" \ +--countries /maps/4C/osm_boundaries.geojson \ +--ecoregions /maps/4C/ecoregions/ecoregions.geojson \ +--projects /maps/mwd24/tmf-data/projects \ +--output "${output_dir}/${proj}/matching-area.geojson" +echo "--Matching area created.--" + +#Download SRTM data +tmfpython3 -m methods.inputs.download_srtm_data --project "${input_dir}/${proj}.geojson" \ +--matching "${output_dir}/${proj}/matching-area.geojson" \ +--zips "${output_dir}/srtm/zip" \ +--tifs "${output_dir}/srtm/tif" +echo "--SRTM downloaded.--" + +#Generate slopes +tmfpython3 -m methods.inputs.generate_slope --input "${output_dir}/srtm/tif" --output "${output_dir}/slopes" +echo "--Slope created.--" + +#Rescale to JRC tiles +tmfpython3 -m methods.inputs.rescale_tiles_to_jrc --jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ +--tiles "${output_dir}/srtm/tif" \ +--output "${output_dir}/rescaled-elevation" +tmfpython3 -m methods.inputs.rescale_tiles_to_jrc \ +--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ +--tiles "${output_dir}/slopes" \ +--output "${output_dir}/rescaled-slopes" +echo "--JRC rescaled.--" + +#Create country raster +tmfpython3 -m methods.inputs.generate_country_raster --jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ +--matching "${output_dir}/${proj}/matching-area.geojson" \ +--countries /maps/4C/osm_boundaries.geojson \ +--output "${output_dir}/${proj}/countries.tif" +echo "--Country raster created.--" + +#Matching: calculate set K +tmfpython3 -m methods.matching.calculate_k \ +--project "${input_dir}/${proj}.geojson" \ +--start_year "$t0" \ +--evaluation_year "$eval_year" \ +--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ +--cpc /maps/rhm31/fine_circular_coverage/forecol_complete/ \ +--ecoregions /maps/4C/ecoregions/ \ +--elevation "${output_dir}/rescaled-elevation" \ +--slope "${output_dir}/rescaled-slopes" \ +--access /maps/4C/access \ +--countries-raster "${output_dir}/${proj}/countries.tif" \ +--output "${output_dir}/${proj}/k.parquet" +echo "--Set K created.--" + +#Matching: calculate set M +tmfpython3 -m methods.matching.find_potential_matches \ +--k "${output_dir}/${proj}/k.parquet" \ +--matching "${output_dir}/${proj}/matching-area.geojson" \ +--start_year "$t0" \ +--evaluation_year "$eval_year" \ +--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ +--cpc /maps/rhm31/fine_circular_coverage/forecol_complete/ \ +--ecoregions /maps/4C/ecoregions/ \ +--elevation "${output_dir}/rescaled-elevation" \ +--slope "${output_dir}/rescaled-slopes" \ +--access /maps/4C/access \ +--countries-raster "${output_dir}/${proj}/countries.tif" \ +--output "${output_dir}/${proj}/matches" +tmfpython3 -m methods.matching.build_m_raster \ +--rasters_directory "${output_dir}/${proj}/matches" \ +--output "${output_dir}/${proj}/matches.tif" \ +-j 20 +tmfpython3 -m methods.matching.build_m_table \ +--raster "${output_dir}/${proj}/matches.tif" \ +--matching "${output_dir}/${proj}/matching-area.geojson" \ +--start_year "$t0" \ +--evaluation_year "$eval_year" \ +--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \ +--cpc /maps/rhm31/fine_circular_coverage/forecol_complete/ \ +--ecoregions /maps/4C/ecoregions/ \ +--elevation "${output_dir}/rescaled-elevation" \ +--slope "${output_dir}/rescaled-slopes" \ +--access /maps/4C/access \ +--countries-raster "${output_dir}/${proj}/countries.tif" \ +--output "${output_dir}/${proj}/matches.parquet" +echo "--Set M created.--" + +#Matching: find pairs +tmfpython3 -m methods.matching.find_pairs \ + --k "${output_dir}/${proj}/k.parquet" \ + --m "${output_dir}/${proj}/matches.parquet" \ + --start_year "$t0" \ + --output "${output_dir}/${proj}/pairs" \ + --seed 42 \ + -j 1 + echo "--Pairs matched.--" + +#Calculate additionality +if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then + tmfpython3 -m methods.outputs.calculate_additionality \ + --project "${input_dir}/${proj}.geojson" \ + --project_start "$t0" \ + --evaluation_year "$eval_year" \ + --density "${output_dir}/${proj}/carbon-density.csv" \ + --matches "${output_dir}/${proj}/pairs" \ + --output "${output_dir}/${proj}/additionality.csv" \ + --stopping "${output_dir}/${proj}/stopping.csv" + echo "--Additionality and stopping criteria calculated.--" + else + tmfpython3 -m methods.outputs.calculate_additionality \ + --project "${input_dir}/${proj}.geojson" \ + --project_start "$t0" \ + --evaluation_year "$eval_year" \ + --density "${output_dir}/${proj}/carbon-density.csv" \ + --matches "${output_dir}/${proj}/pairs" \ + --output "${output_dir}/${proj}/additionality.csv" + echo "--Additionality calculated.--" +fi + +# Run ex post evaluation +if [ "$ex_post" == "true" ]; then +evaluations_dir="~/evaluations" +ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html" +Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))" +fi + +# Run ex-ante evaluation +if [ "$ex_ante" == "true" ]; then +evaluations_dir="~/evaluations" +ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" +Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))" +fi \ No newline at end of file From 4a7abcec0f6af5838065e0650142db6e537929c8 Mon Sep 17 00:00:00 2001 From: swinersha Date: Wed, 7 Aug 2024 13:34:41 +0000 Subject: [PATCH 04/19] fix: sppeds up find pairs by sampling M to the size of K --- .../cluster_find_pairs_interactive.py | 303 +++--------------- methods/matching/find_pairs.py | 54 ++-- 2 files changed, 80 insertions(+), 277 deletions(-) diff --git a/methods/matching/cluster_find_pairs_interactive.py b/methods/matching/cluster_find_pairs_interactive.py index 9ac25fb..215722b 100644 --- a/methods/matching/cluster_find_pairs_interactive.py +++ b/methods/matching/cluster_find_pairs_interactive.py @@ -7,6 +7,7 @@ from sklearn.cluster import KMeans import matplotlib.pyplot as plt import geopandas as gpd +from pyproj import Proj, transform import os import time import sys @@ -108,14 +109,53 @@ def calculate_smd(group1, group2): # Define the start year t0 = 2012 # READ THIS IN -match_years = [t0-10, t0-5, t0] # Read in the data boundary = gpd.read_file('/maps/aew85/projects/1201.geojson') -k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k_all.parquet') +k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k.parquet') +# k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k_all.parquet') m_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/1201/matches.parquet') + +t0 = 2018 +boundary = gpd.read_file('/maps/aew85/projects/ona.geojson') +k_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/fastfp_test_ona/k.parquet') +m_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/fastfp_test_ona/matches.parquet') + +if(m_pixels.shape[0] > (k_pixels.shape[0])): + m_sub_size = int(k_pixels.shape[0]) # First down sample M as it is ~230 million points + m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False) + m_pixels = m_pixels.iloc[m_random_indices] + +# # Calculate the central coordinates (centroid) +# central_lat = m_pixels['lat'].mean() +# central_lon = m_pixels['lng'].mean() +# aeqd_proj = f"+proj=aeqd +lat_0={central_lat} +lon_0={central_lon} +datum=WGS84" + +# # Convert the DataFrame to a GeoDataFrame +# m_gdf = gpd.GeoDataFrame(m_pixels, geometry=gpd.points_from_xy(m_pixels.lng, m_pixels.lat)) +# # Set the original CRS to WGS84 (EPSG:4326) +# m_gdf.set_crs(epsg=4326, inplace=True) + +# # Transform the GeoDataFrame to the AEQD projection +# m_gdf_aeqd = m_gdf.to_crs(aeqd_proj) + +# # Extract the transformed coordinates +# gdf_aeqd['aeqd_x'] = gdf_aeqd.geometry.x +# gdf_aeqd['aeqd_y'] = gdf_aeqd.geometry.y + +# # Define the grid resolution in meters +# grid_resolution_m = 5000 # 5 km + +# # Calculate grid cell indices +# gdf_aeqd['grid_x'] = (gdf_aeqd['aeqd_x'] // grid_resolution_m).astype(int) +# gdf_aeqd['grid_y'] = (gdf_aeqd['aeqd_y'] // grid_resolution_m).astype(int) + +# # Print the first few rows to verify +# print(gdf_aeqd.head()) + + # concat m and k km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) @@ -124,8 +164,6 @@ def calculate_smd(group1, group2): exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt'] exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')] -match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] - # Extract only the continuous columns continuous_columns = km_pixels.columns.difference(exclude_columns) km_pixels_selected = km_pixels[continuous_columns] @@ -178,7 +216,7 @@ def calculate_smd(group1, group2): plt.ylabel('PCA Component 2') plt.legend() plt.show() - plt.savefig('Figures/cluster_centres_faiss_1.png') + plt.savefig('Figures/ona_cluster_centres_faiss_1.png') plt.close() # Close the plot to free up memory #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -202,11 +240,14 @@ def calculate_smd(group1, group2): fig.delaxes(axes[j]) plt.tight_layout() - plt.savefig('Figures/cluster_faiss_1_facet.png') + plt.savefig('Figures/Ona_cluster_faiss_1_facet.png') plt.close() #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +match_years = [t0-10, t0-5, t0] +match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] + # Extract K and M pixels k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt'] m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl'] @@ -216,7 +257,7 @@ def calculate_smd(group1, group2): m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy() k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION) -m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION) +m_sub_size = int(m_pixels.shape[0] * 1) # Define indexs for the samples from K and M k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False) @@ -330,252 +371,4 @@ def calculate_smd(group1, group2): smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) print(smd_df) -smd_results = [] -for column in columns_to_compare: - smd, mean1, mean2, pooled_std = calculate_smd(k_pixels[column], m_pixels[column]) - smd_results.append((column, smd, mean1, mean2, pooled_std)) -# Convert the results to a DataFrame for better readability -smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) -print(smd_df) - - - - - - - -def find_match_iteration( - k_parquet_filename: str, - m_parquet_filename: str, - start_year: int, - output_folder: str, - idx_and_seed: tuple[int, int] -) -> None: - logging.info("Find match iteration %d of %d", idx_and_seed[0] + 1, REPEAT_MATCH_FINDING) - rng = np.random.default_rng(idx_and_seed[1]) - - logging.info("Loading K from %s", k_parquet_filename) - k_pixels = pd.read_parquet(k_parquet_filename) - - logging.info("Loading M from %s", m_parquet_filename) - m_pixels = pd.read_parquet(m_parquet_filename) - - # concat m and k - km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), - m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) - - # Find the continuous columns - exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt'] - exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')] - continuous_columns = km_pixels.columns.difference(exclude_columns) - # Categorical columns - match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] - - logging.info("Starting PCA transformation of k and m union. km_pixels.shape: %a", {km_pixels.shape}) - # PCA transform and conversion to 32 bit ints for continuous only - km_pca = to_pca_int32(km_pixels[continuous_columns]) - logging.info("Done PCA transformation") - - # Extract K and M pixels - this might be unnecessary I just wanted to make sure - # K and M were in the same order here and in the PCA transform - k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt'] - m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl'] - # Extract K and M PCA transforms - k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy() - m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy() - - # Sample from K and M - k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION) - m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION) - # Define indexs for the samples from K and M - k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False) - m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False) - # Take random samples from K and M pixels - k_sub = k_pixels.iloc[k_random_indices] - m_sub = m_pixels.iloc[m_random_indices] - # Take corresponding random samples from the PCA transformed K and M - k_sub_pca = k_pca[k_random_indices,:] - m_sub_pca = m_pca[m_random_indices,:] - - logging.info("Samples taken from K and M. k_sub.shape: %a; m_sub.shape: %a", {k_sub.shape, m_sub.shape}) - - # Identify the unique combinations of luc columns - k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats)) - - pairs_list = [] - matchless_list = [] - - logging.info("Starting greedy matching... k_sub.shape: %s, m_sub.shape: %s", - k_sub.shape, m_sub.shape) - - start_time = time.time() - for i in range(0, k_cat_combinations.shape[0]): - # i = 6 # ith element of the unique combinations of the luc time series in k - # for in range() - k_cat_comb = k_cat_combinations.iloc[i] - k_cat = k_sub[(k_sub[match_cats] == k_cat_comb).all(axis=1)] - k_cat_pca = k_sub_pca[(k_sub[match_cats] == k_cat_comb).all(axis=1)] - - # Find the subset in km_pixels that matches this combination - m_cat = m_sub[(m_sub[match_cats] == k_cat_comb).all(axis=1)] - m_cat_pca = m_sub_pca[(m_sub[match_cats] == k_cat_comb).all(axis=1)] - - if VERBOSE: - print('ksub_cat:' + str(k_cat.shape[0])) - print('msub_cat:' + str(m_cat.shape[0])) - - # If there is no suitable match for the pre-project luc time series - # Then it may be preferable to just take the luc state at t0 - # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)] - # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)] - # For if there are no matches return nothing - - if(m_cat.shape[0] < k_cat.shape[0] * 5): - # print("M insufficient for matching. Set VERBOSE to True for more details.") - # Append the matchless DataFrame to the list - matchless_list.append(k_cat) - continue - - # Find the matches - matches_index = loop_match(m_cat_pca, k_cat_pca) - m_cat_matches = m_cat.iloc[matches_index] - - # i = 0 - # matched = pd.concat([k_cat.iloc[i], m_cat.iloc[matches[i]]], axis=1, ignore_index=True) - # matched.columns = ['trt', 'ctrl'] - # matched - #Looks great! - columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope'] - # Calculate SMDs for the specified columns - smd_results = [] - for column in columns_to_compare: - smd, mean1, mean2, pooled_std = calculate_smd(k_cat[column], m_cat_matches[column]) - smd_results.append((column, smd, mean1, mean2, pooled_std)) - - # Convert the results to a DataFrame for better readability - smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std']) - - if VERBOSE: - # Print the results - print("categorical combination:") - print(k_cat_comb) - # Count how many items in 'column1' are not equal to the specified integer value - print("LUC flips in K:") - (k_cat['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum() - print("LUC flips in matches:") - (m_cat_matches['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum() - print("Standardized Mean Differences:") - print(smd_df) - - # Join the pairs into one dataframe: - k_cat = k_cat.reset_index(drop = True) - m_cat_matches = m_cat_matches.reset_index(drop = True) - pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1) - - # Append the resulting DataFrame to the list - pairs_list.append(pairs_df) - - # Combine all the DataFrames in the list into a single DataFrame - pairs = pd.concat(pairs_list, ignore_index=True) - matchless = pd.concat(matchless_list, ignore_index=True) - - logging.info("Finished greedy matching... pairs.shape: %s, matchless.shape: %s", - pairs.shape, matchless.shape) - - logging.info("Starting storing matches...") - pairs.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet')) - matchless.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet')) - - logging.info("Finished find match iteration") - - -def find_pairs( - k_parquet_filename: str, - m_parquet_filename: str, - start_year: int, - seed: int, - output_folder: str, - processes_count: int -) -> None: - logging.info("Starting find pairs") - os.makedirs(output_folder, exist_ok=True) - - rng = np.random.default_rng(seed) - iteration_seeds = zip(range(REPEAT_MATCH_FINDING), rng.integers(0, 1000000, REPEAT_MATCH_FINDING)) - - with Pool(processes=processes_count) as pool: - pool.map( - partial( - find_match_iteration, - k_parquet_filename, - m_parquet_filename, - start_year, - output_folder - ), - iteration_seeds - ) - - -def main(): - # If you use the default multiprocess model then you risk deadlocks when logging (which we - # have hit). Spawn is the default on macOS, but not on Linux. - set_start_method("spawn") - - parser = argparse.ArgumentParser(description="Takes K and M and finds 100 sets of matches.") - parser.add_argument( - "--k", - type=str, - required=True, - dest="k_filename", - help="Parquet file containing pixels from K as generated by calculate_k.py" - ) - parser.add_argument( - "--m", - type=str, - required=True, - dest="m_filename", - help="Parquet file containing pixels from M as generated by build_m_table.py" - ) - parser.add_argument( - "--start_year", - type=int, - required=True, - dest="start_year", - help="Year project started." - ) - parser.add_argument( - "--seed", - type=int, - required=True, - dest="seed", - help="Random number seed, to ensure experiments are repeatable." - ) - parser.add_argument( - "--output", - type=str, - required=True, - dest="output_directory_path", - help="Directory into which output matches will be written. Will be created if it does not exist." - ) - parser.add_argument( - "-j", - type=int, - required=False, - default=round(cpu_count() / 2), - dest="processes_count", - help="Number of concurrent threads to use." - ) - args = parser.parse_args() - - find_pairs( - args.k_filename, - args.m_filename, - args.start_year, - args.seed, - args.output_directory_path, - args.processes_count - ) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py index 62e9e9c..e253014 100644 --- a/methods/matching/find_pairs.py +++ b/methods/matching/find_pairs.py @@ -20,11 +20,15 @@ # m_parquet_filename = '/maps/aew85/tmf_pipe_out/1201/tom_pairs/matches.parquet' # luc_match = True +t0 = 2018 +k_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/k.parquet' +m_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/matches.parquet' + REPEAT_MATCH_FINDING = 100 DEBUG = False K_SUB_PROPORTION = 0.01 -M_SUB_PROPORTION = 0.1 +M_SUB_PROPORTION = 1 # Number of clusters NUM_CLUSTERS = 9 # Number of iterations for K means fitting @@ -98,8 +102,8 @@ def calculate_smd(group1, group2): return smd, mean1, mean2, pooled_std def find_match_iteration( - km_pixels: pd.DataFrame, - km_pca: np.ndarray, + k_pixels: pd.DataFrame, + m_pixels: pd.DataFrame, start_year: int, luc_match: bool, output_folder: str, @@ -115,6 +119,29 @@ def find_match_iteration( else: match_cats = ["ecoregion", "country", "cluster"] + if(m_pixels.shape[0] > (k_pixels.shape[0])): + m_sub_size = int(k_pixels.shape[0]) # First down sample M as it is ~230 million points + m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False) + m_pixels = m_pixels.iloc[m_random_indices] + + # concat m and k + km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), + m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) + + # Extract only the continuous columns + km_pixels_distance = km_pixels[DISTANCE_COLUMNS] + # PCA transform and conversion to 32 bit ints + logging.info("Transforming continuous variables to PCA space") + km_pca = to_pca_int32(km_pixels_distance) + # Find clusters using Kmeans + logging.info("Starting cluster assignment using kmeans") + # Initialize the KMeans object + kmeans = faiss.Kmeans(d=km_pca.shape[1], k=NUM_CLUSTERS, niter=NUM_ITERATIONS, verbose=True) + # Perform clustering + kmeans.train(km_pca) + # Get cluster assignments + km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten() + # Extract K and M pixels k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt'] m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl'] @@ -223,23 +250,6 @@ def find_pairs( k_pixels = pd.read_parquet(k_parquet_filename) logging.info("Loading M from %s", m_parquet_filename) m_pixels = pd.read_parquet(m_parquet_filename) - # concat m and k - km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), - m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) - - # Extract only the continuous columns - km_pixels_distance = km_pixels[DISTANCE_COLUMNS] - # PCA transform and conversion to 32 bit ints - logging.info("Transforming continuous variables to PCA space") - km_pca = to_pca_int32(km_pixels_distance) - # Find clusters using Kmeans - logging.info("Starting cluster assignment using kmeans") - # Initialize the KMeans object - kmeans = faiss.Kmeans(d=km_pca.shape[1], k=NUM_CLUSTERS, niter=NUM_ITERATIONS, verbose=True) - # Perform clustering - kmeans.train(km_pca) - # Get cluster assignments - km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten() logging.info("Starting find pairs") os.makedirs(output_folder, exist_ok=True) @@ -251,8 +261,8 @@ def find_pairs( pool.map( partial( find_match_iteration, - km_pixels, - km_pca, + k_pixels, + m_pixels, start_year, luc_match, output_folder From 81377a6217247dd8635223a4cac2bb66785c50de Mon Sep 17 00:00:00 2001 From: Abby Williams <149403544+abbyevewilliams@users.noreply.github.com> Date: Wed, 7 Aug 2024 14:42:37 +0100 Subject: [PATCH 05/19] Update calculate_k.py Removed the problematic lines for ecoregion and access --- methods/matching/calculate_k.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py index dba2284..60ad146 100644 --- a/methods/matching/calculate_k.py +++ b/methods/matching/calculate_k.py @@ -73,7 +73,6 @@ def build_layer_collection( # RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in # glob.glob("*.tif", root_dir=ecoregions_directory_path) # ], name="ecoregions") - ecoregions = RasterLayer.layer_from_file(ecoregions_directory_path) elevation = GroupLayer([ RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in @@ -88,7 +87,6 @@ def build_layer_collection( # RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in # glob.glob("*.tif", root_dir=access_directory_path) # ], name="access") - access = RasterLayer.layer_from_file(access_directory_path) countries = RasterLayer.layer_from_file(countries_raster_filename) From 2bec9d97789b45c077481591c0c2465d9fee7b66 Mon Sep 17 00:00:00 2001 From: Abby Williams <149403544+abbyevewilliams@users.noreply.github.com> Date: Thu, 8 Aug 2024 12:46:30 +0100 Subject: [PATCH 06/19] Update calculate_k.py Uncommented lines needed for ecoregion and access --- methods/matching/calculate_k.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py index 60ad146..a5fa156 100644 --- a/methods/matching/calculate_k.py +++ b/methods/matching/calculate_k.py @@ -69,10 +69,10 @@ def build_layer_collection( # ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once # as part of import of the ecoregions data - # ecoregions = GroupLayer([ - # RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in - # glob.glob("*.tif", root_dir=ecoregions_directory_path) - # ], name="ecoregions") + ecoregions = GroupLayer([ + RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in + glob.glob("*.tif", root_dir=ecoregions_directory_path) + ], name="ecoregions") elevation = GroupLayer([ RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in @@ -83,10 +83,10 @@ def build_layer_collection( glob.glob("slope*.tif", root_dir=slope_directory_path) ], name="slopes") - # access = GroupLayer([ - # RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in - # glob.glob("*.tif", root_dir=access_directory_path) - # ], name="access") + access = GroupLayer([ + RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in + glob.glob("*.tif", root_dir=access_directory_path) + ], name="access") countries = RasterLayer.layer_from_file(countries_raster_filename) From bc64c053cc3c73eeb01c8dab1ab4900c2660079f Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Mon, 12 Aug 2024 11:45:53 +0000 Subject: [PATCH 07/19] First attempt at ex ante offset --- methods/matching/build_m_table.py | 6 +++--- methods/matching/calculate_k.py | 18 ++++++++-------- methods/matching/find_pairs.py | 24 +++++++++++++++++++--- methods/matching/find_potential_matches.py | 4 ++-- scripts/tmfpython.sh | 14 ++++++++++++- 5 files changed, 47 insertions(+), 19 deletions(-) diff --git a/methods/matching/build_m_table.py b/methods/matching/build_m_table.py index 66a27e2..ddf24e6 100644 --- a/methods/matching/build_m_table.py +++ b/methods/matching/build_m_table.py @@ -28,8 +28,8 @@ def build_m_table( matching_collection = build_layer_collection( merged_raster.pixel_scale, merged_raster.projection, - list(luc_range(start_year, evaluation_year)), - [start_year, start_year - 5, start_year - 10], + list(luc_range(start_year-10, start_year)), + [start_year-10, start_year - 15, start_year - 20], matching_zone_filename, jrc_directory_path, cpc_directory_path, @@ -44,7 +44,7 @@ def build_m_table( assert matching_collection.boundary.area == merged_raster.area results = [] - luc_columns = [f'luc_{year}' for year in luc_range(start_year, evaluation_year)] + luc_columns = [f'luc_{year}' for year in luc_range(start_year-10, start_year)] cpc_columns = ['cpc0_u', 'cpc0_d', 'cpc5_u', 'cpc5_d', 'cpc10_u', 'cpc10_d'] columns = ['lat', 'lng', 'ecoregion', 'elevation', 'slope', 'access', 'country'] + luc_columns + cpc_columns diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py index dba2284..a5fa156 100644 --- a/methods/matching/calculate_k.py +++ b/methods/matching/calculate_k.py @@ -69,11 +69,10 @@ def build_layer_collection( # ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once # as part of import of the ecoregions data - # ecoregions = GroupLayer([ - # RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in - # glob.glob("*.tif", root_dir=ecoregions_directory_path) - # ], name="ecoregions") - ecoregions = RasterLayer.layer_from_file(ecoregions_directory_path) + ecoregions = GroupLayer([ + RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in + glob.glob("*.tif", root_dir=ecoregions_directory_path) + ], name="ecoregions") elevation = GroupLayer([ RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in @@ -84,11 +83,10 @@ def build_layer_collection( glob.glob("slope*.tif", root_dir=slope_directory_path) ], name="slopes") - # access = GroupLayer([ - # RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in - # glob.glob("*.tif", root_dir=access_directory_path) - # ], name="access") - access = RasterLayer.layer_from_file(access_directory_path) + access = GroupLayer([ + RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in + glob.glob("*.tif", root_dir=access_directory_path) + ], name="access") countries = RasterLayer.layer_from_file(countries_raster_filename) diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py index 62e9e9c..24c48c5 100644 --- a/methods/matching/find_pairs.py +++ b/methods/matching/find_pairs.py @@ -111,7 +111,7 @@ def find_match_iteration( match_years = [start_year + year for year in RELATIVE_MATCH_YEARS] # The categorical columns: if luc_match: - match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years] + match_cats = ["ecoregion", "country", "cluster"] + ["luc_-10", "luc_-5", "luc_0"] else: match_cats = ["ecoregion", "country", "cluster"] @@ -210,6 +210,17 @@ def greedy_match( return (pairs, matchless) +def rename_luc_columns(df, start_year): + + # Define the range of years based on the central start_year + years = range(start_year - 10, start_year + 11) # Adjust the range as needed + new_column_names = {f'luc_{year}': f'luc_{year - start_year}' for year in years} + + # Rename columns based on the new column names mapping + renamed_df = df.rename(columns=new_column_names) + + return renamed_df + def find_pairs( k_parquet_filename: str, m_parquet_filename: str, @@ -223,9 +234,16 @@ def find_pairs( k_pixels = pd.read_parquet(k_parquet_filename) logging.info("Loading M from %s", m_parquet_filename) m_pixels = pd.read_parquet(m_parquet_filename) + + # rename columns of each + k_pixels_renamed = rename_luc_columns(k_pixels, start_year) + m_pixels_renamed = rename_luc_columns(m_pixels, start_year-10) + # concat m and k - km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))), - m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True) + km_pixels = pd.concat([k_pixels_renamed.assign(trt='trt', ID=range(0, len(k_pixels))), + m_pixels_renamed.assign(trt='ctrl', + ID=range(0, len(m_pixels)))], + ignore_index=True) # Extract only the continuous columns km_pixels_distance = km_pixels[DISTANCE_COLUMNS] diff --git a/methods/matching/find_potential_matches.py b/methods/matching/find_potential_matches.py index f69865a..51ae8bf 100644 --- a/methods/matching/find_potential_matches.py +++ b/methods/matching/find_potential_matches.py @@ -119,8 +119,8 @@ def worker( matching_collection = build_layer_collection( example_jrc_layer.pixel_scale, example_jrc_layer.projection, - [start_year, start_year - 5, start_year - 10], - [start_year, start_year - 5, start_year - 10], + [start_year - 10, start_year - 15, start_year - 20], # create time offset in matching set + [start_year - 10, start_year - 15, start_year - 20], # create time offset in matching set matching_zone_filename, jrc_directory_path, cpc_directory_path, diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index beb8178..44e23d8 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -181,14 +181,26 @@ tmfpython3 -m methods.matching.build_m_table \ echo "--Set M created.--" #Matching: find pairs +if [ "$current_branch" == "tws_cluster_find_pairs" ] -o [ "$current_branch" == "aew85_cluster_find_pairs" ]; then tmfpython3 -m methods.matching.find_pairs \ --k "${output_dir}/${proj}/k.parquet" \ --m "${output_dir}/${proj}/matches.parquet" \ --start_year "$t0" \ + --luc_match False \ --output "${output_dir}/${proj}/pairs" \ --seed 42 \ -j 1 echo "--Pairs matched.--" + else + tmfpython3 -m methods.matching.find_pairs \ + --k "${output_dir}/${proj}/k.parquet" \ + --m "${output_dir}/${proj}/matches.parquet" \ + --start_year "$t0" \ + --output "${output_dir}/${proj}/pairs" \ + --seed 42 \ + -j 1 + echo "--Pairs matched.--" +fi #Calculate additionality if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then @@ -223,5 +235,5 @@ fi if [ "$ex_ante" == "true" ]; then evaluations_dir="~/evaluations" ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" -Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))" +Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))" fi \ No newline at end of file From 91e6e2f31c7de02d6436aec1f39564efe51cacf1 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Mon, 12 Aug 2024 12:56:41 +0000 Subject: [PATCH 08/19] Removed manual inputs from tws_cluster_find_pairs --- methods/matching/find_pairs.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py index 0ae02c9..ee8d824 100644 --- a/methods/matching/find_pairs.py +++ b/methods/matching/find_pairs.py @@ -11,19 +11,6 @@ from methods.common.luc import luc_matching_columns -# TO DO: -# 1. Rename columns to luc10, luc5 and luc0 to align with the pipeline - -# to delete: -# start_year = 2012 -# k_parquet_filename = '/maps/tws36/tmf_pipe_out/1201/k_all.parquet' -# m_parquet_filename = '/maps/aew85/tmf_pipe_out/1201/tom_pairs/matches.parquet' -# luc_match = True - -t0 = 2018 -k_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/k.parquet' -m_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/matches.parquet' - REPEAT_MATCH_FINDING = 100 DEBUG = False From 6c7e5524fbd67515c0f2f09395f20782b4db0ee3 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Mon, 12 Aug 2024 13:14:34 +0000 Subject: [PATCH 09/19] Update tmfpython.sh --- scripts/tmfpython.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index 44e23d8..44d233f 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -186,7 +186,7 @@ tmfpython3 -m methods.matching.find_pairs \ --k "${output_dir}/${proj}/k.parquet" \ --m "${output_dir}/${proj}/matches.parquet" \ --start_year "$t0" \ - --luc_match False \ + --luc_match True \ --output "${output_dir}/${proj}/pairs" \ --seed 42 \ -j 1 @@ -213,6 +213,7 @@ if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then --output "${output_dir}/${proj}/additionality.csv" \ --stopping "${output_dir}/${proj}/stopping.csv" echo "--Additionality and stopping criteria calculated.--" + else if [ "$ex_ante" == "true" ]; then else tmfpython3 -m methods.outputs.calculate_additionality \ --project "${input_dir}/${proj}.geojson" \ @@ -226,14 +227,12 @@ fi # Run ex post evaluation if [ "$ex_post" == "true" ]; then -evaluations_dir="~/evaluations" ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html" Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))" fi # Run ex-ante evaluation if [ "$ex_ante" == "true" ]; then -evaluations_dir="~/evaluations" ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))" fi \ No newline at end of file From 154bef5846e67baa0a1e87c04667677bb777474c Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Wed, 14 Aug 2024 10:42:36 +0000 Subject: [PATCH 10/19] Update find_pairs.py --- methods/matching/find_pairs.py | 20 +++++++++++--- scripts/tmfpython.sh | 48 +++++++--------------------------- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py index ee8d824..668736b 100644 --- a/methods/matching/find_pairs.py +++ b/methods/matching/find_pairs.py @@ -88,10 +88,11 @@ def calculate_smd(group1, group2): smd = (mean1 - mean2) / pooled_std return smd, mean1, mean2, pooled_std -def rename_luc_columns(df, start_year): +def rename_luc_columns(df, start_year, eval_year): # Define the range of years based on the central start_year - years = range(start_year - 10, start_year + 11) # Adjust the range as needed + no_years_post = (eval_year - start_year) + 1 + years = range(start_year - 10, start_year + no_years_post) # Adjust the range as needed new_column_names = {f'luc_{year}': f'luc_{year - start_year}' for year in years} # Rename columns based on the new column names mapping @@ -103,6 +104,7 @@ def find_match_iteration( k_pixels: pd.DataFrame, m_pixels: pd.DataFrame, start_year: int, + eval_year: int, luc_match: bool, output_folder: str, idx_and_seed: tuple[int, int] @@ -123,8 +125,8 @@ def find_match_iteration( m_pixels = m_pixels.iloc[m_random_indices] # rename columns of each - k_pixels_renamed = rename_luc_columns(k_pixels, start_year) - m_pixels_renamed = rename_luc_columns(m_pixels, start_year-10) + k_pixels_renamed = rename_luc_columns(k_pixels, start_year, eval_year) + m_pixels_renamed = rename_luc_columns(m_pixels, start_year-10, eval_year) # concat m and k km_pixels = pd.concat([k_pixels_renamed.assign(trt='trt', ID=range(0, len(k_pixels))), @@ -245,6 +247,7 @@ def find_pairs( k_parquet_filename: str, m_parquet_filename: str, start_year: int, + eval_year: int, luc_match: bool, seed: int, output_folder: str, @@ -268,6 +271,7 @@ def find_pairs( k_pixels, m_pixels, start_year, + eval_year, luc_match, output_folder ), @@ -301,6 +305,13 @@ def main(): dest="start_year", help="Year project started." ) + parser.add_argument( + "--eval_year", + type=int, + required=True, + dest="eval_year", + help="Year of evaluation." + ) parser.add_argument( "--luc_match", type=bool, @@ -336,6 +347,7 @@ def main(): args.k_filename, args.m_filename, args.start_year, + args.eval_year, args.luc_match, args.seed, args.output_directory_path, diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index beb8178..1201087 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -182,46 +182,18 @@ echo "--Set M created.--" #Matching: find pairs tmfpython3 -m methods.matching.find_pairs \ - --k "${output_dir}/${proj}/k.parquet" \ - --m "${output_dir}/${proj}/matches.parquet" \ - --start_year "$t0" \ - --output "${output_dir}/${proj}/pairs" \ - --seed 42 \ - -j 1 - echo "--Pairs matched.--" - -#Calculate additionality -if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then - tmfpython3 -m methods.outputs.calculate_additionality \ - --project "${input_dir}/${proj}.geojson" \ - --project_start "$t0" \ - --evaluation_year "$eval_year" \ - --density "${output_dir}/${proj}/carbon-density.csv" \ - --matches "${output_dir}/${proj}/pairs" \ - --output "${output_dir}/${proj}/additionality.csv" \ - --stopping "${output_dir}/${proj}/stopping.csv" - echo "--Additionality and stopping criteria calculated.--" - else - tmfpython3 -m methods.outputs.calculate_additionality \ - --project "${input_dir}/${proj}.geojson" \ - --project_start "$t0" \ - --evaluation_year "$eval_year" \ - --density "${output_dir}/${proj}/carbon-density.csv" \ - --matches "${output_dir}/${proj}/pairs" \ - --output "${output_dir}/${proj}/additionality.csv" - echo "--Additionality calculated.--" -fi - -# Run ex post evaluation -if [ "$ex_post" == "true" ]; then -evaluations_dir="~/evaluations" -ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html" -Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))" -fi +--k "${output_dir}/${proj}/k.parquet" \ +--m "${output_dir}/${proj}/matches.parquet" \ +--start_year "$t0" \ +--eval_year "$eval_year" \ +--luc_match True \ +--output "${output_dir}/${proj}/pairs" \ +--seed 42 \ +-j 1 +echo "--Pairs matched.--" # Run ex-ante evaluation if [ "$ex_ante" == "true" ]; then -evaluations_dir="~/evaluations" ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" -Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))" +Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))" fi \ No newline at end of file From 9a960d9af98c1c28b76612578d8389c0f00375a5 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Thu, 15 Aug 2024 13:53:32 +0000 Subject: [PATCH 11/19] Update tmfpython.sh --- scripts/tmfpython.sh | 47 ++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index 1201087..4cdd570 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -1,16 +1,17 @@ #!/bin/bash -#run with command: scripts/tmfpython.sh -p 1113 -t 2010 ... -#p: project ID +#run with command: scripts/tmfpython.sh -i 'maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ... +#i: input dir - directory containing project shapefiles +#o: output dir - directory containing pipeline outputs +#p: project name/ID - must match name of shapefile #t: year of project start (t0) #e: evaluation year (default: 2022) -#r: whether to run an ex-post evaluation and knit the results in an R notebook (true/false, default: false). -#a: whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false). +#v: verbose - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false). #NB running evaluations requires the evaluations code # Check which branch is currently checked out -current_branch=$(git rev-parse --abbrev-ref HEAD) +#current_branch=$(git rev-parse --abbrev-ref HEAD) set -e @@ -19,8 +20,7 @@ set -e input_dir="/maps/aew85/projects" output_dir="/maps/aew85/tmf_pipe_out" eval_year=2022 -ex_post=false -ex_ante=false +verbose=true ##################################### @@ -28,43 +28,45 @@ function display_help() { echo "Usage: $0 [options]" echo echo "Options:" - echo " -p Project name" + echo " -i Input directory" + echo " -o Output directory" + echo " -p Project name" echo " -t Start year" echo " -e Evaluation year" - echo " -r Knit ex post evaluation? (true/false)" - echo " -a Knit ex ante evaluation? (true/false)" + echo " -v Knit ex ante evaluation as .Rmd? (true/false)" echo " -h Display this help message" - echo echo "Example:" - echo " $0 -p 'gola' -t 2012 -e 2021 -r true -a true" + echo " $0 -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out -p 1201 -t 2012" } # Parse arguments -while getopts "p:t:e:r:a:h" flag +while getopts "i:o:p:t:e:v:h" flag do case "${flag}" in + i) input_dir=${OPTARG};; + o) output_dir=${OPTARG};; p) proj=${OPTARG};; t) t0=${OPTARG};; e) eval_year=${OPTARG};; - r) ex_post=${OPTARG};; - a) ex_ante=${OPTARG};; + r) verbose=${OPTARG};; h) display_help; exit 0;; *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;; esac done +echo "Input directory: $input_dir" +echo "Output directory: $output_dir" echo "Project: $proj" echo "t0: $t0" echo "Evaluation year: $eval_year" -echo "Ex-post evaluation: $ex_post" -echo "Ex-ante evaluation: $ex_ante" +echo "Ex-ante evaluation: $verbose" if [ $# -eq 0 ]; then display_help exit 1 fi -# Make project output directory +# Make project output folder mkdir -p "${output_dir}/${proj}" echo "--Folder created.--" @@ -181,7 +183,8 @@ tmfpython3 -m methods.matching.build_m_table \ echo "--Set M created.--" #Matching: find pairs -tmfpython3 -m methods.matching.find_pairs \ +. ./venv/bin/activate +python3 -m methods.matching.find_pairs \ --k "${output_dir}/${proj}/k.parquet" \ --m "${output_dir}/${proj}/matches.parquet" \ --start_year "$t0" \ @@ -191,9 +194,11 @@ tmfpython3 -m methods.matching.find_pairs \ --seed 42 \ -j 1 echo "--Pairs matched.--" +deactivate # Run ex-ante evaluation -if [ "$ex_ante" == "true" ]; then +if [ "$verbose" == "true" ]; then +evaluations_dir="~/evaluations" ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" -Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))" +Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}'))" fi \ No newline at end of file From 98e800b0dd043b11bfd877817f68d53ca6570a54 Mon Sep 17 00:00:00 2001 From: Patrick Ferris Date: Tue, 20 Aug 2024 16:50:09 +0100 Subject: [PATCH 12/19] Merge pull request #122 from quantifyearth/mwd-fix-121 Fix Issue #121: force layer to disk before saving. --- methods/inputs/generate_slope.py | 241 +++++++++++++++---------------- requirements.txt | 2 +- 2 files changed, 114 insertions(+), 129 deletions(-) diff --git a/methods/inputs/generate_slope.py b/methods/inputs/generate_slope.py index 891e26f..7d58988 100644 --- a/methods/inputs/generate_slope.py +++ b/methods/inputs/generate_slope.py @@ -111,138 +111,123 @@ def generate_slope(input_elevation_directory: str, output_slope_directory: str): continue with tempfile.TemporaryDirectory() as tmpdir: - elevation = RasterLayer.layer_from_file(elev_path) - - logging.info("Area of elevation tile %a", elevation.area) - _easting, _northing, lower_code, lower_letter = utm.from_latlon( - elevation.area.bottom, elevation.area.left - ) - _easting, _northing, upper_code, upper_letter = utm.from_latlon( - elevation.area.top, elevation.area.right - ) - - # FAST PATH -- with only one UTM zone the reprojection back has no issues - if lower_code == upper_code and lower_letter == upper_letter: - actual_utm_code = lower_code - warp( - actual_utm_code, - elev_path, - elevation.pixel_scale.xstep, - elevation.pixel_scale.ystep, - out_path, + with RasterLayer.layer_from_file(elev_path) as elevation: + + logging.info("Area of elevation tile %a", elevation.area) + _easting, _northing, lower_code, lower_letter = utm.from_latlon( + elevation.area.bottom, elevation.area.left ) - else: - # SLOW PATH -- in the slow path, we have to break the elevation raster into - # UTM sections and do the above to each before reprojecting back and recombining - - # To capture the results here for later inspection just override the tmpdir variable - for actual_utm_code in range(lower_code, upper_code + 1): - for utm_letter in crange(lower_letter, upper_letter): - logging.debug("UTM(%s,%s)", actual_utm_code, utm_letter) - - # Note: we go a little bit around the UTM tiles and will crop them down to size later - # this is to remove some aliasing effects. - bbox = bounding_box_of_utm(actual_utm_code, utm_letter, UTM_EXPANSION_DEGREES) - - # Crop the elevation tile to a UTM zone - utm_layer = RasterLayer.empty_raster_layer_like( - elevation, area=bbox - ) - utm_id = f"{actual_utm_code}-{utm_letter}-{elevation_path}" - utm_clip_path = os.path.join(tmpdir, utm_id) - intersection = RasterLayer.find_intersection( - [elevation, utm_layer] - ) - result = RasterLayer.empty_raster_layer( - intersection, - elevation.pixel_scale, - elevation.datatype, - utm_clip_path, - elevation.projection, - ) - result.set_window_for_intersection(intersection) - elevation.set_window_for_intersection(intersection) - elevation.save(result) - - # Flush elevation utm clip to disk - del result - - # Now warp into UTM, calculate slopes, and warp back - slope_out_path = os.path.join(tmpdir, "out-slope-" + utm_id) - warp( - actual_utm_code, - utm_clip_path, - elevation.pixel_scale.xstep, - elevation.pixel_scale.ystep, - slope_out_path, - ) - - # We now recrop the out-slope back to the bounding box we assumed at the start - bbox_no_expand = bounding_box_of_utm( - actual_utm_code, utm_letter, 0.0 - ) - slope_tif = RasterLayer.layer_from_file(slope_out_path) - grid = RasterLayer.empty_raster_layer_like( - slope_tif, area=bbox_no_expand - ) - output_final = f"final-slope-{actual_utm_code}-{utm_letter}-{elevation_path}" - final_path = os.path.join(tmpdir, output_final) - logging.debug("Slope underlying %s", slope_tif._underlying_area) # pylint: disable=W0212 - logging.debug("Grid underling %s", grid._underlying_area) # pylint: disable=W0212 - try: - intersection = RasterLayer.find_intersection([slope_tif, grid]) - except ValueError: - logging.debug( - "UTM (%s, %s) didn't intersect actual area %s", - actual_utm_code, - utm_letter, - grid._underlying_area # pylint: disable=W0212 - ) - continue - slope_tif.set_window_for_intersection(intersection) - final = RasterLayer.empty_raster_layer( - intersection, - slope_tif.pixel_scale, - slope_tif.datatype, - final_path, - slope_tif.projection, - ) - logging.debug("Final underlying %s", final._underlying_area) # pylint: disable=W0212 - final.set_window_for_intersection(intersection) - slope_tif.save(final) - - # Flush - del final - - # Now to recombine the UTM gridded slopes into the slope tile - slopes = glob("final-slope-*", root_dir=tmpdir) - assert len(slopes) > 0 - - # This sets the order a little better for the union of the layers - slopes.sort() - slopes.reverse() - - logging.info("Render order %s", slopes) - - combined = GroupLayer( - [ - RasterLayer.layer_from_file(os.path.join(tmpdir, filename)) - for filename in slopes - ] + _easting, _northing, upper_code, upper_letter = utm.from_latlon( + elevation.area.top, elevation.area.right ) - elevation = RasterLayer.layer_from_file(elev_path) - intersection = RasterLayer.find_intersection([elevation, combined]) - combined.set_window_for_intersection(intersection) - elevation.set_window_for_intersection(intersection) - - assembled_path = os.path.join(tmpdir, "patched.tif") - result = RasterLayer.empty_raster_layer_like( - elevation, filename=assembled_path - ) - combined.save(result) + # FAST PATH -- with only one UTM zone the reprojection back has no issues + if lower_code == upper_code and lower_letter == upper_letter: + actual_utm_code = lower_code + warp( + actual_utm_code, + elev_path, + elevation.pixel_scale.xstep, + elevation.pixel_scale.ystep, + out_path, + ) + else: + # SLOW PATH -- in the slow path, we have to break the elevation raster into + # UTM sections and do the above to each before reprojecting back and recombining + + # To capture the results here for later inspection just override the tmpdir variable + for actual_utm_code in range(lower_code, upper_code + 1): + for utm_letter in crange(lower_letter, upper_letter): + logging.debug("UTM(%s,%s)", actual_utm_code, utm_letter) + + # Note: we go a little bit around the UTM tiles and will crop them down to size later + # this is to remove some aliasing effects. + bbox = bounding_box_of_utm(actual_utm_code, utm_letter, UTM_EXPANSION_DEGREES) + + # Crop the elevation tile to a UTM zone + with RasterLayer.empty_raster_layer_like(elevation, area=bbox) as utm_layer: + utm_id = f"{actual_utm_code}-{utm_letter}-{elevation_path}" + utm_clip_path = os.path.join(tmpdir, utm_id) + intersection = RasterLayer.find_intersection( + [elevation, utm_layer] + ) + with RasterLayer.empty_raster_layer( + intersection, + elevation.pixel_scale, + elevation.datatype, + utm_clip_path, + elevation.projection, + ) as result: + result.set_window_for_intersection(intersection) + elevation.set_window_for_intersection(intersection) + elevation.save(result) + + # Now warp into UTM, calculate slopes, and warp back + slope_out_path = os.path.join(tmpdir, "out-slope-" + utm_id) + warp( + actual_utm_code, + utm_clip_path, + elevation.pixel_scale.xstep, + elevation.pixel_scale.ystep, + slope_out_path, + ) - shutil.move(assembled_path, out_path) + # We now recrop the out-slope back to the bounding box we assumed at the start + bbox_no_expand = bounding_box_of_utm( + actual_utm_code, utm_letter, 0.0 + ) + with RasterLayer.layer_from_file(slope_out_path) as slope_tif: + with RasterLayer.empty_raster_layer_like(slope_tif, area=bbox_no_expand) as grid: + output_final = f"final-slope-{actual_utm_code}-{utm_letter}-{elevation_path}" + final_path = os.path.join(tmpdir, output_final) + logging.debug("Slope underlying %s", slope_tif._underlying_area) # pylint: disable=W0212 + logging.debug("Grid underling %s", grid._underlying_area) # pylint: disable=W0212 + try: + intersection = RasterLayer.find_intersection([slope_tif, grid]) + except ValueError: + logging.debug( + "UTM (%s, %s) didn't intersect actual area %s", + actual_utm_code, + utm_letter, + grid._underlying_area # pylint: disable=W0212 + ) + continue + slope_tif.set_window_for_intersection(intersection) + with RasterLayer.empty_raster_layer( + intersection, + slope_tif.pixel_scale, + slope_tif.datatype, + final_path, + slope_tif.projection, + ) as final: + logging.debug("Final underlying %s", final._underlying_area) # pylint: disable=W0212 + final.set_window_for_intersection(intersection) + slope_tif.save(final) + + # Now to recombine the UTM gridded slopes into the slope tile + slopes = glob("final-slope-*", root_dir=tmpdir) + assert len(slopes) > 0 + + # This sets the order a little better for the union of the layers + slopes.sort() + slopes.reverse() + + logging.info("Render order %s", slopes) + + files = [os.path.join(tmpdir, filename) for filename in slopes] + with GroupLayer.layer_from_files(files) as combined: + with RasterLayer.layer_from_file(elev_path) as elevation: + intersection = RasterLayer.find_intersection([elevation, combined]) + combined.set_window_for_intersection(intersection) + elevation.set_window_for_intersection(intersection) + + assembled_path = os.path.join(tmpdir, "patched.tif") + with RasterLayer.empty_raster_layer_like( + elevation, filename=assembled_path + ) as result: + combined.save(result) + + shutil.move(assembled_path, out_path) def main() -> None: diff --git a/requirements.txt b/requirements.txt index bad4673..8118a36 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ scipy numba matplotlib geojson -git+https://github.com/quantifyearth/yirgacheffe@bd2e91c773a414f66340ebb8c13044a1b1a6045f +git+https://github.com/quantifyearth/yirgacheffe@cc89b4d8a0e97c1a11b730cd688a58b680023336 git+https://github.com/carboncredits/biomass-recovery@9e54f80832a7eca915ebd13b03df9db2a08aee9d # developement From fb7ac4642626bcd93c0c88d473fba77f1b2bc83c Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Tue, 27 Aug 2024 16:30:00 +0000 Subject: [PATCH 13/19] Added functionality to print git branch in R notebook --- scripts/tmfpython.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index 4cdd570..c30367e 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -1,6 +1,6 @@ #!/bin/bash -#run with command: scripts/tmfpython.sh -i 'maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ... +#run with command: scripts/tmfpython.sh -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ... #i: input dir - directory containing project shapefiles #o: output dir - directory containing pipeline outputs #p: project name/ID - must match name of shapefile @@ -11,14 +11,14 @@ #NB running evaluations requires the evaluations code # Check which branch is currently checked out -#current_branch=$(git rev-parse --abbrev-ref HEAD) +branch=$(git rev-parse --abbrev-ref HEAD) set -e ############ DEFAULTS ############### -input_dir="/maps/aew85/projects" -output_dir="/maps/aew85/tmf_pipe_out" +input_dir="" +output_dir="" eval_year=2022 verbose=true @@ -200,5 +200,5 @@ deactivate if [ "$verbose" == "true" ]; then evaluations_dir="~/evaluations" ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" -Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}'))" +Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" fi \ No newline at end of file From 83ddf336f9faf6e09a5e3365e58c614e6e50659e Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Wed, 28 Aug 2024 09:31:21 +0000 Subject: [PATCH 14/19] Added functionality to knit R notebook --- scripts/ex_ante_evaluation_template.Rmd | 1001 +++++++++++++++++++++++ scripts/scripts/def_rate.R | 328 ++++++++ scripts/scripts/land_cover_timeseries.R | 111 +++ scripts/scripts/plot_matchingvars.R | 42 + scripts/scripts/plot_transitions.R | 63 ++ scripts/scripts/std_mean_diff.R | 57 ++ scripts/tmfpython.sh | 7 +- 7 files changed, 1604 insertions(+), 5 deletions(-) create mode 100644 scripts/ex_ante_evaluation_template.Rmd create mode 100644 scripts/scripts/def_rate.R create mode 100644 scripts/scripts/land_cover_timeseries.R create mode 100644 scripts/scripts/plot_matchingvars.R create mode 100644 scripts/scripts/plot_transitions.R create mode 100644 scripts/scripts/std_mean_diff.R diff --git a/scripts/ex_ante_evaluation_template.Rmd b/scripts/ex_ante_evaluation_template.Rmd new file mode 100644 index 0000000..be95ce1 --- /dev/null +++ b/scripts/ex_ante_evaluation_template.Rmd @@ -0,0 +1,1001 @@ +--- +output: + html_document: + theme: spacelab + df_print: paged + toc: yes + toc_float: yes + pdf_document: + toc: yes +params: + proj: null + t0: null + input_dir: null + output_dir: null + fullname: null + country_path: null + shapefile_path: null + pairs_path: null + carbon_density_path: null + branch: null +--- + +```{r include=FALSE} + +# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A SHELL TERMINAL: + +# Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd', output_file='~/evaluations/R/example_project.html', params=list(proj='example_project', t0=2010, ...))" + +# Mandatory args: proj, t0 +# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path +# You must either specify input dir and output dir OR provide absolute paths to each of the objects required + +``` + +```{r settings, include=FALSE} +knitr::opts_chunk$set( + echo = FALSE, warning=FALSE,message=FALSE) + +library(tidyverse) +library(sf) +library(reshape2) +library(maps) +library(mapdata) +library(ggspatial) +library(arrow) +library(rnaturalearth) +library(rnaturalearthdata) +library(rnaturalearthhires) +library(stringr) +library(jsonlite) +library(countrycode) +library(scales) +library(here) +library(patchwork) +library(knitr) +library(kableExtra) + +``` + +```{r read_inputs, echo=FALSE,warning=FALSE, message=FALSE} + +project_name <- params$proj +start_year <- as.numeric(params$t0) +branch <- params$branch + +``` + +--- +title: "`r paste0('4C Ex-Ante Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`" +subtitle: "`r format(Sys.Date(), "%B %Y")`" +--- + +```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE} + +# get output format + +output_format <- ifelse(knitr::is_latex_output(), "latex", "html") + +# get script path + +script_path <- here('scripts') + +# get explainer diagram path + +diagram_path <- here('methods_diagram.png') + +# get data path + +if (!is.null(params$output_dir)) { + data_path <- paste0(params$output_dir,'/',project_name) +} + +# get path to pairs + +if (!is.null(params$pairs_path)) { + pairs_path <- params$pairs_path +} else { pairs_path <- file.path(data_path,'pairs') } + +# read shapefile + +if (!is.null(params$input_dir)) { + input_dir <- params$input_dir +} + +if (!is.null(params$shapefile_path)) { + shapefile_path <- params$shapefile_path +} else { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') } +shapefile <- read_sf(shapefile_path) + +# read carbon density + +if (!is.null(params$carbon_density_path)) { + carbon_density_path <- params$carbon_density_path +} else { carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] } +carbon_density <- read.csv(carbon_density_path) + +# read country path + +if (!is.null(params$country_path)) { + country_path <- params$country_path +} else { country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)} + +``` + +```{r read_pairs, echo=FALSE} + +# get filenames and filter for matched points + +files_full_raw <- list.files(pairs_path, + pattern='*.parquet',full.names=T,recursive=F) +files_full <- files_full_raw[!grepl('matchless',files_full_raw)] +files_short_raw <- list.files(path=pairs_path, + pattern='*.parquet',full.names=F,recursive=F) +files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + +# initialise dfs + +vars <- c(colnames(read_parquet(files_full[1])),'pair') +paired_data_raw <- data.frame(matrix(ncol = length(vars), nrow = 0)) %>% + setNames(vars) %>% + mutate( + pair = as.factor(pair), + k_trt = as.factor(k_trt), + s_trt = as.factor(s_trt) + ) + +for(j in 1:length(files_full)){ + + # read parquet file + + f <- data.frame(read_parquet(files_full[j]),check.names = FALSE) + + # add identity column + + f$pair <- as.factor(c(replicate(nrow(f),str_remove(files_short[j], "\\.parquet$")))) + + # append data to bottom of df + + paired_data_raw <- bind_rows(paired_data_raw,f) + +} + +# generate separate datasets for project and counterfactual + +project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair) +cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair) + +# create project-counterfactual merged dataset + +colnames(cf) <- colnames(project) +pair_merged <- bind_rows(project,cf) +names(pair_merged) <- str_sub(names(pair_merged),3) +names(pair_merged)[names(pair_merged) == "ir"] <- "pair" + +# add type column and remove excess cols + +data <- pair_merged %>% + mutate(type=c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual'))) %>% + select(-c(contains('trt'),ID)) + +``` + +```{r get_shapefile_area, echo=FALSE} + +project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000) + +``` + +```{r get_country_names} + +# define function for extracting country names + +get_country_names <- function(country_codes_path) { + codes <- as.character(fromJSON(country_codes_path)) + country_names <- countrycode(codes, 'iso2c', 'country.name') + country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo' + return(country_names) + } + +# get country names + +country_vec <- get_country_names(country_path) + + # define function for printing the country names if there are multiple + + if (length(country_vec) > 1) { + country_string <- paste(country_vec[-length(country_vec)], collapse = ", ") + country_string <- paste(country_string, "and", country_vec[length(country_vec)]) + } else { + country_string <- country_vec[1] + } + + +``` + +\ + +# Introduction + +This Report has been prepared by researchers at the Cambridge Centre for Carbon Credits (4C) and has been funded by a charitable grant from the Tezos Foundation. 4C utilises innovative, evidence-based approaches to examine the scientific basis of nature-based carbon conservation initiatives and, insodoing, provides a way for different stakeholders to assess the quality of carbon credits (ex post and/or ex ante). + +**Disclaimer: Nothing in this Report constitutes formal advice or recommendations, an endorsement of proposed action, or intention to collaborate; instead, it sets out the details of an evaluation using a method which is still under development. The Report is considered complete as of the publication date shown, though methods are likely to change in future.** + +\ + +# About the project + +`r if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() }` is located in `r country_string`. The proposed area measures `r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. + +For the purposes of this evaluation, we have set the proposed start date to `r start_year`. + +```{r echo=FALSE} + +# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________ + +# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document. + +``` + + + +\ + +# Introduction to the 4C method + +*Our method for forecasting ex-ante additionality remains under development.* + +The 4C approach to forecasting additionality involves identifying places that experienced similar deforestation levels in the past as the project area does today. We start by analyzing forest cover changes in the project area between 10 years ago and the present day. Using pixel-matching techniques, we then identify comparable places outside the project that experienced similar deforestation trends between 20 and 10 years ago (the *matching period*). This allows us to match the deforestation trajectory of the project with that of the matched pixels, but offset in time. This concept is illustrated by the left-hand diagonal arrow in the figure below. + +We can consider the matched pixels as a historical representation of the project as it is today. By examining deforestation in the matched pixels over the subsequent 10 years (the *baseline period*), we estimate a *baseline prediction* — the deforestation expected in the project area under the counterfactual (business-as-usual) scenario. This rate is then projected forward over the next 10 years, as illustrated by the right-hand diagonal arrow in the figure below. We convert the deforestation rate to carbon dioxide emissions using best estimates of carbon density. + +```{r, echo=FALSE, fig.align='center', fig.width=6} + +knitr::include_graphics(diagram_path) + +``` + + +Making predictions about future deforestation is challenging, and there are multiple sources of uncertainty at play. These include: the quantification of carbon, the choice of matching pixels, the effect of leakage and impermanence, future political changes and market forces. We are constantly improving our method in order to minimise these uncertainties. Due to the inherent uncertainty associated with ex-ante (before-the-fact) predictions, carbon credits should only ever be quantified and issued ex-post (after the fact). + +More information about 4C's approach to impact evaluation can be found below. + +[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1) + +[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence) + +[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae) + +[Our paper on the social value of impermanent carbon credits](https://www.nature.com/articles/s41558-023-01815-0) + +[The PACT methodology for ex-post evaluations](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f) + +\ + + +# Methods + +The following sections will detail how we arrived at a forecast of future deforestation and the potential to generate additionality by reducing this deforestation. This includes the location and quality of the matched points, the deforestation rates in each set of points, and the carbon density values used to convert these deforestation rates to carbon emissions reductions. + +\ + +### Location of matched points + +We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. We used these matched points to make a prediction of the counterfactual scenario for deforestation. + +Below we show the location of the matching points (shown in blue) relative to the project (shown in red), both at the country and project scale. + +`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}` + +```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE} + +# downsample no. of points by 90% + +if(nrow(data) > 20000){ + data_forplot <- data %>% sample_frac(0.1) +} else { + data_forplot <- data +} + +# plot location of matching points + +country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large") + +# transform crs + +shapefile <- st_transform(shapefile, st_crs(country_map)) + +ggplot(data=country_map) + + geom_sf(colour=NA,fill='grey80')+ + geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + + scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+ + coord_sf()+ + theme_void()+ + annotation_scale(text_cex=1.5,location='bl')+ + theme(legend.title = element_blank(), + text=element_text(size=16), + legend.position='none') + +xmin <- filter(data, type=='Project') %>% select(lng) %>% min() +xmax <- filter(data, type=='Project') %>% select(lng) %>% max() +ymin <- filter(data, type=='Project') %>% select(lat) %>% min() +ymax <- filter(data, type=='Project') %>% select(lat) %>% max() + +ggplot(data=country_map) + + geom_sf(colour=NA,fill='grey80')+ + geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + + scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+ + coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+ + theme_void()+ + annotation_scale(text_cex=1.5,location='bl')+ + theme(legend.title = element_blank(), + text=element_text(size=16), + legend.position='none') + +``` + +### Quality of matches + +Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the matched points (shown in blue) indicates that the the matched points are composed of places that are similar to the project in terms of the drivers of deforestation and are expected to exhibit similar deforestation trends. + +- Inaccessibility (motorized travel time to healthcare, minutes) + +- Slope ($^\circ$) + +- Elevation (meters) + +- Forest cover at t0 (start year, %) + +- Deforestation at t0 (%) + +- Forest cover at t-5 (5 years prior to start year, %) + +- Deforestation at t-5 (%) + +- Forest cover at t-10 (10 years prior to start year, %) + +- Deforestation at t-10 (%) + +Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset. + +More information about the datasets we use can be found below: + +[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/) + +[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm) + +[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF) + +\ + +```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE} + +# plot matches + +source(file.path(script_path,'plot_matchingvars.R')) + +plot_matching_variables(data,ex_ante='true') + +``` + +\ + +### Standardised mean differences + +We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). The SMD allows us to quantify the similarity between the project and the matched points in a way that is comparable across variables. + +In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and matched points, in standard deviations) for each variable. Values further from zero indicate a larger difference. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our SMDs would ideally fall in order for the project and matched points to be considered well-matched. + +\ + +```{r smd} + +std_mean_diff <- function(pairs_path) { + + # clean data + + files_full_raw <- list.files(pairs_path, + pattern='*.parquet',full.names=T,recursive=F) + files_full <- files_full_raw[!grepl('matchless',files_full_raw)] + files_short_raw <- list.files(path=pairs_path, + pattern='*.parquet',full.names=F,recursive=F) + files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + + # initialise dfs + + vars <- c(colnames(read_parquet(files_full[1])),'pair') + df <- data.frame(matrix(ncol=length(vars),nrow=0)) %>% + setNames(vars) %>% + mutate(k_trt=as.factor(k_trt), + s_trt=as.factor(s_trt)) + + for(j in 1:length(files_full)){ + + # read in all parquet files for a given project + + f <- data.frame(read_parquet(files_full[j])) %>% + mutate(k_trt=as.factor(k_trt), + s_trt=as.factor(s_trt)) + + # append data to bottom of df + + df <- bind_rows(df,f) + + } + + # calculate smd + + smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) + + variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + + for (var in variables) { + k_var <- df[[paste0("k_", var)]] + s_var <- df[[paste0("s_", var)]] + + k_mean <- mean(k_var, na.rm = TRUE) + s_mean <- mean(s_var, na.rm = TRUE) + k_sd <- sd(k_var, na.rm = TRUE) + s_sd <- sd(s_var, na.rm = TRUE) + + pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) + smd <- (k_mean - s_mean) / pooled_sd + + smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) + } + + return(smd_results) +} + +results <- std_mean_diff(pairs_path) + +# changing sign for interpretation + +results$smd <- (-1)*results$smd + +# changing order of variables + +variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + +results$variable <- factor(results$variable, levels=variables) + +# plotting + + ggplot(results,aes(x=smd,y=variable))+ + #geom_boxplot(outlier.shape=NA,colour='blue')+ + geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+ + geom_vline(xintercept=0)+ + geom_vline(xintercept=0.25,lty=2,colour='grey30')+ + geom_vline(xintercept=-0.25,lty=2,colour='grey30')+ + scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")), + bquote(Deforestation~t[-5]~("%")), + bquote(Deforestation~t[0]~("%")), + bquote(Forest~cover~t[-10]~("%")), + bquote(Forest~cover~t[-5]~("%")), + bquote(Forest~cover~t[0]~("%")), + 'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+ + xlab('Standardised mean difference')+ + xlim(-1,1)+ + theme_classic()+ + theme(axis.title.y=element_blank(), + legend.title=element_blank(), + legend.box.background=element_rect(), + legend.position='none', + text=element_text(size=14), + axis.text.y=element_text(size=14)) + + +``` + +\ + +### Deforestation within the project + +Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes: + +- Undisturbed forest to degraded forest + +- Degraded forest to deforested land + +- Undisturbed forest to deforested land + +- Undisturbed land to reforested land (which indicates that regrowth occurred after a deforestation event) + +\ + +These transitions are shown in the plot below for the 10-year period between `r start_year-10` and `r start_year`. They are overlaid on the project area which is shown in grey. If a transition is not shown, it did not occur in the period examined. + +Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF). + +\ + +```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE} + +# plot deforestation within project + +source(file.path(script_path,'plot_transitions.R')) + +proj_coords <- data %>% + filter(type=='Project') %>% + select(lat,lng) + +proj_input_defplot <- data %>% + filter(type=='Project') %>% + select(contains('luc')) %>% + setNames(paste0("luc_", (start_year-10):start_year)) %>% + cbind(proj_coords) + +proj_input_defplot <- proj_input_defplot[, !is.na(colnames(proj_input_defplot))] + +plot_transitions(data=proj_input_defplot,t0=start_year-10,period_length=10,shapefile=shapefile) + +``` + +\ + +### Land cover changes within project and matched pixels + +In the below plots, we show the changes in land classes over time for both the project (red) and the matched points (blue). + +Note the following: + +- The vertical grey dashed line represents the start year of the project (`r start_year`). The timings shown on the x-axis are relative to this start year. + +- As explained in the 'Methods' section, the matched points are offset in time relative to the project by 10 years. This means that all changes observed in the matched points happened 10 years prior to the equivalent time point in the project. This time offset allows us to use the last 10 years in the matched points as a prediction of the next 10 years for the project. + +- Solid lines represent ex-post observed changes, whereas the dotted line represents the prediction for the future of the project. + +```{r make_inputs, echo=FALSE} + +# preparing inputs + +proj_input <- data %>% + filter(type=='Project') %>% + select(contains('luc')) %>% + setNames(paste0("luc_", (start_year-10):start_year)) +proj_input <- proj_input[, !is.na(colnames(proj_input))] + + +cf_input <- data %>% + filter(type=='Counterfactual') %>% + select(contains('luc')) %>% + setNames(paste0("luc_", (start_year-20):(start_year))) %>% + select(where(~ all(!is.na(.)))) + +``` + +```{r luc_timeseries_all, echo=FALSE} + +source(file.path(script_path,'land_cover_timeseries.R')) + +# getting results + +proj_results <- get_luc_timeseries(proj_input,t0=start_year-10,tend=start_year,type='single') %>% + mutate(type='Project') + +cf_results <- get_luc_timeseries(cf_input,t0=start_year-20,tend=start_year,type='single') %>% + mutate(type='Counterfactual') + +# combining results + +results <- bind_rows(proj_results, cf_results) + +``` + +Showing the trend for undisturbed, degraded, deforested and regrowth in turn: + +```{r undisturbed_timeseries, fig.width=8,fig.height=13} + +# add prediction from the matched pixels: + +prediction <- cf_results %>% + filter(year >= (start_year-10)) %>% + mutate(type='Project', + year=year+10) + +results <- bind_rows(results,prediction) + +# make a custom function for plotting the results + +plot_timeseries <- function(luc_value, title_str) { + + #remove gap between solid and dotted project line + percent_val <- results %>% + filter(year == start_year + & type == "Project" + & luc == luc_value) %>% + pull(percentage) + + # df wrangling + extended_results <- results %>% + mutate( + luc = as.numeric(luc), + year = as.numeric(year), + line_type = ifelse(type == "Project" & year > start_year, "dotted", "solid"), + type = case_when( + type == "Counterfactual" ~ "Matched points", + TRUE ~ type + ) + ) %>% + bind_rows(data.frame( + year = start_year, + luc = luc_value, + percentage = percent_val, + type = 'Project', + line_type = 'dotted' + )) + + extended_results %>% + filter(luc == luc_value) %>% + ggplot(aes(x = year, y = percentage, color = type, linetype = line_type)) + + geom_line(linewidth = 1.5) + + geom_vline(xintercept = start_year, linetype = 2, color = 'grey30') + + #geom_vline(xintercept = start_year-10, linetype = 2, color = 'grey30') + + scale_colour_manual(name = 'Location', + values = c('red','blue'), + breaks = c('Project', 'Matched points'), + labels = c('Project', 'Matched points'))+ + xlab('Year') + + ylab('% cover') + + ggtitle(title_str) + + guides(linetype = "none") + + theme_classic() + + scale_linetype_manual(values = c("solid" = "solid", "dotted" = "dotted"))+ + facet_wrap(~type)+ + xlim(start_year-20,start_year+10) + +} + +plot_1 <- plot_timeseries(luc_value=1, title_str='Undisturbed forest') + theme(legend.position='none',axis.title.x = element_blank()) +plot_2 <- plot_timeseries(luc_value=2, title_str='Degraded forest') + theme(legend.position='none', axis.title.x = element_blank()) +plot_3 <- plot_timeseries(luc_value=3, title_str='Deforested land') + theme(legend.position='none', axis.title.x = element_blank()) +plot_4 <- plot_timeseries(luc_value=4, title_str='Regrowth') + theme(legend.position='none', axis.title.x = element_text(size=14)) + +plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(ncol=1) + +``` + +### Deforestation rates in the matched points during the baseline period + +```{r proportions_undisturbed_degraded, echo=FALSE} + +# obtaining the area of undisturbed and degraded forest at t0, for use later + +source(file.path(script_path,'def_rate.R')) + +prop_und <- get_prop_class(data=proj_input,t0=start_year-10,class=1) +prop_deg <- get_prop_class(data=proj_input,t0=start_year-10,class=2) + +``` + +Here we present the deforestation rates observed in the matched pixels over the past 10 years (the baseline period). + +Forest loss transitions can be broken down into the following processes: + +- degradation of undisturbed forest + +- deforestation of undisturbed forest + +- deforestation of degraded forest + +- regrowth of undisturbed forest (implies previous deforestation) + +We calculate the rate at which these processes occur in the matched pixels using the following method: + +1. Calculate the percentage of matched pixels which have undergone one of the above processes (according to the JRC classification) during the baseline period. +2. Divide this percentage by 10 to give an annual rate of change, in %/year, relative to the amount of (undisturbed or degraded) forest present at the beginning of the project. +3. Multiply this rate by the area of the (undisturbed or degraded) forest 10 years prior to the project start to give an annual rate, in hectares per year. + +The amounts of forest in the project area 10 years prior to project start are as follows: + +- Undisturbed forest: `r format(100*prop_und, big.mark = ",", scientific = FALSE, digits = 3)`% + +- Degraded forest: `r format(100*prop_deg, big.mark = ",", scientific = FALSE, digits = 3)`% + +The rates are given below. + +```{r rate_of_forest_loss_ha, echo=FALSE} + +source(file.path(script_path,'def_rate.R')) + +df_rate_percent <- def_rate_single(data=cf_input,t0=start_year-10,period_length=10) + +df_rate_ha <- df_rate_percent + +df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3]/100)*project_area_ha*prop_und + +df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3]/100)*project_area_ha*prop_deg + +knitr::kable( + df_rate_ha %>% + rename('Rate (ha/year)' = 3) %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) +) + + +``` + +\ + +### Carbon stock changes in the matched points during the baseline period + +Here we present the carbon density calculations for this project. + +In order to convert land cover changes to carbon emissions, we use regional aboveground (AGB) carbon density values generated through NASA GEDI data. + +More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi). + +Note that, in calculating carbon stock changes, we assume the following: + +- Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997) + +- Deadwod biomass is 11% of AGB (based on IPCC 2003) + +- Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011) + + +\ +```{r additionality_forecast} + +baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8)) +colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea') +luc_counter <- 1 +row_counter <- 1 + +carbon_density <- filter(carbon_density, land.use.class %in% c(1:6)) + +for(i in carbon_density$land.use.class){ + + for(j in c("Start","End")) { + + # get agb + + agb <- carbon_density$carbon.density[luc_counter] + + # get other values + + bgb <- agb*0.2 + dw <- agb*0.11 + total <- agb + bgb + dw + #total_co2 <- total*0.47 # we're doing this step later + + # get area of class i + + if (j == "Start") { + area_of_forest <- get_prop_class(cf_input,t0=start_year-10,class=i)*project_area_ha + } else if (j == "End") { + area_of_forest <- get_prop_class(cf_input,t0=start_year,class=i)*project_area_ha } + + # multiply total by area + + total_byarea <- total*area_of_forest + + # adding to df + + baseline_stocks[row_counter,1] <- j + baseline_stocks[row_counter,2] <- i + baseline_stocks[row_counter,3] <- agb + baseline_stocks[row_counter,4] <- bgb + baseline_stocks[row_counter,5] <- dw + baseline_stocks[row_counter,6] <- total + baseline_stocks[row_counter,7] <- area_of_forest + baseline_stocks[row_counter,8] <- total_byarea + + row_counter <- row_counter+1 + + } + + # advance counter + + luc_counter <- luc_counter + 1 + +} + +# formatting bits + +baseline_stocks_format <- baseline_stocks +baseline_stocks_format <- baseline_stocks_format %>% filter(time == 'Start') +baseline_stocks_format <- baseline_stocks_format[2:6] + +colnames(baseline_stocks_format) <- c( + 'Land use class', + 'AGB density (t C / ha)', + 'BGB density (t C / ha)', + 'Deadwood biomass density (t C / ha)', + 'Total biomass density (t C / ha)', + 'Total biomass (t C)') + + +# renaming classes + +baseline_stocks_format <- baseline_stocks_format %>% + mutate(`Land use class` = case_when( + `Land use class` == "1" ~ 'Undisturbed', + `Land use class` == "2" ~ 'Degraded', + `Land use class` == "3" ~ 'Deforested', + `Land use class` == "4" ~ 'Reforested', + `Land use class` == "5" ~ 'Water', + `Land use class` == "6" ~ 'Other', + TRUE ~ as.character(`Land use class`) # ensure no unexpected values + )) + + +baseline_stocks_format[2:6] <- lapply(baseline_stocks_format[, 2:6], function(x) { + if (is.numeric(x)) comma(x) else x +}) + +# Print only carbon calculations at this stage + +baseline_stocks_format %>% + drop_na() %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) %>% + kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>% + kable_styling(bootstrap_options = "striped") + +``` + +# Results: baseline rate of carbon emissions + +In this section we present the annual rate of carbon loss due to deforestation in the matched points during the baseline period. We can take this to be a prediction of the counterfactual scenario for the project (the *baseline*). + +First we present the carbon stock changes observed in the matched points during the baseline period: + +```{r results} + +baseline_stock_changes <- baseline_stocks[c(1:2,7:8)] + +# reshape + +reshaped_data <- baseline_stock_changes %>% + mutate(luc = as.character(luc)) %>% + group_by(luc) %>% + summarize( + area_start = area[time == "Start"], + area_end = area[time == "End"], + area_diff = area_start - area_end, + c_start = total_byarea[time == "Start"], + c_end = total_byarea[time == "End"], + c_diff = c_start - c_end, + .groups = 'drop' + ) + +# get totals + +total_row <- reshaped_data %>% + summarize( + luc = "Total", + area_start = sum(area_start, na.rm = TRUE), + area_end = sum(area_end, na.rm = TRUE), + area_diff = sum(area_diff, na.rm = TRUE), + c_start = sum(c_start, na.rm = TRUE), + c_end = sum(c_end, na.rm = TRUE), + c_diff = sum(c_diff, na.rm = TRUE) + ) %>% + mutate(luc = as.character(luc)) + +baseline_stock_changes <- bind_rows(reshaped_data, total_row) + +# add in conversion to CO2 + +baseline_stock_changes <- baseline_stock_changes %>% + mutate(co2_diff = 0.47*c_diff) + +# formatting bits + +baseline_stock_changes_format <- baseline_stock_changes %>% + mutate(across(where(is.numeric), ~ comma(.))) %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", as.numeric(.)))) + +if (knitr::is_html_output()) { + colnames(baseline_stock_changes_format) <- c( + 'Land use class', + 'Area at start (ha)', + 'Area at end (ha)', + 'Area loss (ha)', + 'Biomass at start (t)', + 'Biomass at end (t)', + 'Biomass loss (t)' + 'CO2 loss (t)') +} else if (knitr::is_latex_output()) { + colnames(baseline_stock_changes_format) <- c( + 'Land use class', + 'Area at start (ha)', + 'Area at end (ha)', + 'Area loss (ha)', + 'Biomass at start (t)', + 'Biomass at end (t)', + 'Biomass loss (t)' + 'CO$_{2}$ loss (t)') +} + +baseline_stock_changes_format <- baseline_stock_changes_format %>% + mutate(`Land use class` = case_when( + `Land use class` == "1" ~ 'Undisturbed', + `Land use class` == "2" ~ 'Degraded', + `Land use class` == "3" ~ 'Deforested', + `Land use class` == "4" ~ 'Reforested', + `Land use class` == "5" ~ 'Water', + `Land use class` == "6" ~ 'Other', + TRUE ~ as.character(`Land use class`) # ensure no unexpected values + )) + +baseline_stock_changes_format[nrow(baseline_stock_changes_format), 1] <- 'Total' + +filtered_data <- baseline_stock_changes_format %>% + drop_na() %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) + +last_row_index <- nrow(filtered_data) + +filtered_data %>% + kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>% + kable_styling(bootstrap_options = "striped") %>% + row_spec(last_row_index, bold = TRUE) + +``` + +```{r results_summary} + +# find the difference + +delta_c <- as.numeric(baseline_stock_changes[nrow(baseline_stock_changes), ncol(baseline_stock_changes)]) +delta_c_annual <- delta_c/10 + +``` + +To calculate the baseline annual rate of carbon emissions, we sum the the differences in carbon stocks between the start and end of the baseline period, then divide the total by the length of the baseline period (10 years). + +**For this project, the baseline annual rate of carbon emissions, in tonnes of carbon dioxide per year, is `r format(delta_c_annual, big.mark = ",", scientific = FALSE, digits = 3)`.** This should be interpreted as the maximum estimated number of carbon credits that could be generated if the project mitigated 100% of the baseline level of deforestation, assuming this is confirmed by ex post observations. We present alternative mitigation scenarios below. + +### Expected additionality under different mitigation scenarios + +Additionality depends not only on baseline deforestation rate but the ability of the project to mitigate that deforestation. Therefore, we present the expected additionality under different mitigation scenarios (100% to 10% mitigation success). The 100% mitigation scenario occurs where the project is able to mitigate all of the deforestation that would have occurred under the counterfactual scenario. This scenario is unlikely to be realistic, but gives a sense of the deforestation risk faced by each area. It is more likely that only a proportion of this baseline deforestation is mitigated - for example, under the 50% mitigation scenario, the project is able to reduce the deforestation rate by half. The higher the proportion of the baseline deforestation that is mitigated, the greater the additionality of a project. + +Note that we present the raw additionality, without accounting for leakage and impermenance (discussed later). + +We are in the process of producing confidence intervals that reflect the uncertainty associated with the baseline, which will be added to future revisions of this document. + +```{r} + +scenarios <- data.frame(matrix(ncol=2,nrow=5)) +scenarios[1] <- c("10%","25%","50%","75%","100%") +scenarios[2] <- delta_c_annual*c(0.1,0.25,0.5,0.75,1) + +if (knitr::is_html_output()) { + colnames(scenarios) <- c('Scenario', + 'Additionality (t CO2 / year)') +} else if (knitr::is_latex_output()) { + colnames(scenarios) <- c('Scenario', + 'Additionality (t CO$_{2}$ / year)') +} + +scenarios <- scenarios %>% + mutate(across(where(is.numeric), comma)) + +knitr::kable( + scenarios +) + +``` + +\ + +# Accounting for leakage and impermanence + +Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**. + +**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage is likely to be lower if the processes leading deforestation and degradation do not result in high yielding land uses, or if the carbon densities within the project are high compared with those in other areas where these activities are taking place. Leakage can also be reduced by interventions which improve yields in areas already under production. We can provide guidance on how this could be achieved. + +**Impermanence** occurs when the additionality generated by a project is reversed. Additional carbon stocks in forests are inherently vulnerable to these reversals. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. In future revisions of this document we aim to include indicative estimates of the equivalent permanence (the relative value of a impermanent credit relative to a permanent credit) for this project. + +You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence). + +--- + +### Reproducibility + +This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation). diff --git a/scripts/scripts/def_rate.R b/scripts/scripts/def_rate.R new file mode 100644 index 0000000..6a4c417 --- /dev/null +++ b/scripts/scripts/def_rate.R @@ -0,0 +1,328 @@ + + + +def_rate <- function(data,t0,period_length,process='all'){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and match + + proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() + cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # choosing processes to measure + + if(process=='def_only'){ + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + } else if(process=='deg_only'){ + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + } else { + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 1, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + } + + + data_filtered$response <- response + + # count up number of pixels where there have been changes for each type + + proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% + nrow() + cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + proj_rate <- 100*(proj_changes/proj_1s)/period_length + cf_rate <- 100*(cf_changes/cf_1s)/period_length + + # make df + + df <- data.frame(matrix(ncol=2,nrow=1)) + colnames(df) <- c('Project','Counterfactual') + df[1,1] <- proj_rate + df[1,2] <- cf_rate + + return(df) + +} + + + +def_rate_seperate <- function(data,t0,period_length){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and cf + + proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() + cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # measuring responses + + def_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + deg_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + ref_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 0, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + data_filtered$def_response <- def_response + data_filtered$deg_response <- deg_response + data_filtered$ref_response <- ref_response + + # count up number of pixels where there have been changes for each type + + proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% + nrow() + cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% + nrow() + + proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% + nrow() + cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% + nrow() + + proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% + nrow() + cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + proj_def <- 100*(proj_def_changes/proj_1s)/period_length + cf_def <- 100*(cf_def_changes/cf_1s)/period_length + + proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length + cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length + + proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length + cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length + + # adding the degraded-to-deforested transition + + data_filtered_2 <- data[data[,t0_index]==2,] + + # count 2s at t0 in project and cf + + proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow() + cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + luc_tend_2 <- data_filtered_2 %>% + select(paste0('luc_',tend)) + + def_response_2 <- case_when( + luc_tend_2==1 ~ 0, + luc_tend_2==2 ~ 0, + luc_tend_2==3 ~ 1, + luc_tend_2==4 ~ 0, + luc_tend_2>4 ~ 0) + + data_filtered_2$def_response_2 <- def_response_2 + + proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% + nrow() + cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% + nrow() + + proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length + cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length + + # make df + + df <- data.frame(matrix(ncol=4,nrow=8)) + + colnames(df) <- c('Process','Forest type','Location','Rate (%/year)') + + df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2)) + df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2)) + df[3] <- c(rep(c('Project','Counterfactual'),times=4)) + df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref) + + return(df) + +} + +get_prop_class <- function(data,t0,class){ + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + data_filtered <- data[data[,t0_index]==class,] + + total_count <- data %>% nrow() + class_count <- data_filtered %>% nrow() + prop <- class_count/total_count + + return(prop) + +} + + +def_rate_single <- function(data,t0,period_length){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and cf + + no_1s <- nrow(data_filtered) + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # measuring responses + + def_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + deg_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + ref_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 0, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + data_filtered$def_response <- def_response + data_filtered$deg_response <- deg_response + data_filtered$ref_response <- ref_response + + # count up number of pixels where there have been changes for each type + + def_changes <- data_filtered %>% filter(def_response==1) %>% + nrow() + + deg_changes <- data_filtered %>% filter(deg_response==1) %>% + nrow() + + ref_changes <- data_filtered %>% filter(ref_response==1) %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + def <- 100*(def_changes/no_1s)/period_length + + deg <- 100*(deg_changes/no_1s)/period_length + + ref <- 100*(ref_changes/no_1s)/period_length + + # adding the degraded-to-deforested transition + + data_filtered_2 <- data[data[,t0_index]==2,] + + # count 2s at t0 in project and cf + + no_2s <- data_filtered_2 %>% nrow() + + # identify where there have been changes during the evaluation period + + luc_tend_2 <- data_filtered_2 %>% + select(paste0('luc_',tend)) + + def_response_2 <- case_when( + luc_tend_2==1 ~ 0, + luc_tend_2==2 ~ 0, + luc_tend_2==3 ~ 1, + luc_tend_2==4 ~ 0, + luc_tend_2>4 ~ 0) + + data_filtered_2$def_response_2 <- def_response_2 + + def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% + nrow() + + deg_to_def <- 100*(def_changes_2/no_2s)/period_length + + # make df + + df <- data.frame(matrix(ncol=3,nrow=4)) + + colnames(df) <- c('Process','Forest type','Rate (%/year)') + + df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation') + df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest') + df[3] <- c(deg,def,deg_to_def,ref) + + return(df) + +} \ No newline at end of file diff --git a/scripts/scripts/land_cover_timeseries.R b/scripts/scripts/land_cover_timeseries.R new file mode 100644 index 0000000..6490bf1 --- /dev/null +++ b/scripts/scripts/land_cover_timeseries.R @@ -0,0 +1,111 @@ + +get_luc_timeseries <- function(data,t0,tend,type='both'){ + + years_list <- seq(t0,tend) + + if(type=='both'){ + + df <- data.frame(matrix(ncol=4,nrow=8*length(years_list))) + + colnames(df) <- c('year','type','luc','percentage') + + counter <- 1 + + for(year in years_list) { + + for(i in seq (1:4)) { + + for(type_value in c('Project','Counterfactual')) { + + total <- data %>% filter(type == type_value) %>% nrow() + + no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- type_value + df[counter,3] <- i + df[counter,4] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + } else if(type=='single'){ + + df <- data.frame(matrix(ncol=3,nrow=4*length(years_list))) + + colnames(df) <- c('year','luc','percentage') + + counter <- 1 + + for(year in years_list) { + + for(i in seq (1:4)) { + + total <- data %>% nrow() + + no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- i + df[counter,3] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + return(drop_na(df)) + +} + +luc_class1_uncertainty <- function(data,t0,tend) { + + years_list <- seq(t0-10,tend) + + df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list))) + + colnames(df) <- c('year','type','pair','percent_class1') + + counter <- 1 + + for(year in years_list) { + + for(type_value in c('Project','Counterfactual')) { + + for(pair_id in unique(data$pair)) { + + total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow() + + no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- type_value + df[counter,3] <- pair_id + df[counter,4] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + return(drop_na(df)) + +} + diff --git a/scripts/scripts/plot_matchingvars.R b/scripts/scripts/plot_matchingvars.R new file mode 100644 index 0000000..ec47f01 --- /dev/null +++ b/scripts/scripts/plot_matchingvars.R @@ -0,0 +1,42 @@ +plot_matching_variables <- function(data, ex_ante = 'false') { + + cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc')) + cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages + cont_data <- melt(cont_data) + + # rename labels + cont_data$variable <- factor(cont_data$variable, + levels = c('access', 'cpc0_u', 'cpc0_d', + 'slope', 'cpc5_u', 'cpc5_d', + 'elevation', 'cpc10_u', 'cpc10_d')) + + levels(cont_data$variable) <- c('Inaccessibility', + 'Forest~cover~t[0]', + 'Deforestation~t[0]', + 'Slope', + 'Forest~cover~t[-5]', + 'Deforestation~t[-5]', + 'Elevation', + 'Forest~cover~t[-10]', + 'Deforestation~t[-10]') + + # determine labels based on ex_ante + if (ex_ante == 'false') { + plot_labels <- c('Counterfactual', 'Project') + } else if (ex_ante == 'true') { + plot_labels <- c('Matched points', 'Project')} + + # plot + matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) + + geom_density(adjust = 10, size = 1) + + facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) + + ylab('Density') + + scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) + + theme_classic() + + theme(legend.title = element_blank(), + axis.title.x = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank()) + + return(matchingvars) +} \ No newline at end of file diff --git a/scripts/scripts/plot_transitions.R b/scripts/scripts/plot_transitions.R new file mode 100644 index 0000000..2931a60 --- /dev/null +++ b/scripts/scripts/plot_transitions.R @@ -0,0 +1,63 @@ +library(ggspatial) + +plot_transitions <- function(data,t0,period_length,shapefile){ + + # count number of 1s at project start + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + data_filtered <- data[data[,t0_index]==1,] + + # identify where there have been changes + + tend <- t0 + period_length + + luc_tend <- data_filtered[[paste0('luc_', tend)]] + + response <- case_when( + luc_tend==1 ~ NA, + luc_tend==2 ~ 'deg', + luc_tend==3 ~ 'def', + luc_tend==4 ~ 'ref', + luc_tend>4 ~ NA) + + data_filtered$response <- as.factor(response) + data_filtered <- data_filtered %>% filter(!is.na(response)) + + # adding deg --> def transition + + # count number of 2s at project start + + data_filtered_2s <- data[data[,t0_index]==2,] + + # identify where there have been changes + + luc_tend <- data_filtered_2s[[paste0('luc_', tend)]] + + response <- case_when( + luc_tend==1 ~ NA, + luc_tend==2 ~ NA, + luc_tend==3 ~ 'deg_to_def', + luc_tend==4 ~ NA, + luc_tend>4 ~ NA) + + data_filtered_2s$response <- as.factor(response) + data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response)) + + combined_dat <- bind_rows(data_filtered, data_filtered_2s) + combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref')) + + # plotting + + plot <- combined_dat %>% + filter(response != 0) %>% + ggplot(aes(x=lng,y=lat,colour=response))+ + geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+ + geom_point(alpha=0.5,size=0.5)+ + scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+ + annotation_scale(text_cex = 1.3)+ + theme_void() + + return(plot) + +} diff --git a/scripts/scripts/std_mean_diff.R b/scripts/scripts/std_mean_diff.R new file mode 100644 index 0000000..63d81ba --- /dev/null +++ b/scripts/scripts/std_mean_diff.R @@ -0,0 +1,57 @@ + +std_mean_diff <- function(path_to_pairs) { + + # clean data + + files_full_raw <- list.files(path_to_pairs, + pattern='*.parquet',full.names=T,recursive=F) + files_full <- files_full_raw[!grepl('matchless',files_full_raw)] + files_short_raw <- list.files(path=path_to_pairs, + pattern='*.parquet',full.names=F,recursive=F) + files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + + # initialise dfs + + vars <- c(colnames(read_parquet(files_full[1])),'pair') + df <- data.frame(matrix(ncol=length(vars),nrow=0)) + colnames(df) <- vars + + for(j in 1:length(files_full)){ + + # read in all parquet files for a given project + + f <- data.frame(read_parquet(files_full[j])) + + # append data to bottom of df + + df <- bind_rows(df,f) + + } + + # calculate smd + + smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) + + variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + + for (var in variables) { + k_var <- df[[paste0("k_", var)]] + s_var <- df[[paste0("s_", var)]] + + k_mean <- mean(k_var, na.rm = TRUE) + s_mean <- mean(s_var, na.rm = TRUE) + k_sd <- sd(k_var, na.rm = TRUE) + s_sd <- sd(s_var, na.rm = TRUE) + + pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) + smd <- (k_mean - s_mean) / pooled_sd + + smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) + } + + return(smd_results) +} + + \ No newline at end of file diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index c30367e..7361eb3 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -8,8 +8,6 @@ #e: evaluation year (default: 2022) #v: verbose - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false). -#NB running evaluations requires the evaluations code - # Check which branch is currently checked out branch=$(git rev-parse --abbrev-ref HEAD) @@ -198,7 +196,6 @@ deactivate # Run ex-ante evaluation if [ "$verbose" == "true" ]; then -evaluations_dir="~/evaluations" -ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html" -Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" +ea_output_file="${output_dir}/${proj}_ex_ante_evaluation.html" +Rscript -e "rmarkdown::render(input='scripts/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" fi \ No newline at end of file From 9d92d17f388e0257c4123d26a842132d7bc7d915 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Wed, 28 Aug 2024 09:34:09 +0000 Subject: [PATCH 15/19] Moved things around --- evaluations/ex_ante_evaluation_template.Rmd | 1001 +++++++++++++++++++ evaluations/methods_diagram.png | Bin 0 -> 37672 bytes evaluations/scripts/def_rate.R | 328 ++++++ evaluations/scripts/land_cover_timeseries.R | 111 ++ evaluations/scripts/plot_matchingvars.R | 42 + evaluations/scripts/plot_transitions.R | 63 ++ evaluations/scripts/std_mean_diff.R | 57 ++ 7 files changed, 1602 insertions(+) create mode 100644 evaluations/ex_ante_evaluation_template.Rmd create mode 100644 evaluations/methods_diagram.png create mode 100644 evaluations/scripts/def_rate.R create mode 100644 evaluations/scripts/land_cover_timeseries.R create mode 100644 evaluations/scripts/plot_matchingvars.R create mode 100644 evaluations/scripts/plot_transitions.R create mode 100644 evaluations/scripts/std_mean_diff.R diff --git a/evaluations/ex_ante_evaluation_template.Rmd b/evaluations/ex_ante_evaluation_template.Rmd new file mode 100644 index 0000000..be95ce1 --- /dev/null +++ b/evaluations/ex_ante_evaluation_template.Rmd @@ -0,0 +1,1001 @@ +--- +output: + html_document: + theme: spacelab + df_print: paged + toc: yes + toc_float: yes + pdf_document: + toc: yes +params: + proj: null + t0: null + input_dir: null + output_dir: null + fullname: null + country_path: null + shapefile_path: null + pairs_path: null + carbon_density_path: null + branch: null +--- + +```{r include=FALSE} + +# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A SHELL TERMINAL: + +# Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd', output_file='~/evaluations/R/example_project.html', params=list(proj='example_project', t0=2010, ...))" + +# Mandatory args: proj, t0 +# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path +# You must either specify input dir and output dir OR provide absolute paths to each of the objects required + +``` + +```{r settings, include=FALSE} +knitr::opts_chunk$set( + echo = FALSE, warning=FALSE,message=FALSE) + +library(tidyverse) +library(sf) +library(reshape2) +library(maps) +library(mapdata) +library(ggspatial) +library(arrow) +library(rnaturalearth) +library(rnaturalearthdata) +library(rnaturalearthhires) +library(stringr) +library(jsonlite) +library(countrycode) +library(scales) +library(here) +library(patchwork) +library(knitr) +library(kableExtra) + +``` + +```{r read_inputs, echo=FALSE,warning=FALSE, message=FALSE} + +project_name <- params$proj +start_year <- as.numeric(params$t0) +branch <- params$branch + +``` + +--- +title: "`r paste0('4C Ex-Ante Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`" +subtitle: "`r format(Sys.Date(), "%B %Y")`" +--- + +```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE} + +# get output format + +output_format <- ifelse(knitr::is_latex_output(), "latex", "html") + +# get script path + +script_path <- here('scripts') + +# get explainer diagram path + +diagram_path <- here('methods_diagram.png') + +# get data path + +if (!is.null(params$output_dir)) { + data_path <- paste0(params$output_dir,'/',project_name) +} + +# get path to pairs + +if (!is.null(params$pairs_path)) { + pairs_path <- params$pairs_path +} else { pairs_path <- file.path(data_path,'pairs') } + +# read shapefile + +if (!is.null(params$input_dir)) { + input_dir <- params$input_dir +} + +if (!is.null(params$shapefile_path)) { + shapefile_path <- params$shapefile_path +} else { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') } +shapefile <- read_sf(shapefile_path) + +# read carbon density + +if (!is.null(params$carbon_density_path)) { + carbon_density_path <- params$carbon_density_path +} else { carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] } +carbon_density <- read.csv(carbon_density_path) + +# read country path + +if (!is.null(params$country_path)) { + country_path <- params$country_path +} else { country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)} + +``` + +```{r read_pairs, echo=FALSE} + +# get filenames and filter for matched points + +files_full_raw <- list.files(pairs_path, + pattern='*.parquet',full.names=T,recursive=F) +files_full <- files_full_raw[!grepl('matchless',files_full_raw)] +files_short_raw <- list.files(path=pairs_path, + pattern='*.parquet',full.names=F,recursive=F) +files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + +# initialise dfs + +vars <- c(colnames(read_parquet(files_full[1])),'pair') +paired_data_raw <- data.frame(matrix(ncol = length(vars), nrow = 0)) %>% + setNames(vars) %>% + mutate( + pair = as.factor(pair), + k_trt = as.factor(k_trt), + s_trt = as.factor(s_trt) + ) + +for(j in 1:length(files_full)){ + + # read parquet file + + f <- data.frame(read_parquet(files_full[j]),check.names = FALSE) + + # add identity column + + f$pair <- as.factor(c(replicate(nrow(f),str_remove(files_short[j], "\\.parquet$")))) + + # append data to bottom of df + + paired_data_raw <- bind_rows(paired_data_raw,f) + +} + +# generate separate datasets for project and counterfactual + +project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair) +cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair) + +# create project-counterfactual merged dataset + +colnames(cf) <- colnames(project) +pair_merged <- bind_rows(project,cf) +names(pair_merged) <- str_sub(names(pair_merged),3) +names(pair_merged)[names(pair_merged) == "ir"] <- "pair" + +# add type column and remove excess cols + +data <- pair_merged %>% + mutate(type=c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual'))) %>% + select(-c(contains('trt'),ID)) + +``` + +```{r get_shapefile_area, echo=FALSE} + +project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000) + +``` + +```{r get_country_names} + +# define function for extracting country names + +get_country_names <- function(country_codes_path) { + codes <- as.character(fromJSON(country_codes_path)) + country_names <- countrycode(codes, 'iso2c', 'country.name') + country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo' + return(country_names) + } + +# get country names + +country_vec <- get_country_names(country_path) + + # define function for printing the country names if there are multiple + + if (length(country_vec) > 1) { + country_string <- paste(country_vec[-length(country_vec)], collapse = ", ") + country_string <- paste(country_string, "and", country_vec[length(country_vec)]) + } else { + country_string <- country_vec[1] + } + + +``` + +\ + +# Introduction + +This Report has been prepared by researchers at the Cambridge Centre for Carbon Credits (4C) and has been funded by a charitable grant from the Tezos Foundation. 4C utilises innovative, evidence-based approaches to examine the scientific basis of nature-based carbon conservation initiatives and, insodoing, provides a way for different stakeholders to assess the quality of carbon credits (ex post and/or ex ante). + +**Disclaimer: Nothing in this Report constitutes formal advice or recommendations, an endorsement of proposed action, or intention to collaborate; instead, it sets out the details of an evaluation using a method which is still under development. The Report is considered complete as of the publication date shown, though methods are likely to change in future.** + +\ + +# About the project + +`r if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() }` is located in `r country_string`. The proposed area measures `r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. + +For the purposes of this evaluation, we have set the proposed start date to `r start_year`. + +```{r echo=FALSE} + +# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________ + +# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document. + +``` + + + +\ + +# Introduction to the 4C method + +*Our method for forecasting ex-ante additionality remains under development.* + +The 4C approach to forecasting additionality involves identifying places that experienced similar deforestation levels in the past as the project area does today. We start by analyzing forest cover changes in the project area between 10 years ago and the present day. Using pixel-matching techniques, we then identify comparable places outside the project that experienced similar deforestation trends between 20 and 10 years ago (the *matching period*). This allows us to match the deforestation trajectory of the project with that of the matched pixels, but offset in time. This concept is illustrated by the left-hand diagonal arrow in the figure below. + +We can consider the matched pixels as a historical representation of the project as it is today. By examining deforestation in the matched pixels over the subsequent 10 years (the *baseline period*), we estimate a *baseline prediction* — the deforestation expected in the project area under the counterfactual (business-as-usual) scenario. This rate is then projected forward over the next 10 years, as illustrated by the right-hand diagonal arrow in the figure below. We convert the deforestation rate to carbon dioxide emissions using best estimates of carbon density. + +```{r, echo=FALSE, fig.align='center', fig.width=6} + +knitr::include_graphics(diagram_path) + +``` + + +Making predictions about future deforestation is challenging, and there are multiple sources of uncertainty at play. These include: the quantification of carbon, the choice of matching pixels, the effect of leakage and impermanence, future political changes and market forces. We are constantly improving our method in order to minimise these uncertainties. Due to the inherent uncertainty associated with ex-ante (before-the-fact) predictions, carbon credits should only ever be quantified and issued ex-post (after the fact). + +More information about 4C's approach to impact evaluation can be found below. + +[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1) + +[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence) + +[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae) + +[Our paper on the social value of impermanent carbon credits](https://www.nature.com/articles/s41558-023-01815-0) + +[The PACT methodology for ex-post evaluations](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f) + +\ + + +# Methods + +The following sections will detail how we arrived at a forecast of future deforestation and the potential to generate additionality by reducing this deforestation. This includes the location and quality of the matched points, the deforestation rates in each set of points, and the carbon density values used to convert these deforestation rates to carbon emissions reductions. + +\ + +### Location of matched points + +We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. We used these matched points to make a prediction of the counterfactual scenario for deforestation. + +Below we show the location of the matching points (shown in blue) relative to the project (shown in red), both at the country and project scale. + +`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}` + +```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE} + +# downsample no. of points by 90% + +if(nrow(data) > 20000){ + data_forplot <- data %>% sample_frac(0.1) +} else { + data_forplot <- data +} + +# plot location of matching points + +country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large") + +# transform crs + +shapefile <- st_transform(shapefile, st_crs(country_map)) + +ggplot(data=country_map) + + geom_sf(colour=NA,fill='grey80')+ + geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + + scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+ + coord_sf()+ + theme_void()+ + annotation_scale(text_cex=1.5,location='bl')+ + theme(legend.title = element_blank(), + text=element_text(size=16), + legend.position='none') + +xmin <- filter(data, type=='Project') %>% select(lng) %>% min() +xmax <- filter(data, type=='Project') %>% select(lng) %>% max() +ymin <- filter(data, type=='Project') %>% select(lat) %>% min() +ymax <- filter(data, type=='Project') %>% select(lat) %>% max() + +ggplot(data=country_map) + + geom_sf(colour=NA,fill='grey80')+ + geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + + scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+ + coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+ + theme_void()+ + annotation_scale(text_cex=1.5,location='bl')+ + theme(legend.title = element_blank(), + text=element_text(size=16), + legend.position='none') + +``` + +### Quality of matches + +Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the matched points (shown in blue) indicates that the the matched points are composed of places that are similar to the project in terms of the drivers of deforestation and are expected to exhibit similar deforestation trends. + +- Inaccessibility (motorized travel time to healthcare, minutes) + +- Slope ($^\circ$) + +- Elevation (meters) + +- Forest cover at t0 (start year, %) + +- Deforestation at t0 (%) + +- Forest cover at t-5 (5 years prior to start year, %) + +- Deforestation at t-5 (%) + +- Forest cover at t-10 (10 years prior to start year, %) + +- Deforestation at t-10 (%) + +Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset. + +More information about the datasets we use can be found below: + +[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/) + +[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm) + +[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF) + +\ + +```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE} + +# plot matches + +source(file.path(script_path,'plot_matchingvars.R')) + +plot_matching_variables(data,ex_ante='true') + +``` + +\ + +### Standardised mean differences + +We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). The SMD allows us to quantify the similarity between the project and the matched points in a way that is comparable across variables. + +In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and matched points, in standard deviations) for each variable. Values further from zero indicate a larger difference. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our SMDs would ideally fall in order for the project and matched points to be considered well-matched. + +\ + +```{r smd} + +std_mean_diff <- function(pairs_path) { + + # clean data + + files_full_raw <- list.files(pairs_path, + pattern='*.parquet',full.names=T,recursive=F) + files_full <- files_full_raw[!grepl('matchless',files_full_raw)] + files_short_raw <- list.files(path=pairs_path, + pattern='*.parquet',full.names=F,recursive=F) + files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + + # initialise dfs + + vars <- c(colnames(read_parquet(files_full[1])),'pair') + df <- data.frame(matrix(ncol=length(vars),nrow=0)) %>% + setNames(vars) %>% + mutate(k_trt=as.factor(k_trt), + s_trt=as.factor(s_trt)) + + for(j in 1:length(files_full)){ + + # read in all parquet files for a given project + + f <- data.frame(read_parquet(files_full[j])) %>% + mutate(k_trt=as.factor(k_trt), + s_trt=as.factor(s_trt)) + + # append data to bottom of df + + df <- bind_rows(df,f) + + } + + # calculate smd + + smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) + + variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + + for (var in variables) { + k_var <- df[[paste0("k_", var)]] + s_var <- df[[paste0("s_", var)]] + + k_mean <- mean(k_var, na.rm = TRUE) + s_mean <- mean(s_var, na.rm = TRUE) + k_sd <- sd(k_var, na.rm = TRUE) + s_sd <- sd(s_var, na.rm = TRUE) + + pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) + smd <- (k_mean - s_mean) / pooled_sd + + smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) + } + + return(smd_results) +} + +results <- std_mean_diff(pairs_path) + +# changing sign for interpretation + +results$smd <- (-1)*results$smd + +# changing order of variables + +variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + +results$variable <- factor(results$variable, levels=variables) + +# plotting + + ggplot(results,aes(x=smd,y=variable))+ + #geom_boxplot(outlier.shape=NA,colour='blue')+ + geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+ + geom_vline(xintercept=0)+ + geom_vline(xintercept=0.25,lty=2,colour='grey30')+ + geom_vline(xintercept=-0.25,lty=2,colour='grey30')+ + scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")), + bquote(Deforestation~t[-5]~("%")), + bquote(Deforestation~t[0]~("%")), + bquote(Forest~cover~t[-10]~("%")), + bquote(Forest~cover~t[-5]~("%")), + bquote(Forest~cover~t[0]~("%")), + 'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+ + xlab('Standardised mean difference')+ + xlim(-1,1)+ + theme_classic()+ + theme(axis.title.y=element_blank(), + legend.title=element_blank(), + legend.box.background=element_rect(), + legend.position='none', + text=element_text(size=14), + axis.text.y=element_text(size=14)) + + +``` + +\ + +### Deforestation within the project + +Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes: + +- Undisturbed forest to degraded forest + +- Degraded forest to deforested land + +- Undisturbed forest to deforested land + +- Undisturbed land to reforested land (which indicates that regrowth occurred after a deforestation event) + +\ + +These transitions are shown in the plot below for the 10-year period between `r start_year-10` and `r start_year`. They are overlaid on the project area which is shown in grey. If a transition is not shown, it did not occur in the period examined. + +Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF). + +\ + +```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE} + +# plot deforestation within project + +source(file.path(script_path,'plot_transitions.R')) + +proj_coords <- data %>% + filter(type=='Project') %>% + select(lat,lng) + +proj_input_defplot <- data %>% + filter(type=='Project') %>% + select(contains('luc')) %>% + setNames(paste0("luc_", (start_year-10):start_year)) %>% + cbind(proj_coords) + +proj_input_defplot <- proj_input_defplot[, !is.na(colnames(proj_input_defplot))] + +plot_transitions(data=proj_input_defplot,t0=start_year-10,period_length=10,shapefile=shapefile) + +``` + +\ + +### Land cover changes within project and matched pixels + +In the below plots, we show the changes in land classes over time for both the project (red) and the matched points (blue). + +Note the following: + +- The vertical grey dashed line represents the start year of the project (`r start_year`). The timings shown on the x-axis are relative to this start year. + +- As explained in the 'Methods' section, the matched points are offset in time relative to the project by 10 years. This means that all changes observed in the matched points happened 10 years prior to the equivalent time point in the project. This time offset allows us to use the last 10 years in the matched points as a prediction of the next 10 years for the project. + +- Solid lines represent ex-post observed changes, whereas the dotted line represents the prediction for the future of the project. + +```{r make_inputs, echo=FALSE} + +# preparing inputs + +proj_input <- data %>% + filter(type=='Project') %>% + select(contains('luc')) %>% + setNames(paste0("luc_", (start_year-10):start_year)) +proj_input <- proj_input[, !is.na(colnames(proj_input))] + + +cf_input <- data %>% + filter(type=='Counterfactual') %>% + select(contains('luc')) %>% + setNames(paste0("luc_", (start_year-20):(start_year))) %>% + select(where(~ all(!is.na(.)))) + +``` + +```{r luc_timeseries_all, echo=FALSE} + +source(file.path(script_path,'land_cover_timeseries.R')) + +# getting results + +proj_results <- get_luc_timeseries(proj_input,t0=start_year-10,tend=start_year,type='single') %>% + mutate(type='Project') + +cf_results <- get_luc_timeseries(cf_input,t0=start_year-20,tend=start_year,type='single') %>% + mutate(type='Counterfactual') + +# combining results + +results <- bind_rows(proj_results, cf_results) + +``` + +Showing the trend for undisturbed, degraded, deforested and regrowth in turn: + +```{r undisturbed_timeseries, fig.width=8,fig.height=13} + +# add prediction from the matched pixels: + +prediction <- cf_results %>% + filter(year >= (start_year-10)) %>% + mutate(type='Project', + year=year+10) + +results <- bind_rows(results,prediction) + +# make a custom function for plotting the results + +plot_timeseries <- function(luc_value, title_str) { + + #remove gap between solid and dotted project line + percent_val <- results %>% + filter(year == start_year + & type == "Project" + & luc == luc_value) %>% + pull(percentage) + + # df wrangling + extended_results <- results %>% + mutate( + luc = as.numeric(luc), + year = as.numeric(year), + line_type = ifelse(type == "Project" & year > start_year, "dotted", "solid"), + type = case_when( + type == "Counterfactual" ~ "Matched points", + TRUE ~ type + ) + ) %>% + bind_rows(data.frame( + year = start_year, + luc = luc_value, + percentage = percent_val, + type = 'Project', + line_type = 'dotted' + )) + + extended_results %>% + filter(luc == luc_value) %>% + ggplot(aes(x = year, y = percentage, color = type, linetype = line_type)) + + geom_line(linewidth = 1.5) + + geom_vline(xintercept = start_year, linetype = 2, color = 'grey30') + + #geom_vline(xintercept = start_year-10, linetype = 2, color = 'grey30') + + scale_colour_manual(name = 'Location', + values = c('red','blue'), + breaks = c('Project', 'Matched points'), + labels = c('Project', 'Matched points'))+ + xlab('Year') + + ylab('% cover') + + ggtitle(title_str) + + guides(linetype = "none") + + theme_classic() + + scale_linetype_manual(values = c("solid" = "solid", "dotted" = "dotted"))+ + facet_wrap(~type)+ + xlim(start_year-20,start_year+10) + +} + +plot_1 <- plot_timeseries(luc_value=1, title_str='Undisturbed forest') + theme(legend.position='none',axis.title.x = element_blank()) +plot_2 <- plot_timeseries(luc_value=2, title_str='Degraded forest') + theme(legend.position='none', axis.title.x = element_blank()) +plot_3 <- plot_timeseries(luc_value=3, title_str='Deforested land') + theme(legend.position='none', axis.title.x = element_blank()) +plot_4 <- plot_timeseries(luc_value=4, title_str='Regrowth') + theme(legend.position='none', axis.title.x = element_text(size=14)) + +plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(ncol=1) + +``` + +### Deforestation rates in the matched points during the baseline period + +```{r proportions_undisturbed_degraded, echo=FALSE} + +# obtaining the area of undisturbed and degraded forest at t0, for use later + +source(file.path(script_path,'def_rate.R')) + +prop_und <- get_prop_class(data=proj_input,t0=start_year-10,class=1) +prop_deg <- get_prop_class(data=proj_input,t0=start_year-10,class=2) + +``` + +Here we present the deforestation rates observed in the matched pixels over the past 10 years (the baseline period). + +Forest loss transitions can be broken down into the following processes: + +- degradation of undisturbed forest + +- deforestation of undisturbed forest + +- deforestation of degraded forest + +- regrowth of undisturbed forest (implies previous deforestation) + +We calculate the rate at which these processes occur in the matched pixels using the following method: + +1. Calculate the percentage of matched pixels which have undergone one of the above processes (according to the JRC classification) during the baseline period. +2. Divide this percentage by 10 to give an annual rate of change, in %/year, relative to the amount of (undisturbed or degraded) forest present at the beginning of the project. +3. Multiply this rate by the area of the (undisturbed or degraded) forest 10 years prior to the project start to give an annual rate, in hectares per year. + +The amounts of forest in the project area 10 years prior to project start are as follows: + +- Undisturbed forest: `r format(100*prop_und, big.mark = ",", scientific = FALSE, digits = 3)`% + +- Degraded forest: `r format(100*prop_deg, big.mark = ",", scientific = FALSE, digits = 3)`% + +The rates are given below. + +```{r rate_of_forest_loss_ha, echo=FALSE} + +source(file.path(script_path,'def_rate.R')) + +df_rate_percent <- def_rate_single(data=cf_input,t0=start_year-10,period_length=10) + +df_rate_ha <- df_rate_percent + +df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3]/100)*project_area_ha*prop_und + +df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3]/100)*project_area_ha*prop_deg + +knitr::kable( + df_rate_ha %>% + rename('Rate (ha/year)' = 3) %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) +) + + +``` + +\ + +### Carbon stock changes in the matched points during the baseline period + +Here we present the carbon density calculations for this project. + +In order to convert land cover changes to carbon emissions, we use regional aboveground (AGB) carbon density values generated through NASA GEDI data. + +More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi). + +Note that, in calculating carbon stock changes, we assume the following: + +- Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997) + +- Deadwod biomass is 11% of AGB (based on IPCC 2003) + +- Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011) + + +\ +```{r additionality_forecast} + +baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8)) +colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea') +luc_counter <- 1 +row_counter <- 1 + +carbon_density <- filter(carbon_density, land.use.class %in% c(1:6)) + +for(i in carbon_density$land.use.class){ + + for(j in c("Start","End")) { + + # get agb + + agb <- carbon_density$carbon.density[luc_counter] + + # get other values + + bgb <- agb*0.2 + dw <- agb*0.11 + total <- agb + bgb + dw + #total_co2 <- total*0.47 # we're doing this step later + + # get area of class i + + if (j == "Start") { + area_of_forest <- get_prop_class(cf_input,t0=start_year-10,class=i)*project_area_ha + } else if (j == "End") { + area_of_forest <- get_prop_class(cf_input,t0=start_year,class=i)*project_area_ha } + + # multiply total by area + + total_byarea <- total*area_of_forest + + # adding to df + + baseline_stocks[row_counter,1] <- j + baseline_stocks[row_counter,2] <- i + baseline_stocks[row_counter,3] <- agb + baseline_stocks[row_counter,4] <- bgb + baseline_stocks[row_counter,5] <- dw + baseline_stocks[row_counter,6] <- total + baseline_stocks[row_counter,7] <- area_of_forest + baseline_stocks[row_counter,8] <- total_byarea + + row_counter <- row_counter+1 + + } + + # advance counter + + luc_counter <- luc_counter + 1 + +} + +# formatting bits + +baseline_stocks_format <- baseline_stocks +baseline_stocks_format <- baseline_stocks_format %>% filter(time == 'Start') +baseline_stocks_format <- baseline_stocks_format[2:6] + +colnames(baseline_stocks_format) <- c( + 'Land use class', + 'AGB density (t C / ha)', + 'BGB density (t C / ha)', + 'Deadwood biomass density (t C / ha)', + 'Total biomass density (t C / ha)', + 'Total biomass (t C)') + + +# renaming classes + +baseline_stocks_format <- baseline_stocks_format %>% + mutate(`Land use class` = case_when( + `Land use class` == "1" ~ 'Undisturbed', + `Land use class` == "2" ~ 'Degraded', + `Land use class` == "3" ~ 'Deforested', + `Land use class` == "4" ~ 'Reforested', + `Land use class` == "5" ~ 'Water', + `Land use class` == "6" ~ 'Other', + TRUE ~ as.character(`Land use class`) # ensure no unexpected values + )) + + +baseline_stocks_format[2:6] <- lapply(baseline_stocks_format[, 2:6], function(x) { + if (is.numeric(x)) comma(x) else x +}) + +# Print only carbon calculations at this stage + +baseline_stocks_format %>% + drop_na() %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) %>% + kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>% + kable_styling(bootstrap_options = "striped") + +``` + +# Results: baseline rate of carbon emissions + +In this section we present the annual rate of carbon loss due to deforestation in the matched points during the baseline period. We can take this to be a prediction of the counterfactual scenario for the project (the *baseline*). + +First we present the carbon stock changes observed in the matched points during the baseline period: + +```{r results} + +baseline_stock_changes <- baseline_stocks[c(1:2,7:8)] + +# reshape + +reshaped_data <- baseline_stock_changes %>% + mutate(luc = as.character(luc)) %>% + group_by(luc) %>% + summarize( + area_start = area[time == "Start"], + area_end = area[time == "End"], + area_diff = area_start - area_end, + c_start = total_byarea[time == "Start"], + c_end = total_byarea[time == "End"], + c_diff = c_start - c_end, + .groups = 'drop' + ) + +# get totals + +total_row <- reshaped_data %>% + summarize( + luc = "Total", + area_start = sum(area_start, na.rm = TRUE), + area_end = sum(area_end, na.rm = TRUE), + area_diff = sum(area_diff, na.rm = TRUE), + c_start = sum(c_start, na.rm = TRUE), + c_end = sum(c_end, na.rm = TRUE), + c_diff = sum(c_diff, na.rm = TRUE) + ) %>% + mutate(luc = as.character(luc)) + +baseline_stock_changes <- bind_rows(reshaped_data, total_row) + +# add in conversion to CO2 + +baseline_stock_changes <- baseline_stock_changes %>% + mutate(co2_diff = 0.47*c_diff) + +# formatting bits + +baseline_stock_changes_format <- baseline_stock_changes %>% + mutate(across(where(is.numeric), ~ comma(.))) %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", as.numeric(.)))) + +if (knitr::is_html_output()) { + colnames(baseline_stock_changes_format) <- c( + 'Land use class', + 'Area at start (ha)', + 'Area at end (ha)', + 'Area loss (ha)', + 'Biomass at start (t)', + 'Biomass at end (t)', + 'Biomass loss (t)' + 'CO2 loss (t)') +} else if (knitr::is_latex_output()) { + colnames(baseline_stock_changes_format) <- c( + 'Land use class', + 'Area at start (ha)', + 'Area at end (ha)', + 'Area loss (ha)', + 'Biomass at start (t)', + 'Biomass at end (t)', + 'Biomass loss (t)' + 'CO$_{2}$ loss (t)') +} + +baseline_stock_changes_format <- baseline_stock_changes_format %>% + mutate(`Land use class` = case_when( + `Land use class` == "1" ~ 'Undisturbed', + `Land use class` == "2" ~ 'Degraded', + `Land use class` == "3" ~ 'Deforested', + `Land use class` == "4" ~ 'Reforested', + `Land use class` == "5" ~ 'Water', + `Land use class` == "6" ~ 'Other', + TRUE ~ as.character(`Land use class`) # ensure no unexpected values + )) + +baseline_stock_changes_format[nrow(baseline_stock_changes_format), 1] <- 'Total' + +filtered_data <- baseline_stock_changes_format %>% + drop_na() %>% + mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) + +last_row_index <- nrow(filtered_data) + +filtered_data %>% + kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>% + kable_styling(bootstrap_options = "striped") %>% + row_spec(last_row_index, bold = TRUE) + +``` + +```{r results_summary} + +# find the difference + +delta_c <- as.numeric(baseline_stock_changes[nrow(baseline_stock_changes), ncol(baseline_stock_changes)]) +delta_c_annual <- delta_c/10 + +``` + +To calculate the baseline annual rate of carbon emissions, we sum the the differences in carbon stocks between the start and end of the baseline period, then divide the total by the length of the baseline period (10 years). + +**For this project, the baseline annual rate of carbon emissions, in tonnes of carbon dioxide per year, is `r format(delta_c_annual, big.mark = ",", scientific = FALSE, digits = 3)`.** This should be interpreted as the maximum estimated number of carbon credits that could be generated if the project mitigated 100% of the baseline level of deforestation, assuming this is confirmed by ex post observations. We present alternative mitigation scenarios below. + +### Expected additionality under different mitigation scenarios + +Additionality depends not only on baseline deforestation rate but the ability of the project to mitigate that deforestation. Therefore, we present the expected additionality under different mitigation scenarios (100% to 10% mitigation success). The 100% mitigation scenario occurs where the project is able to mitigate all of the deforestation that would have occurred under the counterfactual scenario. This scenario is unlikely to be realistic, but gives a sense of the deforestation risk faced by each area. It is more likely that only a proportion of this baseline deforestation is mitigated - for example, under the 50% mitigation scenario, the project is able to reduce the deforestation rate by half. The higher the proportion of the baseline deforestation that is mitigated, the greater the additionality of a project. + +Note that we present the raw additionality, without accounting for leakage and impermenance (discussed later). + +We are in the process of producing confidence intervals that reflect the uncertainty associated with the baseline, which will be added to future revisions of this document. + +```{r} + +scenarios <- data.frame(matrix(ncol=2,nrow=5)) +scenarios[1] <- c("10%","25%","50%","75%","100%") +scenarios[2] <- delta_c_annual*c(0.1,0.25,0.5,0.75,1) + +if (knitr::is_html_output()) { + colnames(scenarios) <- c('Scenario', + 'Additionality (t CO2 / year)') +} else if (knitr::is_latex_output()) { + colnames(scenarios) <- c('Scenario', + 'Additionality (t CO$_{2}$ / year)') +} + +scenarios <- scenarios %>% + mutate(across(where(is.numeric), comma)) + +knitr::kable( + scenarios +) + +``` + +\ + +# Accounting for leakage and impermanence + +Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**. + +**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage is likely to be lower if the processes leading deforestation and degradation do not result in high yielding land uses, or if the carbon densities within the project are high compared with those in other areas where these activities are taking place. Leakage can also be reduced by interventions which improve yields in areas already under production. We can provide guidance on how this could be achieved. + +**Impermanence** occurs when the additionality generated by a project is reversed. Additional carbon stocks in forests are inherently vulnerable to these reversals. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. In future revisions of this document we aim to include indicative estimates of the equivalent permanence (the relative value of a impermanent credit relative to a permanent credit) for this project. + +You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence). + +--- + +### Reproducibility + +This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation). diff --git a/evaluations/methods_diagram.png b/evaluations/methods_diagram.png new file mode 100644 index 0000000000000000000000000000000000000000..0e544b378e8f6fab3384fcbb5afd0cea6a64b5cb GIT binary patch literal 37672 zcmb5WXIN8R*DV}6f*=Zr5Slj%3J4+rq=P6Z5Cmxg0-_K?7ij`grPwGI5Q20Fp^FK< zNw*{rx}g`P_g<54<$j*`yx))W=lt-}Wbf>?_9}CZImTGNGBnU(W8q_gKp<>)Zr{8I zfzZJq5ZYO0M(~YoDeVpL7me3Fog0vnPX2lDf!O|Y;y$Bf#5FJf=-W$;f2JW}p{Mz? z)uwAhObby93!}63f#&_>;F7`;8`GebG2-^{uH~-tx>FPl=$8iVacbZR@DIoCKLMG)5h-fNg$USQ z_*+juwQwqA?}gh)RkyO*!JLR6_si&svV1y7{BwF#M6#@7?DBZyxTt>8L5qg>EKjWQA9g@^)~0%C-d0HHaOb(EKS z*K=@};V_EVIu8sx1oE0YOq2%ajz$x+zHp|#rcM(VD=i#<`8x({;RahS%F1d^rM_?5 z3k^=Mf(8tW2ptnUZlKsI@Nu{QLwu5qd&HYy)V}}GNNP?W+ufv8)nBfQf_^lw3{S?v zC&7y%4d_wlj737%vMBk+c{T4mn!*{}?QJ5_nQXM)y`E2Jv}_JSYYkr<4(@s6rIfW3 z;e-}xx#b!3`ScJSQOdIFgu%UqyCE`%znE4oM2B}o1lxM%mP!YFR%$uL(ik*cZ*+2Q zv8PM6=$v56J$&3)(6Xv4v; z=XN9Mk?$c3#lh?M)wZP{A)aE2Iq7Z2jb365J9DPwT~cmpt)cx;1*~>LB04rrfM7G^ z?7N)xEE0uzt|~u`{w@#?avRk%EXMQYGCq+TKR9&x@e|ARtFG1StHOlDTc z*%xNuj}u5tLbV_MK|4luH4ZL?m9NmUEF|e?R3A=hb%Yxs#+Tf7)P<6-*eQZJx|Q3O z$Lsz!$?)p@NVV7BAzo8EG(2S{W*FyW>>o7V5M=YayANmA66HT=KV0FKa6V7I=8o-- zGKF_#BG;XrE&zS_nsR;XfOFV9#MeS-x(z?PtU8xY(Q?63N*RU$%+$($J3&S03 z5g{uFN?l5doHnwyUTZ-SA!N34_fe(8xokYsuessMQUTAKGtsFc-Qym@M}h3>vDuAx zkH<{AtN$Jq#HGv)&pSRnIAN-uI5*x#=YS}67?au)a12xqO6@Mq&5wht&S!}6hL2mp z$MsCsp7|drVXe}43N6kYrMy-{_mcM>)b0>vIq4YfgCUU6o7p!wwI;P8ETRR?eTX?? zrrEuFQ=Qio&at_2tR;k8UHnw--xG7=SxslE#4+@hC#A#sWmQFAh|nm0o>8mAQC8vp zZpn^^@6Ew2eDz#zrw<)Bv4$z=?fdED!TFinE+@USxt_oMW_wLRURKt&x2*UGsxhiE zAEl>&+%_VMHJ<5Td_z85L>MU8`KeBi32Tj0`tpuSCrj`#$?TEmMos2j@MjUt!Q?Db ziZVglzF}~YF%zq&Sr&@gi}do~Xcm!a zAs>&AnHd~G%Bm&XvOQhm8I`@b3P17`CH6&S?`mF2IkG&z>#^~cTwv3AFR#+B;?l`t zYXs$Jl_XJQG#>OjB!AV2GJr$tjQj142g#_Yz333NV}$gCX>iJRw;f{MOlOK(Kn@>j8yH7pD3fcT=RNFg80}o1pwT4&)8Pxa5)-_=X+v`H( z>*MCp;>sJ&Gl**$^SS{8$Bv&4ZS&aSjy+vyHJ4Z3FVK$T+d*>{jzRqOrD+N5m$v4j zOczgX$saUqD{f)&71}zP$o2l{U~SSI!ngaNK*&70y#_tVpio!i-_M5JbKBltaQwi< zQW?Ogem(qQQ9P8)tGXFT+4VoRhz=g;mEQ7b*xq9g7PTDDtvozX7YtBV)#lpTzJBWH zSpwx=qxmsDys~%t=48!fm!gFLS4N7cn0*6J(5Zuw1HLVf8(~&)g*8XU=9IFtN1fYc zTS3u6!zyYtkULr#uXrQhWhiC;d=q zFf-vEHfs}D>%7}mpVr0dqf2w5&P^W2&~|8a zm3g0rf|f%vR9RKIL~-+Ybz_jZ&zysU>mi>+m5iI@Xig-<>}1NL81AixXK!nX8q3!d zqO(#aCKxRhCQt*MYT>%ZPhJdh4DTFBtB2qy*Oo9M$Db@`h0E*R?p<<~*kX(xh^EqRp-B=DMcZ&|=Coy0rL`ydr3|+GyjV z^7P`t?~~;Eo9dP9C!*7%3|}w=-EE%cgv8&ZM43qvlx#5 zH0ZP4g|bn)XLEPW3+;+Y#J+OM z&}{VKoOV+8Rch(G&0{YVR(9-W7D!B%@$|gM6@ADTy`ic$n%C$^MA<(9^7S0BdVNu~Ng>NiE5)@)`-@I-fio8B&|inw1(&PNXWZ;F zbQ6`F=e6&f+#wROOCs~iUyUg!aThd1dG8k9M*YKGzQIQEX}P=E)n5$H@+d6T8LfD~ zWbUyq9T@T>BtQQ{cfJ*=;jY{%BCcTWAYS+;eQ(iLQeVegMxNqB;;K{tXWEr>Ff%yC zX6ty)%cG!!54Cn6+`XH*Dz>NQ|C|=`{-wqX?fs!wWIdmoJ@EJLZ((veZwj)*83q@& zaw}Qg`L^||(`W|a@5juAblmJ5RaeI+=zGUey%WYqK3qdLjZL#A49hWUU7>{{i32us zf!)I{3#bA4h{jJDZsj&SM@w7y%E8!5t7&n~n$z2sdn;;N`##7bmw?nU?XC4~|7MTE z^jb@bb7Sg-m%fUAloBw{2yk zb8O;q&UPhUs@mHJ6U%}}F(KBy1GGx!SBZ<+lNmBwa`emnFIIJI3k7u!hWfu{NF6Bk zpRrhlk}G#gXR8#xQ!({zK3X=uTMBP2T{562uVf9xyc_7jd_A2D{rIq{JdNShYDH;9 z5vsJb9=7Gl<=e--)Nnf5{Ryg9bnuR@?S}!&Ip&@2?f{n2&jZ@VYbK|OG1+4_TV5>g zFK?+U!$VkhCWu`(4_)TGw3Tb+BHYp2)v#LzLgqSON_F?Nt+xE@(gSDEMjm;pYB=pn z`PQA@n4m8Wd25zu%GKF#ga&uAj94ym7nA2@_>K&=YI}#BgOUW_Om^S|KPV0Q)Hk3j zIm=(0>A+Ok!`pV?xjR1&1h=1c&kn2_*m?!!-CS5UvF<-Cco){3UJ^+uUf-jKT!6p1 zTTUzRsvF?LpB0^lJIgLT0lX%D3vcl;b~B9EhhtL2Ll4;RrEG?(~oUiYVX zG|X!w#8SQALo*Fc>`bl`xpdd|=tFUtH|Qez`VQoiuH`ru8&$?Y6^5eNpmo#PaZi8$ zI`=*7e@YTQ;hWs&in*BwF2N9~w4gRXoeE=QP7ZSFmt#@Fp{Mp9J3KRB2tnA|8MOkt<>0R`p2es2VG-0B4bsC$tf%pJ; zS=UZ7Qt0`}mw&7{e7%v90&0#sxfO`|#y>*TM(=ST&!CMzmF#qce0JP91U7CG%`;4SE*V0dJAD+72l~wDqR3KKs;|`+)0uS z-Zu%%qm&ZZ)jvv^+~NJKs2i&=ObH`(HX>qo@#H-o>~bJpz9A491UE*$Mxk@ImyiSb zZZpW)XnuzD&~|!imtZ`!@aAVONHzS0*Y4)O)#HvROHr(9vsZRXeqoCfVjH#HXd779e&RU1;(ii9Ih`0uN})juMG*!c_hSVa?fQprAt<# zk+f87ML^q3+~y!2z8~C{xW#SjPuN}?*%S_SSOx?$(iHyq+)UdNuUL< zy(P0aaDKO+3$9md-$1HP?#i9_0QYMIUHvO*2WoZ)Zf@?es=B%nU0vNe&-2LR7(R{& zy6=aS$2%LoA;qysuda$>yi$ey;hzu!ImzG3jn=Z_FR=!732bqnIVRiP!9T+ONl&A$ zKmN%J$zcTxWr_=lx-mKP?h_tSv<5wEpS>-2>cPt0NUg6NNDfCpHr(b(qiAblHm)6< z#12vjrS0~^-;5Vs4APzCS9q%Dzk$UYx^XiQ=utHP;<^t;r-gL!R4 zw6SgT3T0m5BK7?}MTo+_eP;qzDhUH)4+jqQRvx_!$GiPHeu&**^E(Z0x%Eop#qWE5 zSRM~p$~Ix?a16PSyi?4(pV1h{1}?t_p`r}!ivkrEC&)8M+i}$!8IrC49zKqz(I_^hW9ET{7!GwXJ=}tppLIF zH_lAk1!0ryI#B$g_FPsJO#~o<{=H(V8@sDTAWGJs;c{N_A4mni5awov-4-x)5o{cC z-OFFO@ty7NUK2IU%xh$5iaT9_&zq98V$f+$)@&(HKrFo=6vmj(9f2yd_2RkFx9|G< zL5P+CxcveT_#SO2yE`1*5{hwXlwI^mA$g^YF#N#-E=J;%u=}&;v>G)WNJFl^9{(oH zf?I0klDx8G1?l1@L43@Ur`!JT-@g^0eqP-JduPN{STMW;%?{DnIE^%fX{S#rup0J; zV%nfb&V&rWL>v{9$frgmK+7C1^n6_0RJ`9lus-BIFb}p2t8}U6){wPZSe_?vZ2?( z&*!)I*`h-pp(D5w_lCrISH(!Ne`)HCbcIOl^=6PK3@t4ev=JgYvECveNS;QXK@c-E zD-Z-ekGg~n~bh~N9qugH9rM``~l1PJMtJnnxREwLWM(klv0)x z1@(S<(^k{ycePtJ=|^%`vAYY41{g_<5G@-=rqiQ^m*``cpC0mo6fwl=h*QN>U@$Q` zAWXN3sqleyp<>{14do2@na^W3c=MMi(S|XQ@utk1P`X-&irr1`xHD`D=+1GAHdqEd#E_!I1PH7k(=W)E5zQmn&B5wamHV6xb zvb1c5!p_S@PA)8prIZvBNvzg9;=%S+7iZ8xXTXR)(ik1UY0v&7uY_kiIXymG!wD{! zP%lhmVDB2)o%);HB@E$pQrvqgvfX=XDk?ZB_79?ID&?Z==p@vXWz=ZZ1eg} zjo?~K2f&x#bU^F6#$E2mQOcgU21N7p3O)E5+}a7-E;{5Hd4*l8)K>+WnwbL`dd^s4 zmPu;YHp;1m5VJ~cDD)p~@PHCq9&A2O4X4hP)M!HS&=4T#GR~bwhSFEuf+iu8dWOo1PqnVMYPi=k~EIBHrNNe8nvA=TO%E1>*;UKq3$fWcs%WH&nv{#;rw zEG*5tJp4$0!X29$;=}XkVM`QSh3~TE>h))98MQ2}T5G@iXdt)MzyzGq`y^Ga`GS!| zK0X{xXr4R17!ra)Vn}tpMP18eJ`nw;58(S5eYFxQnxAAvCG$- z1yGIE*yg$M{-ri3LFsv?^^?AB{Y=CcAw}I^4TjS%Ruu;fxiNCnes2m+{OsolM za$RNv8?cJCQ;gywzd?ibZ}g=$e($vg9W1Q*?Tvpq>4cfV@_N$3YDxnCs?>EBx#?g# z-&x&%9~{XH(@?G39Sd~-IGQB$xXuX~ndRhI(1+x+?1Ss!ORWAd@q>59ou@8pZJL4s z*YSo?YbP6Ljtt!xKE|7)l|$*_=xsUBrV|*Kr?-uXCi$294n&vlW+!$IvOfg@_QJ$} z=Lp=C14iZhyD&PYq4yW{TmegMic)|nM4|$9k_ZQgZMN(Eo&MiO5yQJR)YNhUtS%G= zV8{v(hgQ!ic_TGj^ndSRLHv9dUCY`zlZwb2cQRX#tpzv7Kcwf&cm-Ro$5(B8I+}&jg1ZOqpg}76>cSODrsgf zU}N1z7NxNjwvwLD%=9S&ze>)>4|Yq(?tRo&fDg z*eUQ|Z`Z$i__dbf>slp3H5mC{Bq0oNlZI*^ju^7gm+#I+5syKZqD(pQQj3dH5lg=v zefJwvwrdC;>v(ODPK8f^yE)$qs@CWxXs5Wer&=ni9|>Ts8jb3Uv6w?@hEfZ8q0a5r zErAdUUa|q1ghQ4(V9lne`QE%*qodL1X`~^2WcLBe6-SeY&Wd?XT>8D5V!dbF7zTlQ z(LzG6gH-_k*eVXs0Q+@N)_*DNS7D_uyRi<=p>Y$+%pa0XdHTzJR~IZ8S`a@J1oEL2)?ysC*f&t0#fS<<>k?j7Cb6abVg%_s;|u6H`^`d_=h!E-sE(k!JXsQ3 z>AtBjebhAQk`{aRzU8FCX+v2WIc_k^NOurhbU>$1aEewLZD1aG?fgLa*}7IZ1&B_J zIQw>7#lD{&h1zHOc1>1v)%q?MsDxHgC6amS^1M(Z4L?%|BYC@d_)c?RH&TuXiaJ|A z#suI{uY!ACEu0AHdR(W?vq^@3Lqj0z|1X1w@^{z2ZXeQVWK;k|uX z0EM`o;stmE#!1T-ujN$SbJNFlT7iT35eC2eMrlt$0rdMji0<)P1wB+QEHxLa%ZZLf zItypW$%AW=8lY>gM|U7ux zOGGiK90~ZHxpwIP@1sEwiDxS4p|9{5x8XA7Gs5r&7bttC1rS_kZAZaGYs7+Bs$!TR z%J2L-S68GmxP|RAWjDW(Mhsk21F=%&MCB8+1jKw#?uz7SjSpt5bKgAV2?*Mu{31k% z|2@1Zsxgip1rX8U+G9VEHR83vEX09Xc>VfyR({V-2^bR@Q325D|G8$8%c~lIQI6?r_!&6;FVkQ^ZjoMB2bHGt7!~1c&xpR{{AO)6HmS_ z)9sz~A~(S79IWtyl3w8TAc|mYQJQ5Xxz&%8EC6EVbZmYM>Z*Tpk0@BAgbNRHjVfiA z;U9bpdM@=vsG!gscOBh zW2ID-9Lh-Thvb7?i>fDf-6l>oK3AS&O(?59hXMcv;|)w?BC^|#YM%@zD?#MksQT&ipK_x zkFc|$wakC+yHN?-4e(Uj+kpuPF!4LhtkTaqnqDxC_G{r32*#90zKaTY_BTh;TR$iV zF3tvZZLUMc^w2*x{~HE{+iDU;mu5EcW^l|H4SXjg{zSkpJcu>akdt`ac}f|~f*r#t zn1h4EK-MkR)pOVEU)HkZ*xtGG%8&jvK!NUA4$ARafG)mw_N$h=WV0zn5mEtWjG zk-lULmLWS}O}L2R46GxVXN9mV-%hVaw~8G`^@+ZlnllxD5vgJ+qqv9?z%U3ihM_|! zWOw@=ylhw1X+wTYj|sQ8X0Objk4xN8qj*rlP)k#{=Y!S&r`)=HmK)_c-Cq5Uly&RP z2=gWO$#NI~09S^@;YkHBi3Z1tj+-$^<7C?I8Yefwio(sF#d zS>#o$aL4j7y?q{PNC(l7p$g|vo>owtxUEmMjqTO%j$r{P8EObRT#Dc{_)=nB8!!l+ z!u1+pd1Vn**sbKNcHx9H)nTZMSNr<3te@ZQNI=(^Gx9QgcIrj@PhSYw_#q~-n_dm_ zU6I4D#IftQFfxw^gl z+$arr0?J(gZ#Adq7|{ zD`SPRNCh2it;-LnP>wd;dUEcJoo>hs+#U6+cNuD&NqQVxw!b-BI|!NAOh9|as*Jey zJoKAFiWn3C_AbY`h?XEX_+Yx6_@FVkzV@ENl_WuB-=f_H4b({irH|rp5$l1jB9}nF zsUzU36k-(4Q9#_=|HbEGNN}88&}?B2=t+ zip8vbvsAbgKi;rBZog<)bz`#aA0qOKJ_my9;S3?gF>q&Tedp`mf_dYD^{GV?Hg8v{ zo`6r59W})&uh#YMti)`&GMsrU4xi4ahWM4%WHLx}kW09FCc+H)XDTedK-pv2SB&9n zPV>?ZH`!?J34^U9Eg@)kbXJ?pK5cFrR9@~jJYb!lJcgKReU*@@-P>hWZ231P3}%>s z=)-6!Yu(GuS2uMMW%Cs4Z1Gl{b zs1E|CqpGG}ON@M9+8(s^6A>1``4@!8T*%eU7UK!ej&Bbx4oH|uq!cymi@w%-AegRE z=7RLmG8HiO3&n&pp`I-MEe*QjwcE{H(6_ZRIt*<*SSTFR6TZTQC}|`I_#xz}Mw8d2 zK8pkXy*GTgHqb@_T`Phs2A}CdWT7qi`mD1m@;(rx&2dfA@$qT$edCUmfOet5TZ?8U z{U_#t_vXF{l2$<3*E>6^u!~DhINv1q1Qow0N?9*-8h-k9fU;iW_M0l&*AneJ8gZpC ztp@!GLmL_@5rdMVj&-!>$u zChiu)c&(cD*0kK;!&uV{Cl(ZqFadNfns1;7h9B{82#!kk5$gnLjq+}+;xk-KnjEKIB0c)Y_8_$JC1sz`pTaWdLM-pNHtD=~gsw~| zdDXxkNf#s_?Iw3vdJJM`17lE-`BAjc1wP$=si0$)2i>bOdt|Px1x{h^*IPdJhyG4O z=?%d_LI=SMo9kQhELWO#1=EzyJy!{8sA+x(XAwj;tVK#!y(2+A>L#_>uwZh2*1H%e zFT++Z#>-Eh?F733{TPZ3)T#Qw;Z~I|Cr3r}Bcgz}i}y2+HshT`S64970Gk8PHf*t{ zGqtbRPe}f7l-=)rreTiwJ8@N|hBGMPoF|(T9t9z)wkqvQtvlq?5i;Fd)e`|v_)~HN za-ac00+UZTg2o?;r_DKLeKE9Uk!1~D3_0F7ReUOFl2Auk2}b`D7kmwO%KN2n2k9Tzg~#Qu>W$mNx0FvZDISk)%Q4!fvmZ-t$)1ixMZIqOh&?TGlxJY-kgOik zpiok}7WJ7$d-k(etO8@tr3bnB7)*w4uf}xOHLJiE7C+WkZnW0fI))P@v8qfX3=oa6 z2O%?=jx#jPL~iM4$j=eS zQ{G~y5B!hUvI-g-Q3bA(!PhO7GL7gpMYd5=>ELk2?ZrX3Ga{ zHaoH1T(S)5Aj_LY@m5dADkIrLO^g!|ROQZ*%;gS|MXHDly6J=bt7}(gyLN&>7p+qpp|LEg|0};C3u` zw^;`KFi^8EE4NHTL)`w*e|{Jf6&td!ASExtfLl9AC2#NXWi>4?tJi8?;i6x7s(*=S zzj;XUeeu@D*87Fh*8Y2_{n5O%JI5Jv(t+yW>|)>JtW2O%-846r4GIw9~$lO?om=RwhcMX;HoMDO_@ zwn2~Gkw(~b1?63_B?~&p{u!`lBiG^9GC;K%kQ0TbIY7Fd?tRZcMj+!d>eXLb(DL*-_GB#z`4P#Po* z+mbhUO2xD1oi)nruE19SuS=5nfL&E9PVBTa&(23eLPuB?n33O6Zw{E{U-mJgIAt|w z#`?*Gssha#&r?FxiEhQZ4Y zy55Nh6HuG=)2#J;S<8_nN<+6IZzUvNN(Xr=G9;|AMQ?1JXO-XbyZp`3+8O$b3}U9i z;X$T-782Jddb|EnSuSqS$>dTYeoRR4e{b|fVJlwUL-{Uz^_rw!`vl5pua?(+Eq?M-CqgXc9^%?R)1w(U-^I zmHB)CzW-qj*NA2g!9QPFKU;g*(Yd!s{Ti5;O?kqDhUe9RY@rx~RHd>+(YP9)wG@Uq zha_=19&%sE3CPR5O5bYDb_+2c^e+pXskepdZiQlCBI$?mu@?Lcs^v~*qYw9Lm8L+S zjq;w7&&{Cqa|+*|iRoF*KDhiWLvjDl2e+|0e~Vg_Q!jqAOMN_6aps)#NM8j>)UXIp z0FkUbUlT<;a3zTC-NI--Ip!*RwJWJK!X?eDJ+*?1jO7@oyu+agZKn0QSF(nbztL8+Rwz2oTw7cXEuB9gCqS#-9)ViMZXCQzD!b zPFvN;oM6gKFqCFs*13QU3u zf11{3B}ePHbh!+gn{?8ZkAf#dGayFBNk0^h`TNFdO43v2+!k15?+xxs9o!$sA2qB% z{V_p)K^OlaMx^?U`TJ6icd3!@6JHCP;agn0zD{U4Y+rV}8Q*pI%x_BasMxli2GSbs zj$4IJsxJ!J7-y$AMlzO)%a%bq+ly0c>(Z^98%>DNa2SwMpjM2tdwxgYh>YY-rPTpB5!Q7v!1=ik4o(|E^X?zFQy zZu>=?09YZ3INUvjPx(AmoI1syKbci=wweJ;DNROFs~QpWf=Zz{v&qz`dClmT+J&OC zY1$;~61Cy9EMtO9(5Ar3A83H|fNA>+gi{L9lK zMb@>xV~;4vm)$kI#p&HKA5aYXpRd1LI2oVm9?}GpHFfF}hU|O(nM8bu0{y6!P`?*P z_zu`HcaSXYS!EU;oXy?A=jP_x7(+}SAMCEK+RzDKbs3OUxRxU8$S%;efm|B=YT{&O zVr>QJmyNuMFzv-h3oJ7Afx8v{n=_;|)j)BrQ=urMTqiYB0nAd*%P}DE17OP~0Ee~O zaOgh3k?kV`P{7VWTiwP_3E%C0^Spo-ec{1o7blZkP4WZf?;dA3(B&}bxDS;-@OV@n zC3>GH``7^|rJdsRRf-Y$8BiYP`8{|l6C<+QhxQ*(7=4x94iOQ0xB#@T{j+a0IE&u+ zFBVtU`N#~ZaBB2|TV79#|8yQdOl)@lb>t^^=uf3Fo{Ia1%}_z-jW;M-%pz&Ia@@U3WmEre zCL`?!QTW&YO$-nW;B2)zut$7*4%pi0+7Iz3;!Xnz^LzAt20l}JVCMMhtNKe@IbLF9 zYreN*xPwu{cT$`Xae{M%dH)(&vT@Vm;|;=J%8Ew=;I5x$Q;qr$3|V9gzTadMRQVm~ zKa=bjkV5Ayj#A3VPxVULDX^%1>aZ|4iDbL^L`y07Xipq^iMIS-Uq53C>siS)afSV? zkT(I%nlMunfs=2#t89}9hvmwz=92c`d8-bjM)2>YkkaT@E@WFvgjgGlcOs5!5#-WE zlOSCvpEevxOCqlTI{M<-kh9$PN~&l0{AN})v(>Nvw5*Zok@1|mSxSd$*UU&(^g;)o z!fr1P4#$NYJ@#W1ek~5)tX1$TRXH;8J5wg|VRzst^Pr3z2lOpLCGUo3FG$qG9BKQ~ zt3YUh2mYj%;B+()!#|IVGP|2YiW5#RYk7fHFi_i*C8zL$k*(ml{C!O^Lg1ibrsiX( zWBpQ_dQ`D4UHVS8(I?sB^dEoIV9pv(mNl>;y!HctC;10(oRez2`{K)?3tVdh|rTFVlk%zPZ%cJVs|c`Law57yL} z=I-xIgw7vzqcf@hzP0dKO2l;%jP## zDt{E|h~_yfp$1j~1Lq6CZzISyqnJtDyl7}mzAop=GwlRXf0JCJer@g)ATPDWXPjW%WgzsU`igFc&h+PG-( zaY$o#FO$^60_25IuGhCBwNoCwu1QS*=yWuQ zV`%!MRF**{xKZxnU>1-|Y8jWIs`oPXW2FRx#@^K=(0x?vqsMUGYxcJje6F&HeXD_j?&)S5h!Zo0* z=0Y>8fwjX=CGfIdua1v}@hooLv6}VPOR9-T&y_Z2=e-fef=WsDT%|_ma?xdA3%j_i zm(Auz7m)RE;#=5C(Yjg51E<5C{lf0;Riz_EqDtSnhX=+8zY_e|vi_pS*|!_JrI+Oo z0#Bb)e40ZgZ^h-UJ8_@hx`*Rr5L$EEcM_5xG0SR{08!!DWB5Q%PV|t<_yHTBYXNco z=;e^|ZuKYt#2WA0M2RnN#Q5%q0KVOAude?fK^cn4NCaI-$D## zjXg()QW7kNb7iCRCHDij!Ja7)#NUgXg^3b#^>|Wb9!J%KEP&?qWV!A#ShCHYO8F}t zQg-XjS$|XBNMT3}PjSx|Y4`yq7t6( zkf&n*ictly`JA23QvJ>b-n^ZHZY~@_nW__=`R8;Pqeie%?qxyXxv9VY!*z>y<;Kfq zd}ytOIUVHoSz0#8U7T)qy;Hn_^Jo#fBPty2`8$m+dZ6EaF&0eqNUIbPiPd*2%HNhT zM(cZ;7m7<#GeDp^PjcS(=rgQ|FO_&cH{s(M^L@W>hKTRhUnv25Yh-$Ip)r~&uTW|sv5VUau= zHj@^(LOlfY*$Ir$7mMB%%21$oY40$mzKAv+$--yKmjTi{uaY-LIa}A*71)S&0z`We z!!M0}mwu^;Chj+B_ja#Xx$R7LtRw(uhX&AfzI?H5vIk-a+mIx%fTa0O$0*duVoeLp zs!xZuXX$d<63ToHjET6@d3=oCl4$*-f`pyxA<6OoNRz&C(I0Nhc4p3 z9)8c((+Nx{12-JEB)2OHo4aF|+<3i28UaDAOIjE7`vSn)joHzy1T5QXp1io!=Po-@XT9Q&OA@2dk50xYKuz8RVKzx2S?+s zMxwkzZU8$aEH0dh@5;g{q%5JGS%LQYcAxPmvxYGMOsIvM3Ft3brhGpBk~kM82l6RF zkAVf*m2l&VNFbjf1L}=~4H{uLkJ3I*>MhxW74Suvz6VG7G-F_7f z<8Ed-Na&51w=$}ChFnI!yRM8>j|s|dTa1+Yc2Se^(tGq(&XPR{QprcTHwK5%&1R&z z^!d7(Qv_({(#Nyuh$>Ib{GVA@Ini+W!>xH^e_8I|U+i9}9xh=MG_`TjgoXY>5MaG! z6@k?4QRL{{;=&*LygOzS*n)s8mhU!P5wq>gkVByhf0+NR+WyBG4}K<=s|(&cKPWY< zu}Zo5$FdMe_1P2+5z-BK5SL!kX<9axi*=M&1@uFl43U}RFM@@_Ha0fI1?E|b(FJ(m z1!6n;3dAd6ok#r@`{cvbN`QU+X>uE21lQw(ZGAbq4M~-LC!jzAV~~zekcq$W?i3Ju zGv*!j+D*qxXLO-uO_9x1c?)Ryn*%+rq_1r*=Y*TGD6(BDy-}xx+^uW8 z*E`=ZaPx89tJ0MPP#mfrT&EBlK;EBq+g%wQyH7ETQ(n35@pj}@V1hPP@CLg0ohaJQ zka4$xl7w1AVURG~?d6~TS8Xg3huO`h`lXqd0pOR*gZxwRuQkkHSrAmh>QpEI{<7mRmpx`%XiBaJDX|G`QmJyvTUo zio66I5r2RoN+a7a_f_0=bEhnBK2k#mwHwsCwL!4CWQscX$|YgudrRS}vT<3-IJlLa*c886kgw?^Yu_Mo2wP9R9YoT-VD?LE*{I zYhh9Cz;5sW*m#gQWI|Ms6N`jpE>MQwLb(Tk+c3;)!UM4xybQXF%mQ(N8rY*w@=gf= z%J!)_6rAAsH)}v(Or}n>!<8O+y2cMD#bd-_o|qo*MZ=AJ4Icg7PUl zA4yYeOU7WURsClV01}QAN<&w-4x~LdAj6GSWm@8pllA;6&%P8tsP2L0y+gk{36;k~ zU5s{sL99;3@OCd6URcx#vJyB_;UpBY)OA_uXszWx=NXWK1$g?VuQ@rbKY#tI_dKt9 ze}?JP!5ldfPw}{jFyHDL+o{fH7gU+kMgSi3Uw5m_-;)|2ZWY3QeUjdF=knn|pltaI zVdjYDA1~Eba@e8ca^b+SBZE zMY)%Ys!DSG_mmOF(rZ?VgM&T#XBXf3Umsngj{!wB>7o+Rb+o2GQ1rfwBW+%)FA7S7 zG?p<=aZ1-eW1J+8gp*k1=V z<~PA9KT@Q+9I)dy%JM-ns&fA!Qng84J-uZRCH%}GTsIyJx^op?ps3YU><*3;k%PAr zyTY#29#?cZ^M9clOCXn&2x@=z7j4FZcCGyoJD}e9O@$6+;*#SB1C@22LHs^_Mgv&K zyMAW;z@A?<+!)eW?&LMcZqoZ1HhLoRUDS9qie{v8$hG}7es?ph_FU#pC$NyO`h##P zl~0OB-{(Na(@&N|RSa0AuehXeAy%v&@AT?dk$`VUCZ}cg=rwyLBjmLncxA}T=(QoX z$R_|*m{|_U1FDNenBBMTSvhA`e~i5mJ222^(nbS>^Cmh1Ps18R>9y?Xx3Zm2%xNx= z3QHTDZ4ZYAM|i=uw;URfzx;jVdh1f#o&Lc=q&30k&Bz@#+|0G>5XcYnHH}_?2R%^9 z>`EVL<~_l=iQ0Fzxuk}mT4k;CIc0R#Kh_bY`a5~3NT*U1IB;k0TFjIqyPK_5|4^%O zn>!{I15ShMO^l}mmpY%D+xX`~X~dBWu4M@l=0kvk zDe_w#8`}o8v~e!YqnW+wxZRzs6NX^V!OWeaw0Bso?_|(~W^ugH*uVtUq>z({(DY!+M#2+9Dda|d|wYh(-0JcqQl058_)OK~`VPPwk^p3vwwDut*+ zhzx;f=z=m1?AP+A*7z#V>AKJ}+e_H4ns=uD2$_t5F3ki4AW+BjAnTc<^Z7lkI+lg7 zQXF2F0~~4# zvWU^f34_n(Ct#5H0DwWvQmBaVo%*Px6+>RqTyj5TlI>2GcuXwB6xFQj0_>b_QEn<* zpLrp|xsQa9!N-fXz#O0Km^QE7pZ}gtc&yo!#*`i)ZZlx{QdO=A1AdgyEs6h7_W-W6 zz2N#ur9*{`(fWMv?Et;dZ%Yx}0vKbq;@ zsIYN)Ptvww#AT8n=VmsixLUVaCllJNIQ3HGUk_5IXHre75A{e};eU`7&eX{=WT423 z$!|*KxCVN?>9H{rM0g*lbpUSISmd5wzR0=aiXOBM+U{S$D>(IBPzFw#!X?ek59vp( zF_2b^D)Fn1o%{+nGINS`y>>E_@oBl-|4r2SoPd0Djpb#Rqp@a8&Ud9zG` z{DJm=%%we40Q;m({z&DFzTQq?gv1MiMV&>EvGxDU9SACAw;6o$SSFRfB_Z9(fRJ|v zbzwQxe^m&1FI<-xB@Um=1vZw(LW{iH(G=SvFJR?bz){)!cM$6TI6|$39{aqttPdzx zV0@49n(a)0(|BvC1|L21c-wpqRN#M6qW@oEZypb2`~DA)J-eb5iBc&-F^nh%B`ssg zQe>$x_N6RSWJ@Yp(p|#Ho_$MW385rf3}cDxku2G>MV8-jPWR{YeZJ50yq@Qe`}Mlx zGIL$mc^=1kEbsUG$g@3!67bU=pZ&?bB|Y)hI~PgBxbzFtw^=dTRbPQ^Cy?kCb$1)^ zyZ|y-`lmc_?&Aj8oMZ<@7yJW0_;pyj^PP^rHOSP%DUN?Jjvt|VA-)2I)yX(PR|uN| z+U|QMHP?{=yw|t6FLD1X93GGbbGgU4%5IoUTHQ`+xWQMtC*J>@Xj)?yNRVWA2$IIujDSlZHZe)?;%V|0Zn z#Vu<8M;y)m5j^Q{%38kdmB-a)4!UcW8;Gx31zwNkALzlAQ|`9@k+Iz6V9z9WB;`*F z-J3xA)cYTz)}G8Zz(XQr64^wl!OnHZqpnHr(|DZIgJ;wwVYKw#wqW!=|> z>4^;5`J>!e@O)?8qlVcmaXxGPtLY5-Tc=|d%7PBsVfaXp4|5V<{St?xsx-+oRY=pw7{0}bekQr`|lP7`QzxNqM+ zMbebnv}a2N$@Nc%|Jns8EsuFHiS&I1zdZm;V(8^zs!Q1>Jg zrdBglSH9=fH-U>aan|k8H4UGlr1IcIoI2w@7W>np()*@WaQUxa*MYnA5&AomsSj0u z)j_0OL??#fo}?R<*tgcj$1dII_`WMR^-}}Nn+g}Xx=Nhma6127o|b1(jW7tnv5F1a z|3bXM8$;rCOmUn;9?>sba#j@-uK)#>du1~~j>LGb8YQ!LmwV>mdM9-KR~|2&G8nlI zo2IMzl&CZ0tWuvMh=z80PH6Rnva#HNJr(S`;JX$kiOp`yyb7UX*jH8|>r+YUN6y>q z&twKTWZTn2CWGPn+nXcHF2B7k8C6sy**#e57LlK*pLrANMeF8Jcf(U8e?O6TevoBM zRcBb>S_QYDh%MJr*Q|6C(ZnwD`k z&f84rGhwLvpe9$;TCLdD#?uHj5dg=tr*mIV1RC*QCruU}hya8rhir%u8946Zg384T zaetgigSUPcb`m@U*#e0AGx*eRasZp2Ea91ge<`6-s$$u1*_QER@Qtu+GjM5F!e7x5$eF zp27fMw~;0Uo`iHX=J4C_2cPL@e~Ti8$sY(20nY$_@VPmBC`d9w}7Uw9Y?IB z+!b&^B_Q$!4d^%7U()Lkf*0j2l~I<(JFs?qQ9?=g_0vM4uNJiIe7ZcYJR|_I<;zCc zBuBS~`r3W^^yzh_BqQpM;L>v86;GY<@>V-8|7Wq=)n|kQFLfb*zRo_4r72#8v{-ko z?-kGcNdUroeS5>;+LLW0g&HKdM%-HWCNg6KSs~|46o#rB6{|FLmg3kU14r!CtM5Gr zmWo`4|5=`@y=n8(WLg&O2%Xv#jk(X3>n-680Fxe3V9u0k(B4>Ia|W_T)E6OwTB<%q z?*_-=a{abuvnyIPTLUlGdkg58Iqh8d%CtQfQ{OgB{wb#$AjPlI^SB)lol4|vjK)G$)PIaYrVxRI96TT6+ z2(gRtn~h9{n;v2(3*G^tW74PpPlC$aQVIjw%Lchd^BVryuVx%fZl&&4=0i?mPn}pN#&~N_N{cqlhO&ac+zL z-&pYpc8{-klAZxOwPLW9e^YLOx@qn8PqEob z#BMcCOgx6|^5$_5P!^3&eA#ncnVT@~DBuL&zt@VJJ0N9qUdv2_^}trEDB{}#l(?q3 zhxh7Vh2rID)i{DC>I$bnXTdHUx@h?hi`}bFfo@}VXnI_Tscg!sbpe;0J()NZWn2)vR#i;;f z=Yups)3G%@9LFu7%?R=-2WWOy`cT%swH*l+NE4QJ;;HjVJO67f*=S2ZWMZG4werOTvMM%$t8QyFDp+S8*!}O0g z%MOA9XF&nOKLH%v0nZEqn!{5flbb3En}xgQG0t6acXkK}uT`A?9u6dK zBX)UjMs;;9-3dM-2!;t^PM*d!PC)oko+VilVT*U)lpv;OusiP|dwU)8#&D5txDdnP z_gtqz;Z(1v2G_J=r3>Yg2hNs4V*S`kClF%j^JoEh!)dx32`Yzq8 zG|sAwhl}B$D@&HJo1Z>)W&i7t7wJ#z9b#*^T0ci_Lth^*Nm$iOV<2m7xD@GVwSH#w zeu*Eq7O<|-9m3e_MatMB^tJC&MgKxw=jh$5sJHbhzchShP_Wt#%4u#8AeGKAn&jm) z>vSDB6dW@=dV}@(BiPK3*blrp(7>!KfU!4-#6Q+8y2(n@BS#=lL?ZSvnyk+VK6Uvp zufi@#9`3aGO9zk}#j6{ormKD&VLma z0{Em;xMA_{NfNk*QqMqBofn$wcfdyBPcZ>xbYpvN&E-mRb3Byf#w6{BH+=to-!Z6( zbqlAyfR(_!gzgG{+d!PHVpka3?fn=rVfX~=w^a9Io3RW?L;;h@fNPr7q%mVoz&a+e z9y;+*0$qnhGm0=@Q9D&YW zoW{Z*pV!jvcl)OhoaNr<1el1d2n*$Ukg)(5#FMN zUY7BK9(&uLx&g#7lt{QyI1vvOnRaEBnX7~ZeUlCMXD>|%k z{_D3;eveDQxh9}mV4qSRE4Pp})Yz`s_LwnH10@N^kc@Bb%Yy;gGS}hM>exX%!9WB+ zNT1SVYXR`2O!qb1cPYZMaO2lUi8tx3w&2lg>h{A8yFHH4Ha;CqsU$Us$ARFfan_sE4coSf{CqzQMJMsq)X}A9W0C!8cm9iO{skTcv>!xPX$+mP)!R?mw#ZZ0gb*A=0L36+1x@IN1!rDMMQIM>_g z05}hU$l=EL@%RkSYp)sOvJV$GQ{GPyn1(I@2*f_=z%kfW0tG~8j-;7cNC0mJ?stQ3 zcNgT&t&;H{ktMng_#9q1XC%UsgnG3q(Vzp-WC8FFpXDFXQvEu;G{ED;ev+|;(s$15 zYv*%_EYX*};~RrK6}ld$gD{gY(+6HiC9*NuTu$93RImVW4w&~@V&;tk7X=&=ZVvC5 zHZy2|;5I_C_hU+Ox{N)KJck?ymO(+4c9Rbp-f3z>3|&f0%W3{TZ`&>o^qU={&2nKL z7^njdWA1G5LsaBT$NPJO7y%rk~#!u91$59TebgF2*W&`g8p)OkbGs zwZn2xKE33UWyT~fZ`)WTmH%qbg0OY_y>&4*+}ZMw^9owBTW4-_5ZvBv?-SRY`)L0o zOv9wY%h|?&*ZH6B$cW(6a46bkXQbasdGXUAt(o5@5Y-zuW5lvJs(8aSLKowjxg#ZI zQpW5+Fs=B<=V+gi_mW+k@Vig?C+*uLpW>)o6MEJDJJ(S3Iu(!$hFaf~tB|F8Lv4V$ zC~z2{9j||ey@rU<3%ce$B27OjB!Wx)lZb88s zN>FzPN+Z;QuBXle_Z@O)UPF6m1p<0V8wg{s-;+%zL|uev8M=v|(i!ZRaxB+Ag}=lx zW`^|1^f-qcmkU@OJbmXz3b@br;!k1M_1zq~8Q-{HH(?^az_Pk8?mP;A3u5tP-|xcx zGZsJpaBp16f5^hQ%}`b4;~pEa>VF;Gfn6-q*NrVv?U~FMETsQq_|d6dvl_bOG2Uw> zSwv~mA~$gg*)Le3QZ~}np?hbpuR##6k$LO6jrJOmrnd0IuVS?Z+S+ErWNa~!GAuc? zQR1nnZ3OXF>dpC4=%bmtQxz;CH!E<{EaV-8_|9n>v|=XB9}lN95ZL$L*^zW}v|k8v zm1OiF$zp-S`4v;YOtJlbe0v}YtzuNj$R4Y3f6sX*z%!Ej1Uw`RJ@Mn6dN&}`%zrsHb6O&P zC_ETW$H-e$z|sIU!?ir7=ANlshgWK9b_4Wm28|p}^addp9nRz(C%W;w-lF(a+mwk& zOG^YkNJ*+4pBhK(J;sLC84RPV0ZAqV3^}#~$E0lMp-;Jxa(0(ESE?N&C+wH4od0J@ z?wqT1dz|`=+4aRsp$;3{#%p^#w0zDxy(;t5kOvRG?(VdO$*7GnYQ_oZ4^bZQ;&yT!Ge&vMsr;5Js@M(osfr3xc=E)uM z;G9XuxZv2t_4y17CJYEF?A43W&)mGGA}uSM@!f>$gi6A!54=;U4|U|Ood&HRz!uD` zY~?%^g~5U4nOC&uH1H9jw%^_|%6FXottt7&eXnDji`+o2zo0IOwG)!W;^hvRm$!jd zf`k(U@(`CB>MNMpinq}o9v=mNc6YDn;-W@ zzmh^%eWYI%#H;!Ol6jxM8WDqA^H@<61?qVvUFf1^xQND8V>+H=wEPf{}T=&5A{x?>Hp=<|IZfHk`44$43+<+)l(AV?xqY~~X$sQSse zBMujrXKATW1gU^!9InZ!VLPSCzt3q%oAQ8TiFFKZ0`y&S^zZTqVpm(ML}HAzsdal! zv}G}S@FF%vh)=hK?Z#xg6N}dzQGiQigj#n=kq-ixb9vNPPKJ zSqcZ85MQ7fHyWfIHGQj$;T-gJ_VhSDQf)6Vx*-Lgh@mcPZnqdtad>m9)SPLrno4yGkjzL!d3;(hY_xt;5I zNJl!Ms$5S2DJXAU&;cNyn)#Z0uwfJ0GCY!4_7lOZS*7k7#-IMgH@(#V)?EI~v4E;- z#QlK*0`W*L*!Z=&_jrhDX@2|+1clJ9>VDp18HpBi0t!hX^woq=y*8S>>GgWKW%$uw z+N2{V#$5y)9$cOl!m24JFyQ4t3{>0yA`>}?_c{4?So!mVUk`k<=ASUv9Jd;Rf+yy_ z8ep)jX>IZZ^;SbH;dE$ApMq6g4EOfPu1ZJRDlq#@Pokq?79?#V4~c8VICmM&^(olc z5RZGu^m;t*3}l+xL%Z?wgkTc73a#cbvP0Hlqr+UTSCf9?t*c%m@qRB%%392sh3-K% zQ7a6ft?LWpwu^(IrlXhJv!g)y(6*(R6yE*~L%@T<41!Rr*ocMynx_wh0hLz3f}3(k z0EwmFv+LNRfp@*{HyuySzYM5dYg8FuIQiedQT_dmsk_prlmt*KnmLscAwu2d-!qYw z(O{O-D~VkQ(+dg%Gfv%&-rX^0_-*!Gt>^h8UXiLS(L8j({VzW)v4!$pMu>Jw8=ktdgu7Ij;++YAK_{9R7$b=xQxsO0PZpA-u?xVdE0Q;IfM8uPpd7;VfE$om z9pK?mD0qFfJvpf2Tc-M(qa!hd7(4~jCX5ygaNuRb{3+MKNMBtMiw99VW>66>(*|99rEZ`Mzi2edle_WKHVz30_ zB)!3#2*)d?RsDSlaf<7G3Z*?)OY9!?p9sjZ|3F_^Fmt_UZDNq+RGadbAI#Glu=#O@ z((NKIe6%I^f-EI9LT$wce?w#E?Q8u!w#pGS*SI@H)7^%fMzx_J; z>MQ80O9PpxpFUI1oTZkNNP^?)xcuz)1FsL%_P9UnB;b|@w0578PTuMrq1KmsGF0Zne6UYteB9ah(Giwz~@&k563|bwdJ%# znf`Yp>)Xzi^Iy`R1Xu{lSf5r^IeS~8JW&CFL*ua*av{=iN~0wWR(q**9BFy$2= z;ZRn_^5i>qwGLC)^}DNZ+1v9js%M_Yun4gT4{Q)IcB;%|W{_Pv2t6hp2j1h>CD&5H z+Ui&wKUG^;lA+Jm%)6*T#8(@qR598_RnvkUuP(sGHw!$ zXJuol36nZ}7tl)GM1PZC#HQm>kafxB%1@I9`d`3t9qHZ`;tTY-K3Fmt(pK*FLaT)= zTZh?WS8l7QTOl)F)&R2udFcIti;mw_Gd$-zFNv|vZ|{X_?q731nHsjs-1ld!=`q<$ z!#XrQz{{MfLTAG;IHiDB6-#|T&711N4X%4g+!u!jryYYsj*ry!?Aj-2gJhPwE&rHe zzJ^Y0G8@P@|MQJajVAI^0~a>%{f6k%emik;H1=I=|`Y#!bh9TOE*#(6&ujjP!VEBJv8;)J~+36s^!MAG0+rORGZ zWge=b2jO-iEx~BDg^qvsU+kQ!1$8sU2B+pdB!+9~xqsb5E|EuV)eR^22Vi2on~w75 zW&mg6FhLFM&uP&KE>!>bLd@y5{P@mLS@Od4w9K7zF_*pGal@0*e;tH>jPD9Q?gmZ9 zip>Ejt>wj?54OT#`%UH!GCQLS%SOVrjOb2LNoa3umy)c>RmH1!D)@eV3tAVz<`t+P zLhrN(8f+iRKmhv^V4Ev7t%giqq!U+fkak7D+V-BJd&*(l1b8|e-kQPh2Q1HT`K-GeP>&VdqyX? zPHFR>GD&>@{e2!Q&-_yMSldU?o)r7`&sk_WxeMxTLhFY^>jw&9%gO1_Yt7w?)Tpnn z5plin@nvYA_K4B;(g49fqD3ZbiPd4m=!+=F3k9b)CVZ5(*hX|> zhNEqFHHlj((fiZHC#l}x$djR~l@3K!tki4}dqnmS0$-?isBK{)knQEP(Oi!^vC?~4 zwmBEwEQqQ>bq^Y?0N3;EOFnW{%Lho!dX;(ZGzRB}7{d#Wpt>8OgnTmrox1K7r0J5p zOQqiQsMVQSG| zl$c`t8Hz3dNnlK*@V;_?8T2)pCKedQ!+laz{U zq_?s15{iG?kjuPw3f%`N$ws<^K2p)H&GlN7w7ojIX-(wIEfy5QPEpoZLW*EayFVg z*)yg?f2U$9u*<~!^Iz3)gScSa(bo9CS9_slU$^^Y0y+2Vo5VO=1wZyg(HR4M{eebI z(hd3-C;+EW33MlgN8qzycLC?NcM~*WA=Yf11keIxN+TjdsUH@mEq$dy)t10d4g1oC6LNqW$+n=x6{w2=x${jrJb24Y#6I@fT4c zQ{%^VH1aDNoo26+Zm*#8;T|jthH6#2i8A}WHw0Fa7;Sd`#hC4B6r=L&2LAy?+g@#X zr9B2p7_r8ws)`kFp=R#oU_tU^=XaST8Q5A#Y#4)0e8(d%C9NQ=+y+>ul7@--R%&|z zNl;H1?%*3ZUko`~gZlkpoViaPNUGX|iaNNIe@u+HYf3#&VSzY1;3}M|7_1tI&Q2>_ zdhurBgLv=BQ|iNwDpc=znt6G}eJAW>47v_|oj`<3-RIBrI7~y)RS~!e5YnlA)hqn? z;J=p`m%0#TtL(gfL``YjaiMK>QYI}{Bo~@EZiSl`Kx>yYV_Gcd^NoSBRjR0~*E~!w zvYEOvTmRSc1L!ven-No6AGw=YP@SJpD|!4?sL7M4W#@!H^T|@CtgsUAQG?*VpphfC z7_3k2E0~4Fqu`pb@|0e$xC*`3u=x)dLsSEF&RTQW|C7%j>-^%`m1>7Q?sfa}T${oA z-Dk}ab}2*#h+q|6%qoT0kSW-2$@_~G!HD(#8!iGc%t&<(gmGN}dIsYg&g+ib?Ydbz zaY{|Dn^JGir9awS7w^DnerNv91!Ofl{KYW>Y?82ZhiLN@VOOlrxHRRn3@G1klU0O6 zI9>phOHrf2g&8x3UjE*P>8Z$5^^z($cXKG5Jk~1-N&#PPK^sP!zSYb8W|(^40(HET z5~|n+)}rTx48vMjt!h{=HJ$O~ef08ylY`l7Nk}HvaZ#eAdQZ=S%TJ0y8*MC7x-Bz& z&gg>8^!=yT(JF8v3py^~P#57T5n+1Vk`%9UaF?Nd0P$C&3b-D}_Uh_RUjI>VqW1N$*uLkW?AGi#eeJTPP|}VA4RzrJ|;x0=F89r_@?Mpamln@Ep)Er^^-mwuZ1q0H+5ab1f5 zZ9^xp3;zw$QX~63NHGTt!;}3zwxCMZsxr@Ygf>y`IhTtDCi{-4gk%XM9Y&YVld&IjjigBDVLOT)F`U$6 zdg#XK6-=&}x-NnDMDEHv>x?@X%JWz722$*}pJg9q2;P~*ihfmY|JDUya+_1m1l2CD zbb!K^5!o&%0CcbPNZ4>DitUBTTw{KMRmIBBpXOhMy6g##w`!&>MZHCRBc^(DstOtx z6cmUL%nAJuRnt{3I6FS7aovdPe-*kzZAJe}$JqS=@_w`pdX>Gf6^o{%^F7eOhuo-K zUhJ;U(f8cX%DZ;wgao(@OPtH}Hb@9(q}gZ0u1(l}=~wKrCtMMWj&HwBP%8drkqAW2 z9;u!22BvOy}QXEvUbi_##d+U*1W=VF60OO4AOtMHl@J97&OQSHYQt>@2Y)- zCAj={Sf*+6C(Uo}iE|mHS!AEFG^3y=H37j zInsGhKNUM7bpG^>i9*dUJi-6G59RgL;WV((`tl&)_8m#J75{GuJ`LNX<+@)r`)`Pu z*_Fp>5DcV*OMAUW9oy=SPvPK^EJPfZbLZ~uQWlV$FTR`6dh%qosPDAH?!0LUC68Tx zzcl(E))-v;J?S$KvTjw&)a-)R zNG2UJ_PuvCPF{2tHkNccz(5SG z*x!yOFOB3<@~iu}yN=|l{R(0=N}T#f&d^?uue|VK2?LL)X~#AT{yiK$ zo<`W8;yWeRXGMFHcsSSxf>%0kpY&yRv>lqQUn`dN6d73hsJKsYb!Ej*&6J5hS329i z+#J$r^mW_eBpDY$$OzEM4_f*<(ciyfL5f%NwVt?0dB@{k;K{RdCWHL^#>x&Oq1uF+ zI|&zQ`{M{(<0ZH89Q0&YJJJzT?A6?q_a>eGMVM`0o=i)C>O2Vfe}P=ArwpB(?Zr72 zqX!zE-@^IA3lj<6o)_J0WzbvZ;K)ql12{BmIIZmk?MajQ<6E~Hn9&(et0)z9sBxcB z@%)8LK6@k^Yvj$yrx3fi(})3yQVO!-_^PH1R}7 zUr2oLRKwHAzR2i}ljk$We+==E3Mc6evN};fP+vU;X=a?G>N!~nhSd>bCy@z#H|ZibQ|RA5LCoJhDUmX zSusEE-tQKqmjCfCH&NrnqS4;}3kEPo>xlWz>`DTlMjGPE;p;vm)lSWfq?LZDq${M);CGQ>1LJx9UwZ)o*SE7bYs~Eu z@$fGI<}j`w8#2A)5yZq($v_b5y@f|3+9o`L)M zg6LaAiC*oIY?i{B`x+tGq3=J_EpZzR!CTiXlRm+;2%<2~IPcu(0*BH-$K>D4%mY()&zHS3+Uo1#tK;h;GH^IGTgO+GH2-I_@3U{4?!~U$Qd@aFa6IdFqORLe z%Gpbx;||o^Tuxam#Gz_6t*5u70QZ$s0o7S91nfEDUj>xK6?HK)Dqkl zCIxX3_X;q>h^886h2qV?JPp7$-TH;?~=LfZ->x zW1%y53xXYU#%7i`nNS7=5$p;?K-M?1;9v=GHjug3P{v=s^JRx3O7m=qZF z9Ic8^7z1{S89hgO3%L%8!yy!6c&B?4fOWG{+8dPH2Am)^g#BH~h*bSCVuI=CvZ|gA zLy19kl@A%ORs27f4tB@2=m4*M4#b@7lBGxc@4zA0WRVUnFXI!_NUB) z_#2N2gtLDV0e7!(vFzpWx7Lc%>evxlGUyQtNI+hJ8G;#mpheT0x*vor;-KCo7P>G_ zy~`~>{^he%ryK~1JmB3*z5_Dx81}`F5`Lxfm4?A-GGtRlJH55^819{RijKw$Qx2HO z>oDC7=1Z|do6`pBLW^}UKfDyxtf44ALw73e!%J?d+gq{4Cqo_R74_2MvrER-@eRLdlE`|k2aY)%6k`yp0~KwjA|)y%3pFd*_K#0bEruq zPT=@6rGEp-6q-&%f4k}1^!_9!$!>IG9aI$!ldBhuH;$c&BlUS0tYzdd;BO;K*WP2~ z;!@S=)ViNiXhr2wX{g+D!l11Udl|B7222Zz=<)y9|Aa%-bmtJsAoRS=22|~cO)vsR z%p8tb%yV_fOdUHwvit?WRX_nJJqeO&oxeR_%=3>n(Xmm{aq+$-6?`>aXE6tng@`$~ zy}Su~SY>J^<9V^ekB5{z3PS+>7>d;5kj3@cE%t9rihi%JxBzbgS&O!c5mXn`&=>x% zS%T~o%RlT&d1J-^+6>qOW*X`4+*MSF7!s#IE28(`@mSV{ewCe2PUI9ML(c~2uzhHt-8V*j|4ii;Du`MhHLER+&M1>@rV%3%&8QN`;O zyvI4vj{tZf;8eZF^XRR-zsns8h0$RK$95Px5a|y)w-cE1!uFFyM;s$Ytf@#4)j2BX zB05D#F`nbY7MlxgA3|aGw~;U~g&=-8S_(Y_9~e(iX$2$ArzH7!5mi=5U4-WHx;gPd zy4C9MpRLVBwxGe>4D3F6xQh>E(BJPyBPp%e=EgS5qdy>P8<9;Yt7CTi_D!Xv7AwHO z1b!GgiC&SN5aHCNiuPQ8f6`mvawLO>eIzPIVrU)?yIx!k?gUvUjXGNLIx!v-c?$g^ zQkVlwIj-_|%EoyDGdu;_H)5qL3pu7)fK5PQJ8Y>#=)*AqFlJ#d7hr|q|LFy!|204o zUHEsNB$hXisrOZLJg5MoAlVFdR~`VZp!&xrL4SZ|_oi6SDJLl3A`C&|tw9The`~`a zPEv>_i~Psq;-S{3U#%fN-Vt~Fcaj=w02&ekCAsdhR_K(SEnW@y<#WAL0%mdwlQKG${vU5 zZe8A%ETZ&H=WlTs+CW^QQ>+o?@mW!GN$TrIu0QXTE{(}EJ56KIbEUu4WBJtELQ~x&PLU{R0kea|r??sD^+FK1A$u8{I(Q8|n$nPlxkN8*h|!iV zidSxfjeSAUeGklua2jvV`|o?=xcq^S@+F6Q=1fnpcg*o_cvu+c2IUOs9K)kDs=)Zj z;e5tfKBx{R;a3*qrDT_asJ)~(LcGIbkmzqmr<6Zmv~!GswSm3>x|v)%N7+D+X!Y{k z@9#wKEkk*w4K9^mTo-anO$uu&nJ|Xv^v2AlEH>i$ok|>{EZ>jy_g6AvvQgMe-v@-6 z2{caQuRIaj2oLy?={>K1D($`46puN8Haei6x=Pcb*UGPpRav&Cj3l@hfRfwa-#k2a z)mVSvA!!PW8bVFGHg@8(>38=lOWri5rs1LAkPhbg4)m#e6BWn9N=SY)>i&{K0fy)% z0)|v34pTrjAS6|i+tEItjs8Oqr9V#4M2AXdsO2k$fFjr=AIbowXA1EtG=0Q$APnSr zPP2(F3xtZVFh*Mnkm~d~Th{Gpju+!#npJ&-SG$Sr&3Xjk`uC-Z1y{{-c>6~8i{ zr=t{5$r$$5Tm%HFYIty$O1n98q$0W&g9$;!Ls;yK|3Cjkw;3N1|M!put(zz*s@D`e^FQ=w zet_UTFqI8DjKsu^*PSn}(7iCDHV1lb2XtgBtzp0D$UT)&7;k<@Z#Ck#{MWAl2%zs8 znos!eGaa}N^=@$N&<-v30Zb8Kc~l_IV&=1zR;9XG?R^7{cc-ib^-mWeeB}ntsW1~J zK?KzP=;T;l;xBefw#-ZB6yj4szX^aH zL;UgMZ%;GVa+;(rCFL%NsbxSrUo6*Lg_4mV2mSk>2DCO7!k>0Nhk&ohk@>Bi`dQ@C z+Z7H5-Pn6075bunZ`ch~M%){Z0Aj!#qo+;(c=9vf5;c~*OE}V7{UFTb1gyWBlz%h-JeU;rnC>|Gaty)h1&)_~RcpSgX9jZO%n{JNo#$jkZXbWK$((-L# zb6M4wbL#Y0xnPpY&<@84X(SV;%qaDuID~Cr+jUs@NKNGyygS|DVDF|Upu%+21>^n# zRy+ln!2dpt8t82bk-Y#fgWu>qqkq-^ZG!UhOv_o~Qje9f@vzL_#x@3@Z)6w~?;g>~ zjf(*YdWlo44@-a<{X2)c;BbON;Gx=bt<&&1e=e~R_|WT9Gw*my!J~NJtHVD-#)I$M zdGooj64?v7w<-*gXa8<5JV=1%YS>JK0gW4xhU#D`wH7Cis4QLt1|vKQxFCK^nMN>L zB&e#9+FSP2W=^xJ5?B6<6N-xXFe)zsv)rQZzn zsc6;JCii%zYL}ZfJec6~kRg7E1|S2I?bvy+W-0m>xK%cP9tYjHD_uJxt^WQ5)c=k1 zdP3e6$%|#sU)FL8x8glsk_1m{W56@I--M=87pM@v{*T{GGBJbMJm^WS?x`@)4B>;0 zmWt2aTS3YN5vU|?&N8?g(jyExI6eMkVFc@k9msU?s`la&Q@wCHy2L{SBngj4KRW_` zBgrU`$^@na9LEQ)g0M}36c%Z1fTl+$^y+)Jid6z~u-&xjH`vMVapdzJa8P*FJS57} z_~h+qVsWaz<)|xwCV^QnJ*S^RV*M#w$-1gXE>-u%4?RtonSk zeEQfqcvGd&Ay)QqlhE14L`9Pi(TIizuFw*p6N#0D=-v>o54S`3!GO)Nivq3P9oGeH9O7~Ps}{1tv=hq;v<}g6JHE3ZSC|)<21A7PW7I*E*rXCN3BAMaC z*x8VuF|!g90$2O^u(#F^&_8)eg~p^5SW8*j4l&64toCo(5-4NKo*J!VF0%i z@C2zd30gyrVYsh>*@v17YPyGm+LBN*5xIwKyp3Bf_pv&?Pm4K!B2Y{C8rxlvnkUGj z3ECBmFi{#Al}+2n36Nr<=%;EODdx(e2Q+X*VbT2K|L0Mao25CjFicvAwm_K z^(Zf2$^i!$g!bv;3|#P`)tj)H8n|>ORxUSN;<00TqJP2eAegk}4g+vct^i+FthlYj zShsETD?l+TI565UeGq=OpFkcmiuXKZ0iKx!^PC;sBYocbWi)=OlvU%@jkPf5jvzV% zP~iZPT*dEac4zE}fS>vN0$f^#l0bCh9!G=VOFy7(9R8)`CMYcQT|@)|t*TZX6)GA) z(WsYOL^E~dX9OYWFyYt*DA1X94+A%|d~>8fcNRxW6fYX$7bFSFv2~mJ@z;ayYrtCI zuhH+`$k2o*LeaVFRSpp-Xc|!+V@e3>?Z|*ReKMHU2q_%He`Z;aP9NTP9W-?mL1-Da zwKgw0B>z{U`TzY-yKNv}_p2)-yVBJy49Lrl{N}=w+6AR5z*&F2)cn~ozpM@gGJTBp z>Ggz}8(9w#IfqWlRTUbyf_*Z-hbA^HfQP~7CS`&rz5#N7Vexg=*j;y^6&s&FKUmod zfizvP>N8AV^R@~WdET+QJXcvy@ICnY9CXCm3oNIdlQOxtDzw+rtoR^$1^-dpRY58c zR*0y1z!$*=Yv*K z&HIme~HjDJ(oV$j!!YOV88q+^O>vyzQ><-`)-{lm27VQ;;`x13GiXYlk8EI<-VWA)A()^p)T_-p1+Yq-!?Vle&K^EAJshmGwK+cf- zd@$qLV17Ft5X7CK;ey3}o*8&o8O`ygCv00OqBVuTOhpfWB1v7Le{G7?`@wP~rI?+} z_}mJ{NK7d#@Sl?nHTGkC%Y8T9q>I;L%J<|f~T9Log@t%FW1QE$Mfee?+#yav)Fzzzoqxu05{`tZ=! zduT~ypcbY4V$~A>I77332e-QB!NaGKpjPyjsawVdd@DU8sw0(cR5lLXYKh~iA~b^K zObmOr$MP>UnNju~E?@s`xV?E3P29KY_j}p$N0vwO^Uj>I*^c=KM#4MH-}3>r*}kqI zw-+B3j(^S)wmd$*;~Zyp0bOiomF4w;W5-kTvaSv+V6Qbp8ZvF;;UL~J{mi{_cKsnc zmk>7d%);T-4YsH&7;Wg3zC54VJTFQW@ z6r67RVqrVG8qq_bD=Q=6XJrr6D3-Ro*W4&Q;KXM5h5G_;(1uzj7ZU>W(A6Y+(x&wM5U z+i)lGsPla{qy1)i>aXn|=$6!nEk62~ktD}&=%9DY(6bqPVNzEhe^8Om>zUwsZ=wVlBY=!i?eU^bU z;v<_{RN?dDYlUUCU*FlJBj}j{N*zh!kcZjoxm@xKomP9ly3N6^U$nS8*133D^nSAD zJ#Sh4*F_`YlAhkTSRRJH07j>eY;ox(TyiEmS_c_Mps#O|FK5VfjoCaIoVuic$<|Yo zGo5SfM``t%^YpM0E^}e+hPJF{Vfk#GaAn~xse!m=pqO<(KmGXitaEAaa3!;Y2HGN7 z?=#5L3`2p#anVnnyrPt6aq9mzIo}(W;J>~+)ll8JDV^uw1Udw5dkl02tltDM02O#6 zo9+sK>wWz)=*nVge};)efL9?y57YkL7V0ch*}A&v&u$EcaA#wg03W(lXb9lF7&I}% zSYKbi)4>Rr}^4~KE{p3GI9{M5n|Mt%(og1R!O8O2LRk~%-?@yo9Igzh%`S$+-mUzbb literal 0 HcmV?d00001 diff --git a/evaluations/scripts/def_rate.R b/evaluations/scripts/def_rate.R new file mode 100644 index 0000000..6a4c417 --- /dev/null +++ b/evaluations/scripts/def_rate.R @@ -0,0 +1,328 @@ + + + +def_rate <- function(data,t0,period_length,process='all'){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and match + + proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() + cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # choosing processes to measure + + if(process=='def_only'){ + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + } else if(process=='deg_only'){ + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + } else { + + response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 1, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + } + + + data_filtered$response <- response + + # count up number of pixels where there have been changes for each type + + proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% + nrow() + cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + proj_rate <- 100*(proj_changes/proj_1s)/period_length + cf_rate <- 100*(cf_changes/cf_1s)/period_length + + # make df + + df <- data.frame(matrix(ncol=2,nrow=1)) + colnames(df) <- c('Project','Counterfactual') + df[1,1] <- proj_rate + df[1,2] <- cf_rate + + return(df) + +} + + + +def_rate_seperate <- function(data,t0,period_length){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and cf + + proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() + cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # measuring responses + + def_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + deg_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + ref_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 0, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + data_filtered$def_response <- def_response + data_filtered$deg_response <- deg_response + data_filtered$ref_response <- ref_response + + # count up number of pixels where there have been changes for each type + + proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% + nrow() + cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% + nrow() + + proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% + nrow() + cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% + nrow() + + proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% + nrow() + cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + proj_def <- 100*(proj_def_changes/proj_1s)/period_length + cf_def <- 100*(cf_def_changes/cf_1s)/period_length + + proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length + cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length + + proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length + cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length + + # adding the degraded-to-deforested transition + + data_filtered_2 <- data[data[,t0_index]==2,] + + # count 2s at t0 in project and cf + + proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow() + cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() + + # identify where there have been changes during the evaluation period + + luc_tend_2 <- data_filtered_2 %>% + select(paste0('luc_',tend)) + + def_response_2 <- case_when( + luc_tend_2==1 ~ 0, + luc_tend_2==2 ~ 0, + luc_tend_2==3 ~ 1, + luc_tend_2==4 ~ 0, + luc_tend_2>4 ~ 0) + + data_filtered_2$def_response_2 <- def_response_2 + + proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% + nrow() + cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% + nrow() + + proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length + cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length + + # make df + + df <- data.frame(matrix(ncol=4,nrow=8)) + + colnames(df) <- c('Process','Forest type','Location','Rate (%/year)') + + df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2)) + df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2)) + df[3] <- c(rep(c('Project','Counterfactual'),times=4)) + df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref) + + return(df) + +} + +get_prop_class <- function(data,t0,class){ + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + data_filtered <- data[data[,t0_index]==class,] + + total_count <- data %>% nrow() + class_count <- data_filtered %>% nrow() + prop <- class_count/total_count + + return(prop) + +} + + +def_rate_single <- function(data,t0,period_length){ + + # get name of column for start year + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + # filter down to pixels with undisturbed forest (JRC class 1) + + data_filtered <- data[data[,t0_index]==1,] + + # count 1s at t0 in project and cf + + no_1s <- nrow(data_filtered) + + # identify where there have been changes during the evaluation period + + tend <- t0 + period_length + + luc_tend <- data_filtered %>% + select(paste0('luc_',tend)) + + # measuring responses + + def_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 1, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + deg_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 1, + luc_tend==3 ~ 0, + luc_tend==4 ~ 0, + luc_tend>4 ~ 0) + + ref_response <- case_when( + luc_tend==1 ~ 0, + luc_tend==2 ~ 0, + luc_tend==3 ~ 0, + luc_tend==4 ~ 1, + luc_tend>4 ~ 0) + + data_filtered$def_response <- def_response + data_filtered$deg_response <- deg_response + data_filtered$ref_response <- ref_response + + # count up number of pixels where there have been changes for each type + + def_changes <- data_filtered %>% filter(def_response==1) %>% + nrow() + + deg_changes <- data_filtered %>% filter(deg_response==1) %>% + nrow() + + ref_changes <- data_filtered %>% filter(ref_response==1) %>% + nrow() + + # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage + + def <- 100*(def_changes/no_1s)/period_length + + deg <- 100*(deg_changes/no_1s)/period_length + + ref <- 100*(ref_changes/no_1s)/period_length + + # adding the degraded-to-deforested transition + + data_filtered_2 <- data[data[,t0_index]==2,] + + # count 2s at t0 in project and cf + + no_2s <- data_filtered_2 %>% nrow() + + # identify where there have been changes during the evaluation period + + luc_tend_2 <- data_filtered_2 %>% + select(paste0('luc_',tend)) + + def_response_2 <- case_when( + luc_tend_2==1 ~ 0, + luc_tend_2==2 ~ 0, + luc_tend_2==3 ~ 1, + luc_tend_2==4 ~ 0, + luc_tend_2>4 ~ 0) + + data_filtered_2$def_response_2 <- def_response_2 + + def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% + nrow() + + deg_to_def <- 100*(def_changes_2/no_2s)/period_length + + # make df + + df <- data.frame(matrix(ncol=3,nrow=4)) + + colnames(df) <- c('Process','Forest type','Rate (%/year)') + + df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation') + df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest') + df[3] <- c(deg,def,deg_to_def,ref) + + return(df) + +} \ No newline at end of file diff --git a/evaluations/scripts/land_cover_timeseries.R b/evaluations/scripts/land_cover_timeseries.R new file mode 100644 index 0000000..6490bf1 --- /dev/null +++ b/evaluations/scripts/land_cover_timeseries.R @@ -0,0 +1,111 @@ + +get_luc_timeseries <- function(data,t0,tend,type='both'){ + + years_list <- seq(t0,tend) + + if(type=='both'){ + + df <- data.frame(matrix(ncol=4,nrow=8*length(years_list))) + + colnames(df) <- c('year','type','luc','percentage') + + counter <- 1 + + for(year in years_list) { + + for(i in seq (1:4)) { + + for(type_value in c('Project','Counterfactual')) { + + total <- data %>% filter(type == type_value) %>% nrow() + + no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- type_value + df[counter,3] <- i + df[counter,4] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + } else if(type=='single'){ + + df <- data.frame(matrix(ncol=3,nrow=4*length(years_list))) + + colnames(df) <- c('year','luc','percentage') + + counter <- 1 + + for(year in years_list) { + + for(i in seq (1:4)) { + + total <- data %>% nrow() + + no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- i + df[counter,3] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + return(drop_na(df)) + +} + +luc_class1_uncertainty <- function(data,t0,tend) { + + years_list <- seq(t0-10,tend) + + df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list))) + + colnames(df) <- c('year','type','pair','percent_class1') + + counter <- 1 + + for(year in years_list) { + + for(type_value in c('Project','Counterfactual')) { + + for(pair_id in unique(data$pair)) { + + total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow() + + no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow() + + prop <- no_class_i/total + + df[counter,1] <- year + df[counter,2] <- type_value + df[counter,3] <- pair_id + df[counter,4] <- prop*100 + + counter <- counter+1 + + } + + } + + } + + return(drop_na(df)) + +} + diff --git a/evaluations/scripts/plot_matchingvars.R b/evaluations/scripts/plot_matchingvars.R new file mode 100644 index 0000000..ec47f01 --- /dev/null +++ b/evaluations/scripts/plot_matchingvars.R @@ -0,0 +1,42 @@ +plot_matching_variables <- function(data, ex_ante = 'false') { + + cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc')) + cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages + cont_data <- melt(cont_data) + + # rename labels + cont_data$variable <- factor(cont_data$variable, + levels = c('access', 'cpc0_u', 'cpc0_d', + 'slope', 'cpc5_u', 'cpc5_d', + 'elevation', 'cpc10_u', 'cpc10_d')) + + levels(cont_data$variable) <- c('Inaccessibility', + 'Forest~cover~t[0]', + 'Deforestation~t[0]', + 'Slope', + 'Forest~cover~t[-5]', + 'Deforestation~t[-5]', + 'Elevation', + 'Forest~cover~t[-10]', + 'Deforestation~t[-10]') + + # determine labels based on ex_ante + if (ex_ante == 'false') { + plot_labels <- c('Counterfactual', 'Project') + } else if (ex_ante == 'true') { + plot_labels <- c('Matched points', 'Project')} + + # plot + matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) + + geom_density(adjust = 10, size = 1) + + facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) + + ylab('Density') + + scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) + + theme_classic() + + theme(legend.title = element_blank(), + axis.title.x = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank()) + + return(matchingvars) +} \ No newline at end of file diff --git a/evaluations/scripts/plot_transitions.R b/evaluations/scripts/plot_transitions.R new file mode 100644 index 0000000..2931a60 --- /dev/null +++ b/evaluations/scripts/plot_transitions.R @@ -0,0 +1,63 @@ +library(ggspatial) + +plot_transitions <- function(data,t0,period_length,shapefile){ + + # count number of 1s at project start + + t0_index <- grep(paste0('luc_',t0),colnames(data)) + + data_filtered <- data[data[,t0_index]==1,] + + # identify where there have been changes + + tend <- t0 + period_length + + luc_tend <- data_filtered[[paste0('luc_', tend)]] + + response <- case_when( + luc_tend==1 ~ NA, + luc_tend==2 ~ 'deg', + luc_tend==3 ~ 'def', + luc_tend==4 ~ 'ref', + luc_tend>4 ~ NA) + + data_filtered$response <- as.factor(response) + data_filtered <- data_filtered %>% filter(!is.na(response)) + + # adding deg --> def transition + + # count number of 2s at project start + + data_filtered_2s <- data[data[,t0_index]==2,] + + # identify where there have been changes + + luc_tend <- data_filtered_2s[[paste0('luc_', tend)]] + + response <- case_when( + luc_tend==1 ~ NA, + luc_tend==2 ~ NA, + luc_tend==3 ~ 'deg_to_def', + luc_tend==4 ~ NA, + luc_tend>4 ~ NA) + + data_filtered_2s$response <- as.factor(response) + data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response)) + + combined_dat <- bind_rows(data_filtered, data_filtered_2s) + combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref')) + + # plotting + + plot <- combined_dat %>% + filter(response != 0) %>% + ggplot(aes(x=lng,y=lat,colour=response))+ + geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+ + geom_point(alpha=0.5,size=0.5)+ + scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+ + annotation_scale(text_cex = 1.3)+ + theme_void() + + return(plot) + +} diff --git a/evaluations/scripts/std_mean_diff.R b/evaluations/scripts/std_mean_diff.R new file mode 100644 index 0000000..63d81ba --- /dev/null +++ b/evaluations/scripts/std_mean_diff.R @@ -0,0 +1,57 @@ + +std_mean_diff <- function(path_to_pairs) { + + # clean data + + files_full_raw <- list.files(path_to_pairs, + pattern='*.parquet',full.names=T,recursive=F) + files_full <- files_full_raw[!grepl('matchless',files_full_raw)] + files_short_raw <- list.files(path=path_to_pairs, + pattern='*.parquet',full.names=F,recursive=F) + files_short <- files_short_raw[!grepl('matchless',files_short_raw)] + + # initialise dfs + + vars <- c(colnames(read_parquet(files_full[1])),'pair') + df <- data.frame(matrix(ncol=length(vars),nrow=0)) + colnames(df) <- vars + + for(j in 1:length(files_full)){ + + # read in all parquet files for a given project + + f <- data.frame(read_parquet(files_full[j])) + + # append data to bottom of df + + df <- bind_rows(df,f) + + } + + # calculate smd + + smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) + + variables <- c('cpc10_d','cpc5_d','cpc0_d', + 'cpc10_u','cpc5_u','cpc0_u', + 'access','slope','elevation') + + for (var in variables) { + k_var <- df[[paste0("k_", var)]] + s_var <- df[[paste0("s_", var)]] + + k_mean <- mean(k_var, na.rm = TRUE) + s_mean <- mean(s_var, na.rm = TRUE) + k_sd <- sd(k_var, na.rm = TRUE) + s_sd <- sd(s_var, na.rm = TRUE) + + pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) + smd <- (k_mean - s_mean) / pooled_sd + + smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) + } + + return(smd_results) +} + + \ No newline at end of file From 435ebfbcba15eb2b0f93261ebf03d806ffd201c4 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Wed, 28 Aug 2024 09:46:28 +0000 Subject: [PATCH 16/19] Deleted unnecessary stuff --- scripts/ex_ante_evaluation_template.Rmd | 1001 ----------------------- scripts/scripts/def_rate.R | 328 -------- scripts/scripts/land_cover_timeseries.R | 111 --- scripts/scripts/plot_matchingvars.R | 42 - scripts/scripts/plot_transitions.R | 63 -- scripts/scripts/std_mean_diff.R | 57 -- 6 files changed, 1602 deletions(-) delete mode 100644 scripts/ex_ante_evaluation_template.Rmd delete mode 100644 scripts/scripts/def_rate.R delete mode 100644 scripts/scripts/land_cover_timeseries.R delete mode 100644 scripts/scripts/plot_matchingvars.R delete mode 100644 scripts/scripts/plot_transitions.R delete mode 100644 scripts/scripts/std_mean_diff.R diff --git a/scripts/ex_ante_evaluation_template.Rmd b/scripts/ex_ante_evaluation_template.Rmd deleted file mode 100644 index be95ce1..0000000 --- a/scripts/ex_ante_evaluation_template.Rmd +++ /dev/null @@ -1,1001 +0,0 @@ ---- -output: - html_document: - theme: spacelab - df_print: paged - toc: yes - toc_float: yes - pdf_document: - toc: yes -params: - proj: null - t0: null - input_dir: null - output_dir: null - fullname: null - country_path: null - shapefile_path: null - pairs_path: null - carbon_density_path: null - branch: null ---- - -```{r include=FALSE} - -# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A SHELL TERMINAL: - -# Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd', output_file='~/evaluations/R/example_project.html', params=list(proj='example_project', t0=2010, ...))" - -# Mandatory args: proj, t0 -# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path -# You must either specify input dir and output dir OR provide absolute paths to each of the objects required - -``` - -```{r settings, include=FALSE} -knitr::opts_chunk$set( - echo = FALSE, warning=FALSE,message=FALSE) - -library(tidyverse) -library(sf) -library(reshape2) -library(maps) -library(mapdata) -library(ggspatial) -library(arrow) -library(rnaturalearth) -library(rnaturalearthdata) -library(rnaturalearthhires) -library(stringr) -library(jsonlite) -library(countrycode) -library(scales) -library(here) -library(patchwork) -library(knitr) -library(kableExtra) - -``` - -```{r read_inputs, echo=FALSE,warning=FALSE, message=FALSE} - -project_name <- params$proj -start_year <- as.numeric(params$t0) -branch <- params$branch - -``` - ---- -title: "`r paste0('4C Ex-Ante Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`" -subtitle: "`r format(Sys.Date(), "%B %Y")`" ---- - -```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE} - -# get output format - -output_format <- ifelse(knitr::is_latex_output(), "latex", "html") - -# get script path - -script_path <- here('scripts') - -# get explainer diagram path - -diagram_path <- here('methods_diagram.png') - -# get data path - -if (!is.null(params$output_dir)) { - data_path <- paste0(params$output_dir,'/',project_name) -} - -# get path to pairs - -if (!is.null(params$pairs_path)) { - pairs_path <- params$pairs_path -} else { pairs_path <- file.path(data_path,'pairs') } - -# read shapefile - -if (!is.null(params$input_dir)) { - input_dir <- params$input_dir -} - -if (!is.null(params$shapefile_path)) { - shapefile_path <- params$shapefile_path -} else { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') } -shapefile <- read_sf(shapefile_path) - -# read carbon density - -if (!is.null(params$carbon_density_path)) { - carbon_density_path <- params$carbon_density_path -} else { carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] } -carbon_density <- read.csv(carbon_density_path) - -# read country path - -if (!is.null(params$country_path)) { - country_path <- params$country_path -} else { country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)} - -``` - -```{r read_pairs, echo=FALSE} - -# get filenames and filter for matched points - -files_full_raw <- list.files(pairs_path, - pattern='*.parquet',full.names=T,recursive=F) -files_full <- files_full_raw[!grepl('matchless',files_full_raw)] -files_short_raw <- list.files(path=pairs_path, - pattern='*.parquet',full.names=F,recursive=F) -files_short <- files_short_raw[!grepl('matchless',files_short_raw)] - -# initialise dfs - -vars <- c(colnames(read_parquet(files_full[1])),'pair') -paired_data_raw <- data.frame(matrix(ncol = length(vars), nrow = 0)) %>% - setNames(vars) %>% - mutate( - pair = as.factor(pair), - k_trt = as.factor(k_trt), - s_trt = as.factor(s_trt) - ) - -for(j in 1:length(files_full)){ - - # read parquet file - - f <- data.frame(read_parquet(files_full[j]),check.names = FALSE) - - # add identity column - - f$pair <- as.factor(c(replicate(nrow(f),str_remove(files_short[j], "\\.parquet$")))) - - # append data to bottom of df - - paired_data_raw <- bind_rows(paired_data_raw,f) - -} - -# generate separate datasets for project and counterfactual - -project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair) -cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair) - -# create project-counterfactual merged dataset - -colnames(cf) <- colnames(project) -pair_merged <- bind_rows(project,cf) -names(pair_merged) <- str_sub(names(pair_merged),3) -names(pair_merged)[names(pair_merged) == "ir"] <- "pair" - -# add type column and remove excess cols - -data <- pair_merged %>% - mutate(type=c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual'))) %>% - select(-c(contains('trt'),ID)) - -``` - -```{r get_shapefile_area, echo=FALSE} - -project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000) - -``` - -```{r get_country_names} - -# define function for extracting country names - -get_country_names <- function(country_codes_path) { - codes <- as.character(fromJSON(country_codes_path)) - country_names <- countrycode(codes, 'iso2c', 'country.name') - country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo' - return(country_names) - } - -# get country names - -country_vec <- get_country_names(country_path) - - # define function for printing the country names if there are multiple - - if (length(country_vec) > 1) { - country_string <- paste(country_vec[-length(country_vec)], collapse = ", ") - country_string <- paste(country_string, "and", country_vec[length(country_vec)]) - } else { - country_string <- country_vec[1] - } - - -``` - -\ - -# Introduction - -This Report has been prepared by researchers at the Cambridge Centre for Carbon Credits (4C) and has been funded by a charitable grant from the Tezos Foundation. 4C utilises innovative, evidence-based approaches to examine the scientific basis of nature-based carbon conservation initiatives and, insodoing, provides a way for different stakeholders to assess the quality of carbon credits (ex post and/or ex ante). - -**Disclaimer: Nothing in this Report constitutes formal advice or recommendations, an endorsement of proposed action, or intention to collaborate; instead, it sets out the details of an evaluation using a method which is still under development. The Report is considered complete as of the publication date shown, though methods are likely to change in future.** - -\ - -# About the project - -`r if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() }` is located in `r country_string`. The proposed area measures `r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. - -For the purposes of this evaluation, we have set the proposed start date to `r start_year`. - -```{r echo=FALSE} - -# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________ - -# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document. - -``` - - - -\ - -# Introduction to the 4C method - -*Our method for forecasting ex-ante additionality remains under development.* - -The 4C approach to forecasting additionality involves identifying places that experienced similar deforestation levels in the past as the project area does today. We start by analyzing forest cover changes in the project area between 10 years ago and the present day. Using pixel-matching techniques, we then identify comparable places outside the project that experienced similar deforestation trends between 20 and 10 years ago (the *matching period*). This allows us to match the deforestation trajectory of the project with that of the matched pixels, but offset in time. This concept is illustrated by the left-hand diagonal arrow in the figure below. - -We can consider the matched pixels as a historical representation of the project as it is today. By examining deforestation in the matched pixels over the subsequent 10 years (the *baseline period*), we estimate a *baseline prediction* — the deforestation expected in the project area under the counterfactual (business-as-usual) scenario. This rate is then projected forward over the next 10 years, as illustrated by the right-hand diagonal arrow in the figure below. We convert the deforestation rate to carbon dioxide emissions using best estimates of carbon density. - -```{r, echo=FALSE, fig.align='center', fig.width=6} - -knitr::include_graphics(diagram_path) - -``` - - -Making predictions about future deforestation is challenging, and there are multiple sources of uncertainty at play. These include: the quantification of carbon, the choice of matching pixels, the effect of leakage and impermanence, future political changes and market forces. We are constantly improving our method in order to minimise these uncertainties. Due to the inherent uncertainty associated with ex-ante (before-the-fact) predictions, carbon credits should only ever be quantified and issued ex-post (after the fact). - -More information about 4C's approach to impact evaluation can be found below. - -[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1) - -[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence) - -[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae) - -[Our paper on the social value of impermanent carbon credits](https://www.nature.com/articles/s41558-023-01815-0) - -[The PACT methodology for ex-post evaluations](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f) - -\ - - -# Methods - -The following sections will detail how we arrived at a forecast of future deforestation and the potential to generate additionality by reducing this deforestation. This includes the location and quality of the matched points, the deforestation rates in each set of points, and the carbon density values used to convert these deforestation rates to carbon emissions reductions. - -\ - -### Location of matched points - -We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. We used these matched points to make a prediction of the counterfactual scenario for deforestation. - -Below we show the location of the matching points (shown in blue) relative to the project (shown in red), both at the country and project scale. - -`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}` - -```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE} - -# downsample no. of points by 90% - -if(nrow(data) > 20000){ - data_forplot <- data %>% sample_frac(0.1) -} else { - data_forplot <- data -} - -# plot location of matching points - -country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large") - -# transform crs - -shapefile <- st_transform(shapefile, st_crs(country_map)) - -ggplot(data=country_map) + - geom_sf(colour=NA,fill='grey80')+ - geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + - scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+ - coord_sf()+ - theme_void()+ - annotation_scale(text_cex=1.5,location='bl')+ - theme(legend.title = element_blank(), - text=element_text(size=16), - legend.position='none') - -xmin <- filter(data, type=='Project') %>% select(lng) %>% min() -xmax <- filter(data, type=='Project') %>% select(lng) %>% max() -ymin <- filter(data, type=='Project') %>% select(lat) %>% min() -ymax <- filter(data, type=='Project') %>% select(lat) %>% max() - -ggplot(data=country_map) + - geom_sf(colour=NA,fill='grey80')+ - geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + - scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+ - coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+ - theme_void()+ - annotation_scale(text_cex=1.5,location='bl')+ - theme(legend.title = element_blank(), - text=element_text(size=16), - legend.position='none') - -``` - -### Quality of matches - -Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the matched points (shown in blue) indicates that the the matched points are composed of places that are similar to the project in terms of the drivers of deforestation and are expected to exhibit similar deforestation trends. - -- Inaccessibility (motorized travel time to healthcare, minutes) - -- Slope ($^\circ$) - -- Elevation (meters) - -- Forest cover at t0 (start year, %) - -- Deforestation at t0 (%) - -- Forest cover at t-5 (5 years prior to start year, %) - -- Deforestation at t-5 (%) - -- Forest cover at t-10 (10 years prior to start year, %) - -- Deforestation at t-10 (%) - -Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset. - -More information about the datasets we use can be found below: - -[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/) - -[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm) - -[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF) - -\ - -```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE} - -# plot matches - -source(file.path(script_path,'plot_matchingvars.R')) - -plot_matching_variables(data,ex_ante='true') - -``` - -\ - -### Standardised mean differences - -We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). The SMD allows us to quantify the similarity between the project and the matched points in a way that is comparable across variables. - -In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and matched points, in standard deviations) for each variable. Values further from zero indicate a larger difference. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our SMDs would ideally fall in order for the project and matched points to be considered well-matched. - -\ - -```{r smd} - -std_mean_diff <- function(pairs_path) { - - # clean data - - files_full_raw <- list.files(pairs_path, - pattern='*.parquet',full.names=T,recursive=F) - files_full <- files_full_raw[!grepl('matchless',files_full_raw)] - files_short_raw <- list.files(path=pairs_path, - pattern='*.parquet',full.names=F,recursive=F) - files_short <- files_short_raw[!grepl('matchless',files_short_raw)] - - # initialise dfs - - vars <- c(colnames(read_parquet(files_full[1])),'pair') - df <- data.frame(matrix(ncol=length(vars),nrow=0)) %>% - setNames(vars) %>% - mutate(k_trt=as.factor(k_trt), - s_trt=as.factor(s_trt)) - - for(j in 1:length(files_full)){ - - # read in all parquet files for a given project - - f <- data.frame(read_parquet(files_full[j])) %>% - mutate(k_trt=as.factor(k_trt), - s_trt=as.factor(s_trt)) - - # append data to bottom of df - - df <- bind_rows(df,f) - - } - - # calculate smd - - smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) - - variables <- c('cpc10_d','cpc5_d','cpc0_d', - 'cpc10_u','cpc5_u','cpc0_u', - 'access','slope','elevation') - - for (var in variables) { - k_var <- df[[paste0("k_", var)]] - s_var <- df[[paste0("s_", var)]] - - k_mean <- mean(k_var, na.rm = TRUE) - s_mean <- mean(s_var, na.rm = TRUE) - k_sd <- sd(k_var, na.rm = TRUE) - s_sd <- sd(s_var, na.rm = TRUE) - - pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) - smd <- (k_mean - s_mean) / pooled_sd - - smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) - } - - return(smd_results) -} - -results <- std_mean_diff(pairs_path) - -# changing sign for interpretation - -results$smd <- (-1)*results$smd - -# changing order of variables - -variables <- c('cpc10_d','cpc5_d','cpc0_d', - 'cpc10_u','cpc5_u','cpc0_u', - 'access','slope','elevation') - -results$variable <- factor(results$variable, levels=variables) - -# plotting - - ggplot(results,aes(x=smd,y=variable))+ - #geom_boxplot(outlier.shape=NA,colour='blue')+ - geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+ - geom_vline(xintercept=0)+ - geom_vline(xintercept=0.25,lty=2,colour='grey30')+ - geom_vline(xintercept=-0.25,lty=2,colour='grey30')+ - scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")), - bquote(Deforestation~t[-5]~("%")), - bquote(Deforestation~t[0]~("%")), - bquote(Forest~cover~t[-10]~("%")), - bquote(Forest~cover~t[-5]~("%")), - bquote(Forest~cover~t[0]~("%")), - 'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+ - xlab('Standardised mean difference')+ - xlim(-1,1)+ - theme_classic()+ - theme(axis.title.y=element_blank(), - legend.title=element_blank(), - legend.box.background=element_rect(), - legend.position='none', - text=element_text(size=14), - axis.text.y=element_text(size=14)) - - -``` - -\ - -### Deforestation within the project - -Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes: - -- Undisturbed forest to degraded forest - -- Degraded forest to deforested land - -- Undisturbed forest to deforested land - -- Undisturbed land to reforested land (which indicates that regrowth occurred after a deforestation event) - -\ - -These transitions are shown in the plot below for the 10-year period between `r start_year-10` and `r start_year`. They are overlaid on the project area which is shown in grey. If a transition is not shown, it did not occur in the period examined. - -Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF). - -\ - -```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE} - -# plot deforestation within project - -source(file.path(script_path,'plot_transitions.R')) - -proj_coords <- data %>% - filter(type=='Project') %>% - select(lat,lng) - -proj_input_defplot <- data %>% - filter(type=='Project') %>% - select(contains('luc')) %>% - setNames(paste0("luc_", (start_year-10):start_year)) %>% - cbind(proj_coords) - -proj_input_defplot <- proj_input_defplot[, !is.na(colnames(proj_input_defplot))] - -plot_transitions(data=proj_input_defplot,t0=start_year-10,period_length=10,shapefile=shapefile) - -``` - -\ - -### Land cover changes within project and matched pixels - -In the below plots, we show the changes in land classes over time for both the project (red) and the matched points (blue). - -Note the following: - -- The vertical grey dashed line represents the start year of the project (`r start_year`). The timings shown on the x-axis are relative to this start year. - -- As explained in the 'Methods' section, the matched points are offset in time relative to the project by 10 years. This means that all changes observed in the matched points happened 10 years prior to the equivalent time point in the project. This time offset allows us to use the last 10 years in the matched points as a prediction of the next 10 years for the project. - -- Solid lines represent ex-post observed changes, whereas the dotted line represents the prediction for the future of the project. - -```{r make_inputs, echo=FALSE} - -# preparing inputs - -proj_input <- data %>% - filter(type=='Project') %>% - select(contains('luc')) %>% - setNames(paste0("luc_", (start_year-10):start_year)) -proj_input <- proj_input[, !is.na(colnames(proj_input))] - - -cf_input <- data %>% - filter(type=='Counterfactual') %>% - select(contains('luc')) %>% - setNames(paste0("luc_", (start_year-20):(start_year))) %>% - select(where(~ all(!is.na(.)))) - -``` - -```{r luc_timeseries_all, echo=FALSE} - -source(file.path(script_path,'land_cover_timeseries.R')) - -# getting results - -proj_results <- get_luc_timeseries(proj_input,t0=start_year-10,tend=start_year,type='single') %>% - mutate(type='Project') - -cf_results <- get_luc_timeseries(cf_input,t0=start_year-20,tend=start_year,type='single') %>% - mutate(type='Counterfactual') - -# combining results - -results <- bind_rows(proj_results, cf_results) - -``` - -Showing the trend for undisturbed, degraded, deforested and regrowth in turn: - -```{r undisturbed_timeseries, fig.width=8,fig.height=13} - -# add prediction from the matched pixels: - -prediction <- cf_results %>% - filter(year >= (start_year-10)) %>% - mutate(type='Project', - year=year+10) - -results <- bind_rows(results,prediction) - -# make a custom function for plotting the results - -plot_timeseries <- function(luc_value, title_str) { - - #remove gap between solid and dotted project line - percent_val <- results %>% - filter(year == start_year - & type == "Project" - & luc == luc_value) %>% - pull(percentage) - - # df wrangling - extended_results <- results %>% - mutate( - luc = as.numeric(luc), - year = as.numeric(year), - line_type = ifelse(type == "Project" & year > start_year, "dotted", "solid"), - type = case_when( - type == "Counterfactual" ~ "Matched points", - TRUE ~ type - ) - ) %>% - bind_rows(data.frame( - year = start_year, - luc = luc_value, - percentage = percent_val, - type = 'Project', - line_type = 'dotted' - )) - - extended_results %>% - filter(luc == luc_value) %>% - ggplot(aes(x = year, y = percentage, color = type, linetype = line_type)) + - geom_line(linewidth = 1.5) + - geom_vline(xintercept = start_year, linetype = 2, color = 'grey30') + - #geom_vline(xintercept = start_year-10, linetype = 2, color = 'grey30') + - scale_colour_manual(name = 'Location', - values = c('red','blue'), - breaks = c('Project', 'Matched points'), - labels = c('Project', 'Matched points'))+ - xlab('Year') + - ylab('% cover') + - ggtitle(title_str) + - guides(linetype = "none") + - theme_classic() + - scale_linetype_manual(values = c("solid" = "solid", "dotted" = "dotted"))+ - facet_wrap(~type)+ - xlim(start_year-20,start_year+10) - -} - -plot_1 <- plot_timeseries(luc_value=1, title_str='Undisturbed forest') + theme(legend.position='none',axis.title.x = element_blank()) -plot_2 <- plot_timeseries(luc_value=2, title_str='Degraded forest') + theme(legend.position='none', axis.title.x = element_blank()) -plot_3 <- plot_timeseries(luc_value=3, title_str='Deforested land') + theme(legend.position='none', axis.title.x = element_blank()) -plot_4 <- plot_timeseries(luc_value=4, title_str='Regrowth') + theme(legend.position='none', axis.title.x = element_text(size=14)) - -plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(ncol=1) - -``` - -### Deforestation rates in the matched points during the baseline period - -```{r proportions_undisturbed_degraded, echo=FALSE} - -# obtaining the area of undisturbed and degraded forest at t0, for use later - -source(file.path(script_path,'def_rate.R')) - -prop_und <- get_prop_class(data=proj_input,t0=start_year-10,class=1) -prop_deg <- get_prop_class(data=proj_input,t0=start_year-10,class=2) - -``` - -Here we present the deforestation rates observed in the matched pixels over the past 10 years (the baseline period). - -Forest loss transitions can be broken down into the following processes: - -- degradation of undisturbed forest - -- deforestation of undisturbed forest - -- deforestation of degraded forest - -- regrowth of undisturbed forest (implies previous deforestation) - -We calculate the rate at which these processes occur in the matched pixels using the following method: - -1. Calculate the percentage of matched pixels which have undergone one of the above processes (according to the JRC classification) during the baseline period. -2. Divide this percentage by 10 to give an annual rate of change, in %/year, relative to the amount of (undisturbed or degraded) forest present at the beginning of the project. -3. Multiply this rate by the area of the (undisturbed or degraded) forest 10 years prior to the project start to give an annual rate, in hectares per year. - -The amounts of forest in the project area 10 years prior to project start are as follows: - -- Undisturbed forest: `r format(100*prop_und, big.mark = ",", scientific = FALSE, digits = 3)`% - -- Degraded forest: `r format(100*prop_deg, big.mark = ",", scientific = FALSE, digits = 3)`% - -The rates are given below. - -```{r rate_of_forest_loss_ha, echo=FALSE} - -source(file.path(script_path,'def_rate.R')) - -df_rate_percent <- def_rate_single(data=cf_input,t0=start_year-10,period_length=10) - -df_rate_ha <- df_rate_percent - -df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3]/100)*project_area_ha*prop_und - -df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3]/100)*project_area_ha*prop_deg - -knitr::kable( - df_rate_ha %>% - rename('Rate (ha/year)' = 3) %>% - mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) -) - - -``` - -\ - -### Carbon stock changes in the matched points during the baseline period - -Here we present the carbon density calculations for this project. - -In order to convert land cover changes to carbon emissions, we use regional aboveground (AGB) carbon density values generated through NASA GEDI data. - -More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi). - -Note that, in calculating carbon stock changes, we assume the following: - -- Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997) - -- Deadwod biomass is 11% of AGB (based on IPCC 2003) - -- Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011) - - -\ -```{r additionality_forecast} - -baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8)) -colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea') -luc_counter <- 1 -row_counter <- 1 - -carbon_density <- filter(carbon_density, land.use.class %in% c(1:6)) - -for(i in carbon_density$land.use.class){ - - for(j in c("Start","End")) { - - # get agb - - agb <- carbon_density$carbon.density[luc_counter] - - # get other values - - bgb <- agb*0.2 - dw <- agb*0.11 - total <- agb + bgb + dw - #total_co2 <- total*0.47 # we're doing this step later - - # get area of class i - - if (j == "Start") { - area_of_forest <- get_prop_class(cf_input,t0=start_year-10,class=i)*project_area_ha - } else if (j == "End") { - area_of_forest <- get_prop_class(cf_input,t0=start_year,class=i)*project_area_ha } - - # multiply total by area - - total_byarea <- total*area_of_forest - - # adding to df - - baseline_stocks[row_counter,1] <- j - baseline_stocks[row_counter,2] <- i - baseline_stocks[row_counter,3] <- agb - baseline_stocks[row_counter,4] <- bgb - baseline_stocks[row_counter,5] <- dw - baseline_stocks[row_counter,6] <- total - baseline_stocks[row_counter,7] <- area_of_forest - baseline_stocks[row_counter,8] <- total_byarea - - row_counter <- row_counter+1 - - } - - # advance counter - - luc_counter <- luc_counter + 1 - -} - -# formatting bits - -baseline_stocks_format <- baseline_stocks -baseline_stocks_format <- baseline_stocks_format %>% filter(time == 'Start') -baseline_stocks_format <- baseline_stocks_format[2:6] - -colnames(baseline_stocks_format) <- c( - 'Land use class', - 'AGB density (t C / ha)', - 'BGB density (t C / ha)', - 'Deadwood biomass density (t C / ha)', - 'Total biomass density (t C / ha)', - 'Total biomass (t C)') - - -# renaming classes - -baseline_stocks_format <- baseline_stocks_format %>% - mutate(`Land use class` = case_when( - `Land use class` == "1" ~ 'Undisturbed', - `Land use class` == "2" ~ 'Degraded', - `Land use class` == "3" ~ 'Deforested', - `Land use class` == "4" ~ 'Reforested', - `Land use class` == "5" ~ 'Water', - `Land use class` == "6" ~ 'Other', - TRUE ~ as.character(`Land use class`) # ensure no unexpected values - )) - - -baseline_stocks_format[2:6] <- lapply(baseline_stocks_format[, 2:6], function(x) { - if (is.numeric(x)) comma(x) else x -}) - -# Print only carbon calculations at this stage - -baseline_stocks_format %>% - drop_na() %>% - mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) %>% - kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>% - kable_styling(bootstrap_options = "striped") - -``` - -# Results: baseline rate of carbon emissions - -In this section we present the annual rate of carbon loss due to deforestation in the matched points during the baseline period. We can take this to be a prediction of the counterfactual scenario for the project (the *baseline*). - -First we present the carbon stock changes observed in the matched points during the baseline period: - -```{r results} - -baseline_stock_changes <- baseline_stocks[c(1:2,7:8)] - -# reshape - -reshaped_data <- baseline_stock_changes %>% - mutate(luc = as.character(luc)) %>% - group_by(luc) %>% - summarize( - area_start = area[time == "Start"], - area_end = area[time == "End"], - area_diff = area_start - area_end, - c_start = total_byarea[time == "Start"], - c_end = total_byarea[time == "End"], - c_diff = c_start - c_end, - .groups = 'drop' - ) - -# get totals - -total_row <- reshaped_data %>% - summarize( - luc = "Total", - area_start = sum(area_start, na.rm = TRUE), - area_end = sum(area_end, na.rm = TRUE), - area_diff = sum(area_diff, na.rm = TRUE), - c_start = sum(c_start, na.rm = TRUE), - c_end = sum(c_end, na.rm = TRUE), - c_diff = sum(c_diff, na.rm = TRUE) - ) %>% - mutate(luc = as.character(luc)) - -baseline_stock_changes <- bind_rows(reshaped_data, total_row) - -# add in conversion to CO2 - -baseline_stock_changes <- baseline_stock_changes %>% - mutate(co2_diff = 0.47*c_diff) - -# formatting bits - -baseline_stock_changes_format <- baseline_stock_changes %>% - mutate(across(where(is.numeric), ~ comma(.))) %>% - mutate(across(where(is.numeric), ~ sprintf("%.2f", as.numeric(.)))) - -if (knitr::is_html_output()) { - colnames(baseline_stock_changes_format) <- c( - 'Land use class', - 'Area at start (ha)', - 'Area at end (ha)', - 'Area loss (ha)', - 'Biomass at start (t)', - 'Biomass at end (t)', - 'Biomass loss (t)' - 'CO2 loss (t)') -} else if (knitr::is_latex_output()) { - colnames(baseline_stock_changes_format) <- c( - 'Land use class', - 'Area at start (ha)', - 'Area at end (ha)', - 'Area loss (ha)', - 'Biomass at start (t)', - 'Biomass at end (t)', - 'Biomass loss (t)' - 'CO$_{2}$ loss (t)') -} - -baseline_stock_changes_format <- baseline_stock_changes_format %>% - mutate(`Land use class` = case_when( - `Land use class` == "1" ~ 'Undisturbed', - `Land use class` == "2" ~ 'Degraded', - `Land use class` == "3" ~ 'Deforested', - `Land use class` == "4" ~ 'Reforested', - `Land use class` == "5" ~ 'Water', - `Land use class` == "6" ~ 'Other', - TRUE ~ as.character(`Land use class`) # ensure no unexpected values - )) - -baseline_stock_changes_format[nrow(baseline_stock_changes_format), 1] <- 'Total' - -filtered_data <- baseline_stock_changes_format %>% - drop_na() %>% - mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) - -last_row_index <- nrow(filtered_data) - -filtered_data %>% - kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>% - kable_styling(bootstrap_options = "striped") %>% - row_spec(last_row_index, bold = TRUE) - -``` - -```{r results_summary} - -# find the difference - -delta_c <- as.numeric(baseline_stock_changes[nrow(baseline_stock_changes), ncol(baseline_stock_changes)]) -delta_c_annual <- delta_c/10 - -``` - -To calculate the baseline annual rate of carbon emissions, we sum the the differences in carbon stocks between the start and end of the baseline period, then divide the total by the length of the baseline period (10 years). - -**For this project, the baseline annual rate of carbon emissions, in tonnes of carbon dioxide per year, is `r format(delta_c_annual, big.mark = ",", scientific = FALSE, digits = 3)`.** This should be interpreted as the maximum estimated number of carbon credits that could be generated if the project mitigated 100% of the baseline level of deforestation, assuming this is confirmed by ex post observations. We present alternative mitigation scenarios below. - -### Expected additionality under different mitigation scenarios - -Additionality depends not only on baseline deforestation rate but the ability of the project to mitigate that deforestation. Therefore, we present the expected additionality under different mitigation scenarios (100% to 10% mitigation success). The 100% mitigation scenario occurs where the project is able to mitigate all of the deforestation that would have occurred under the counterfactual scenario. This scenario is unlikely to be realistic, but gives a sense of the deforestation risk faced by each area. It is more likely that only a proportion of this baseline deforestation is mitigated - for example, under the 50% mitigation scenario, the project is able to reduce the deforestation rate by half. The higher the proportion of the baseline deforestation that is mitigated, the greater the additionality of a project. - -Note that we present the raw additionality, without accounting for leakage and impermenance (discussed later). - -We are in the process of producing confidence intervals that reflect the uncertainty associated with the baseline, which will be added to future revisions of this document. - -```{r} - -scenarios <- data.frame(matrix(ncol=2,nrow=5)) -scenarios[1] <- c("10%","25%","50%","75%","100%") -scenarios[2] <- delta_c_annual*c(0.1,0.25,0.5,0.75,1) - -if (knitr::is_html_output()) { - colnames(scenarios) <- c('Scenario', - 'Additionality (t CO2 / year)') -} else if (knitr::is_latex_output()) { - colnames(scenarios) <- c('Scenario', - 'Additionality (t CO$_{2}$ / year)') -} - -scenarios <- scenarios %>% - mutate(across(where(is.numeric), comma)) - -knitr::kable( - scenarios -) - -``` - -\ - -# Accounting for leakage and impermanence - -Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**. - -**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage is likely to be lower if the processes leading deforestation and degradation do not result in high yielding land uses, or if the carbon densities within the project are high compared with those in other areas where these activities are taking place. Leakage can also be reduced by interventions which improve yields in areas already under production. We can provide guidance on how this could be achieved. - -**Impermanence** occurs when the additionality generated by a project is reversed. Additional carbon stocks in forests are inherently vulnerable to these reversals. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. In future revisions of this document we aim to include indicative estimates of the equivalent permanence (the relative value of a impermanent credit relative to a permanent credit) for this project. - -You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence). - ---- - -### Reproducibility - -This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation). diff --git a/scripts/scripts/def_rate.R b/scripts/scripts/def_rate.R deleted file mode 100644 index 6a4c417..0000000 --- a/scripts/scripts/def_rate.R +++ /dev/null @@ -1,328 +0,0 @@ - - - -def_rate <- function(data,t0,period_length,process='all'){ - - # get name of column for start year - - t0_index <- grep(paste0('luc_',t0),colnames(data)) - - # filter down to pixels with undisturbed forest (JRC class 1) - - data_filtered <- data[data[,t0_index]==1,] - - # count 1s at t0 in project and match - - proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() - cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() - - # identify where there have been changes during the evaluation period - - tend <- t0 + period_length - - luc_tend <- data_filtered %>% - select(paste0('luc_',tend)) - - # choosing processes to measure - - if(process=='def_only'){ - - response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 0, - luc_tend==3 ~ 1, - luc_tend==4 ~ 0, - luc_tend>4 ~ 0) - - } else if(process=='deg_only'){ - - response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 1, - luc_tend==3 ~ 0, - luc_tend==4 ~ 0, - luc_tend>4 ~ 0) - - } else { - - response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 1, - luc_tend==3 ~ 1, - luc_tend==4 ~ 1, - luc_tend>4 ~ 0) - - } - - - data_filtered$response <- response - - # count up number of pixels where there have been changes for each type - - proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% - nrow() - cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% - nrow() - - # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage - - proj_rate <- 100*(proj_changes/proj_1s)/period_length - cf_rate <- 100*(cf_changes/cf_1s)/period_length - - # make df - - df <- data.frame(matrix(ncol=2,nrow=1)) - colnames(df) <- c('Project','Counterfactual') - df[1,1] <- proj_rate - df[1,2] <- cf_rate - - return(df) - -} - - - -def_rate_seperate <- function(data,t0,period_length){ - - # get name of column for start year - - t0_index <- grep(paste0('luc_',t0),colnames(data)) - - # filter down to pixels with undisturbed forest (JRC class 1) - - data_filtered <- data[data[,t0_index]==1,] - - # count 1s at t0 in project and cf - - proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() - cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() - - # identify where there have been changes during the evaluation period - - tend <- t0 + period_length - - luc_tend <- data_filtered %>% - select(paste0('luc_',tend)) - - # measuring responses - - def_response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 0, - luc_tend==3 ~ 1, - luc_tend==4 ~ 0, - luc_tend>4 ~ 0) - - deg_response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 1, - luc_tend==3 ~ 0, - luc_tend==4 ~ 0, - luc_tend>4 ~ 0) - - ref_response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 0, - luc_tend==3 ~ 0, - luc_tend==4 ~ 1, - luc_tend>4 ~ 0) - - data_filtered$def_response <- def_response - data_filtered$deg_response <- deg_response - data_filtered$ref_response <- ref_response - - # count up number of pixels where there have been changes for each type - - proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% - nrow() - cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% - nrow() - - proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% - nrow() - cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% - nrow() - - proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% - nrow() - cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% - nrow() - - # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage - - proj_def <- 100*(proj_def_changes/proj_1s)/period_length - cf_def <- 100*(cf_def_changes/cf_1s)/period_length - - proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length - cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length - - proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length - cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length - - # adding the degraded-to-deforested transition - - data_filtered_2 <- data[data[,t0_index]==2,] - - # count 2s at t0 in project and cf - - proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow() - cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() - - # identify where there have been changes during the evaluation period - - luc_tend_2 <- data_filtered_2 %>% - select(paste0('luc_',tend)) - - def_response_2 <- case_when( - luc_tend_2==1 ~ 0, - luc_tend_2==2 ~ 0, - luc_tend_2==3 ~ 1, - luc_tend_2==4 ~ 0, - luc_tend_2>4 ~ 0) - - data_filtered_2$def_response_2 <- def_response_2 - - proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% - nrow() - cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% - nrow() - - proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length - cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length - - # make df - - df <- data.frame(matrix(ncol=4,nrow=8)) - - colnames(df) <- c('Process','Forest type','Location','Rate (%/year)') - - df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2)) - df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2)) - df[3] <- c(rep(c('Project','Counterfactual'),times=4)) - df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref) - - return(df) - -} - -get_prop_class <- function(data,t0,class){ - - t0_index <- grep(paste0('luc_',t0),colnames(data)) - data_filtered <- data[data[,t0_index]==class,] - - total_count <- data %>% nrow() - class_count <- data_filtered %>% nrow() - prop <- class_count/total_count - - return(prop) - -} - - -def_rate_single <- function(data,t0,period_length){ - - # get name of column for start year - - t0_index <- grep(paste0('luc_',t0),colnames(data)) - - # filter down to pixels with undisturbed forest (JRC class 1) - - data_filtered <- data[data[,t0_index]==1,] - - # count 1s at t0 in project and cf - - no_1s <- nrow(data_filtered) - - # identify where there have been changes during the evaluation period - - tend <- t0 + period_length - - luc_tend <- data_filtered %>% - select(paste0('luc_',tend)) - - # measuring responses - - def_response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 0, - luc_tend==3 ~ 1, - luc_tend==4 ~ 0, - luc_tend>4 ~ 0) - - deg_response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 1, - luc_tend==3 ~ 0, - luc_tend==4 ~ 0, - luc_tend>4 ~ 0) - - ref_response <- case_when( - luc_tend==1 ~ 0, - luc_tend==2 ~ 0, - luc_tend==3 ~ 0, - luc_tend==4 ~ 1, - luc_tend>4 ~ 0) - - data_filtered$def_response <- def_response - data_filtered$deg_response <- deg_response - data_filtered$ref_response <- ref_response - - # count up number of pixels where there have been changes for each type - - def_changes <- data_filtered %>% filter(def_response==1) %>% - nrow() - - deg_changes <- data_filtered %>% filter(deg_response==1) %>% - nrow() - - ref_changes <- data_filtered %>% filter(ref_response==1) %>% - nrow() - - # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage - - def <- 100*(def_changes/no_1s)/period_length - - deg <- 100*(deg_changes/no_1s)/period_length - - ref <- 100*(ref_changes/no_1s)/period_length - - # adding the degraded-to-deforested transition - - data_filtered_2 <- data[data[,t0_index]==2,] - - # count 2s at t0 in project and cf - - no_2s <- data_filtered_2 %>% nrow() - - # identify where there have been changes during the evaluation period - - luc_tend_2 <- data_filtered_2 %>% - select(paste0('luc_',tend)) - - def_response_2 <- case_when( - luc_tend_2==1 ~ 0, - luc_tend_2==2 ~ 0, - luc_tend_2==3 ~ 1, - luc_tend_2==4 ~ 0, - luc_tend_2>4 ~ 0) - - data_filtered_2$def_response_2 <- def_response_2 - - def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% - nrow() - - deg_to_def <- 100*(def_changes_2/no_2s)/period_length - - # make df - - df <- data.frame(matrix(ncol=3,nrow=4)) - - colnames(df) <- c('Process','Forest type','Rate (%/year)') - - df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation') - df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest') - df[3] <- c(deg,def,deg_to_def,ref) - - return(df) - -} \ No newline at end of file diff --git a/scripts/scripts/land_cover_timeseries.R b/scripts/scripts/land_cover_timeseries.R deleted file mode 100644 index 6490bf1..0000000 --- a/scripts/scripts/land_cover_timeseries.R +++ /dev/null @@ -1,111 +0,0 @@ - -get_luc_timeseries <- function(data,t0,tend,type='both'){ - - years_list <- seq(t0,tend) - - if(type=='both'){ - - df <- data.frame(matrix(ncol=4,nrow=8*length(years_list))) - - colnames(df) <- c('year','type','luc','percentage') - - counter <- 1 - - for(year in years_list) { - - for(i in seq (1:4)) { - - for(type_value in c('Project','Counterfactual')) { - - total <- data %>% filter(type == type_value) %>% nrow() - - no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow() - - prop <- no_class_i/total - - df[counter,1] <- year - df[counter,2] <- type_value - df[counter,3] <- i - df[counter,4] <- prop*100 - - counter <- counter+1 - - } - - } - - } - - } else if(type=='single'){ - - df <- data.frame(matrix(ncol=3,nrow=4*length(years_list))) - - colnames(df) <- c('year','luc','percentage') - - counter <- 1 - - for(year in years_list) { - - for(i in seq (1:4)) { - - total <- data %>% nrow() - - no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow() - - prop <- no_class_i/total - - df[counter,1] <- year - df[counter,2] <- i - df[counter,3] <- prop*100 - - counter <- counter+1 - - } - - } - - } - - return(drop_na(df)) - -} - -luc_class1_uncertainty <- function(data,t0,tend) { - - years_list <- seq(t0-10,tend) - - df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list))) - - colnames(df) <- c('year','type','pair','percent_class1') - - counter <- 1 - - for(year in years_list) { - - for(type_value in c('Project','Counterfactual')) { - - for(pair_id in unique(data$pair)) { - - total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow() - - no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow() - - prop <- no_class_i/total - - df[counter,1] <- year - df[counter,2] <- type_value - df[counter,3] <- pair_id - df[counter,4] <- prop*100 - - counter <- counter+1 - - } - - } - - } - - return(drop_na(df)) - -} - diff --git a/scripts/scripts/plot_matchingvars.R b/scripts/scripts/plot_matchingvars.R deleted file mode 100644 index ec47f01..0000000 --- a/scripts/scripts/plot_matchingvars.R +++ /dev/null @@ -1,42 +0,0 @@ -plot_matching_variables <- function(data, ex_ante = 'false') { - - cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc')) - cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages - cont_data <- melt(cont_data) - - # rename labels - cont_data$variable <- factor(cont_data$variable, - levels = c('access', 'cpc0_u', 'cpc0_d', - 'slope', 'cpc5_u', 'cpc5_d', - 'elevation', 'cpc10_u', 'cpc10_d')) - - levels(cont_data$variable) <- c('Inaccessibility', - 'Forest~cover~t[0]', - 'Deforestation~t[0]', - 'Slope', - 'Forest~cover~t[-5]', - 'Deforestation~t[-5]', - 'Elevation', - 'Forest~cover~t[-10]', - 'Deforestation~t[-10]') - - # determine labels based on ex_ante - if (ex_ante == 'false') { - plot_labels <- c('Counterfactual', 'Project') - } else if (ex_ante == 'true') { - plot_labels <- c('Matched points', 'Project')} - - # plot - matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) + - geom_density(adjust = 10, size = 1) + - facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) + - ylab('Density') + - scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) + - theme_classic() + - theme(legend.title = element_blank(), - axis.title.x = element_blank(), - axis.text.y = element_blank(), - axis.ticks.y = element_blank()) - - return(matchingvars) -} \ No newline at end of file diff --git a/scripts/scripts/plot_transitions.R b/scripts/scripts/plot_transitions.R deleted file mode 100644 index 2931a60..0000000 --- a/scripts/scripts/plot_transitions.R +++ /dev/null @@ -1,63 +0,0 @@ -library(ggspatial) - -plot_transitions <- function(data,t0,period_length,shapefile){ - - # count number of 1s at project start - - t0_index <- grep(paste0('luc_',t0),colnames(data)) - - data_filtered <- data[data[,t0_index]==1,] - - # identify where there have been changes - - tend <- t0 + period_length - - luc_tend <- data_filtered[[paste0('luc_', tend)]] - - response <- case_when( - luc_tend==1 ~ NA, - luc_tend==2 ~ 'deg', - luc_tend==3 ~ 'def', - luc_tend==4 ~ 'ref', - luc_tend>4 ~ NA) - - data_filtered$response <- as.factor(response) - data_filtered <- data_filtered %>% filter(!is.na(response)) - - # adding deg --> def transition - - # count number of 2s at project start - - data_filtered_2s <- data[data[,t0_index]==2,] - - # identify where there have been changes - - luc_tend <- data_filtered_2s[[paste0('luc_', tend)]] - - response <- case_when( - luc_tend==1 ~ NA, - luc_tend==2 ~ NA, - luc_tend==3 ~ 'deg_to_def', - luc_tend==4 ~ NA, - luc_tend>4 ~ NA) - - data_filtered_2s$response <- as.factor(response) - data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response)) - - combined_dat <- bind_rows(data_filtered, data_filtered_2s) - combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref')) - - # plotting - - plot <- combined_dat %>% - filter(response != 0) %>% - ggplot(aes(x=lng,y=lat,colour=response))+ - geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+ - geom_point(alpha=0.5,size=0.5)+ - scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+ - annotation_scale(text_cex = 1.3)+ - theme_void() - - return(plot) - -} diff --git a/scripts/scripts/std_mean_diff.R b/scripts/scripts/std_mean_diff.R deleted file mode 100644 index 63d81ba..0000000 --- a/scripts/scripts/std_mean_diff.R +++ /dev/null @@ -1,57 +0,0 @@ - -std_mean_diff <- function(path_to_pairs) { - - # clean data - - files_full_raw <- list.files(path_to_pairs, - pattern='*.parquet',full.names=T,recursive=F) - files_full <- files_full_raw[!grepl('matchless',files_full_raw)] - files_short_raw <- list.files(path=path_to_pairs, - pattern='*.parquet',full.names=F,recursive=F) - files_short <- files_short_raw[!grepl('matchless',files_short_raw)] - - # initialise dfs - - vars <- c(colnames(read_parquet(files_full[1])),'pair') - df <- data.frame(matrix(ncol=length(vars),nrow=0)) - colnames(df) <- vars - - for(j in 1:length(files_full)){ - - # read in all parquet files for a given project - - f <- data.frame(read_parquet(files_full[j])) - - # append data to bottom of df - - df <- bind_rows(df,f) - - } - - # calculate smd - - smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE) - - variables <- c('cpc10_d','cpc5_d','cpc0_d', - 'cpc10_u','cpc5_u','cpc0_u', - 'access','slope','elevation') - - for (var in variables) { - k_var <- df[[paste0("k_", var)]] - s_var <- df[[paste0("s_", var)]] - - k_mean <- mean(k_var, na.rm = TRUE) - s_mean <- mean(s_var, na.rm = TRUE) - k_sd <- sd(k_var, na.rm = TRUE) - s_sd <- sd(s_var, na.rm = TRUE) - - pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2) - smd <- (k_mean - s_mean) / pooled_sd - - smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE)) - } - - return(smd_results) -} - - \ No newline at end of file From 62c0b38ee086863c3d05cff49319d29622ab0d7d Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Wed, 28 Aug 2024 09:48:59 +0000 Subject: [PATCH 17/19] Update tmfpython.sh --- scripts/tmfpython.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index 7361eb3..f2ece30 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -197,5 +197,5 @@ deactivate # Run ex-ante evaluation if [ "$verbose" == "true" ]; then ea_output_file="${output_dir}/${proj}_ex_ante_evaluation.html" -Rscript -e "rmarkdown::render(input='scripts/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" +Rscript -e "rmarkdown::render(input='evaluations/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" fi \ No newline at end of file From 7823ebf1b15e16f5874e7962d045d17cd30774c8 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Thu, 29 Aug 2024 16:04:28 +0000 Subject: [PATCH 18/19] Updated tmfpython.sh to mirror ex post evaluations code --- scripts/tmfpython.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh index f2ece30..fdecea6 100755 --- a/scripts/tmfpython.sh +++ b/scripts/tmfpython.sh @@ -6,7 +6,7 @@ #p: project name/ID - must match name of shapefile #t: year of project start (t0) #e: evaluation year (default: 2022) -#v: verbose - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false). +#v: report - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: true). # Check which branch is currently checked out branch=$(git rev-parse --abbrev-ref HEAD) @@ -18,7 +18,7 @@ set -e input_dir="" output_dir="" eval_year=2022 -verbose=true +report=true ##################################### @@ -31,7 +31,7 @@ function display_help() { echo " -p Project name" echo " -t Start year" echo " -e Evaluation year" - echo " -v Knit ex ante evaluation as .Rmd? (true/false)" + echo " -r Knit ex ante evaluation as .Rmd? (true/false)" echo " -h Display this help message" echo "Example:" echo " $0 -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out -p 1201 -t 2012" @@ -46,7 +46,7 @@ do p) proj=${OPTARG};; t) t0=${OPTARG};; e) eval_year=${OPTARG};; - r) verbose=${OPTARG};; + r) report=${OPTARG};; h) display_help; exit 0;; *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;; esac @@ -57,7 +57,7 @@ echo "Output directory: $output_dir" echo "Project: $proj" echo "t0: $t0" echo "Evaluation year: $eval_year" -echo "Ex-ante evaluation: $verbose" +echo "Ex-ante evaluation: $report" if [ $# -eq 0 ]; then display_help @@ -195,7 +195,7 @@ echo "--Pairs matched.--" deactivate # Run ex-ante evaluation -if [ "$verbose" == "true" ]; then +if [ "$report" == "true" ]; then ea_output_file="${output_dir}/${proj}_ex_ante_evaluation.html" Rscript -e "rmarkdown::render(input='evaluations/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))" fi \ No newline at end of file From 8d391fa259774e46fa26f941cb4ddb7feb4fb761 Mon Sep 17 00:00:00 2001 From: Abby Williams Date: Fri, 30 Aug 2024 14:57:11 +0000 Subject: [PATCH 19/19] Update ex_ante_evaluations_template.Rmd --- evaluations/ex_ante_evaluation_template.Rmd | 30 +++++++++++---------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/evaluations/ex_ante_evaluation_template.Rmd b/evaluations/ex_ante_evaluation_template.Rmd index be95ce1..3a92a4c 100644 --- a/evaluations/ex_ante_evaluation_template.Rmd +++ b/evaluations/ex_ante_evaluation_template.Rmd @@ -733,7 +733,7 @@ Note that, in calculating carbon stock changes, we assume the following: - Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997) -- Deadwod biomass is 11% of AGB (based on IPCC 2003) +- Deadwood biomass is 11% of AGB (based on IPCC 2003) - Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011) @@ -741,8 +741,8 @@ Note that, in calculating carbon stock changes, we assume the following: \ ```{r additionality_forecast} -baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8)) -colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea') +baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=9)) +colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total','total_c','area','total_byarea') luc_counter <- 1 row_counter <- 1 @@ -761,7 +761,7 @@ for(i in carbon_density$land.use.class){ bgb <- agb*0.2 dw <- agb*0.11 total <- agb + bgb + dw - #total_co2 <- total*0.47 # we're doing this step later + total_c <- total*0.47 # get area of class i @@ -772,7 +772,7 @@ for(i in carbon_density$land.use.class){ # multiply total by area - total_byarea <- total*area_of_forest + total_byarea <- total_c*area_of_forest # adding to df @@ -782,8 +782,9 @@ for(i in carbon_density$land.use.class){ baseline_stocks[row_counter,4] <- bgb baseline_stocks[row_counter,5] <- dw baseline_stocks[row_counter,6] <- total - baseline_stocks[row_counter,7] <- area_of_forest - baseline_stocks[row_counter,8] <- total_byarea + baseline_stocks[row_counter,7] <- total_c + baseline_stocks[row_counter,8] <- area_of_forest + baseline_stocks[row_counter,9] <- total_byarea row_counter <- row_counter+1 @@ -803,11 +804,11 @@ baseline_stocks_format <- baseline_stocks_format[2:6] colnames(baseline_stocks_format) <- c( 'Land use class', - 'AGB density (t C / ha)', - 'BGB density (t C / ha)', - 'Deadwood biomass density (t C / ha)', - 'Total biomass density (t C / ha)', - 'Total biomass (t C)') + 'AGB (t / ha)', + 'BGB (t / ha)', + 'Deadwood biomass (t / ha)', + 'Total biomass (t / ha)', + 'Total carbon (t / ha)') # renaming classes @@ -846,7 +847,7 @@ First we present the carbon stock changes observed in the matched points during ```{r results} -baseline_stock_changes <- baseline_stocks[c(1:2,7:8)] +baseline_stock_changes <- baseline_stocks[c(1:2,8:9)] # reshape @@ -882,7 +883,7 @@ baseline_stock_changes <- bind_rows(reshaped_data, total_row) # add in conversion to CO2 baseline_stock_changes <- baseline_stock_changes %>% - mutate(co2_diff = 0.47*c_diff) + mutate(co2_diff = (44/12)*c_diff) # formatting bits @@ -938,6 +939,7 @@ filtered_data %>% ``` + ```{r results_summary} # find the difference