From 2cfc1ba4ea7f9cb2bd5c7e8707f8baa8f2c081e4 Mon Sep 17 00:00:00 2001
From: swinersha <twswinfield@gmai.com>
Date: Wed, 31 Jul 2024 13:44:46 +0000
Subject: [PATCH 01/19] feat: k_all pixels and interactive find pairs drafted

---
 .gitignore                                    |   2 +-
 methods/matching/calculate_k.py               |  29 +-
 .../cluster_find_pairs_interactive.py         | 581 ++++++++++++++++++
 3 files changed, 599 insertions(+), 13 deletions(-)
 create mode 100644 methods/matching/cluster_find_pairs_interactive.py

diff --git a/.gitignore b/.gitignore
index eac8079..123285e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 .DS_Store
 *.npy
 data
-
+Figures
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py
index dfecd05..dba2284 100644
--- a/methods/matching/calculate_k.py
+++ b/methods/matching/calculate_k.py
@@ -21,10 +21,13 @@
 LARGE_PROJECT_PIXEL_DENSITY_PER_HECTARE = 0.05
 
 # The '2 *' in this is because I'm just considering one axis, rather than area
-PIXEL_SKIP_SMALL_PROJECT = \
-    round((HECTARE_WIDTH_IN_METERS / (2 * SMALL_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS)
-PIXEL_SKIP_LARGE_PROJECT = \
-    round((HECTARE_WIDTH_IN_METERS / (2 * LARGE_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS)
+# PIXEL_SKIP_SMALL_PROJECT = \
+#     round((HECTARE_WIDTH_IN_METERS / (2 * SMALL_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS)
+# PIXEL_SKIP_LARGE_PROJECT = \
+#     round((HECTARE_WIDTH_IN_METERS / (2 * LARGE_PROJECT_PIXEL_DENSITY_PER_HECTARE)) / PIXEL_WIDTH_IN_METERS)
+
+PIXEL_SKIP_SMALL_PROJECT = 1
+PIXEL_SKIP_LARGE_PROJECT = 1
 
 MatchingCollection = namedtuple('MatchingCollection',
     ['boundary', 'lucs', 'cpcs', 'ecoregions', 'elevation', 'slope', 'access', 'countries'])
@@ -66,10 +69,11 @@ def build_layer_collection(
 
     # ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once
     # as part of import of the ecoregions data
-    ecoregions = GroupLayer([
-        RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
-            glob.glob("*.tif", root_dir=ecoregions_directory_path)
-    ], name="ecoregions")
+    # ecoregions = GroupLayer([
+    #     RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
+    #         glob.glob("*.tif", root_dir=ecoregions_directory_path)
+    # ], name="ecoregions")
+    ecoregions = RasterLayer.layer_from_file(ecoregions_directory_path)
 
     elevation = GroupLayer([
         RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in
@@ -80,10 +84,11 @@ def build_layer_collection(
             glob.glob("slope*.tif", root_dir=slope_directory_path)
     ], name="slopes")
 
-    access = GroupLayer([
-        RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
-            glob.glob("*.tif", root_dir=access_directory_path)
-    ], name="access")
+    # access = GroupLayer([
+    #     RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
+    #         glob.glob("*.tif", root_dir=access_directory_path)
+    # ], name="access")
+    access = RasterLayer.layer_from_file(access_directory_path)
 
     countries = RasterLayer.layer_from_file(countries_raster_filename)
 
diff --git a/methods/matching/cluster_find_pairs_interactive.py b/methods/matching/cluster_find_pairs_interactive.py
new file mode 100644
index 0000000..9ac25fb
--- /dev/null
+++ b/methods/matching/cluster_find_pairs_interactive.py
@@ -0,0 +1,581 @@
+# source myenv/bin/activate
+
+import pandas as pd
+import numpy as np
+from numba import njit
+from sklearn.decomposition import PCA
+from sklearn.cluster import KMeans
+import matplotlib.pyplot as plt
+import geopandas as gpd
+import os
+import time
+import sys
+import faiss
+
+# Read K and M
+# Repeat 100 times - Ultimately we would like to remove this step
+#   PCA across K and M
+#   Divide into clusters - Check how much splitting into clusters speeds things up
+#   Sample K to find K_sub
+#   Sample M to find M_sub
+#   Split M into LUC combinations - How big is each set?
+#   For each cluster in K_sub
+#       Split cluster in K_sub into categorical combinations - How big is each set?
+#       For each categorical combination
+#           Sample from the equivalent cluster and categorical combindation in M_sub
+#           Find pairs for the categorical combinations in K_sub from the categorical combinations in M_sub
+#       RowBind categorical combination sets
+#   RowBind cluster sets
+#   Save Pairs
+
+
+# NOTES
+# 1. We might need to combine some categorical subsets because at least for comparing validity of the matches
+#    because if the subsets are too small the standardised mean differences can be very wrong
+# 2. One option would be to combine clusters based on the proximity of the cluster centres. For the LUCs, we might 
+#    combine groups that are in the same state when the project begins, even if the LUC history is not the same.
+# 3. There is a question of how much supposed additionality is created by each categorical subset? If it is 
+#    nothing, it might not matter. If it is substantive then it definitely does matter.
+
+
+
+# this function uses loops instead of numpy vector operations
+@njit
+def loop_match(m_pca, k_pca):
+    picked = np.ones((m_pca.shape[0],), dtype=np.bool_)
+    fast_matches = np.full(k_pca.shape[0], -1, dtype=np.int32)
+    for i in range(0, k_pca.shape[0]):
+        min_squared_diff_sum = np.inf
+        min_squared_diff_j = -1
+        for j in range(0, m_pca.shape[0]):
+            if picked[j]:
+                squared_diff_sum = np.sum((m_pca[j, :] - k_pca[i, :])**2)
+                if squared_diff_sum < min_squared_diff_sum:
+                    min_squared_diff_sum = squared_diff_sum
+                    min_squared_diff_j = j
+        fast_matches[i] = min_squared_diff_j
+        picked[min_squared_diff_j] = False
+    return fast_matches
+
+### Now try with real numbers
+
+def to_int32(x):
+    # Normalize the data to the range 0 to 1
+    min_val = np.min(x)
+    max_val = np.max(x)
+    normalized_data = (x - min_val) / (max_val - min_val)
+    
+    # Scale the normalized data to the range 0 to 255 for unsigned 8-bit integers
+    scaled_data = normalized_data * 255
+    
+    # Convert to 32-bit integers (0 to 255)
+    int32_data = scaled_data.astype(np.int32)
+    
+    return int32_data
+
+def to_pca_int32(x):
+    # Perform PCA and convert to dataframe
+    pca = PCA(n_components=min(len(x), len(x.columns)), 
+            whiten=False)  # Centering and scaling done by default
+    pca_result = pca.fit_transform(x)
+    pca_df = pd.DataFrame(pca_result)
+    # Convert all columns to int8
+    pca_32 = pca_df.apply(to_int32)
+    return pca_32
+
+
+def calculate_smd(group1, group2):
+    # Means
+    mean1, mean2 = np.mean(group1), np.mean(group2)
+    # Standard deviations
+    std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
+    # Sample sizes
+    n1, n2 = len(group1), len(group2)
+    # Pooled standard deviation
+    pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))
+    # Standardized mean difference
+    smd = (mean1 - mean2) / pooled_std
+    return smd, mean1, mean2, pooled_std
+
+
+K_SUB_PROPORTION = 0.01
+M_SUB_PROPORTION = 0.1
+# Number of clusters
+N_CLUSTERS = 9
+# Number of iterations for K means fitting
+N_ITERATIONS = 100
+VERBOSE = True
+
+# Define the start year
+t0 = 2012 # READ THIS IN
+match_years = [t0-10, t0-5, t0]
+
+# Read in the data
+boundary = gpd.read_file('/maps/aew85/projects/1201.geojson')
+
+k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k_all.parquet')
+m_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/1201/matches.parquet')
+
+# concat m and k
+km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
+                       m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
+
+# Select columns (excluding 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt', and those starting with 'luc')
+exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt']
+exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')]
+
+match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
+
+# Extract only the continuous columns
+continuous_columns = km_pixels.columns.difference(exclude_columns)
+km_pixels_selected = km_pixels[continuous_columns]
+
+# PCA transform and conversion to 32 bit ints
+km_pca = to_pca_int32(km_pixels_selected)
+
+# Looks good
+km_pca.head()
+
+#------------------------------------------
+
+# Initialize the KMeans object
+kmeans = faiss.Kmeans(d=km_pca.shape[1], k=N_CLUSTERS, niter=N_ITERATIONS, verbose=True)
+# Perform clustering
+kmeans.train(km_pca)
+
+# Get cluster assignments
+km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten()
+
+# Frequency distribution in each cluster
+cluster_counts = pd.Series(km_pixels['cluster']).value_counts()
+if VERBOSE:
+    print("Cluster counts:\n", cluster_counts)
+
+
+# Convert to spatial (simple features)
+km_pixels_sf = gpd.GeoDataFrame(
+    km_pixels,
+    geometry=gpd.points_from_xy(km_pixels['lng'], km_pixels['lat']),
+    crs="EPSG:4326"
+)
+
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Plot cluster centres
+
+# Get cluster centers
+centroids = kmeans.centroids
+
+if VERBOSE:
+    # Plot the cluster centers
+    plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=100, marker='x', label='Cluster centers')
+    # Add cluster IDs as labels on the plot
+    for i, center in enumerate(centroids):
+        plt.text(center[0], center[1], str(i), color='red', fontsize=12, weight='bold')
+    
+    plt.title('K-means Clustering with Faiss')
+    plt.xlabel('PCA Component 1')
+    plt.ylabel('PCA Component 2')
+    plt.legend()
+    plt.show()
+    plt.savefig('Figures/cluster_centres_faiss_1.png')
+    plt.close()  # Close the plot to free up memory
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Plot clusters as separate facets
+if VERBOSE:
+    fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15, 15))
+    axes = axes.flatten()
+    
+    clusters = sorted(km_pixels_sf['cluster'].unique())
+    
+    for i, cluster in enumerate(clusters):
+        ax = axes[i]
+        cluster_data = km_pixels_sf[km_pixels_sf['cluster'] == cluster]
+        cluster_data.plot(ax=ax, color='blue', markersize=0.2)
+        boundary.plot(ax=ax, edgecolor='black', facecolor='none')
+        ax.set_title(f'Cluster {cluster}')
+        ax.set_axis_off()
+    
+    # Turn off any unused subplots
+    for j in range(len(clusters), len(axes)):
+        fig.delaxes(axes[j])
+    
+    plt.tight_layout()
+    plt.savefig('Figures/cluster_faiss_1_facet.png')
+    plt.close()
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+# Extract K and M pixels 
+k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt']
+m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl']
+
+# Extract K and M PCA transforms
+k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy()
+m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy()
+
+k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION)
+m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION)
+
+# Define indexs for the samples from K and M
+k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False)
+m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False)
+
+# Take random samples from K and M pixels
+k_sub = k_pixels.iloc[k_random_indices]
+m_sub = m_pixels.iloc[m_random_indices]
+
+# Take corresponding random samples from the PCA transformed K and M 
+k_sub_pca = k_pca[k_random_indices,:]
+m_sub_pca = m_pca[m_random_indices,:]
+
+if VERBOSE:
+    # Handy code for displaying the number of counts in each unique category combination
+    # In K
+    k_combination_counts = k_sub.groupby(match_cats).size().reset_index(name='counts')
+    print("k_combination_counts")
+    print(k_combination_counts)
+    # In M
+    m_combination_counts = m_sub.groupby(match_cats).size().reset_index(name='counts')
+    print("m_combination_counts")
+    print(m_combination_counts)
+
+
+# Identify the unique combinations of luc columns
+k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats))
+
+pairs_list = []
+
+start_time = time.time()
+for i in range(0, k_cat_combinations.shape[0]):
+    # i = 6 # ith element of the unique combinations of the luc time series in k
+    # for in range()
+    k_cat_comb = k_cat_combinations.iloc[i]
+    k_cat = k_sub[(k_sub[match_cats] == k_cat_comb).all(axis=1)]
+    k_cat_pca = k_sub_pca[(k_sub[match_cats] == k_cat_comb).all(axis=1)]
+    
+    # Find the subset in km_pixels that matches this combination
+    m_cat = m_sub[(m_sub[match_cats] == k_cat_comb).all(axis=1)]
+    m_cat_pca = m_sub_pca[(m_sub[match_cats] == k_cat_comb).all(axis=1)]
+    
+    if VERBOSE:
+        print('ksub_cat:' + str(k_cat.shape[0]))
+        print('msub_cat:' + str(m_cat.shape[0]))
+    
+    # If there is no suitable match for the pre-project luc time series
+    # Then it may be preferable to just take the luc state at t0
+    # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)]
+    # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)]
+    # For if there are no matches return nothing
+    
+    if(m_cat.shape[0] < k_cat.shape[0] * 5):
+        print("M insufficient for matching. Set VERBOSE to True for more details.")
+        continue
+    
+    matches_index = loop_match(m_cat_pca, k_cat_pca)
+    m_cat_matches = m_cat.iloc[matches_index]
+    
+    # i = 0
+    # matched = pd.concat([k_cat.iloc[i], m_cat.iloc[matches[i]]], axis=1, ignore_index=True)
+    # matched.columns = ['trt', 'ctrl']
+    # matched
+    #Looks great!
+    columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope']
+    # Calculate SMDs for the specified columns
+    smd_results = []
+    for column in columns_to_compare:
+        smd, mean1, mean2, pooled_std = calculate_smd(k_cat[column], m_cat_matches[column])
+        smd_results.append((column, smd, mean1, mean2, pooled_std))
+    
+    # Convert the results to a DataFrame for better readability
+    smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
+    
+    if VERBOSE:
+        # Print the results
+        print("categorical combination:")
+        print(k_cat_comb)
+        # Count how many items in 'column1' are not equal to the specified integer value
+        print("LUC flips in K:")
+        (k_cat['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum()
+        print("LUC flips in matches:")
+        (m_cat_matches['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum()
+        print("Standardized Mean Differences:")
+        print(smd_df)
+    
+    # Join the pairs into one dataframe:
+    k_cat = k_cat.reset_index(drop = True)
+    m_cat_matches = m_cat_matches.reset_index(drop = True)  
+    pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1)
+    
+    # Append the resulting DataFrame to the list
+    pairs_list.append(pairs_df)
+
+# Combine all the DataFrames in the list into a single DataFrame
+combined_pairs = pd.concat(pairs_list, ignore_index=True)
+
+end_time = time.time()
+elapsed_time = end_time - start_time
+if VERBOSE:
+    print(f"Elapsed time: {elapsed_time:.2f} seconds")
+
+columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope']
+# Calculate SMDs for the specified columns
+smd_results = []
+for column in columns_to_compare:
+    smd, mean1, mean2, pooled_std = calculate_smd(combined_pairs['k_' + column], combined_pairs['s_' + column])
+    smd_results.append((column, smd, mean1, mean2, pooled_std))
+
+# Convert the results to a DataFrame for better readability
+smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
+print(smd_df)
+
+smd_results = []
+for column in columns_to_compare:
+    smd, mean1, mean2, pooled_std = calculate_smd(k_pixels[column], m_pixels[column])
+    smd_results.append((column, smd, mean1, mean2, pooled_std))
+
+# Convert the results to a DataFrame for better readability
+smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
+print(smd_df)
+
+
+
+
+
+
+
+def find_match_iteration(
+    k_parquet_filename: str,
+    m_parquet_filename: str,
+    start_year: int,
+    output_folder: str,
+    idx_and_seed: tuple[int, int]
+) -> None:
+    logging.info("Find match iteration %d of %d", idx_and_seed[0] + 1, REPEAT_MATCH_FINDING)
+    rng = np.random.default_rng(idx_and_seed[1])
+
+    logging.info("Loading K from %s", k_parquet_filename)
+    k_pixels = pd.read_parquet(k_parquet_filename)
+    
+    logging.info("Loading M from %s", m_parquet_filename)
+    m_pixels = pd.read_parquet(m_parquet_filename)
+    
+    # concat m and k
+    km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
+                           m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
+    
+    # Find the continuous columns
+    exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt']
+    exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')]
+    continuous_columns = km_pixels.columns.difference(exclude_columns)
+    # Categorical columns
+    match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
+    
+    logging.info("Starting PCA transformation of k and m union. km_pixels.shape: %a", {km_pixels.shape})
+    # PCA transform and conversion to 32 bit ints for continuous only
+    km_pca = to_pca_int32(km_pixels[continuous_columns])
+    logging.info("Done PCA transformation")
+    
+    # Extract K and M pixels  - this might be unnecessary I just wanted to make sure 
+    # K and M were in the same order here and in the PCA transform
+    k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt']
+    m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl']
+    # Extract K and M PCA transforms
+    k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy()
+    m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy()
+    
+    # Sample from K and M
+    k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION)
+    m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION)
+    # Define indexs for the samples from K and M
+    k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False)
+    m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False)
+    # Take random samples from K and M pixels
+    k_sub = k_pixels.iloc[k_random_indices]
+    m_sub = m_pixels.iloc[m_random_indices]
+    # Take corresponding random samples from the PCA transformed K and M 
+    k_sub_pca = k_pca[k_random_indices,:]
+    m_sub_pca = m_pca[m_random_indices,:]
+    
+    logging.info("Samples taken from K and M. k_sub.shape: %a; m_sub.shape: %a", {k_sub.shape, m_sub.shape})
+    
+    # Identify the unique combinations of luc columns
+    k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats))
+    
+    pairs_list = []
+    matchless_list = []
+    
+    logging.info("Starting greedy matching... k_sub.shape: %s, m_sub.shape: %s",
+                                  k_sub.shape, m_sub.shape)
+    
+    start_time = time.time()
+    for i in range(0, k_cat_combinations.shape[0]):
+        # i = 6 # ith element of the unique combinations of the luc time series in k
+        # for in range()
+        k_cat_comb = k_cat_combinations.iloc[i]
+        k_cat = k_sub[(k_sub[match_cats] == k_cat_comb).all(axis=1)]
+        k_cat_pca = k_sub_pca[(k_sub[match_cats] == k_cat_comb).all(axis=1)]
+        
+        # Find the subset in km_pixels that matches this combination
+        m_cat = m_sub[(m_sub[match_cats] == k_cat_comb).all(axis=1)]
+        m_cat_pca = m_sub_pca[(m_sub[match_cats] == k_cat_comb).all(axis=1)]
+        
+        if VERBOSE:
+            print('ksub_cat:' + str(k_cat.shape[0]))
+            print('msub_cat:' + str(m_cat.shape[0]))
+        
+        # If there is no suitable match for the pre-project luc time series
+        # Then it may be preferable to just take the luc state at t0
+        # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)]
+        # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)]
+        # For if there are no matches return nothing
+        
+        if(m_cat.shape[0] < k_cat.shape[0] * 5):
+            # print("M insufficient for matching. Set VERBOSE to True for more details.")
+            # Append the matchless DataFrame to the list
+            matchless_list.append(k_cat)
+            continue
+        
+        # Find the matches
+        matches_index = loop_match(m_cat_pca, k_cat_pca)
+        m_cat_matches = m_cat.iloc[matches_index]
+        
+        # i = 0
+        # matched = pd.concat([k_cat.iloc[i], m_cat.iloc[matches[i]]], axis=1, ignore_index=True)
+        # matched.columns = ['trt', 'ctrl']
+        # matched
+        #Looks great!
+        columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope']
+        # Calculate SMDs for the specified columns
+        smd_results = []
+        for column in columns_to_compare:
+            smd, mean1, mean2, pooled_std = calculate_smd(k_cat[column], m_cat_matches[column])
+            smd_results.append((column, smd, mean1, mean2, pooled_std))
+        
+        # Convert the results to a DataFrame for better readability
+        smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
+        
+        if VERBOSE:
+            # Print the results
+            print("categorical combination:")
+            print(k_cat_comb)
+            # Count how many items in 'column1' are not equal to the specified integer value
+            print("LUC flips in K:")
+            (k_cat['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum()
+            print("LUC flips in matches:")
+            (m_cat_matches['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum()
+            print("Standardized Mean Differences:")
+            print(smd_df)
+        
+        # Join the pairs into one dataframe:
+        k_cat = k_cat.reset_index(drop = True)
+        m_cat_matches = m_cat_matches.reset_index(drop = True)  
+        pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1)
+        
+        # Append the resulting DataFrame to the list
+        pairs_list.append(pairs_df)
+
+    # Combine all the DataFrames in the list into a single DataFrame
+    pairs = pd.concat(pairs_list, ignore_index=True)
+    matchless = pd.concat(matchless_list, ignore_index=True)
+    
+    logging.info("Finished greedy matching... pairs.shape: %s, matchless.shape: %s",
+                                  pairs.shape, matchless.shape)
+    
+    logging.info("Starting storing matches...")
+    pairs.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet'))
+    matchless.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet'))
+    
+    logging.info("Finished find match iteration")
+
+
+def find_pairs(
+    k_parquet_filename: str,
+    m_parquet_filename: str,
+    start_year: int,
+    seed: int,
+    output_folder: str,
+    processes_count: int
+) -> None:
+    logging.info("Starting find pairs")
+    os.makedirs(output_folder, exist_ok=True)
+
+    rng = np.random.default_rng(seed)
+    iteration_seeds = zip(range(REPEAT_MATCH_FINDING), rng.integers(0, 1000000, REPEAT_MATCH_FINDING))
+
+    with Pool(processes=processes_count) as pool:
+        pool.map(
+            partial(
+                find_match_iteration,
+                k_parquet_filename,
+                m_parquet_filename,
+                start_year,
+                output_folder
+            ),
+            iteration_seeds
+        )
+
+
+def main():
+    # If you use the default multiprocess model then you risk deadlocks when logging (which we
+    # have hit). Spawn is the default on macOS, but not on Linux.
+    set_start_method("spawn")
+
+    parser = argparse.ArgumentParser(description="Takes K and M and finds 100 sets of matches.")
+    parser.add_argument(
+        "--k",
+        type=str,
+        required=True,
+        dest="k_filename",
+        help="Parquet file containing pixels from K as generated by calculate_k.py"
+    )
+    parser.add_argument(
+        "--m",
+        type=str,
+        required=True,
+        dest="m_filename",
+        help="Parquet file containing pixels from M as generated by build_m_table.py"
+    )
+    parser.add_argument(
+        "--start_year",
+        type=int,
+        required=True,
+        dest="start_year",
+        help="Year project started."
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        required=True,
+        dest="seed",
+        help="Random number seed, to ensure experiments are repeatable."
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        required=True,
+        dest="output_directory_path",
+        help="Directory into which output matches will be written. Will be created if it does not exist."
+    )
+    parser.add_argument(
+        "-j",
+        type=int,
+        required=False,
+        default=round(cpu_count() / 2),
+        dest="processes_count",
+        help="Number of concurrent threads to use."
+    )
+    args = parser.parse_args()
+
+    find_pairs(
+        args.k_filename,
+        args.m_filename,
+        args.start_year,
+        args.seed,
+        args.output_directory_path,
+        args.processes_count
+    )
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 088048eb0907685ec24ba6afc20cffe7ee71e2c7 Mon Sep 17 00:00:00 2001
From: swinersha <twswinfield@gmai.com>
Date: Wed, 31 Jul 2024 18:54:55 +0000
Subject: [PATCH 02/19] feat: fast find pairs experiment incorporated into
 executable

---
 methods/matching/find_pairs.py | 469 +++++++++++++++------------------
 1 file changed, 209 insertions(+), 260 deletions(-)

diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py
index 7f17782..62e9e9c 100644
--- a/methods/matching/find_pairs.py
+++ b/methods/matching/find_pairs.py
@@ -3,16 +3,35 @@
 import logging
 from functools import partial
 from multiprocessing import Pool, cpu_count, set_start_method
-from numba import jit  # type: ignore
+from numba import njit  # type: ignore
 import numpy as np
 import pandas as pd
+from sklearn.decomposition import PCA
+import faiss
 
 from methods.common.luc import luc_matching_columns
 
+# TO DO: 
+# 1. Rename columns to luc10, luc5 and luc0 to align with the pipeline
+
+# to delete: 
+# start_year = 2012
+# k_parquet_filename = '/maps/tws36/tmf_pipe_out/1201/k_all.parquet'
+# m_parquet_filename = '/maps/aew85/tmf_pipe_out/1201/tom_pairs/matches.parquet'
+# luc_match = True
+
 REPEAT_MATCH_FINDING = 100
-DEFAULT_DISTANCE = 10000000.0
 DEBUG = False
 
+K_SUB_PROPORTION = 0.01
+M_SUB_PROPORTION = 0.1
+# Number of clusters
+NUM_CLUSTERS = 9
+# Number of iterations for K means fitting
+NUM_ITERATIONS = 100
+RELATIVE_MATCH_YEARS = [-10, -5, 0]
+
+
 DISTANCE_COLUMNS = [
     "elevation", "slope", "access",
     "cpc0_u", "cpc0_d",
@@ -23,297 +42,219 @@
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 
+# this function uses loops instead of numpy vector operations
+@njit
+def loop_match(m_pca, k_pca):
+    picked = np.ones((m_pca.shape[0],), dtype=np.bool_)
+    fast_matches = np.full(k_pca.shape[0], -1, dtype=np.int32)
+    for i in range(0, k_pca.shape[0]):
+        min_squared_diff_sum = np.inf
+        min_squared_diff_j = -1
+        for j in range(0, m_pca.shape[0]):
+            if picked[j]:
+                squared_diff_sum = np.sum((m_pca[j, :] - k_pca[i, :])**2)
+                if squared_diff_sum < min_squared_diff_sum:
+                    min_squared_diff_sum = squared_diff_sum
+                    min_squared_diff_j = j
+        fast_matches[i] = min_squared_diff_j
+        picked[min_squared_diff_j] = False
+    return fast_matches
+
+### Now try with real numbers
+
+def to_int32(x):
+    # Normalize the data to the range 0 to 1
+    min_val = np.min(x)
+    max_val = np.max(x)
+    normalized_data = (x - min_val) / (max_val - min_val)
+    # Scale the normalized data to the range 0 to 255 for unsigned 8-bit integers
+    scaled_data = normalized_data * 255
+    # Convert to 32-bit integers (0 to 255)
+    int32_data = scaled_data.astype(np.int32)
+    return int32_data
+
+def to_pca_int32(x):
+    # Perform PCA and convert to dataframe
+    pca = PCA(n_components=min(len(x), len(x.columns)), 
+            whiten=False)  # Centering and scaling done by default
+    pca_result = pca.fit_transform(x)
+    pca_df = pd.DataFrame(pca_result)
+    # Convert all columns to int8
+    pca_32 = pca_df.apply(to_int32)
+    return pca_32
+
+
+def calculate_smd(group1, group2):
+    # Means
+    mean1, mean2 = np.mean(group1), np.mean(group2)
+    # Standard deviations
+    std1, std2 = np.std(group1, ddof=1), np.std(group2, ddof=1)
+    # Sample sizes
+    n1, n2 = len(group1), len(group2)
+    # Pooled standard deviation
+    pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))
+    # Standardized mean difference
+    smd = (mean1 - mean2) / pooled_std
+    return smd, mean1, mean2, pooled_std
+
 def find_match_iteration(
-    k_parquet_filename: str,
-    m_parquet_filename: str,
+    km_pixels: pd.DataFrame,
+    km_pca: np.ndarray,
     start_year: int,
+    luc_match: bool,
     output_folder: str,
     idx_and_seed: tuple[int, int]
 ) -> None:
     logging.info("Find match iteration %d of %d", idx_and_seed[0] + 1, REPEAT_MATCH_FINDING)
     rng = np.random.default_rng(idx_and_seed[1])
-
-    logging.info("Loading K from %s", k_parquet_filename)
-
-    # Methodology 6.5.7: For a 10% sample of K
-    k_set = pd.read_parquet(k_parquet_filename)
-    k_subset = k_set.sample(
-        frac=0.1,
-        random_state=rng
-    ).reset_index()
-
-    logging.info("Loading M from %s", m_parquet_filename)
-    m_set = pd.read_parquet(m_parquet_filename)
-
-    # get the column ids for DISTANCE_COLUMNS
-    thresholds_for_columns = np.array([
-            200.0, # Elev
-            2.5, # Slope
-            10.0,  # Access
-            0.1, # CPCs
-            0.1, # CPCs
-            0.1, # CPCs
-            0.1, # CPCs
-            0.1, # CPCs
-            0.1, # CPCs
-    ])
-
-    logging.info("Preparing s_set...")
-
-    m_dist_thresholded_df = m_set[DISTANCE_COLUMNS] / thresholds_for_columns
-    k_subset_dist_thresholded_df = k_subset[DISTANCE_COLUMNS] / thresholds_for_columns
-
-    # convert to float32 numpy arrays and make them contiguous for numba to vectorise
-    m_dist_thresholded = np.ascontiguousarray(m_dist_thresholded_df, dtype=np.float32)
-    k_subset_dist_thresholded = np.ascontiguousarray(k_subset_dist_thresholded_df, dtype=np.float32)
-
-    # LUC columns are all named with the year in, so calculate the column names
-    # for the years we are intested in
-    luc0, luc5, luc10 = luc_matching_columns(start_year)
-    # As well as all the LUC columns for later use
-    luc_columns = [x for x in m_set.columns if x.startswith('luc')]
-
-    hard_match_columns = ['country', 'ecoregion', luc10, luc5, luc0]
-    assert len(hard_match_columns) == HARD_COLUMN_COUNT
-
-    # similar to the above, make the hard match columns contiguous float32 numpy arrays
-    m_dist_hard = np.ascontiguousarray(m_set[hard_match_columns].to_numpy()).astype(np.int32)
-    k_subset_dist_hard = np.ascontiguousarray(k_subset[hard_match_columns].to_numpy()).astype(np.int32)
-
-    # Methodology 6.5.5: S should be 10 times the size of K, in order to achieve this for every
-    # pixel in the subsample (which is 10% the size of K) we select 100 pixels.
-    required = 100
-
-    logging.info("Running make_s_set_mask... required: %d", required)
-    starting_positions = rng.integers(0, int(m_dist_thresholded.shape[0]), int(k_subset_dist_thresholded.shape[0]))
-    s_set_mask_true, no_potentials = make_s_set_mask(
-        m_dist_thresholded,
-        k_subset_dist_thresholded,
-        m_dist_hard,
-        k_subset_dist_hard,
-        starting_positions,
-        required
+    
+    match_years = [start_year + year for year in RELATIVE_MATCH_YEARS]
+    # The categorical columns:
+    if luc_match:
+        match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
+    else:
+        match_cats = ["ecoregion", "country", "cluster"]
+    
+    # Extract K and M pixels 
+    k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt']
+    m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl']
+    # Extract K and M PCA transforms
+    k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy()
+    m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy()
+    # Draw subsamples
+    # Methodology 6.5.7: Needs to be updated !!!
+    k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION)
+    m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION)
+    # Define indexs for the samples from K and M
+    k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False)
+    m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False)
+    # Take random samples from K and M pixels
+    k_sub = k_pixels.iloc[k_random_indices]
+    m_sub = m_pixels.iloc[m_random_indices]
+    # Take corresponding random samples from the PCA transformed K and M 
+    k_sub_pca = k_pca[k_random_indices,:]
+    m_sub_pca = m_pca[m_random_indices,:]
+    
+    logging.info("Starting greedy matching... k_sub.shape: %s, m_sub.shape: %s",
+                 k_sub.shape, m_sub.shape)
+    
+    pairs, matchless = greedy_match(
+        k_sub,
+        m_sub,
+        k_sub_pca,
+        m_sub_pca,
+        match_cats
     )
-
-    logging.info("Done make_s_set_mask. s_set_mask.shape: %a", {s_set_mask_true.shape})
-
-    s_set = m_set[s_set_mask_true]
-    potentials = np.invert(no_potentials)
-
-    k_subset = k_subset[potentials]
-    logging.info("Finished preparing s_set. shape: %a", {s_set.shape})
-
-    # Notes:
-    # 1. Not all pixels may have matches
-    results = []
-    matchless = []
-
-    s_set_for_cov = s_set[DISTANCE_COLUMNS]
-    logging.info("Calculating covariance...")
-    covarience = np.cov(s_set_for_cov, rowvar=False)
-    logging.info("Calculating inverse covariance...")
-    invconv = np.linalg.inv(covarience).astype(np.float32)
-
-    # Match columns are luc10, luc5, luc0, "country" and "ecoregion"
-    s_set_match = s_set[hard_match_columns + DISTANCE_COLUMNS].to_numpy(dtype=np.float32)
-    # this is required so numba can vectorise the loop in greedy_match
-    s_set_match = np.ascontiguousarray(s_set_match)
-
-    # Now we do the same thing for k_subset
-    k_subset_match = k_subset[hard_match_columns + DISTANCE_COLUMNS].to_numpy(dtype=np.float32)
-    # this is required so numba can vectorise the loop in greedy_match
-    k_subset_match = np.ascontiguousarray(k_subset_match)
-
-    logging.info("Starting greedy matching... k_subset_match.shape: %s, s_set_match.shape: %s",
-                 k_subset_match.shape, s_set_match.shape)
-
-    add_results, k_idx_matchless = greedy_match(
-        k_subset_match,
-        s_set_match,
-        invconv
-    )
-
+    
+    # Combine all the pairs DataFrames in the list into a single DataFrame
+    combined_pairs = pd.concat(pairs, ignore_index=True)
+    # Combine all the matchess DataFrames in the list into a single DataFrame
+    combined_matchless = pd.concat(matchless, ignore_index=True)
     logging.info("Finished greedy matching...")
-
+    
     logging.info("Starting storing matches...")
-
-    for result in add_results:
-        (k_idx, s_idx) = result
-        k_row = k_subset.iloc[k_idx]
-        match = s_set.iloc[s_idx]
-
-        if DEBUG:
-            for hard_match_column in hard_match_columns:
-                if k_row[hard_match_column] != match[hard_match_column]:
-                    print(k_row)
-                    print(match)
-                    raise ValueError("Hard match inconsistency")
-
-        results.append(
-            [k_row.lat, k_row.lng] + [k_row[x] for x in luc_columns + DISTANCE_COLUMNS] + \
-            [match.lat, match.lng] + [match[x] for x in luc_columns + DISTANCE_COLUMNS]
-        )
-
-    logging.info("Finished storing matches...")
-
-    for k_idx in k_idx_matchless:
-        k_row = k_subset.iloc[k_idx]
-        matchless.append(k_row)
-
-    columns = ['k_lat', 'k_lng'] + \
-        [f'k_{x}' for x in luc_columns + DISTANCE_COLUMNS] + \
-        ['s_lat', 's_lng'] + \
-        [f's_{x}' for x in luc_columns + DISTANCE_COLUMNS]
-
-    results_df = pd.DataFrame(results, columns=columns)
-    results_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet'))
-
-    matchless_df = pd.DataFrame(matchless, columns=k_set.columns)
-    matchless_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet'))
-
+    combined_pairs_df = pd.DataFrame(combined_pairs)
+    combined_pairs_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet'))
+    
+    combined_matchless_df = pd.DataFrame(combined_matchless)
+    combined_matchless_df.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet'))
+    
     logging.info("Finished find match iteration")
 
-@jit(nopython=True, fastmath=True, error_model="numpy")
-def make_s_set_mask(
-    m_dist_thresholded: np.ndarray,
-    k_subset_dist_thresholded: np.ndarray,
-    m_dist_hard: np.ndarray,
-    k_subset_dist_hard: np.ndarray,
-    starting_positions: np.ndarray,
-    required: int
-):
-    m_size = m_dist_thresholded.shape[0]
-    k_size = k_subset_dist_thresholded.shape[0]
-
-    s_include = np.zeros(m_size, dtype=np.bool_)
-    k_miss = np.zeros(k_size, dtype=np.bool_)
-
-    for k in range(k_size):
-        matches = 0
-        k_row = k_subset_dist_thresholded[k, :]
-        k_hard = k_subset_dist_hard[k]
-
-        for index in range(m_size):
-            m_index = (index + starting_positions[k]) % m_size
-
-            m_row = m_dist_thresholded[m_index, :]
-            m_hard = m_dist_hard[m_index]
-
-            should_include = True
-
-            # check that every element of m_hard matches k_hard
-            hard_equals = True
-            for j in range(m_hard.shape[0]):
-                if m_hard[j] != k_hard[j]:
-                    hard_equals = False
-
-            if not hard_equals:
-                should_include = False
-            else:
-                for j in range(m_row.shape[0]):
-                    if abs(m_row[j] - k_row[j]) > 1.0:
-                        should_include = False
-
-            if should_include:
-                s_include[m_index] = True
-                matches += 1
-
-            # Don't find any more M's
-            if matches == required:
-                break
-
-        k_miss[k] = matches == 0
-
-    return s_include, k_miss
-
-# Function which returns a boolean array indicating whether all values in a row are true
-@jit(nopython=True, fastmath=True, error_model="numpy")
-def rows_all_true(rows: np.ndarray):
-    # Don't use np.all because not supported by numba
-
-    # Create an array of booleans for rows in s still available
-    all_true = np.ones((rows.shape[0],), dtype=np.bool_)
-    for row_idx in range(rows.shape[0]):
-        for col_idx in range(rows.shape[1]):
-            if not rows[row_idx, col_idx]:
-                all_true[row_idx] = False
-                break
-
-    return all_true
-
-
-@jit(nopython=True, fastmath=True, error_model="numpy")
 def greedy_match(
-    k_subset: np.ndarray,
-    s_set: np.ndarray,
-    invcov: np.ndarray
+    k_sub: pd.DataFrame,
+    m_sub: pd.DataFrame,
+    k_sub_pca: np.ndarray,
+    m_sub_pca: np.ndarray,
+    match_cats: list
 ):
-    # Create an array of booleans for rows in s still available
-    s_available = np.ones((s_set.shape[0],), dtype=np.bool_)
-    total_available = s_set.shape[0]
-
-    results = []
+    # Identify the unique combinations of categorical columns
+    k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats))
+    
+    # Not all pixels may have matches
+    pairs = []
     matchless = []
-
-    s_tmp = np.zeros((s_set.shape[0],), dtype=np.float32)
-
-    for k_idx in range(k_subset.shape[0]):
-        k_row = k_subset[k_idx, :]
-
-        hard_matches = rows_all_true(s_set[:, :HARD_COLUMN_COUNT] == k_row[:HARD_COLUMN_COUNT]) & s_available
-        hard_matches = hard_matches.reshape(
-            -1,
-        )
-
-        if total_available > 0:
-            # Now calculate the distance between the k_row and all the hard matches we haven't already matched
-            s_tmp[hard_matches] = batch_mahalanobis_squared(
-                s_set[hard_matches, HARD_COLUMN_COUNT:], k_row[HARD_COLUMN_COUNT:], invcov
-            )
-            # Find the index of the minimum distance in s_tmp[hard_matches] but map it back to the index in s_set
-            if np.any(hard_matches):
-                min_dist_idx = np.argmin(s_tmp[hard_matches])
-                min_dist_idx = np.arange(s_tmp.shape[0])[hard_matches][min_dist_idx]
-
-                results.append((k_idx, min_dist_idx))
-                s_available[min_dist_idx] = False
-                total_available -= 1
-        else:
-            matchless.append(k_idx)
-
-    return (results, matchless)
-
-# optimised batch implementation of mahalanobis distance that returns a distance per row
-@jit(nopython=True, fastmath=True, error_model="numpy")
-def batch_mahalanobis_squared(rows, vector, invcov):
-    # calculate the difference between the vector and each row (broadcasted)
-    diff = rows - vector
-    # calculate the distance for each row in one batch
-    dists = (np.dot(diff, invcov) * diff).sum(axis=1)
-    return dists
-
+    
+    for i in range(0, k_cat_combinations.shape[0]):
+        # i = 6 # ith element of the unique combinations of the luc time series in k
+        # for in range()
+        k_cat_comb = k_cat_combinations.iloc[i]
+        k_cat_index = k_sub[match_cats] == k_cat_comb
+        k_cat = k_sub[(k_cat_index).all(axis=1)]
+        k_cat_pca = k_sub_pca[(k_cat_index).all(axis=1)]
+        
+        # Find the subset in km_pixels that matches this combination
+        m_cat_index = m_sub[match_cats] == k_cat_comb
+        m_cat = m_sub[(m_cat_index).all(axis=1)]
+        m_cat_pca = m_sub_pca[(m_cat_index).all(axis=1)]
+        
+        # If there is no suitable match for the pre-project luc time series
+        # Then it may be preferable to just take the luc state at t0
+        # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)]
+        # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)]
+        # For now if there are no matches return nothing
+        
+        if(m_cat.shape[0] < k_cat.shape[0] * 5):
+            matchless.append(k_cat)
+            continue
+        
+        matches_index = loop_match(m_cat_pca, k_cat_pca)
+        m_cat_matches = m_cat.iloc[matches_index]
+        
+        # Join the pairs into one dataframe:
+        k_cat = k_cat.reset_index(drop = True)
+        m_cat_matches = m_cat_matches.reset_index(drop = True)  
+        pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1)
+        # Append the resulting DataFrame to the list
+        pairs.append(pairs_df)
+    
+    return (pairs, matchless)
 
 def find_pairs(
     k_parquet_filename: str,
     m_parquet_filename: str,
     start_year: int,
+    luc_match: bool,
     seed: int,
     output_folder: str,
     processes_count: int
 ) -> None:
+    logging.info("Loading K from %s", k_parquet_filename)
+    k_pixels = pd.read_parquet(k_parquet_filename)
+    logging.info("Loading M from %s", m_parquet_filename)
+    m_pixels = pd.read_parquet(m_parquet_filename)
+    # concat m and k
+    km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
+                        m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
+    
+    # Extract only the continuous columns
+    km_pixels_distance = km_pixels[DISTANCE_COLUMNS]
+    # PCA transform and conversion to 32 bit ints
+    logging.info("Transforming continuous variables to PCA space")
+    km_pca = to_pca_int32(km_pixels_distance)
+    # Find clusters using Kmeans
+    logging.info("Starting cluster assignment using kmeans")
+    # Initialize the KMeans object
+    kmeans = faiss.Kmeans(d=km_pca.shape[1], k=NUM_CLUSTERS, niter=NUM_ITERATIONS, verbose=True)
+    # Perform clustering
+    kmeans.train(km_pca)
+    # Get cluster assignments
+    km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten()
+    
     logging.info("Starting find pairs")
     os.makedirs(output_folder, exist_ok=True)
-
+    
     rng = np.random.default_rng(seed)
     iteration_seeds = zip(range(REPEAT_MATCH_FINDING), rng.integers(0, 1000000, REPEAT_MATCH_FINDING))
-
+    
     with Pool(processes=processes_count) as pool:
         pool.map(
             partial(
                 find_match_iteration,
-                k_parquet_filename,
-                m_parquet_filename,
+                km_pixels,
+                km_pca,
                 start_year,
+                luc_match,
                 output_folder
             ),
             iteration_seeds
@@ -346,6 +287,13 @@ def main():
         dest="start_year",
         help="Year project started."
     )
+    parser.add_argument(
+        "--luc_match",
+        type=bool,
+        required=True,
+        dest="luc_match",
+        help="Boolean determines whether matching should include LUCs."
+    )
     parser.add_argument(
         "--seed",
         type=int,
@@ -374,6 +322,7 @@ def main():
         args.k_filename,
         args.m_filename,
         args.start_year,
+        args.luc_match,
         args.seed,
         args.output_directory_path,
         args.processes_count

From f1d44eee7ab1f09df4d29d0a3be66eef278ca684 Mon Sep 17 00:00:00 2001
From: Abigail E Williams <abbyevewilliams@gmail.com>
Date: Thu, 18 Jul 2024 13:21:11 +0000
Subject: [PATCH 03/19] Added wrapper script

---
 README.md            |   3 +
 scripts/tmfpython.sh | 227 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100755 scripts/tmfpython.sh

diff --git a/README.md b/README.md
index f02d35b..027f896 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,9 @@ The code is broken into three main sections contained in the
     to generate the outputs from the methodology like the equivalent permanence
     and the additionality per year.
 
+Additionally, there is a wrapper script called `tmfpython.sh` in the `scripts/` folder. 
+This contains all of the pipeline commands and uses the `tmfpython3` magic wrapper.
+
 ## Bugs
 
 Should you find any bugs or issues with the code then please do open an issue on
diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
new file mode 100755
index 0000000..beb8178
--- /dev/null
+++ b/scripts/tmfpython.sh
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+#run with command: scripts/tmfpython.sh -p 1113 -t 2010 ...
+#p: project ID 
+#t: year of project start (t0)
+#e: evaluation year (default: 2022)
+#r: whether to run an ex-post evaluation and knit the results in an R notebook (true/false, default: false).
+#a: whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false).
+
+#NB running evaluations requires the evaluations code
+
+# Check which branch is currently checked out
+current_branch=$(git rev-parse --abbrev-ref HEAD)
+
+set -e
+
+############ DEFAULTS ###############
+
+input_dir="/maps/aew85/projects"
+output_dir="/maps/aew85/tmf_pipe_out"
+eval_year=2022
+ex_post=false
+ex_ante=false
+
+#####################################
+
+function display_help() {
+    echo "Usage: $0 [options]"
+    echo
+    echo "Options:"
+    echo "  -p <project>        Project name"
+    echo "  -t <t0>             Start year"
+    echo "  -e <year>           Evaluation year"
+    echo "  -r <ex_post>        Knit ex post evaluation? (true/false)"
+    echo "  -a <ex_ante>        Knit ex ante evaluation? (true/false)"
+    echo "  -h                  Display this help message"
+    echo
+    echo "Example:"
+    echo "  $0 -p 'gola' -t 2012 -e 2021 -r true -a true"
+}
+
+# Parse arguments
+while getopts "p:t:e:r:a:h" flag
+do
+    case "${flag}" in
+        p) proj=${OPTARG};;
+        t) t0=${OPTARG};;
+        e) eval_year=${OPTARG};;
+        r) ex_post=${OPTARG};;
+        a) ex_ante=${OPTARG};;
+        h) display_help; exit 0;;
+        *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;;
+    esac
+done
+
+echo "Project: $proj"
+echo "t0: $t0"
+echo "Evaluation year: $eval_year"
+echo "Ex-post evaluation: $ex_post"
+echo "Ex-ante evaluation: $ex_ante"
+
+if [ $# -eq 0 ]; then
+    display_help
+    exit 1
+fi
+
+# Make project output directory
+mkdir -p "${output_dir}/${proj}"
+echo "--Folder created.--"
+
+#Make buffer
+tmfpython3 -m methods.inputs.generate_boundary --project "${input_dir}/${proj}.geojson" --output "${output_dir}/${proj}/buffer.geojson"
+echo "--Buffer created.--"
+
+#Make leakage area
+tmfpython3 -m methods.inputs.generate_leakage --project "${input_dir}/${proj}.geojson" --output "${output_dir}/${proj}/leakage.geojson"
+echo "--Leakage created.--"
+
+# Get GEDI data
+tmfpython3 -m methods.inputs.locate_gedi_data "${output_dir}/${proj}/buffer.geojson" /maps/4C/gedi/granule/info/
+tmfpython3 -m methods.inputs.download_gedi_data /maps/4C/gedi/granule/info/* /maps/4C/gedi/granule/
+tmfpython3 -m methods.inputs.filter_gedi_data --buffer "${output_dir}/${proj}/buffer.geojson" \
+                                        --granules /maps/4C/gedi/granule/ \
+                                        --output  "${output_dir}/${proj}/gedi.geojson"
+tmfpython3 -m methods.inputs.generate_carbon_density --jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+                                                --gedi "${output_dir}/${proj}/gedi.geojson" \
+                                                --output "${output_dir}/${proj}/carbon-density.csv"
+
+echo "--GEDI data obtained.--"
+
+#Generate list of overlapping countries
+tmfpython3 -m methods.inputs.generate_country_list \
+--leakage "${output_dir}/${proj}/leakage.geojson" \
+--countries /maps/4C/osm_boundaries.geojson \
+--output "${output_dir}/${proj}/country-list.json"
+echo "--Country list created.--"
+
+#Generate matching area
+tmfpython3 -m methods.inputs.generate_matching_area --project "${input_dir}/${proj}.geojson" \
+--countrycodes "${output_dir}/${proj}/country-list.json" \
+--countries /maps/4C/osm_boundaries.geojson \
+--ecoregions /maps/4C/ecoregions/ecoregions.geojson \
+--projects /maps/mwd24/tmf-data/projects \
+--output "${output_dir}/${proj}/matching-area.geojson"
+echo "--Matching area created.--"
+
+#Download SRTM data
+tmfpython3 -m methods.inputs.download_srtm_data --project "${input_dir}/${proj}.geojson" \
+--matching "${output_dir}/${proj}/matching-area.geojson" \
+--zips "${output_dir}/srtm/zip" \
+--tifs "${output_dir}/srtm/tif"
+echo "--SRTM downloaded.--"
+
+#Generate slopes
+tmfpython3 -m methods.inputs.generate_slope --input "${output_dir}/srtm/tif" --output "${output_dir}/slopes"
+echo "--Slope created.--"
+
+#Rescale to JRC tiles
+tmfpython3 -m methods.inputs.rescale_tiles_to_jrc --jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+--tiles "${output_dir}/srtm/tif" \
+--output "${output_dir}/rescaled-elevation"
+tmfpython3 -m methods.inputs.rescale_tiles_to_jrc \
+--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+--tiles "${output_dir}/slopes" \
+--output "${output_dir}/rescaled-slopes"
+echo "--JRC rescaled.--"
+
+#Create country raster
+tmfpython3 -m methods.inputs.generate_country_raster --jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+--matching "${output_dir}/${proj}/matching-area.geojson" \
+--countries /maps/4C/osm_boundaries.geojson \
+--output "${output_dir}/${proj}/countries.tif"
+echo "--Country raster created.--"
+
+#Matching: calculate set K
+tmfpython3 -m methods.matching.calculate_k \
+--project "${input_dir}/${proj}.geojson" \
+--start_year "$t0" \
+--evaluation_year "$eval_year" \
+--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+--cpc /maps/rhm31/fine_circular_coverage/forecol_complete/ \
+--ecoregions /maps/4C/ecoregions/ \
+--elevation "${output_dir}/rescaled-elevation" \
+--slope "${output_dir}/rescaled-slopes" \
+--access /maps/4C/access \
+--countries-raster "${output_dir}/${proj}/countries.tif" \
+--output "${output_dir}/${proj}/k.parquet"
+echo "--Set K created.--"
+
+#Matching: calculate set M
+tmfpython3 -m methods.matching.find_potential_matches \
+--k "${output_dir}/${proj}/k.parquet" \
+--matching "${output_dir}/${proj}/matching-area.geojson" \
+--start_year "$t0" \
+--evaluation_year "$eval_year" \
+--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+--cpc /maps/rhm31/fine_circular_coverage/forecol_complete/ \
+--ecoregions /maps/4C/ecoregions/ \
+--elevation "${output_dir}/rescaled-elevation" \
+--slope "${output_dir}/rescaled-slopes" \
+--access /maps/4C/access \
+--countries-raster "${output_dir}/${proj}/countries.tif" \
+--output "${output_dir}/${proj}/matches"
+tmfpython3 -m methods.matching.build_m_raster \
+--rasters_directory "${output_dir}/${proj}/matches" \
+--output "${output_dir}/${proj}/matches.tif" \
+-j 20
+tmfpython3 -m methods.matching.build_m_table \
+--raster "${output_dir}/${proj}/matches.tif" \
+--matching "${output_dir}/${proj}/matching-area.geojson" \
+--start_year "$t0" \
+--evaluation_year "$eval_year" \
+--jrc /maps/forecol/data/JRC/v1_2022/AnnualChange/tifs \
+--cpc /maps/rhm31/fine_circular_coverage/forecol_complete/ \
+--ecoregions /maps/4C/ecoregions/ \
+--elevation "${output_dir}/rescaled-elevation" \
+--slope "${output_dir}/rescaled-slopes" \
+--access /maps/4C/access \
+--countries-raster "${output_dir}/${proj}/countries.tif" \
+--output "${output_dir}/${proj}/matches.parquet"
+echo "--Set M created.--"
+
+#Matching: find pairs
+tmfpython3 -m methods.matching.find_pairs \
+    --k "${output_dir}/${proj}/k.parquet" \
+    --m "${output_dir}/${proj}/matches.parquet" \
+    --start_year "$t0" \
+    --output "${output_dir}/${proj}/pairs" \
+    --seed 42 \
+    -j 1
+    echo "--Pairs matched.--"
+
+#Calculate additionality
+if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then
+    tmfpython3 -m methods.outputs.calculate_additionality \
+    --project "${input_dir}/${proj}.geojson" \
+    --project_start "$t0" \
+    --evaluation_year "$eval_year" \
+    --density "${output_dir}/${proj}/carbon-density.csv" \
+    --matches "${output_dir}/${proj}/pairs" \
+    --output "${output_dir}/${proj}/additionality.csv" \
+    --stopping "${output_dir}/${proj}/stopping.csv"
+    echo "--Additionality and stopping criteria calculated.--"
+    else
+    tmfpython3 -m methods.outputs.calculate_additionality \
+    --project "${input_dir}/${proj}.geojson" \
+    --project_start "$t0" \
+    --evaluation_year "$eval_year" \
+    --density "${output_dir}/${proj}/carbon-density.csv" \
+    --matches "${output_dir}/${proj}/pairs" \
+    --output "${output_dir}/${proj}/additionality.csv"
+    echo "--Additionality calculated.--"
+fi
+
+# Run ex post evaluation
+if [ "$ex_post" == "true" ]; then
+evaluations_dir="~/evaluations"
+ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html"
+Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))"
+fi
+
+# Run ex-ante evaluation
+if [ "$ex_ante" == "true" ]; then
+evaluations_dir="~/evaluations"
+ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
+Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))"
+fi
\ No newline at end of file

From 4a7abcec0f6af5838065e0650142db6e537929c8 Mon Sep 17 00:00:00 2001
From: swinersha <twswinfield@gmai.com>
Date: Wed, 7 Aug 2024 13:34:41 +0000
Subject: [PATCH 04/19] fix: sppeds up find pairs by sampling M to the size of
 K

---
 .../cluster_find_pairs_interactive.py         | 303 +++---------------
 methods/matching/find_pairs.py                |  54 ++--
 2 files changed, 80 insertions(+), 277 deletions(-)

diff --git a/methods/matching/cluster_find_pairs_interactive.py b/methods/matching/cluster_find_pairs_interactive.py
index 9ac25fb..215722b 100644
--- a/methods/matching/cluster_find_pairs_interactive.py
+++ b/methods/matching/cluster_find_pairs_interactive.py
@@ -7,6 +7,7 @@
 from sklearn.cluster import KMeans
 import matplotlib.pyplot as plt
 import geopandas as gpd
+from pyproj import Proj, transform
 import os
 import time
 import sys
@@ -108,14 +109,53 @@ def calculate_smd(group1, group2):
 
 # Define the start year
 t0 = 2012 # READ THIS IN
-match_years = [t0-10, t0-5, t0]
 
 # Read in the data
 boundary = gpd.read_file('/maps/aew85/projects/1201.geojson')
 
-k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k_all.parquet')
+k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k.parquet')
+# k_pixels = pd.read_parquet('/maps/tws36/tmf_pipe_out/1201/k_all.parquet')
 m_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/1201/matches.parquet')
 
+
+t0 = 2018
+boundary = gpd.read_file('/maps/aew85/projects/ona.geojson')
+k_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/fastfp_test_ona/k.parquet')
+m_pixels = pd.read_parquet('/maps/aew85/tmf_pipe_out/fastfp_test_ona/matches.parquet')
+
+if(m_pixels.shape[0] > (k_pixels.shape[0])):
+    m_sub_size = int(k_pixels.shape[0]) # First down sample M as it is ~230 million points    
+    m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False)
+    m_pixels = m_pixels.iloc[m_random_indices]
+
+# # Calculate the central coordinates (centroid)
+# central_lat = m_pixels['lat'].mean()
+# central_lon = m_pixels['lng'].mean()
+# aeqd_proj = f"+proj=aeqd +lat_0={central_lat} +lon_0={central_lon} +datum=WGS84"
+
+# # Convert the DataFrame to a GeoDataFrame
+# m_gdf = gpd.GeoDataFrame(m_pixels, geometry=gpd.points_from_xy(m_pixels.lng, m_pixels.lat))
+# # Set the original CRS to WGS84 (EPSG:4326)
+# m_gdf.set_crs(epsg=4326, inplace=True)
+
+# # Transform the GeoDataFrame to the AEQD projection
+# m_gdf_aeqd = m_gdf.to_crs(aeqd_proj)
+
+# # Extract the transformed coordinates
+# gdf_aeqd['aeqd_x'] = gdf_aeqd.geometry.x
+# gdf_aeqd['aeqd_y'] = gdf_aeqd.geometry.y
+
+# # Define the grid resolution in meters
+# grid_resolution_m = 5000  # 5 km
+
+# # Calculate grid cell indices
+# gdf_aeqd['grid_x'] = (gdf_aeqd['aeqd_x'] // grid_resolution_m).astype(int)
+# gdf_aeqd['grid_y'] = (gdf_aeqd['aeqd_y'] // grid_resolution_m).astype(int)
+
+# # Print the first few rows to verify
+# print(gdf_aeqd.head())
+
+
 # concat m and k
 km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
                        m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
@@ -124,8 +164,6 @@ def calculate_smd(group1, group2):
 exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt']
 exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')]
 
-match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
-
 # Extract only the continuous columns
 continuous_columns = km_pixels.columns.difference(exclude_columns)
 km_pixels_selected = km_pixels[continuous_columns]
@@ -178,7 +216,7 @@ def calculate_smd(group1, group2):
     plt.ylabel('PCA Component 2')
     plt.legend()
     plt.show()
-    plt.savefig('Figures/cluster_centres_faiss_1.png')
+    plt.savefig('Figures/ona_cluster_centres_faiss_1.png')
     plt.close()  # Close the plot to free up memory
 
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -202,11 +240,14 @@ def calculate_smd(group1, group2):
         fig.delaxes(axes[j])
     
     plt.tight_layout()
-    plt.savefig('Figures/cluster_faiss_1_facet.png')
+    plt.savefig('Figures/Ona_cluster_faiss_1_facet.png')
     plt.close()
 
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+match_years = [t0-10, t0-5, t0]
+match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
+
 # Extract K and M pixels 
 k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt']
 m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl']
@@ -216,7 +257,7 @@ def calculate_smd(group1, group2):
 m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy()
 
 k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION)
-m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION)
+m_sub_size = int(m_pixels.shape[0] * 1)
 
 # Define indexs for the samples from K and M
 k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False)
@@ -330,252 +371,4 @@ def calculate_smd(group1, group2):
 smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
 print(smd_df)
 
-smd_results = []
-for column in columns_to_compare:
-    smd, mean1, mean2, pooled_std = calculate_smd(k_pixels[column], m_pixels[column])
-    smd_results.append((column, smd, mean1, mean2, pooled_std))
 
-# Convert the results to a DataFrame for better readability
-smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
-print(smd_df)
-
-
-
-
-
-
-
-def find_match_iteration(
-    k_parquet_filename: str,
-    m_parquet_filename: str,
-    start_year: int,
-    output_folder: str,
-    idx_and_seed: tuple[int, int]
-) -> None:
-    logging.info("Find match iteration %d of %d", idx_and_seed[0] + 1, REPEAT_MATCH_FINDING)
-    rng = np.random.default_rng(idx_and_seed[1])
-
-    logging.info("Loading K from %s", k_parquet_filename)
-    k_pixels = pd.read_parquet(k_parquet_filename)
-    
-    logging.info("Loading M from %s", m_parquet_filename)
-    m_pixels = pd.read_parquet(m_parquet_filename)
-    
-    # concat m and k
-    km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
-                           m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
-    
-    # Find the continuous columns
-    exclude_columns = ['ID', 'x', 'y', 'lat', 'lng', 'country', 'ecoregion', 'trt']
-    exclude_columns += [col for col in km_pixels.columns if col.startswith('luc')]
-    continuous_columns = km_pixels.columns.difference(exclude_columns)
-    # Categorical columns
-    match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
-    
-    logging.info("Starting PCA transformation of k and m union. km_pixels.shape: %a", {km_pixels.shape})
-    # PCA transform and conversion to 32 bit ints for continuous only
-    km_pca = to_pca_int32(km_pixels[continuous_columns])
-    logging.info("Done PCA transformation")
-    
-    # Extract K and M pixels  - this might be unnecessary I just wanted to make sure 
-    # K and M were in the same order here and in the PCA transform
-    k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt']
-    m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl']
-    # Extract K and M PCA transforms
-    k_pca = km_pca.loc[km_pixels['trt'] == 'trt'].to_numpy()
-    m_pca = km_pca.loc[km_pixels['trt'] == 'ctrl'].to_numpy()
-    
-    # Sample from K and M
-    k_sub_size = int(k_pixels.shape[0]* K_SUB_PROPORTION)
-    m_sub_size = int(m_pixels.shape[0] * M_SUB_PROPORTION)
-    # Define indexs for the samples from K and M
-    k_random_indices = np.random.choice(k_pixels.shape[0], size=k_sub_size, replace=False)
-    m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False)
-    # Take random samples from K and M pixels
-    k_sub = k_pixels.iloc[k_random_indices]
-    m_sub = m_pixels.iloc[m_random_indices]
-    # Take corresponding random samples from the PCA transformed K and M 
-    k_sub_pca = k_pca[k_random_indices,:]
-    m_sub_pca = m_pca[m_random_indices,:]
-    
-    logging.info("Samples taken from K and M. k_sub.shape: %a; m_sub.shape: %a", {k_sub.shape, m_sub.shape})
-    
-    # Identify the unique combinations of luc columns
-    k_cat_combinations = k_sub[match_cats].drop_duplicates().sort_values(by=match_cats, ascending=[True] * len(match_cats))
-    
-    pairs_list = []
-    matchless_list = []
-    
-    logging.info("Starting greedy matching... k_sub.shape: %s, m_sub.shape: %s",
-                                  k_sub.shape, m_sub.shape)
-    
-    start_time = time.time()
-    for i in range(0, k_cat_combinations.shape[0]):
-        # i = 6 # ith element of the unique combinations of the luc time series in k
-        # for in range()
-        k_cat_comb = k_cat_combinations.iloc[i]
-        k_cat = k_sub[(k_sub[match_cats] == k_cat_comb).all(axis=1)]
-        k_cat_pca = k_sub_pca[(k_sub[match_cats] == k_cat_comb).all(axis=1)]
-        
-        # Find the subset in km_pixels that matches this combination
-        m_cat = m_sub[(m_sub[match_cats] == k_cat_comb).all(axis=1)]
-        m_cat_pca = m_sub_pca[(m_sub[match_cats] == k_cat_comb).all(axis=1)]
-        
-        if VERBOSE:
-            print('ksub_cat:' + str(k_cat.shape[0]))
-            print('msub_cat:' + str(m_cat.shape[0]))
-        
-        # If there is no suitable match for the pre-project luc time series
-        # Then it may be preferable to just take the luc state at t0
-        # m_luc_comb = m_pixels[(m_pixels[match_luc_years[1:3]] == K_luc_comb[1:3]).all(axis=1)]
-        # m_luc_comb = m_pixels[(m_pixels[match_luc_years[2:3]] == K_luc_comb[2:3]).all(axis=1)]
-        # For if there are no matches return nothing
-        
-        if(m_cat.shape[0] < k_cat.shape[0] * 5):
-            # print("M insufficient for matching. Set VERBOSE to True for more details.")
-            # Append the matchless DataFrame to the list
-            matchless_list.append(k_cat)
-            continue
-        
-        # Find the matches
-        matches_index = loop_match(m_cat_pca, k_cat_pca)
-        m_cat_matches = m_cat.iloc[matches_index]
-        
-        # i = 0
-        # matched = pd.concat([k_cat.iloc[i], m_cat.iloc[matches[i]]], axis=1, ignore_index=True)
-        # matched.columns = ['trt', 'ctrl']
-        # matched
-        #Looks great!
-        columns_to_compare = ['access', 'cpc0_d', 'cpc0_u', 'cpc10_d', 'cpc10_u', 'cpc5_d', 'cpc5_u', 'elevation', 'slope']
-        # Calculate SMDs for the specified columns
-        smd_results = []
-        for column in columns_to_compare:
-            smd, mean1, mean2, pooled_std = calculate_smd(k_cat[column], m_cat_matches[column])
-            smd_results.append((column, smd, mean1, mean2, pooled_std))
-        
-        # Convert the results to a DataFrame for better readability
-        smd_df = pd.DataFrame(smd_results, columns=['Variable', 'SMD', 'Mean_k_cat', 'Mean_m_cat', 'Pooled_std'])
-        
-        if VERBOSE:
-            # Print the results
-            print("categorical combination:")
-            print(k_cat_comb)
-            # Count how many items in 'column1' are not equal to the specified integer value
-            print("LUC flips in K:")
-            (k_cat['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum()
-            print("LUC flips in matches:")
-            (m_cat_matches['luc_2022'] != k_cat_comb['luc_' + str(t0)]).sum()
-            print("Standardized Mean Differences:")
-            print(smd_df)
-        
-        # Join the pairs into one dataframe:
-        k_cat = k_cat.reset_index(drop = True)
-        m_cat_matches = m_cat_matches.reset_index(drop = True)  
-        pairs_df = pd.concat([k_cat.add_prefix('k_'), m_cat_matches.add_prefix('s_')], axis=1)
-        
-        # Append the resulting DataFrame to the list
-        pairs_list.append(pairs_df)
-
-    # Combine all the DataFrames in the list into a single DataFrame
-    pairs = pd.concat(pairs_list, ignore_index=True)
-    matchless = pd.concat(matchless_list, ignore_index=True)
-    
-    logging.info("Finished greedy matching... pairs.shape: %s, matchless.shape: %s",
-                                  pairs.shape, matchless.shape)
-    
-    logging.info("Starting storing matches...")
-    pairs.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}.parquet'))
-    matchless.to_parquet(os.path.join(output_folder, f'{idx_and_seed[1]}_matchless.parquet'))
-    
-    logging.info("Finished find match iteration")
-
-
-def find_pairs(
-    k_parquet_filename: str,
-    m_parquet_filename: str,
-    start_year: int,
-    seed: int,
-    output_folder: str,
-    processes_count: int
-) -> None:
-    logging.info("Starting find pairs")
-    os.makedirs(output_folder, exist_ok=True)
-
-    rng = np.random.default_rng(seed)
-    iteration_seeds = zip(range(REPEAT_MATCH_FINDING), rng.integers(0, 1000000, REPEAT_MATCH_FINDING))
-
-    with Pool(processes=processes_count) as pool:
-        pool.map(
-            partial(
-                find_match_iteration,
-                k_parquet_filename,
-                m_parquet_filename,
-                start_year,
-                output_folder
-            ),
-            iteration_seeds
-        )
-
-
-def main():
-    # If you use the default multiprocess model then you risk deadlocks when logging (which we
-    # have hit). Spawn is the default on macOS, but not on Linux.
-    set_start_method("spawn")
-
-    parser = argparse.ArgumentParser(description="Takes K and M and finds 100 sets of matches.")
-    parser.add_argument(
-        "--k",
-        type=str,
-        required=True,
-        dest="k_filename",
-        help="Parquet file containing pixels from K as generated by calculate_k.py"
-    )
-    parser.add_argument(
-        "--m",
-        type=str,
-        required=True,
-        dest="m_filename",
-        help="Parquet file containing pixels from M as generated by build_m_table.py"
-    )
-    parser.add_argument(
-        "--start_year",
-        type=int,
-        required=True,
-        dest="start_year",
-        help="Year project started."
-    )
-    parser.add_argument(
-        "--seed",
-        type=int,
-        required=True,
-        dest="seed",
-        help="Random number seed, to ensure experiments are repeatable."
-    )
-    parser.add_argument(
-        "--output",
-        type=str,
-        required=True,
-        dest="output_directory_path",
-        help="Directory into which output matches will be written. Will be created if it does not exist."
-    )
-    parser.add_argument(
-        "-j",
-        type=int,
-        required=False,
-        default=round(cpu_count() / 2),
-        dest="processes_count",
-        help="Number of concurrent threads to use."
-    )
-    args = parser.parse_args()
-
-    find_pairs(
-        args.k_filename,
-        args.m_filename,
-        args.start_year,
-        args.seed,
-        args.output_directory_path,
-        args.processes_count
-    )
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file
diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py
index 62e9e9c..e253014 100644
--- a/methods/matching/find_pairs.py
+++ b/methods/matching/find_pairs.py
@@ -20,11 +20,15 @@
 # m_parquet_filename = '/maps/aew85/tmf_pipe_out/1201/tom_pairs/matches.parquet'
 # luc_match = True
 
+t0 = 2018
+k_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/k.parquet'
+m_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/matches.parquet'
+
 REPEAT_MATCH_FINDING = 100
 DEBUG = False
 
 K_SUB_PROPORTION = 0.01
-M_SUB_PROPORTION = 0.1
+M_SUB_PROPORTION = 1
 # Number of clusters
 NUM_CLUSTERS = 9
 # Number of iterations for K means fitting
@@ -98,8 +102,8 @@ def calculate_smd(group1, group2):
     return smd, mean1, mean2, pooled_std
 
 def find_match_iteration(
-    km_pixels: pd.DataFrame,
-    km_pca: np.ndarray,
+    k_pixels: pd.DataFrame,
+    m_pixels: pd.DataFrame,
     start_year: int,
     luc_match: bool,
     output_folder: str,
@@ -115,6 +119,29 @@ def find_match_iteration(
     else:
         match_cats = ["ecoregion", "country", "cluster"]
     
+    if(m_pixels.shape[0] > (k_pixels.shape[0])):
+        m_sub_size = int(k_pixels.shape[0]) # First down sample M as it is ~230 million points    
+        m_random_indices = np.random.choice(m_pixels.shape[0], size=m_sub_size, replace=False)
+        m_pixels = m_pixels.iloc[m_random_indices]
+    
+    # concat m and k
+    km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
+                        m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
+    
+    # Extract only the continuous columns
+    km_pixels_distance = km_pixels[DISTANCE_COLUMNS]
+    # PCA transform and conversion to 32 bit ints
+    logging.info("Transforming continuous variables to PCA space")
+    km_pca = to_pca_int32(km_pixels_distance)
+    # Find clusters using Kmeans
+    logging.info("Starting cluster assignment using kmeans")
+    # Initialize the KMeans object
+    kmeans = faiss.Kmeans(d=km_pca.shape[1], k=NUM_CLUSTERS, niter=NUM_ITERATIONS, verbose=True)
+    # Perform clustering
+    kmeans.train(km_pca)
+    # Get cluster assignments
+    km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten()
+    
     # Extract K and M pixels 
     k_pixels = km_pixels.loc[km_pixels['trt'] == 'trt']
     m_pixels = km_pixels.loc[km_pixels['trt'] == 'ctrl']
@@ -223,23 +250,6 @@ def find_pairs(
     k_pixels = pd.read_parquet(k_parquet_filename)
     logging.info("Loading M from %s", m_parquet_filename)
     m_pixels = pd.read_parquet(m_parquet_filename)
-    # concat m and k
-    km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
-                        m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
-    
-    # Extract only the continuous columns
-    km_pixels_distance = km_pixels[DISTANCE_COLUMNS]
-    # PCA transform and conversion to 32 bit ints
-    logging.info("Transforming continuous variables to PCA space")
-    km_pca = to_pca_int32(km_pixels_distance)
-    # Find clusters using Kmeans
-    logging.info("Starting cluster assignment using kmeans")
-    # Initialize the KMeans object
-    kmeans = faiss.Kmeans(d=km_pca.shape[1], k=NUM_CLUSTERS, niter=NUM_ITERATIONS, verbose=True)
-    # Perform clustering
-    kmeans.train(km_pca)
-    # Get cluster assignments
-    km_pixels['cluster'] = kmeans.index.search(km_pca, 1)[1].flatten()
     
     logging.info("Starting find pairs")
     os.makedirs(output_folder, exist_ok=True)
@@ -251,8 +261,8 @@ def find_pairs(
         pool.map(
             partial(
                 find_match_iteration,
-                km_pixels,
-                km_pca,
+                k_pixels,
+                m_pixels,
                 start_year,
                 luc_match,
                 output_folder

From 81377a6217247dd8635223a4cac2bb66785c50de Mon Sep 17 00:00:00 2001
From: Abby Williams <149403544+abbyevewilliams@users.noreply.github.com>
Date: Wed, 7 Aug 2024 14:42:37 +0100
Subject: [PATCH 05/19] Update calculate_k.py

Removed the problematic lines for ecoregion and access
---
 methods/matching/calculate_k.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py
index dba2284..60ad146 100644
--- a/methods/matching/calculate_k.py
+++ b/methods/matching/calculate_k.py
@@ -73,7 +73,6 @@ def build_layer_collection(
     #     RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
     #         glob.glob("*.tif", root_dir=ecoregions_directory_path)
     # ], name="ecoregions")
-    ecoregions = RasterLayer.layer_from_file(ecoregions_directory_path)
 
     elevation = GroupLayer([
         RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in
@@ -88,7 +87,6 @@ def build_layer_collection(
     #     RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
     #         glob.glob("*.tif", root_dir=access_directory_path)
     # ], name="access")
-    access = RasterLayer.layer_from_file(access_directory_path)
 
     countries = RasterLayer.layer_from_file(countries_raster_filename)
 

From 2bec9d97789b45c077481591c0c2465d9fee7b66 Mon Sep 17 00:00:00 2001
From: Abby Williams <149403544+abbyevewilliams@users.noreply.github.com>
Date: Thu, 8 Aug 2024 12:46:30 +0100
Subject: [PATCH 06/19] Update calculate_k.py

Uncommented lines needed for ecoregion and access
---
 methods/matching/calculate_k.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py
index 60ad146..a5fa156 100644
--- a/methods/matching/calculate_k.py
+++ b/methods/matching/calculate_k.py
@@ -69,10 +69,10 @@ def build_layer_collection(
 
     # ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once
     # as part of import of the ecoregions data
-    # ecoregions = GroupLayer([
-    #     RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
-    #         glob.glob("*.tif", root_dir=ecoregions_directory_path)
-    # ], name="ecoregions")
+    ecoregions = GroupLayer([
+        RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
+            glob.glob("*.tif", root_dir=ecoregions_directory_path)
+    ], name="ecoregions")
 
     elevation = GroupLayer([
         RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in
@@ -83,10 +83,10 @@ def build_layer_collection(
             glob.glob("slope*.tif", root_dir=slope_directory_path)
     ], name="slopes")
 
-    # access = GroupLayer([
-    #     RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
-    #         glob.glob("*.tif", root_dir=access_directory_path)
-    # ], name="access")
+    access = GroupLayer([
+        RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
+            glob.glob("*.tif", root_dir=access_directory_path)
+    ], name="access")
 
     countries = RasterLayer.layer_from_file(countries_raster_filename)
 

From bc64c053cc3c73eeb01c8dab1ab4900c2660079f Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Mon, 12 Aug 2024 11:45:53 +0000
Subject: [PATCH 07/19] First attempt at ex ante offset

---
 methods/matching/build_m_table.py          |  6 +++---
 methods/matching/calculate_k.py            | 18 ++++++++--------
 methods/matching/find_pairs.py             | 24 +++++++++++++++++++---
 methods/matching/find_potential_matches.py |  4 ++--
 scripts/tmfpython.sh                       | 14 ++++++++++++-
 5 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/methods/matching/build_m_table.py b/methods/matching/build_m_table.py
index 66a27e2..ddf24e6 100644
--- a/methods/matching/build_m_table.py
+++ b/methods/matching/build_m_table.py
@@ -28,8 +28,8 @@ def build_m_table(
     matching_collection = build_layer_collection(
         merged_raster.pixel_scale,
         merged_raster.projection,
-        list(luc_range(start_year, evaluation_year)),
-        [start_year, start_year - 5, start_year - 10],
+        list(luc_range(start_year-10, start_year)),
+        [start_year-10, start_year - 15, start_year - 20],
         matching_zone_filename,
         jrc_directory_path,
         cpc_directory_path,
@@ -44,7 +44,7 @@ def build_m_table(
     assert matching_collection.boundary.area == merged_raster.area
 
     results = []
-    luc_columns = [f'luc_{year}' for year in luc_range(start_year, evaluation_year)]
+    luc_columns = [f'luc_{year}' for year in luc_range(start_year-10, start_year)]
     cpc_columns = ['cpc0_u', 'cpc0_d', 'cpc5_u', 'cpc5_d', 'cpc10_u', 'cpc10_d']
     columns = ['lat', 'lng', 'ecoregion', 'elevation', 'slope', 'access', 'country'] + luc_columns + cpc_columns
 
diff --git a/methods/matching/calculate_k.py b/methods/matching/calculate_k.py
index dba2284..a5fa156 100644
--- a/methods/matching/calculate_k.py
+++ b/methods/matching/calculate_k.py
@@ -69,11 +69,10 @@ def build_layer_collection(
 
     # ecoregions is such a heavy layer it pays to just rasterize it once - we should possibly do this once
     # as part of import of the ecoregions data
-    # ecoregions = GroupLayer([
-    #     RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
-    #         glob.glob("*.tif", root_dir=ecoregions_directory_path)
-    # ], name="ecoregions")
-    ecoregions = RasterLayer.layer_from_file(ecoregions_directory_path)
+    ecoregions = GroupLayer([
+        RasterLayer.layer_from_file(os.path.join(ecoregions_directory_path, filename)) for filename in
+            glob.glob("*.tif", root_dir=ecoregions_directory_path)
+    ], name="ecoregions")
 
     elevation = GroupLayer([
         RasterLayer.layer_from_file(os.path.join(elevation_directory_path, filename)) for filename in
@@ -84,11 +83,10 @@ def build_layer_collection(
             glob.glob("slope*.tif", root_dir=slope_directory_path)
     ], name="slopes")
 
-    # access = GroupLayer([
-    #     RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
-    #         glob.glob("*.tif", root_dir=access_directory_path)
-    # ], name="access")
-    access = RasterLayer.layer_from_file(access_directory_path)
+    access = GroupLayer([
+        RasterLayer.layer_from_file(os.path.join(access_directory_path, filename)) for filename in
+            glob.glob("*.tif", root_dir=access_directory_path)
+    ], name="access")
 
     countries = RasterLayer.layer_from_file(countries_raster_filename)
 
diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py
index 62e9e9c..24c48c5 100644
--- a/methods/matching/find_pairs.py
+++ b/methods/matching/find_pairs.py
@@ -111,7 +111,7 @@ def find_match_iteration(
     match_years = [start_year + year for year in RELATIVE_MATCH_YEARS]
     # The categorical columns:
     if luc_match:
-        match_cats = ["ecoregion", "country", "cluster"] + ["luc_" + str(year) for year in match_years]
+        match_cats = ["ecoregion", "country", "cluster"] + ["luc_-10", "luc_-5", "luc_0"]
     else:
         match_cats = ["ecoregion", "country", "cluster"]
     
@@ -210,6 +210,17 @@ def greedy_match(
     
     return (pairs, matchless)
 
+def rename_luc_columns(df, start_year):
+
+    # Define the range of years based on the central start_year
+    years = range(start_year - 10, start_year + 11)  # Adjust the range as needed
+    new_column_names = {f'luc_{year}': f'luc_{year - start_year}' for year in years}
+    
+    # Rename columns based on the new column names mapping
+    renamed_df = df.rename(columns=new_column_names)
+    
+    return renamed_df
+
 def find_pairs(
     k_parquet_filename: str,
     m_parquet_filename: str,
@@ -223,9 +234,16 @@ def find_pairs(
     k_pixels = pd.read_parquet(k_parquet_filename)
     logging.info("Loading M from %s", m_parquet_filename)
     m_pixels = pd.read_parquet(m_parquet_filename)
+    
+    # rename columns of each 
+    k_pixels_renamed = rename_luc_columns(k_pixels, start_year)
+    m_pixels_renamed = rename_luc_columns(m_pixels, start_year-10)
+        
     # concat m and k
-    km_pixels = pd.concat([k_pixels.assign(trt='trt', ID=range(0, len(k_pixels))),
-                        m_pixels.assign(trt='ctrl', ID=range(0, len(m_pixels)))], ignore_index=True)
+    km_pixels = pd.concat([k_pixels_renamed.assign(trt='trt', ID=range(0, len(k_pixels))),
+                          m_pixels_renamed.assign(trt='ctrl', 
+                                                ID=range(0, len(m_pixels)))], 
+                                                ignore_index=True)
     
     # Extract only the continuous columns
     km_pixels_distance = km_pixels[DISTANCE_COLUMNS]
diff --git a/methods/matching/find_potential_matches.py b/methods/matching/find_potential_matches.py
index f69865a..51ae8bf 100644
--- a/methods/matching/find_potential_matches.py
+++ b/methods/matching/find_potential_matches.py
@@ -119,8 +119,8 @@ def worker(
     matching_collection = build_layer_collection(
         example_jrc_layer.pixel_scale,
         example_jrc_layer.projection,
-        [start_year, start_year - 5, start_year - 10],
-        [start_year, start_year - 5, start_year - 10],
+        [start_year - 10, start_year - 15, start_year - 20], # create time offset in matching set
+        [start_year - 10, start_year - 15, start_year - 20], # create time offset in matching set
         matching_zone_filename,
         jrc_directory_path,
         cpc_directory_path,
diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index beb8178..44e23d8 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -181,14 +181,26 @@ tmfpython3 -m methods.matching.build_m_table \
 echo "--Set M created.--"
 
 #Matching: find pairs
+if [ "$current_branch" == "tws_cluster_find_pairs" ] -o [ "$current_branch" == "aew85_cluster_find_pairs" ]; then
 tmfpython3 -m methods.matching.find_pairs \
     --k "${output_dir}/${proj}/k.parquet" \
     --m "${output_dir}/${proj}/matches.parquet" \
     --start_year "$t0" \
+    --luc_match False \
     --output "${output_dir}/${proj}/pairs" \
     --seed 42 \
     -j 1
     echo "--Pairs matched.--"
+    else
+    tmfpython3 -m methods.matching.find_pairs \
+    --k "${output_dir}/${proj}/k.parquet" \
+    --m "${output_dir}/${proj}/matches.parquet" \
+    --start_year "$t0" \
+    --output "${output_dir}/${proj}/pairs" \
+    --seed 42 \
+    -j 1
+    echo "--Pairs matched.--"
+fi
 
 #Calculate additionality
 if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then
@@ -223,5 +235,5 @@ fi
 if [ "$ex_ante" == "true" ]; then
 evaluations_dir="~/evaluations"
 ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
-Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))"
+Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))"
 fi
\ No newline at end of file

From 91e6e2f31c7de02d6436aec1f39564efe51cacf1 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Mon, 12 Aug 2024 12:56:41 +0000
Subject: [PATCH 08/19] Removed manual inputs from tws_cluster_find_pairs

---
 methods/matching/find_pairs.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py
index 0ae02c9..ee8d824 100644
--- a/methods/matching/find_pairs.py
+++ b/methods/matching/find_pairs.py
@@ -11,19 +11,6 @@
 
 from methods.common.luc import luc_matching_columns
 
-# TO DO: 
-# 1. Rename columns to luc10, luc5 and luc0 to align with the pipeline
-
-# to delete: 
-# start_year = 2012
-# k_parquet_filename = '/maps/tws36/tmf_pipe_out/1201/k_all.parquet'
-# m_parquet_filename = '/maps/aew85/tmf_pipe_out/1201/tom_pairs/matches.parquet'
-# luc_match = True
-
-t0 = 2018
-k_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/k.parquet'
-m_parquet_filename = '/maps/aew85/tmf_pipe_out/fastfp_test_ona/matches.parquet'
-
 REPEAT_MATCH_FINDING = 100
 DEBUG = False
 

From 6c7e5524fbd67515c0f2f09395f20782b4db0ee3 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Mon, 12 Aug 2024 13:14:34 +0000
Subject: [PATCH 09/19] Update tmfpython.sh

---
 scripts/tmfpython.sh | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index 44e23d8..44d233f 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -186,7 +186,7 @@ tmfpython3 -m methods.matching.find_pairs \
     --k "${output_dir}/${proj}/k.parquet" \
     --m "${output_dir}/${proj}/matches.parquet" \
     --start_year "$t0" \
-    --luc_match False \
+    --luc_match True \
     --output "${output_dir}/${proj}/pairs" \
     --seed 42 \
     -j 1
@@ -213,6 +213,7 @@ if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then
     --output "${output_dir}/${proj}/additionality.csv" \
     --stopping "${output_dir}/${proj}/stopping.csv"
     echo "--Additionality and stopping criteria calculated.--"
+    else if [ "$ex_ante" == "true" ]; then
     else
     tmfpython3 -m methods.outputs.calculate_additionality \
     --project "${input_dir}/${proj}.geojson" \
@@ -226,14 +227,12 @@ fi
 
 # Run ex post evaluation
 if [ "$ex_post" == "true" ]; then
-evaluations_dir="~/evaluations"
 ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html"
 Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))"
 fi
 
 # Run ex-ante evaluation
 if [ "$ex_ante" == "true" ]; then
-evaluations_dir="~/evaluations"
 ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
 Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))"
 fi
\ No newline at end of file

From 154bef5846e67baa0a1e87c04667677bb777474c Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Wed, 14 Aug 2024 10:42:36 +0000
Subject: [PATCH 10/19] Update find_pairs.py

---
 methods/matching/find_pairs.py | 20 +++++++++++---
 scripts/tmfpython.sh           | 48 +++++++---------------------------
 2 files changed, 26 insertions(+), 42 deletions(-)

diff --git a/methods/matching/find_pairs.py b/methods/matching/find_pairs.py
index ee8d824..668736b 100644
--- a/methods/matching/find_pairs.py
+++ b/methods/matching/find_pairs.py
@@ -88,10 +88,11 @@ def calculate_smd(group1, group2):
     smd = (mean1 - mean2) / pooled_std
     return smd, mean1, mean2, pooled_std
 
-def rename_luc_columns(df, start_year):
+def rename_luc_columns(df, start_year, eval_year):
 
     # Define the range of years based on the central start_year
-    years = range(start_year - 10, start_year + 11)  # Adjust the range as needed
+    no_years_post = (eval_year - start_year) + 1
+    years = range(start_year - 10, start_year + no_years_post)  # Adjust the range as needed
     new_column_names = {f'luc_{year}': f'luc_{year - start_year}' for year in years}
     
     # Rename columns based on the new column names mapping
@@ -103,6 +104,7 @@ def find_match_iteration(
     k_pixels: pd.DataFrame,
     m_pixels: pd.DataFrame,
     start_year: int,
+    eval_year: int,
     luc_match: bool,
     output_folder: str,
     idx_and_seed: tuple[int, int]
@@ -123,8 +125,8 @@ def find_match_iteration(
         m_pixels = m_pixels.iloc[m_random_indices]
     
     # rename columns of each 
-    k_pixels_renamed = rename_luc_columns(k_pixels, start_year)
-    m_pixels_renamed = rename_luc_columns(m_pixels, start_year-10)
+    k_pixels_renamed = rename_luc_columns(k_pixels, start_year, eval_year)
+    m_pixels_renamed = rename_luc_columns(m_pixels, start_year-10, eval_year)
         
     # concat m and k
     km_pixels = pd.concat([k_pixels_renamed.assign(trt='trt', ID=range(0, len(k_pixels))),
@@ -245,6 +247,7 @@ def find_pairs(
     k_parquet_filename: str,
     m_parquet_filename: str,
     start_year: int,
+    eval_year: int,
     luc_match: bool,
     seed: int,
     output_folder: str,
@@ -268,6 +271,7 @@ def find_pairs(
                 k_pixels,
                 m_pixels,
                 start_year,
+                eval_year,
                 luc_match,
                 output_folder
             ),
@@ -301,6 +305,13 @@ def main():
         dest="start_year",
         help="Year project started."
     )
+    parser.add_argument(
+        "--eval_year",
+        type=int,
+        required=True,
+        dest="eval_year",
+        help="Year of evaluation."
+    )
     parser.add_argument(
         "--luc_match",
         type=bool,
@@ -336,6 +347,7 @@ def main():
         args.k_filename,
         args.m_filename,
         args.start_year,
+        args.eval_year,
         args.luc_match,
         args.seed,
         args.output_directory_path,
diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index beb8178..1201087 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -182,46 +182,18 @@ echo "--Set M created.--"
 
 #Matching: find pairs
 tmfpython3 -m methods.matching.find_pairs \
-    --k "${output_dir}/${proj}/k.parquet" \
-    --m "${output_dir}/${proj}/matches.parquet" \
-    --start_year "$t0" \
-    --output "${output_dir}/${proj}/pairs" \
-    --seed 42 \
-    -j 1
-    echo "--Pairs matched.--"
-
-#Calculate additionality
-if [ "$current_branch" == "mwd-check-stopping-criteria" ]; then
-    tmfpython3 -m methods.outputs.calculate_additionality \
-    --project "${input_dir}/${proj}.geojson" \
-    --project_start "$t0" \
-    --evaluation_year "$eval_year" \
-    --density "${output_dir}/${proj}/carbon-density.csv" \
-    --matches "${output_dir}/${proj}/pairs" \
-    --output "${output_dir}/${proj}/additionality.csv" \
-    --stopping "${output_dir}/${proj}/stopping.csv"
-    echo "--Additionality and stopping criteria calculated.--"
-    else
-    tmfpython3 -m methods.outputs.calculate_additionality \
-    --project "${input_dir}/${proj}.geojson" \
-    --project_start "$t0" \
-    --evaluation_year "$eval_year" \
-    --density "${output_dir}/${proj}/carbon-density.csv" \
-    --matches "${output_dir}/${proj}/pairs" \
-    --output "${output_dir}/${proj}/additionality.csv"
-    echo "--Additionality calculated.--"
-fi
-
-# Run ex post evaluation
-if [ "$ex_post" == "true" ]; then
-evaluations_dir="~/evaluations"
-ep_output_file="${evaluations_dir}/${proj}_ex_post_evaluation.html"
-Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_post_evaluation_template.Rmd',output_file='${ep_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))"
-fi
+--k "${output_dir}/${proj}/k.parquet" \
+--m "${output_dir}/${proj}/matches.parquet" \
+--start_year "$t0" \
+--eval_year "$eval_year" \
+--luc_match True \
+--output "${output_dir}/${proj}/pairs" \
+--seed 42 \
+-j 1
+echo "--Pairs matched.--"
 
 # Run ex-ante evaluation
 if [ "$ex_ante" == "true" ]; then
-evaluations_dir="~/evaluations"
 ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
-Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}',evaluations_dir='${evaluations_dir}'))"
+Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))"
 fi
\ No newline at end of file

From 9a960d9af98c1c28b76612578d8389c0f00375a5 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Thu, 15 Aug 2024 13:53:32 +0000
Subject: [PATCH 11/19] Update tmfpython.sh

---
 scripts/tmfpython.sh | 47 ++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index 1201087..4cdd570 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -1,16 +1,17 @@
 #!/bin/bash
 
-#run with command: scripts/tmfpython.sh -p 1113 -t 2010 ...
-#p: project ID 
+#run with command: scripts/tmfpython.sh -i 'maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ...
+#i: input dir - directory containing project shapefiles
+#o: output dir - directory containing pipeline outputs
+#p: project name/ID - must match name of shapefile
 #t: year of project start (t0)
 #e: evaluation year (default: 2022)
-#r: whether to run an ex-post evaluation and knit the results in an R notebook (true/false, default: false).
-#a: whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false).
+#v: verbose - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false).
 
 #NB running evaluations requires the evaluations code
 
 # Check which branch is currently checked out
-current_branch=$(git rev-parse --abbrev-ref HEAD)
+#current_branch=$(git rev-parse --abbrev-ref HEAD)
 
 set -e
 
@@ -19,8 +20,7 @@ set -e
 input_dir="/maps/aew85/projects"
 output_dir="/maps/aew85/tmf_pipe_out"
 eval_year=2022
-ex_post=false
-ex_ante=false
+verbose=true
 
 #####################################
 
@@ -28,43 +28,45 @@ function display_help() {
     echo "Usage: $0 [options]"
     echo
     echo "Options:"
-    echo "  -p <project>        Project name"
+    echo "  -i <input_dir>      Input directory"
+    echo "  -o <output_dir>     Output directory"
+    echo "  -p <proj>           Project name"
     echo "  -t <t0>             Start year"
     echo "  -e <year>           Evaluation year"
-    echo "  -r <ex_post>        Knit ex post evaluation? (true/false)"
-    echo "  -a <ex_ante>        Knit ex ante evaluation? (true/false)"
+    echo "  -v <verbose>        Knit ex ante evaluation as .Rmd? (true/false)"
     echo "  -h                  Display this help message"
-    echo
     echo "Example:"
-    echo "  $0 -p 'gola' -t 2012 -e 2021 -r true -a true"
+    echo "  $0 -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out -p 1201 -t 2012"
 }
 
 # Parse arguments
-while getopts "p:t:e:r:a:h" flag
+while getopts "i:o:p:t:e:v:h" flag
 do
     case "${flag}" in
+        i) input_dir=${OPTARG};;
+        o) output_dir=${OPTARG};;
         p) proj=${OPTARG};;
         t) t0=${OPTARG};;
         e) eval_year=${OPTARG};;
-        r) ex_post=${OPTARG};;
-        a) ex_ante=${OPTARG};;
+        r) verbose=${OPTARG};;
         h) display_help; exit 0;;
         *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;;
     esac
 done
 
+echo "Input directory: $input_dir"
+echo "Output directory: $output_dir"
 echo "Project: $proj"
 echo "t0: $t0"
 echo "Evaluation year: $eval_year"
-echo "Ex-post evaluation: $ex_post"
-echo "Ex-ante evaluation: $ex_ante"
+echo "Ex-ante evaluation: $verbose"
 
 if [ $# -eq 0 ]; then
     display_help
     exit 1
 fi
 
-# Make project output directory
+# Make project output folder
 mkdir -p "${output_dir}/${proj}"
 echo "--Folder created.--"
 
@@ -181,7 +183,8 @@ tmfpython3 -m methods.matching.build_m_table \
 echo "--Set M created.--"
 
 #Matching: find pairs
-tmfpython3 -m methods.matching.find_pairs \
+. ./venv/bin/activate
+python3 -m methods.matching.find_pairs \
 --k "${output_dir}/${proj}/k.parquet" \
 --m "${output_dir}/${proj}/matches.parquet" \
 --start_year "$t0" \
@@ -191,9 +194,11 @@ tmfpython3 -m methods.matching.find_pairs \
 --seed 42 \
 -j 1
 echo "--Pairs matched.--"
+deactivate
 
 # Run ex-ante evaluation
-if [ "$ex_ante" == "true" ]; then
+if [ "$verbose" == "true" ]; then
+evaluations_dir="~/evaluations"
 ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
-Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}'))"
+Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}'))"
 fi
\ No newline at end of file

From 98e800b0dd043b11bfd877817f68d53ca6570a54 Mon Sep 17 00:00:00 2001
From: Patrick Ferris <patrick@sirref.org>
Date: Tue, 20 Aug 2024 16:50:09 +0100
Subject: [PATCH 12/19] Merge pull request #122 from quantifyearth/mwd-fix-121

Fix Issue #121: force layer to disk before saving.
---
 methods/inputs/generate_slope.py | 241 +++++++++++++++----------------
 requirements.txt                 |   2 +-
 2 files changed, 114 insertions(+), 129 deletions(-)

diff --git a/methods/inputs/generate_slope.py b/methods/inputs/generate_slope.py
index 891e26f..7d58988 100644
--- a/methods/inputs/generate_slope.py
+++ b/methods/inputs/generate_slope.py
@@ -111,138 +111,123 @@ def generate_slope(input_elevation_directory: str, output_slope_directory: str):
             continue
 
         with tempfile.TemporaryDirectory() as tmpdir:
-            elevation = RasterLayer.layer_from_file(elev_path)
-
-            logging.info("Area of elevation tile %a", elevation.area)
-            _easting, _northing, lower_code, lower_letter = utm.from_latlon(
-                elevation.area.bottom, elevation.area.left
-            )
-            _easting, _northing, upper_code, upper_letter = utm.from_latlon(
-                elevation.area.top, elevation.area.right
-            )
-
-            # FAST PATH -- with only one UTM zone the reprojection back has no issues
-            if lower_code == upper_code and lower_letter == upper_letter:
-                actual_utm_code = lower_code
-                warp(
-                    actual_utm_code,
-                    elev_path,
-                    elevation.pixel_scale.xstep,
-                    elevation.pixel_scale.ystep,
-                    out_path,
+            with RasterLayer.layer_from_file(elev_path) as elevation:
+
+                logging.info("Area of elevation tile %a", elevation.area)
+                _easting, _northing, lower_code, lower_letter = utm.from_latlon(
+                    elevation.area.bottom, elevation.area.left
                 )
-            else:
-                # SLOW PATH -- in the slow path, we have to break the elevation raster into
-                # UTM sections and do the above to each before reprojecting back and recombining
-
-                # To capture the results here for later inspection just override the tmpdir variable
-                for actual_utm_code in range(lower_code, upper_code + 1):
-                    for utm_letter in crange(lower_letter, upper_letter):
-                        logging.debug("UTM(%s,%s)", actual_utm_code, utm_letter)
-
-                        # Note: we go a little bit around the UTM tiles and will crop them down to size later
-                        # this is to remove some aliasing effects.
-                        bbox = bounding_box_of_utm(actual_utm_code, utm_letter, UTM_EXPANSION_DEGREES)
-
-                        # Crop the elevation tile to a UTM zone
-                        utm_layer = RasterLayer.empty_raster_layer_like(
-                            elevation, area=bbox
-                        )
-                        utm_id = f"{actual_utm_code}-{utm_letter}-{elevation_path}"
-                        utm_clip_path = os.path.join(tmpdir, utm_id)
-                        intersection = RasterLayer.find_intersection(
-                            [elevation, utm_layer]
-                        )
-                        result = RasterLayer.empty_raster_layer(
-                            intersection,
-                            elevation.pixel_scale,
-                            elevation.datatype,
-                            utm_clip_path,
-                            elevation.projection,
-                        )
-                        result.set_window_for_intersection(intersection)
-                        elevation.set_window_for_intersection(intersection)
-                        elevation.save(result)
-
-                        # Flush elevation utm clip to disk
-                        del result
-
-                        # Now warp into UTM, calculate slopes, and warp back
-                        slope_out_path = os.path.join(tmpdir, "out-slope-" + utm_id)
-                        warp(
-                            actual_utm_code,
-                            utm_clip_path,
-                            elevation.pixel_scale.xstep,
-                            elevation.pixel_scale.ystep,
-                            slope_out_path,
-                        )
-
-                        # We now recrop the out-slope back to the bounding box we assumed at the start
-                        bbox_no_expand = bounding_box_of_utm(
-                            actual_utm_code, utm_letter, 0.0
-                        )
-                        slope_tif = RasterLayer.layer_from_file(slope_out_path)
-                        grid = RasterLayer.empty_raster_layer_like(
-                            slope_tif, area=bbox_no_expand
-                        )
-                        output_final = f"final-slope-{actual_utm_code}-{utm_letter}-{elevation_path}"
-                        final_path = os.path.join(tmpdir, output_final)
-                        logging.debug("Slope underlying %s", slope_tif._underlying_area) # pylint: disable=W0212
-                        logging.debug("Grid underling %s", grid._underlying_area) # pylint: disable=W0212
-                        try:
-                            intersection = RasterLayer.find_intersection([slope_tif, grid])
-                        except ValueError:
-                            logging.debug(
-                                "UTM (%s, %s) didn't intersect actual area %s",
-                                actual_utm_code,
-                                utm_letter,
-                                grid._underlying_area # pylint: disable=W0212
-                            )
-                            continue
-                        slope_tif.set_window_for_intersection(intersection)
-                        final = RasterLayer.empty_raster_layer(
-                            intersection,
-                            slope_tif.pixel_scale,
-                            slope_tif.datatype,
-                            final_path,
-                            slope_tif.projection,
-                        )
-                        logging.debug("Final underlying %s", final._underlying_area) # pylint: disable=W0212
-                        final.set_window_for_intersection(intersection)
-                        slope_tif.save(final)
-
-                        # Flush
-                        del final
-
-                # Now to recombine the UTM gridded slopes into the slope tile
-                slopes = glob("final-slope-*", root_dir=tmpdir)
-                assert len(slopes) > 0
-
-                # This sets the order a little better for the union of the layers
-                slopes.sort()
-                slopes.reverse()
-
-                logging.info("Render order %s", slopes)
-
-                combined = GroupLayer(
-                    [
-                        RasterLayer.layer_from_file(os.path.join(tmpdir, filename))
-                        for filename in slopes
-                    ]
+                _easting, _northing, upper_code, upper_letter = utm.from_latlon(
+                    elevation.area.top, elevation.area.right
                 )
 
-                elevation = RasterLayer.layer_from_file(elev_path)
-                intersection = RasterLayer.find_intersection([elevation, combined])
-                combined.set_window_for_intersection(intersection)
-                elevation.set_window_for_intersection(intersection)
-
-                assembled_path = os.path.join(tmpdir, "patched.tif")
-                result = RasterLayer.empty_raster_layer_like(
-                    elevation, filename=assembled_path
-                )
-                combined.save(result)
+                # FAST PATH -- with only one UTM zone the reprojection back has no issues
+                if lower_code == upper_code and lower_letter == upper_letter:
+                    actual_utm_code = lower_code
+                    warp(
+                        actual_utm_code,
+                        elev_path,
+                        elevation.pixel_scale.xstep,
+                        elevation.pixel_scale.ystep,
+                        out_path,
+                    )
+                else:
+                    # SLOW PATH -- in the slow path, we have to break the elevation raster into
+                    # UTM sections and do the above to each before reprojecting back and recombining
+
+                    # To capture the results here for later inspection just override the tmpdir variable
+                    for actual_utm_code in range(lower_code, upper_code + 1):
+                        for utm_letter in crange(lower_letter, upper_letter):
+                            logging.debug("UTM(%s,%s)", actual_utm_code, utm_letter)
+
+                            # Note: we go a little bit around the UTM tiles and will crop them down to size later
+                            # this is to remove some aliasing effects.
+                            bbox = bounding_box_of_utm(actual_utm_code, utm_letter, UTM_EXPANSION_DEGREES)
+
+                            # Crop the elevation tile to a UTM zone
+                            with RasterLayer.empty_raster_layer_like(elevation, area=bbox) as utm_layer:
+                                utm_id = f"{actual_utm_code}-{utm_letter}-{elevation_path}"
+                                utm_clip_path = os.path.join(tmpdir, utm_id)
+                                intersection = RasterLayer.find_intersection(
+                                    [elevation, utm_layer]
+                                )
+                            with RasterLayer.empty_raster_layer(
+                                intersection,
+                                elevation.pixel_scale,
+                                elevation.datatype,
+                                utm_clip_path,
+                                elevation.projection,
+                            ) as result:
+                                result.set_window_for_intersection(intersection)
+                                elevation.set_window_for_intersection(intersection)
+                                elevation.save(result)
+
+                            # Now warp into UTM, calculate slopes, and warp back
+                            slope_out_path = os.path.join(tmpdir, "out-slope-" + utm_id)
+                            warp(
+                                actual_utm_code,
+                                utm_clip_path,
+                                elevation.pixel_scale.xstep,
+                                elevation.pixel_scale.ystep,
+                                slope_out_path,
+                            )
 
-                shutil.move(assembled_path, out_path)
+                            # We now recrop the out-slope back to the bounding box we assumed at the start
+                            bbox_no_expand = bounding_box_of_utm(
+                                actual_utm_code, utm_letter, 0.0
+                            )
+                            with RasterLayer.layer_from_file(slope_out_path) as slope_tif:
+                                with RasterLayer.empty_raster_layer_like(slope_tif, area=bbox_no_expand) as grid:
+                                    output_final = f"final-slope-{actual_utm_code}-{utm_letter}-{elevation_path}"
+                                    final_path = os.path.join(tmpdir, output_final)
+                                    logging.debug("Slope underlying %s", slope_tif._underlying_area) # pylint: disable=W0212
+                                    logging.debug("Grid underling %s", grid._underlying_area) # pylint: disable=W0212
+                                    try:
+                                        intersection = RasterLayer.find_intersection([slope_tif, grid])
+                                    except ValueError:
+                                        logging.debug(
+                                            "UTM (%s, %s) didn't intersect actual area %s",
+                                            actual_utm_code,
+                                            utm_letter,
+                                            grid._underlying_area # pylint: disable=W0212
+                                        )
+                                        continue
+                                    slope_tif.set_window_for_intersection(intersection)
+                                    with RasterLayer.empty_raster_layer(
+                                        intersection,
+                                        slope_tif.pixel_scale,
+                                        slope_tif.datatype,
+                                        final_path,
+                                        slope_tif.projection,
+                                    ) as final:
+                                        logging.debug("Final underlying %s", final._underlying_area) # pylint: disable=W0212
+                                        final.set_window_for_intersection(intersection)
+                                        slope_tif.save(final)
+
+                    # Now to recombine the UTM gridded slopes into the slope tile
+                    slopes = glob("final-slope-*", root_dir=tmpdir)
+                    assert len(slopes) > 0
+
+                    # This sets the order a little better for the union of the layers
+                    slopes.sort()
+                    slopes.reverse()
+
+                    logging.info("Render order %s", slopes)
+
+                    files = [os.path.join(tmpdir, filename) for filename in slopes]
+                    with GroupLayer.layer_from_files(files) as combined:
+                        with RasterLayer.layer_from_file(elev_path) as elevation:
+                            intersection = RasterLayer.find_intersection([elevation, combined])
+                            combined.set_window_for_intersection(intersection)
+                            elevation.set_window_for_intersection(intersection)
+
+                            assembled_path = os.path.join(tmpdir, "patched.tif")
+                            with RasterLayer.empty_raster_layer_like(
+                                elevation, filename=assembled_path
+                            ) as result:
+                                combined.save(result)
+
+                        shutil.move(assembled_path, out_path)
 
 
 def main() -> None:
diff --git a/requirements.txt b/requirements.txt
index bad4673..8118a36 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,7 +8,7 @@ scipy
 numba
 matplotlib
 geojson
-git+https://github.com/quantifyearth/yirgacheffe@bd2e91c773a414f66340ebb8c13044a1b1a6045f
+git+https://github.com/quantifyearth/yirgacheffe@cc89b4d8a0e97c1a11b730cd688a58b680023336
 git+https://github.com/carboncredits/biomass-recovery@9e54f80832a7eca915ebd13b03df9db2a08aee9d
 
 # developement

From fb7ac4642626bcd93c0c88d473fba77f1b2bc83c Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Tue, 27 Aug 2024 16:30:00 +0000
Subject: [PATCH 13/19] Added functionality to print git branch in R notebook

---
 scripts/tmfpython.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index 4cdd570..c30367e 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-#run with command: scripts/tmfpython.sh -i 'maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ...
+#run with command: scripts/tmfpython.sh -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out' -p 1113 -t 2010 ...
 #i: input dir - directory containing project shapefiles
 #o: output dir - directory containing pipeline outputs
 #p: project name/ID - must match name of shapefile
@@ -11,14 +11,14 @@
 #NB running evaluations requires the evaluations code
 
 # Check which branch is currently checked out
-#current_branch=$(git rev-parse --abbrev-ref HEAD)
+branch=$(git rev-parse --abbrev-ref HEAD)
 
 set -e
 
 ############ DEFAULTS ###############
 
-input_dir="/maps/aew85/projects"
-output_dir="/maps/aew85/tmf_pipe_out"
+input_dir=""
+output_dir=""
 eval_year=2022
 verbose=true
 
@@ -200,5 +200,5 @@ deactivate
 if [ "$verbose" == "true" ]; then
 evaluations_dir="~/evaluations"
 ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
-Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',eval_year='${eval_year}',input_dir='${input_dir}',output_dir='${output_dir}'))"
+Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))"
 fi
\ No newline at end of file

From 83ddf336f9faf6e09a5e3365e58c614e6e50659e Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Wed, 28 Aug 2024 09:31:21 +0000
Subject: [PATCH 14/19] Added functionality to knit R notebook

---
 scripts/ex_ante_evaluation_template.Rmd | 1001 +++++++++++++++++++++++
 scripts/scripts/def_rate.R              |  328 ++++++++
 scripts/scripts/land_cover_timeseries.R |  111 +++
 scripts/scripts/plot_matchingvars.R     |   42 +
 scripts/scripts/plot_transitions.R      |   63 ++
 scripts/scripts/std_mean_diff.R         |   57 ++
 scripts/tmfpython.sh                    |    7 +-
 7 files changed, 1604 insertions(+), 5 deletions(-)
 create mode 100644 scripts/ex_ante_evaluation_template.Rmd
 create mode 100644 scripts/scripts/def_rate.R
 create mode 100644 scripts/scripts/land_cover_timeseries.R
 create mode 100644 scripts/scripts/plot_matchingvars.R
 create mode 100644 scripts/scripts/plot_transitions.R
 create mode 100644 scripts/scripts/std_mean_diff.R

diff --git a/scripts/ex_ante_evaluation_template.Rmd b/scripts/ex_ante_evaluation_template.Rmd
new file mode 100644
index 0000000..be95ce1
--- /dev/null
+++ b/scripts/ex_ante_evaluation_template.Rmd
@@ -0,0 +1,1001 @@
+---
+output:
+  html_document:
+    theme: spacelab
+    df_print: paged
+    toc: yes
+    toc_float: yes
+  pdf_document:
+    toc: yes
+params:
+  proj: null
+  t0: null
+  input_dir: null
+  output_dir: null
+  fullname: null
+  country_path: null
+  shapefile_path: null
+  pairs_path: null
+  carbon_density_path: null
+  branch: null
+---
+
+```{r include=FALSE}
+
+# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A SHELL TERMINAL:
+
+# Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd', output_file='~/evaluations/R/example_project.html', params=list(proj='example_project', t0=2010, ...))"
+
+# Mandatory args: proj, t0
+# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path
+# You must either specify input dir and output dir OR provide absolute paths to each of the objects required
+
+```
+
+```{r settings, include=FALSE}
+knitr::opts_chunk$set(
+  echo = FALSE, warning=FALSE,message=FALSE)
+
+library(tidyverse)
+library(sf)
+library(reshape2)
+library(maps)
+library(mapdata)
+library(ggspatial)
+library(arrow)
+library(rnaturalearth)
+library(rnaturalearthdata)
+library(rnaturalearthhires)
+library(stringr)
+library(jsonlite)
+library(countrycode)
+library(scales)
+library(here)
+library(patchwork)
+library(knitr)
+library(kableExtra)
+
+```
+
+```{r read_inputs, echo=FALSE,warning=FALSE, message=FALSE}
+
+project_name <- params$proj
+start_year <- as.numeric(params$t0)
+branch <- params$branch
+
+```
+
+---
+title: "`r paste0('4C Ex-Ante Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`"
+subtitle: "`r format(Sys.Date(), "%B %Y")`"
+---
+
+```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE}
+
+# get output format
+
+output_format <- ifelse(knitr::is_latex_output(), "latex", "html")
+
+# get script path
+
+script_path <- here('scripts')
+
+# get explainer diagram path 
+
+diagram_path <- here('methods_diagram.png')
+
+# get data path
+
+if (!is.null(params$output_dir)) {
+  data_path <- paste0(params$output_dir,'/',project_name)
+}
+
+# get path to pairs
+
+if (!is.null(params$pairs_path)) {
+  pairs_path <- params$pairs_path
+} else { pairs_path <- file.path(data_path,'pairs') }
+
+# read shapefile
+
+if (!is.null(params$input_dir)) {
+  input_dir <- params$input_dir 
+}
+
+if (!is.null(params$shapefile_path)) {
+  shapefile_path <- params$shapefile_path
+} else { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') }
+shapefile <- read_sf(shapefile_path)
+
+# read carbon density
+
+if (!is.null(params$carbon_density_path)) {
+  carbon_density_path <- params$carbon_density_path
+} else { carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] }
+carbon_density <- read.csv(carbon_density_path)
+
+# read country path
+
+if (!is.null(params$country_path)) {
+  country_path <- params$country_path
+} else { country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)}
+
+```
+
+```{r read_pairs, echo=FALSE}
+  
+# get filenames and filter for matched points
+
+files_full_raw <- list.files(pairs_path,
+                          pattern='*.parquet',full.names=T,recursive=F)
+files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
+files_short_raw <- list.files(path=pairs_path,
+                          pattern='*.parquet',full.names=F,recursive=F)
+files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
+
+# initialise dfs
+
+vars <- c(colnames(read_parquet(files_full[1])),'pair')
+paired_data_raw <- data.frame(matrix(ncol = length(vars), nrow = 0)) %>%
+  setNames(vars) %>%
+  mutate(
+    pair = as.factor(pair),
+    k_trt = as.factor(k_trt),
+    s_trt = as.factor(s_trt)
+  )
+
+for(j in 1:length(files_full)){
+  
+  # read parquet file
+  
+  f <- data.frame(read_parquet(files_full[j]),check.names = FALSE)
+  
+  # add identity column
+  
+  f$pair <- as.factor(c(replicate(nrow(f),str_remove(files_short[j], "\\.parquet$"))))
+  
+  # append data to bottom of df
+  
+  paired_data_raw <- bind_rows(paired_data_raw,f)
+  
+}
+
+# generate separate datasets for project and counterfactual 
+
+project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair)
+cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair)
+
+# create project-counterfactual merged dataset
+
+colnames(cf) <- colnames(project)
+pair_merged <- bind_rows(project,cf)
+names(pair_merged) <- str_sub(names(pair_merged),3)
+names(pair_merged)[names(pair_merged) == "ir"] <- "pair"
+
+# add type column and remove excess cols
+
+data <- pair_merged %>%
+        mutate(type=c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual'))) %>% 
+        select(-c(contains('trt'),ID))
+
+```
+
+```{r get_shapefile_area, echo=FALSE}
+  
+project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000)
+
+```
+
+```{r get_country_names}
+
+# define function for extracting country names 
+
+get_country_names <- function(country_codes_path) {
+    codes <- as.character(fromJSON(country_codes_path))
+    country_names <- countrycode(codes, 'iso2c', 'country.name')
+    country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo'
+    return(country_names)
+  }
+
+# get country names
+
+country_vec <- get_country_names(country_path)
+
+  # define function for printing the country names if there are multiple
+
+  if (length(country_vec) > 1) {
+    country_string <- paste(country_vec[-length(country_vec)], collapse = ", ")
+    country_string <- paste(country_string, "and", country_vec[length(country_vec)])
+  } else {
+    country_string <- country_vec[1]
+  }
+
+
+```
+
+\
+
+# Introduction
+
+This Report has been prepared by researchers at the Cambridge Centre for Carbon Credits (4C) and has been funded by a charitable grant from the Tezos Foundation.  4C utilises innovative, evidence-based approaches to examine the scientific basis of nature-based carbon conservation initiatives and, insodoing, provides a way for different stakeholders to assess the quality of carbon credits (ex post and/or ex ante).  
+
+**Disclaimer:  Nothing in this Report constitutes formal advice or recommendations, an endorsement of proposed action, or intention to collaborate; instead, it sets out the details of an evaluation using a method which is still under development. The Report is considered complete as of the publication date shown, though methods are likely to change in future.**
+
+\
+
+# About the project
+
+`r if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() }` is located in `r country_string`. The proposed area measures `r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. 
+
+For the purposes of this evaluation, we have set the proposed start date to `r start_year`.
+
+```{r echo=FALSE}
+
+# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________
+
+# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document.
+
+```
+
+
+
+\
+
+# Introduction to the 4C method
+
+*Our method for forecasting ex-ante additionality remains under development.*
+
+The 4C approach to forecasting additionality involves identifying places that experienced similar deforestation levels in the past as the project area does today. We start by analyzing forest cover changes in the project area between 10 years ago and the present day. Using pixel-matching techniques, we then identify comparable places outside the project that experienced similar deforestation trends between 20 and 10 years ago (the *matching period*). This allows us to match the deforestation trajectory of the project with that of the matched pixels, but offset in time. This concept is illustrated by the left-hand diagonal arrow in the figure below.
+
+We can consider the matched pixels as a historical representation of the project as it is today. By examining deforestation in the matched pixels over the subsequent 10 years (the *baseline period*), we estimate a *baseline prediction* — the deforestation expected in the project area under the counterfactual (business-as-usual) scenario. This rate is then projected forward over the next 10 years, as illustrated by the right-hand diagonal arrow in the figure below. We convert the deforestation rate to carbon dioxide emissions using best estimates of carbon density.
+
+```{r, echo=FALSE, fig.align='center', fig.width=6}
+
+knitr::include_graphics(diagram_path)
+
+```
+
+
+Making predictions about future deforestation is challenging, and there are multiple sources of uncertainty at play. These include: the quantification of carbon, the choice of matching pixels, the effect of leakage and impermanence, future political changes and market forces. We are constantly improving our method in order to minimise these uncertainties. Due to the inherent uncertainty associated with ex-ante (before-the-fact) predictions, carbon credits should only ever be quantified and issued ex-post (after the fact).
+
+More information about 4C's approach to impact evaluation can be found below.
+
+[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1)
+
+[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence)
+
+[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae)
+
+[Our paper on the social value of impermanent carbon credits](https://www.nature.com/articles/s41558-023-01815-0)
+
+[The PACT methodology for ex-post evaluations](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f)
+
+\
+
+
+# Methods
+
+The following sections will detail how we arrived at a forecast of future deforestation and the potential to generate additionality by reducing this deforestation. This includes the location and quality of the matched points, the deforestation rates in each set of points, and the carbon density values used to convert these deforestation rates to carbon emissions reductions.
+
+\
+
+### Location of matched points
+
+We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. We used these matched points to make a prediction of the counterfactual scenario for deforestation.
+
+Below we show the location of the matching points (shown in blue) relative to the project (shown in red), both at the country and project scale.
+
+`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}`
+
+```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE}
+
+# downsample no. of points by 90%
+
+if(nrow(data) > 20000){
+  data_forplot <- data %>% sample_frac(0.1)
+} else {
+  data_forplot <- data
+}
+
+# plot location of matching points
+
+country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large")
+
+# transform crs
+
+shapefile <- st_transform(shapefile, st_crs(country_map))
+
+ggplot(data=country_map) + 
+  geom_sf(colour=NA,fill='grey80')+
+  geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + 
+  scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+
+  coord_sf()+
+  theme_void()+
+  annotation_scale(text_cex=1.5,location='bl')+
+  theme(legend.title = element_blank(),
+        text=element_text(size=16),
+        legend.position='none')
+
+xmin <- filter(data, type=='Project') %>% select(lng) %>% min()
+xmax <- filter(data, type=='Project') %>% select(lng) %>% max()
+ymin <- filter(data, type=='Project') %>% select(lat) %>% min()
+ymax <- filter(data, type=='Project') %>% select(lat) %>% max()
+
+ggplot(data=country_map) + 
+  geom_sf(colour=NA,fill='grey80')+
+  geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + 
+  scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+
+  coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+
+  theme_void()+
+  annotation_scale(text_cex=1.5,location='bl')+
+  theme(legend.title = element_blank(),
+        text=element_text(size=16),
+        legend.position='none')
+
+```
+
+### Quality of matches
+
+Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the matched points (shown in blue) indicates that the the matched points are composed of places that are similar to the project in terms of the drivers of deforestation and are expected to exhibit similar deforestation trends.
+
+-   Inaccessibility (motorized travel time to healthcare, minutes)
+
+-   Slope ($^\circ$)
+
+-   Elevation (meters)
+
+-   Forest cover at t0 (start year, %)
+
+-   Deforestation at t0 (%)
+
+-   Forest cover at t-5 (5 years prior to start year, %)
+
+-   Deforestation at t-5 (%)
+
+-   Forest cover at t-10 (10 years prior to start year, %)
+
+-   Deforestation at t-10 (%)
+
+Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset.
+
+More information about the datasets we use can be found below:
+
+[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/)
+
+[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm)
+
+[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF)
+
+\
+
+```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE}
+
+# plot matches
+
+source(file.path(script_path,'plot_matchingvars.R'))
+
+plot_matching_variables(data,ex_ante='true')
+
+```
+
+\
+
+### Standardised mean differences
+
+We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). The SMD allows us to quantify the similarity between the project and the matched points in a way that is comparable across variables. 
+
+In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and matched points, in standard deviations) for each variable. Values further from zero indicate a larger difference. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our SMDs would ideally fall in order for the project and matched points to be considered well-matched.
+
+\
+
+```{r smd}
+
+std_mean_diff <- function(pairs_path) {
+  
+  # clean data
+
+  files_full_raw <- list.files(pairs_path,
+                               pattern='*.parquet',full.names=T,recursive=F)
+  files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
+  files_short_raw <- list.files(path=pairs_path,
+                                pattern='*.parquet',full.names=F,recursive=F)
+  files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
+  
+  # initialise dfs
+  
+  vars <- c(colnames(read_parquet(files_full[1])),'pair')
+  df <- data.frame(matrix(ncol=length(vars),nrow=0))  %>% 
+  setNames(vars) %>% 
+  mutate(k_trt=as.factor(k_trt),
+         s_trt=as.factor(s_trt))
+  
+  for(j in 1:length(files_full)){
+    
+    # read in all parquet files for a given project
+    
+    f <- data.frame(read_parquet(files_full[j])) %>% 
+         mutate(k_trt=as.factor(k_trt),
+                s_trt=as.factor(s_trt))
+    
+    # append data to bottom of df
+    
+    df <- bind_rows(df,f)
+    
+  }
+
+  # calculate smd
+
+  smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE)
+  
+  variables <- c('cpc10_d','cpc5_d','cpc0_d',
+                 'cpc10_u','cpc5_u','cpc0_u',
+                 'access','slope','elevation')
+    
+    for (var in variables) {
+      k_var <- df[[paste0("k_", var)]]
+      s_var <- df[[paste0("s_", var)]]
+      
+      k_mean <- mean(k_var, na.rm = TRUE)
+      s_mean <- mean(s_var, na.rm = TRUE)
+      k_sd <- sd(k_var, na.rm = TRUE)
+      s_sd <- sd(s_var, na.rm = TRUE)
+      
+      pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2)
+      smd <- (k_mean - s_mean) / pooled_sd
+      
+      smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE))
+    }
+    
+  return(smd_results)
+}
+
+results <- std_mean_diff(pairs_path)
+
+# changing sign for interpretation
+
+results$smd <- (-1)*results$smd
+
+# changing order of variables
+
+variables <- c('cpc10_d','cpc5_d','cpc0_d',
+                 'cpc10_u','cpc5_u','cpc0_u',
+                 'access','slope','elevation')
+
+results$variable <- factor(results$variable, levels=variables)
+
+# plotting
+
+ ggplot(results,aes(x=smd,y=variable))+
+  #geom_boxplot(outlier.shape=NA,colour='blue')+
+  geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+
+  geom_vline(xintercept=0)+
+  geom_vline(xintercept=0.25,lty=2,colour='grey30')+
+  geom_vline(xintercept=-0.25,lty=2,colour='grey30')+
+  scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")),
+                            bquote(Deforestation~t[-5]~("%")),
+                            bquote(Deforestation~t[0]~("%")),
+                            bquote(Forest~cover~t[-10]~("%")),
+                            bquote(Forest~cover~t[-5]~("%")),
+                            bquote(Forest~cover~t[0]~("%")),
+                            'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+
+  xlab('Standardised mean difference')+
+  xlim(-1,1)+
+  theme_classic()+
+  theme(axis.title.y=element_blank(),
+        legend.title=element_blank(),
+        legend.box.background=element_rect(),
+        legend.position='none',
+        text=element_text(size=14),
+        axis.text.y=element_text(size=14))
+
+
+```
+
+\
+
+### Deforestation within the project
+
+Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes:
+
+-   Undisturbed forest to degraded forest 
+
+-   Degraded forest to deforested land 
+
+-   Undisturbed forest to deforested land 
+
+-   Undisturbed land to reforested land (which indicates that regrowth occurred after a deforestation event)
+
+\
+
+These transitions are shown in the plot below for the 10-year period between `r start_year-10` and `r start_year`. They are overlaid on the project area which is shown in grey. If a transition is not shown, it did not occur in the period examined.
+
+Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF).
+
+\
+
+```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE}
+
+# plot deforestation within project
+
+source(file.path(script_path,'plot_transitions.R'))
+
+proj_coords <- data %>% 
+  filter(type=='Project') %>% 
+  select(lat,lng)
+
+proj_input_defplot <- data %>% 
+  filter(type=='Project') %>% 
+  select(contains('luc')) %>% 
+  setNames(paste0("luc_", (start_year-10):start_year)) %>%
+  cbind(proj_coords)
+
+proj_input_defplot <- proj_input_defplot[, !is.na(colnames(proj_input_defplot))]
+
+plot_transitions(data=proj_input_defplot,t0=start_year-10,period_length=10,shapefile=shapefile)
+
+```
+
+\
+
+### Land cover changes within project and matched pixels
+
+In the below plots, we show the changes in land classes over time for both the project (red) and the matched points (blue).
+
+Note the following:
+
+-   The vertical grey dashed line represents the start year of the project (`r start_year`). The timings shown on the x-axis are relative to this start year.
+
+-   As explained in the 'Methods' section, the matched points are offset in time relative to the project by 10 years. This means that all changes observed in the matched points happened 10 years prior to the equivalent time point in the project. This time offset allows us to use the last 10 years in the matched points as a prediction of the next 10 years for the project.
+
+-   Solid lines represent ex-post observed changes, whereas the dotted line represents the prediction for the future of the project. 
+
+```{r make_inputs, echo=FALSE}
+
+# preparing inputs
+
+proj_input <- data %>% 
+  filter(type=='Project') %>% 
+  select(contains('luc')) %>% 
+  setNames(paste0("luc_", (start_year-10):start_year))
+proj_input <- proj_input[, !is.na(colnames(proj_input))]
+
+
+cf_input <- data %>% 
+  filter(type=='Counterfactual') %>% 
+  select(contains('luc')) %>% 
+  setNames(paste0("luc_", (start_year-20):(start_year))) %>%
+  select(where(~ all(!is.na(.))))
+
+```
+
+```{r luc_timeseries_all, echo=FALSE}
+
+source(file.path(script_path,'land_cover_timeseries.R'))
+
+# getting results
+
+proj_results <- get_luc_timeseries(proj_input,t0=start_year-10,tend=start_year,type='single') %>% 
+  mutate(type='Project')
+
+cf_results <- get_luc_timeseries(cf_input,t0=start_year-20,tend=start_year,type='single') %>% 
+  mutate(type='Counterfactual')
+
+# combining results
+
+results <- bind_rows(proj_results, cf_results)
+
+```
+
+Showing the trend for undisturbed, degraded, deforested and regrowth in turn:
+
+```{r undisturbed_timeseries, fig.width=8,fig.height=13}
+
+# add prediction from the matched pixels:
+
+prediction <- cf_results %>% 
+                filter(year >= (start_year-10))  %>% 
+                mutate(type='Project',
+                year=year+10)
+
+results <- bind_rows(results,prediction)
+
+# make a custom function for plotting the results
+
+plot_timeseries <- function(luc_value, title_str) {
+  
+  #remove gap between solid and dotted project line
+  percent_val <- results %>%
+  filter(year == start_year 
+          & type == "Project"
+          & luc == luc_value) %>%
+  pull(percentage)
+
+  # df wrangling
+  extended_results <- results %>%
+    mutate(
+      luc = as.numeric(luc),
+      year = as.numeric(year),
+      line_type = ifelse(type == "Project" & year > start_year, "dotted", "solid"),
+      type = case_when(
+      type == "Counterfactual" ~ "Matched points",
+      TRUE ~ type
+    )
+    ) %>%
+    bind_rows(data.frame(
+      year = start_year,
+      luc = luc_value,   
+      percentage = percent_val,
+      type = 'Project',
+      line_type = 'dotted'
+    ))
+
+   extended_results %>%
+    filter(luc == luc_value) %>%
+    ggplot(aes(x = year, y = percentage, color = type, linetype = line_type)) +
+    geom_line(linewidth = 1.5) +
+    geom_vline(xintercept = start_year, linetype = 2, color = 'grey30') +
+    #geom_vline(xintercept = start_year-10, linetype = 2, color = 'grey30') +
+    scale_colour_manual(name = 'Location',
+                        values = c('red','blue'),
+                        breaks = c('Project', 'Matched points'),
+                        labels = c('Project', 'Matched points'))+
+    xlab('Year') +
+    ylab('% cover') +
+    ggtitle(title_str) +
+    guides(linetype = "none") +
+    theme_classic() +
+    scale_linetype_manual(values = c("solid" = "solid", "dotted" = "dotted"))+
+    facet_wrap(~type)+
+    xlim(start_year-20,start_year+10)
+
+}
+
+plot_1 <- plot_timeseries(luc_value=1, title_str='Undisturbed forest') + theme(legend.position='none',axis.title.x = element_blank())
+plot_2 <- plot_timeseries(luc_value=2, title_str='Degraded forest') + theme(legend.position='none', axis.title.x = element_blank())
+plot_3 <- plot_timeseries(luc_value=3, title_str='Deforested land') + theme(legend.position='none', axis.title.x = element_blank())
+plot_4 <- plot_timeseries(luc_value=4, title_str='Regrowth') + theme(legend.position='none', axis.title.x = element_text(size=14))
+
+plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(ncol=1)
+
+```
+
+### Deforestation rates in the matched points during the baseline period
+
+```{r proportions_undisturbed_degraded, echo=FALSE}
+
+# obtaining the area of undisturbed and degraded forest at t0, for use later
+
+source(file.path(script_path,'def_rate.R'))
+
+prop_und <- get_prop_class(data=proj_input,t0=start_year-10,class=1)
+prop_deg <- get_prop_class(data=proj_input,t0=start_year-10,class=2)
+
+```
+
+Here we present the deforestation rates observed in the matched pixels over the past 10 years (the baseline period). 
+
+Forest loss transitions can be broken down into the following processes: 
+
+-   degradation of undisturbed forest
+
+-   deforestation of undisturbed forest
+
+-   deforestation of degraded forest
+
+-   regrowth of undisturbed forest (implies previous deforestation)
+
+We calculate the rate at which these processes occur in the matched pixels using the following method:
+
+1. Calculate the percentage of matched pixels which have undergone one of the above processes (according to the JRC classification) during the baseline period. 
+2. Divide this percentage by 10 to give an annual rate of change, in %/year, relative to the amount of (undisturbed or degraded) forest present at the beginning of the project.
+3. Multiply this rate by the area of the (undisturbed or degraded) forest  10 years prior to the project start to give an annual rate, in hectares per year.
+
+The amounts of forest in the project area 10 years prior to project start are as follows:
+
+-   Undisturbed forest: `r format(100*prop_und, big.mark = ",", scientific = FALSE, digits = 3)`%
+
+-   Degraded forest: `r format(100*prop_deg, big.mark = ",", scientific = FALSE, digits = 3)`%
+
+The rates are given below.
+
+```{r rate_of_forest_loss_ha, echo=FALSE}
+
+source(file.path(script_path,'def_rate.R'))
+
+df_rate_percent <- def_rate_single(data=cf_input,t0=start_year-10,period_length=10)
+
+df_rate_ha <- df_rate_percent
+
+df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3]/100)*project_area_ha*prop_und
+
+df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3]/100)*project_area_ha*prop_deg
+
+knitr::kable(
+  df_rate_ha %>% 
+  rename('Rate (ha/year)' = 3) %>%
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", .)))
+)
+
+
+```
+
+\
+
+### Carbon stock changes in the matched points during the baseline period
+
+Here we present the carbon density calculations for this project.
+
+In order to convert land cover changes to carbon emissions, we use regional aboveground (AGB) carbon density values generated through NASA GEDI data. 
+
+More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi).
+
+Note that, in calculating carbon stock changes, we assume the following:
+
+- Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997)
+
+- Deadwod biomass is 11% of AGB (based on IPCC 2003)
+
+- Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011)
+
+
+\
+```{r additionality_forecast}
+
+baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8))
+colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea')
+luc_counter <- 1
+row_counter <- 1
+
+carbon_density <- filter(carbon_density, land.use.class %in% c(1:6))
+
+for(i in carbon_density$land.use.class){
+
+  for(j in c("Start","End")) {
+
+    # get agb
+
+    agb <- carbon_density$carbon.density[luc_counter]
+
+    # get other values
+
+    bgb <- agb*0.2
+    dw <- agb*0.11
+    total <- agb + bgb + dw
+    #total_co2 <- total*0.47 # we're doing this step later
+
+    # get area of class i
+
+    if (j == "Start") {
+      area_of_forest <- get_prop_class(cf_input,t0=start_year-10,class=i)*project_area_ha
+    } else if (j == "End") {
+      area_of_forest <- get_prop_class(cf_input,t0=start_year,class=i)*project_area_ha }
+
+    # multiply total by area
+    
+    total_byarea <- total*area_of_forest
+
+    # adding to df
+
+    baseline_stocks[row_counter,1] <- j
+    baseline_stocks[row_counter,2] <- i
+    baseline_stocks[row_counter,3] <- agb
+    baseline_stocks[row_counter,4] <- bgb
+    baseline_stocks[row_counter,5] <- dw
+    baseline_stocks[row_counter,6] <- total
+    baseline_stocks[row_counter,7] <- area_of_forest
+    baseline_stocks[row_counter,8] <- total_byarea
+
+    row_counter <- row_counter+1
+
+  }
+
+  # advance counter
+
+  luc_counter <- luc_counter + 1
+
+}
+
+# formatting bits
+
+baseline_stocks_format <- baseline_stocks
+baseline_stocks_format <- baseline_stocks_format %>% filter(time == 'Start')
+baseline_stocks_format <- baseline_stocks_format[2:6]
+
+colnames(baseline_stocks_format) <- c(
+                                 'Land use class',
+                                 'AGB density (t C / ha)',
+                                 'BGB density (t C / ha)',
+                                 'Deadwood biomass density (t C / ha)', 
+                                 'Total biomass density (t C / ha)', 
+                                 'Total biomass (t C)')
+
+
+# renaming classes
+
+baseline_stocks_format <- baseline_stocks_format %>%
+  mutate(`Land use class` = case_when(
+    `Land use class` == "1" ~ 'Undisturbed',
+    `Land use class` == "2" ~ 'Degraded',
+    `Land use class` == "3" ~ 'Deforested',
+    `Land use class` == "4" ~ 'Reforested',
+    `Land use class` == "5" ~ 'Water',
+    `Land use class` == "6" ~ 'Other',
+    TRUE ~ as.character(`Land use class`)  # ensure no unexpected values
+  ))
+
+
+baseline_stocks_format[2:6] <- lapply(baseline_stocks_format[, 2:6], function(x) {
+  if (is.numeric(x)) comma(x) else x
+})
+
+# Print only carbon calculations at this stage
+
+baseline_stocks_format %>% 
+  drop_na() %>% 
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) %>% 
+  kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>%
+  kable_styling(bootstrap_options = "striped")
+
+```
+
+# Results: baseline rate of carbon emissions
+
+In this section we present the annual rate of carbon loss due to deforestation in the matched points during the baseline period. We can take this to be a prediction of the counterfactual scenario for the project (the *baseline*). 
+
+First we present the carbon stock changes observed in the matched points during the baseline period:
+
+```{r results}
+
+baseline_stock_changes <- baseline_stocks[c(1:2,7:8)]
+
+# reshape
+
+reshaped_data <- baseline_stock_changes %>%
+  mutate(luc = as.character(luc)) %>%
+  group_by(luc) %>%
+  summarize(
+    area_start = area[time == "Start"],
+    area_end = area[time == "End"],
+    area_diff = area_start - area_end,
+    c_start = total_byarea[time == "Start"],
+    c_end = total_byarea[time == "End"],
+    c_diff = c_start - c_end,
+    .groups = 'drop'
+  )
+
+# get totals
+
+total_row <- reshaped_data %>%
+  summarize(
+    luc = "Total",
+    area_start = sum(area_start, na.rm = TRUE),
+    area_end = sum(area_end, na.rm = TRUE),
+    area_diff = sum(area_diff, na.rm = TRUE),
+    c_start = sum(c_start, na.rm = TRUE),
+    c_end = sum(c_end, na.rm = TRUE),
+    c_diff = sum(c_diff, na.rm = TRUE)
+  ) %>% 
+  mutate(luc = as.character(luc))
+
+baseline_stock_changes <- bind_rows(reshaped_data, total_row)
+
+# add in conversion to CO2
+
+baseline_stock_changes <- baseline_stock_changes  %>% 
+    mutate(co2_diff = 0.47*c_diff)
+
+# formatting bits
+
+baseline_stock_changes_format <- baseline_stock_changes %>%
+  mutate(across(where(is.numeric), ~ comma(.))) %>%
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", as.numeric(.))))
+
+if (knitr::is_html_output()) {
+  colnames(baseline_stock_changes_format) <- c(
+                                 'Land use class',
+                                 'Area at start (ha)',
+                                 'Area at end (ha)',
+                                 'Area loss (ha)',
+                                 'Biomass at start (t)',
+                                 'Biomass at end (t)',
+                                 'Biomass loss (t)'
+                                 'CO<sub>2</sub> loss (t)')
+} else if (knitr::is_latex_output()) {
+  colnames(baseline_stock_changes_format) <- c(
+                                 'Land use class',
+                                 'Area at start (ha)',
+                                 'Area at end (ha)',
+                                 'Area loss (ha)',
+                                 'Biomass at start (t)',
+                                 'Biomass at end (t)',
+                                 'Biomass loss (t)'
+                                 'CO$_{2}$ loss (t)')
+}
+
+baseline_stock_changes_format <- baseline_stock_changes_format %>%
+  mutate(`Land use class` = case_when(
+    `Land use class` == "1" ~ 'Undisturbed',
+    `Land use class` == "2" ~ 'Degraded',
+    `Land use class` == "3" ~ 'Deforested',
+    `Land use class` == "4" ~ 'Reforested',
+    `Land use class` == "5" ~ 'Water',
+    `Land use class` == "6" ~ 'Other',
+    TRUE ~ as.character(`Land use class`)  # ensure no unexpected values
+  ))
+
+baseline_stock_changes_format[nrow(baseline_stock_changes_format), 1] <- 'Total'
+
+filtered_data <- baseline_stock_changes_format %>%
+  drop_na() %>%
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", .)))
+
+last_row_index <- nrow(filtered_data)
+
+filtered_data %>%
+  kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>%
+  kable_styling(bootstrap_options = "striped") %>%
+  row_spec(last_row_index, bold = TRUE) 
+
+```
+
+```{r results_summary}
+
+# find the difference
+
+delta_c <- as.numeric(baseline_stock_changes[nrow(baseline_stock_changes), ncol(baseline_stock_changes)])
+delta_c_annual <- delta_c/10
+
+```
+
+To calculate the baseline annual rate of carbon emissions, we sum the the differences in carbon stocks between the start and end of the baseline period, then divide the total by the length of the baseline period (10 years).
+
+**For this project, the baseline annual rate of carbon emissions, in tonnes of carbon dioxide per year, is `r format(delta_c_annual, big.mark = ",", scientific = FALSE, digits = 3)`.** This should be interpreted as the maximum estimated number of carbon credits that could be generated if the project mitigated 100% of the baseline level of deforestation, assuming this is confirmed by ex post observations. We present alternative mitigation scenarios below.
+
+### Expected additionality under different mitigation scenarios
+
+Additionality depends not only on baseline deforestation rate but the ability of the project to mitigate that deforestation. Therefore, we present the expected additionality under different mitigation scenarios (100% to 10% mitigation success). The 100% mitigation scenario occurs where the project is able to mitigate all of the deforestation that would have occurred under the counterfactual scenario. This scenario is unlikely to be realistic, but gives a sense of the deforestation risk faced by each area. It is more likely that only a proportion of this baseline deforestation is mitigated - for example, under the 50% mitigation scenario, the project is able to reduce the deforestation rate by half. The higher the proportion of the baseline deforestation that is mitigated, the greater the additionality of a project.
+
+Note that we present the raw additionality, without accounting for leakage and impermenance (discussed later).
+
+We are in the process of producing confidence intervals that reflect the uncertainty associated with the baseline, which will be added to future revisions of this document.
+
+```{r}
+
+scenarios <- data.frame(matrix(ncol=2,nrow=5))
+scenarios[1] <- c("10%","25%","50%","75%","100%")
+scenarios[2] <- delta_c_annual*c(0.1,0.25,0.5,0.75,1)
+
+if (knitr::is_html_output()) {
+  colnames(scenarios) <- c('Scenario', 
+                            'Additionality (t CO<sub>2</sub> / year)')
+} else if (knitr::is_latex_output()) {
+  colnames(scenarios) <- c('Scenario', 
+                            'Additionality (t CO$_{2}$ / year)')
+}
+
+scenarios <- scenarios %>% 
+  mutate(across(where(is.numeric), comma))
+
+knitr::kable(
+  scenarios
+)
+
+```
+
+\
+
+# Accounting for leakage and impermanence
+
+Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**.
+
+**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage is likely to be lower if the processes leading deforestation and degradation do not result in high yielding land uses, or if the carbon densities within the project are high compared with those in other areas where these activities are taking place. Leakage can also be reduced by interventions which improve yields in areas already under production. We can provide guidance on how this could be achieved.
+
+**Impermanence** occurs when the additionality generated by a project is reversed. Additional carbon stocks in forests are inherently vulnerable to these reversals. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. In future revisions of this document we aim to include indicative estimates of the equivalent permanence (the relative value of a impermanent credit relative to a permanent credit) for this project.
+
+You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence).
+
+---
+
+### Reproducibility
+
+This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation).
diff --git a/scripts/scripts/def_rate.R b/scripts/scripts/def_rate.R
new file mode 100644
index 0000000..6a4c417
--- /dev/null
+++ b/scripts/scripts/def_rate.R
@@ -0,0 +1,328 @@
+
+
+
+def_rate <- function(data,t0,period_length,process='all'){
+  
+  # get name of column for start year
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  # filter down to pixels with undisturbed forest (JRC class 1)
+  
+  data_filtered <- data[data[,t0_index]==1,] 
+  
+  # count 1s at t0 in project and match
+  
+  proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() 
+  cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() 
+  
+  # identify where there have been changes during the evaluation period
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered %>% 
+    select(paste0('luc_',tend))
+  
+  # choosing processes to measure
+  
+  if(process=='def_only'){
+    
+    response <- case_when(
+      luc_tend==1 ~ 0,
+      luc_tend==2 ~ 0,
+      luc_tend==3 ~ 1,
+      luc_tend==4 ~ 0,
+      luc_tend>4 ~ 0)
+    
+  } else if(process=='deg_only'){
+      
+      response <- case_when(
+        luc_tend==1 ~ 0,
+        luc_tend==2 ~ 1,
+        luc_tend==3 ~ 0,
+        luc_tend==4 ~ 0,
+        luc_tend>4 ~ 0)
+      
+    } else {
+    
+    response <- case_when(
+      luc_tend==1 ~ 0,
+      luc_tend==2 ~ 1,
+      luc_tend==3 ~ 1,
+      luc_tend==4 ~ 1,
+      luc_tend>4 ~ 0)
+    
+    }
+  
+  
+  data_filtered$response <- response
+  
+  # count up number of pixels where there have been changes for each type
+  
+  proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% 
+    nrow()
+  cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
+  
+  proj_rate <- 100*(proj_changes/proj_1s)/period_length
+  cf_rate <- 100*(cf_changes/cf_1s)/period_length
+  
+  # make df
+  
+  df <- data.frame(matrix(ncol=2,nrow=1))
+  colnames(df) <- c('Project','Counterfactual')
+  df[1,1] <- proj_rate
+  df[1,2] <- cf_rate
+  
+  return(df)
+  
+}
+
+
+
+def_rate_seperate <- function(data,t0,period_length){
+  
+  # get name of column for start year
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  # filter down to pixels with undisturbed forest (JRC class 1)
+  
+  data_filtered <- data[data[,t0_index]==1,] 
+  
+  # count 1s at t0 in project and cf
+  
+  proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() 
+  cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() 
+  
+  # identify where there have been changes during the evaluation period
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered %>% 
+    select(paste0('luc_',tend))
+  
+  # measuring responses
+  
+  def_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 1,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  deg_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 1,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  ref_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 1,
+    luc_tend>4 ~ 0)
+  
+  data_filtered$def_response <- def_response
+  data_filtered$deg_response <- deg_response
+  data_filtered$ref_response <- ref_response
+  
+  # count up number of pixels where there have been changes for each type
+  
+  proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% 
+    nrow()
+  cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% 
+    nrow()
+  cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% 
+    nrow()
+  cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
+  
+  proj_def <- 100*(proj_def_changes/proj_1s)/period_length
+  cf_def <- 100*(cf_def_changes/cf_1s)/period_length
+  
+  proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length
+  cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length
+  
+  proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length
+  cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length
+  
+  # adding the degraded-to-deforested transition
+  
+  data_filtered_2 <- data[data[,t0_index]==2,]
+  
+  # count 2s at t0 in project and cf
+  
+  proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow()
+  cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() 
+  
+  # identify where there have been changes during the evaluation period
+  
+  luc_tend_2 <- data_filtered_2 %>% 
+    select(paste0('luc_',tend))
+  
+  def_response_2 <- case_when(
+    luc_tend_2==1 ~ 0,
+    luc_tend_2==2 ~ 0,
+    luc_tend_2==3 ~ 1,
+    luc_tend_2==4 ~ 0,
+    luc_tend_2>4 ~ 0)
+  
+  data_filtered_2$def_response_2 <- def_response_2
+  
+  proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% 
+    nrow()
+  cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length
+  cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length
+  
+  # make df
+  
+  df <- data.frame(matrix(ncol=4,nrow=8))
+  
+  colnames(df) <- c('Process','Forest type','Location','Rate (%/year)')
+  
+  df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2))
+  df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2))
+  df[3] <- c(rep(c('Project','Counterfactual'),times=4))
+  df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref)
+  
+  return(df)
+  
+}
+
+get_prop_class <- function(data,t0,class){
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  data_filtered <- data[data[,t0_index]==class,]
+  
+  total_count <- data %>% nrow()
+  class_count <- data_filtered %>% nrow()
+  prop <- class_count/total_count
+  
+  return(prop)
+  
+}
+
+
+def_rate_single <- function(data,t0,period_length){
+  
+  # get name of column for start year
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  # filter down to pixels with undisturbed forest (JRC class 1)
+  
+  data_filtered <- data[data[,t0_index]==1,] 
+  
+  # count 1s at t0 in project and cf
+  
+  no_1s <- nrow(data_filtered)
+  
+  # identify where there have been changes during the evaluation period
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered %>% 
+    select(paste0('luc_',tend))
+  
+  # measuring responses
+  
+  def_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 1,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  deg_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 1,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  ref_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 1,
+    luc_tend>4 ~ 0)
+  
+  data_filtered$def_response <- def_response
+  data_filtered$deg_response <- deg_response
+  data_filtered$ref_response <- ref_response
+  
+  # count up number of pixels where there have been changes for each type
+  
+  def_changes <- data_filtered %>% filter(def_response==1) %>% 
+    nrow()
+  
+  deg_changes <- data_filtered %>% filter(deg_response==1) %>% 
+    nrow()
+  
+  ref_changes <- data_filtered %>% filter(ref_response==1) %>% 
+    nrow()
+  
+  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
+  
+  def <- 100*(def_changes/no_1s)/period_length
+  
+  deg <- 100*(deg_changes/no_1s)/period_length
+  
+  ref <- 100*(ref_changes/no_1s)/period_length
+  
+  # adding the degraded-to-deforested transition
+  
+  data_filtered_2 <- data[data[,t0_index]==2,]
+  
+  # count 2s at t0 in project and cf
+  
+  no_2s <- data_filtered_2 %>% nrow()
+  
+  # identify where there have been changes during the evaluation period
+  
+  luc_tend_2 <- data_filtered_2 %>% 
+    select(paste0('luc_',tend))
+  
+  def_response_2 <- case_when(
+    luc_tend_2==1 ~ 0,
+    luc_tend_2==2 ~ 0,
+    luc_tend_2==3 ~ 1,
+    luc_tend_2==4 ~ 0,
+    luc_tend_2>4 ~ 0)
+  
+  data_filtered_2$def_response_2 <- def_response_2
+  
+  def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% 
+    nrow()
+
+  deg_to_def <- 100*(def_changes_2/no_2s)/period_length
+  
+  # make df
+  
+  df <- data.frame(matrix(ncol=3,nrow=4))
+  
+  colnames(df) <- c('Process','Forest type','Rate (%/year)')
+  
+  df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation')
+  df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest')
+  df[3] <- c(deg,def,deg_to_def,ref)
+  
+  return(df)
+  
+}
\ No newline at end of file
diff --git a/scripts/scripts/land_cover_timeseries.R b/scripts/scripts/land_cover_timeseries.R
new file mode 100644
index 0000000..6490bf1
--- /dev/null
+++ b/scripts/scripts/land_cover_timeseries.R
@@ -0,0 +1,111 @@
+
+get_luc_timeseries <- function(data,t0,tend,type='both'){
+  
+  years_list <- seq(t0,tend)
+
+  if(type=='both'){
+
+    df <- data.frame(matrix(ncol=4,nrow=8*length(years_list)))
+  
+    colnames(df) <- c('year','type','luc','percentage')
+    
+    counter <- 1
+    
+    for(year in years_list) {
+      
+      for(i in seq (1:4)) {
+        
+        for(type_value in c('Project','Counterfactual')) {
+          
+          total <- data %>% filter(type == type_value) %>% nrow()
+          
+          no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow()
+          
+          prop <- no_class_i/total
+          
+          df[counter,1] <- year
+          df[counter,2] <- type_value
+          df[counter,3] <- i
+          df[counter,4] <- prop*100
+          
+          counter <- counter+1
+        
+        }
+        
+      }
+      
+    }
+
+  } else if(type=='single'){
+
+    df <- data.frame(matrix(ncol=3,nrow=4*length(years_list)))
+  
+    colnames(df) <- c('year','luc','percentage')
+    
+    counter <- 1
+    
+    for(year in years_list) {
+      
+      for(i in seq (1:4)) {
+          
+          total <- data %>% nrow()
+          
+          no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow()
+          
+          prop <- no_class_i/total
+          
+          df[counter,1] <- year
+          df[counter,2] <- i
+          df[counter,3] <- prop*100
+          
+          counter <- counter+1
+        
+        }
+      
+    }
+
+  }
+
+  return(drop_na(df))
+  
+}
+
+luc_class1_uncertainty <- function(data,t0,tend) {
+
+  years_list <- seq(t0-10,tend)
+  
+  df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list)))
+  
+  colnames(df) <- c('year','type','pair','percent_class1')
+  
+  counter <- 1
+  
+  for(year in years_list) {
+    
+    for(type_value in c('Project','Counterfactual')) {
+      
+      for(pair_id in unique(data$pair)) {
+        
+        total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow()
+        
+        no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow()
+        
+        prop <- no_class_i/total
+        
+        df[counter,1] <- year
+        df[counter,2] <- type_value
+        df[counter,3] <- pair_id
+        df[counter,4] <- prop*100
+        
+        counter <- counter+1
+        
+      }
+      
+    }
+    
+  }
+  
+  return(drop_na(df))
+
+}
+
diff --git a/scripts/scripts/plot_matchingvars.R b/scripts/scripts/plot_matchingvars.R
new file mode 100644
index 0000000..ec47f01
--- /dev/null
+++ b/scripts/scripts/plot_matchingvars.R
@@ -0,0 +1,42 @@
+plot_matching_variables <- function(data, ex_ante = 'false') {
+  
+  cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc'))
+  cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages
+  cont_data <- melt(cont_data)
+  
+  # rename labels
+  cont_data$variable <- factor(cont_data$variable, 
+                  levels = c('access', 'cpc0_u', 'cpc0_d', 
+                  'slope', 'cpc5_u', 'cpc5_d', 
+                  'elevation', 'cpc10_u', 'cpc10_d'))
+  
+  levels(cont_data$variable) <- c('Inaccessibility',
+                                  'Forest~cover~t[0]',
+                                  'Deforestation~t[0]',
+                                  'Slope',
+                                  'Forest~cover~t[-5]',
+                                  'Deforestation~t[-5]',
+                                  'Elevation',
+                                  'Forest~cover~t[-10]',
+                                  'Deforestation~t[-10]')
+  
+  # determine labels based on ex_ante
+  if (ex_ante == 'false') {
+    plot_labels <- c('Counterfactual', 'Project')
+  } else if (ex_ante == 'true') {
+    plot_labels <- c('Matched points', 'Project')}
+  
+  # plot
+  matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) +
+    geom_density(adjust = 10, size = 1) +
+    facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) +
+    ylab('Density') +
+    scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) +
+    theme_classic() +
+    theme(legend.title = element_blank(),
+          axis.title.x = element_blank(),
+          axis.text.y = element_blank(),
+          axis.ticks.y = element_blank())
+  
+  return(matchingvars)
+}
\ No newline at end of file
diff --git a/scripts/scripts/plot_transitions.R b/scripts/scripts/plot_transitions.R
new file mode 100644
index 0000000..2931a60
--- /dev/null
+++ b/scripts/scripts/plot_transitions.R
@@ -0,0 +1,63 @@
+library(ggspatial)
+
+plot_transitions <- function(data,t0,period_length,shapefile){
+  
+  # count number of 1s at project start
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  data_filtered <- data[data[,t0_index]==1,]
+  
+  # identify where there have been changes
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered[[paste0('luc_', tend)]]
+  
+  response <- case_when(
+    luc_tend==1 ~ NA,
+    luc_tend==2 ~ 'deg',
+    luc_tend==3 ~ 'def',
+    luc_tend==4 ~ 'ref',
+    luc_tend>4 ~ NA)
+  
+  data_filtered$response <- as.factor(response)
+  data_filtered <- data_filtered %>% filter(!is.na(response))
+  
+  # adding deg --> def transition
+  
+  # count number of 2s at project start
+  
+  data_filtered_2s <- data[data[,t0_index]==2,]
+
+  # identify where there have been changes
+  
+  luc_tend <- data_filtered_2s[[paste0('luc_', tend)]]
+  
+  response <- case_when(
+    luc_tend==1 ~ NA,
+    luc_tend==2 ~ NA,
+    luc_tend==3 ~ 'deg_to_def',
+    luc_tend==4 ~ NA,
+    luc_tend>4 ~ NA)
+  
+  data_filtered_2s$response <- as.factor(response)
+  data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response))
+  
+  combined_dat <- bind_rows(data_filtered, data_filtered_2s)
+  combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref'))
+  
+  # plotting 
+  
+  plot <- combined_dat %>% 
+    filter(response != 0) %>% 
+    ggplot(aes(x=lng,y=lat,colour=response))+
+    geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+
+    geom_point(alpha=0.5,size=0.5)+
+    scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+
+    annotation_scale(text_cex = 1.3)+
+    theme_void()
+  
+  return(plot)
+  
+}
diff --git a/scripts/scripts/std_mean_diff.R b/scripts/scripts/std_mean_diff.R
new file mode 100644
index 0000000..63d81ba
--- /dev/null
+++ b/scripts/scripts/std_mean_diff.R
@@ -0,0 +1,57 @@
+
+std_mean_diff <- function(path_to_pairs) {
+  
+  # clean data
+
+  files_full_raw <- list.files(path_to_pairs,
+                               pattern='*.parquet',full.names=T,recursive=F)
+  files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
+  files_short_raw <- list.files(path=path_to_pairs,
+                                pattern='*.parquet',full.names=F,recursive=F)
+  files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
+  
+  # initialise dfs
+  
+  vars <- c(colnames(read_parquet(files_full[1])),'pair')
+  df <- data.frame(matrix(ncol=length(vars),nrow=0))
+  colnames(df) <- vars
+  
+  for(j in 1:length(files_full)){
+    
+    # read in all parquet files for a given project
+    
+    f <- data.frame(read_parquet(files_full[j]))
+    
+    # append data to bottom of df
+    
+    df <- bind_rows(df,f)
+    
+  }
+
+  # calculate smd
+
+  smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE)
+  
+  variables <- c('cpc10_d','cpc5_d','cpc0_d',
+                 'cpc10_u','cpc5_u','cpc0_u',
+                 'access','slope','elevation')
+    
+    for (var in variables) {
+      k_var <- df[[paste0("k_", var)]]
+      s_var <- df[[paste0("s_", var)]]
+      
+      k_mean <- mean(k_var, na.rm = TRUE)
+      s_mean <- mean(s_var, na.rm = TRUE)
+      k_sd <- sd(k_var, na.rm = TRUE)
+      s_sd <- sd(s_var, na.rm = TRUE)
+      
+      pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2)
+      smd <- (k_mean - s_mean) / pooled_sd
+      
+      smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE))
+    }
+    
+  return(smd_results)
+}
+  
+  
\ No newline at end of file
diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index c30367e..7361eb3 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -8,8 +8,6 @@
 #e: evaluation year (default: 2022)
 #v: verbose - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false).
 
-#NB running evaluations requires the evaluations code
-
 # Check which branch is currently checked out
 branch=$(git rev-parse --abbrev-ref HEAD)
 
@@ -198,7 +196,6 @@ deactivate
 
 # Run ex-ante evaluation
 if [ "$verbose" == "true" ]; then
-evaluations_dir="~/evaluations"
-ea_output_file="${evaluations_dir}/${proj}_ex_ante_evaluation.html"
-Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))"
+ea_output_file="${output_dir}/${proj}_ex_ante_evaluation.html"
+Rscript -e "rmarkdown::render(input='scripts/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))"
 fi
\ No newline at end of file

From 9d92d17f388e0257c4123d26a842132d7bc7d915 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Wed, 28 Aug 2024 09:34:09 +0000
Subject: [PATCH 15/19] Moved things around

---
 evaluations/ex_ante_evaluation_template.Rmd | 1001 +++++++++++++++++++
 evaluations/methods_diagram.png             |  Bin 0 -> 37672 bytes
 evaluations/scripts/def_rate.R              |  328 ++++++
 evaluations/scripts/land_cover_timeseries.R |  111 ++
 evaluations/scripts/plot_matchingvars.R     |   42 +
 evaluations/scripts/plot_transitions.R      |   63 ++
 evaluations/scripts/std_mean_diff.R         |   57 ++
 7 files changed, 1602 insertions(+)
 create mode 100644 evaluations/ex_ante_evaluation_template.Rmd
 create mode 100644 evaluations/methods_diagram.png
 create mode 100644 evaluations/scripts/def_rate.R
 create mode 100644 evaluations/scripts/land_cover_timeseries.R
 create mode 100644 evaluations/scripts/plot_matchingvars.R
 create mode 100644 evaluations/scripts/plot_transitions.R
 create mode 100644 evaluations/scripts/std_mean_diff.R

diff --git a/evaluations/ex_ante_evaluation_template.Rmd b/evaluations/ex_ante_evaluation_template.Rmd
new file mode 100644
index 0000000..be95ce1
--- /dev/null
+++ b/evaluations/ex_ante_evaluation_template.Rmd
@@ -0,0 +1,1001 @@
+---
+output:
+  html_document:
+    theme: spacelab
+    df_print: paged
+    toc: yes
+    toc_float: yes
+  pdf_document:
+    toc: yes
+params:
+  proj: null
+  t0: null
+  input_dir: null
+  output_dir: null
+  fullname: null
+  country_path: null
+  shapefile_path: null
+  pairs_path: null
+  carbon_density_path: null
+  branch: null
+---
+
+```{r include=FALSE}
+
+# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A SHELL TERMINAL:
+
+# Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd', output_file='~/evaluations/R/example_project.html', params=list(proj='example_project', t0=2010, ...))"
+
+# Mandatory args: proj, t0
+# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path
+# You must either specify input dir and output dir OR provide absolute paths to each of the objects required
+
+```
+
+```{r settings, include=FALSE}
+knitr::opts_chunk$set(
+  echo = FALSE, warning=FALSE,message=FALSE)
+
+library(tidyverse)
+library(sf)
+library(reshape2)
+library(maps)
+library(mapdata)
+library(ggspatial)
+library(arrow)
+library(rnaturalearth)
+library(rnaturalearthdata)
+library(rnaturalearthhires)
+library(stringr)
+library(jsonlite)
+library(countrycode)
+library(scales)
+library(here)
+library(patchwork)
+library(knitr)
+library(kableExtra)
+
+```
+
+```{r read_inputs, echo=FALSE,warning=FALSE, message=FALSE}
+
+project_name <- params$proj
+start_year <- as.numeric(params$t0)
+branch <- params$branch
+
+```
+
+---
+title: "`r paste0('4C Ex-Ante Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`"
+subtitle: "`r format(Sys.Date(), "%B %Y")`"
+---
+
+```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE}
+
+# get output format
+
+output_format <- ifelse(knitr::is_latex_output(), "latex", "html")
+
+# get script path
+
+script_path <- here('scripts')
+
+# get explainer diagram path 
+
+diagram_path <- here('methods_diagram.png')
+
+# get data path
+
+if (!is.null(params$output_dir)) {
+  data_path <- paste0(params$output_dir,'/',project_name)
+}
+
+# get path to pairs
+
+if (!is.null(params$pairs_path)) {
+  pairs_path <- params$pairs_path
+} else { pairs_path <- file.path(data_path,'pairs') }
+
+# read shapefile
+
+if (!is.null(params$input_dir)) {
+  input_dir <- params$input_dir 
+}
+
+if (!is.null(params$shapefile_path)) {
+  shapefile_path <- params$shapefile_path
+} else { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') }
+shapefile <- read_sf(shapefile_path)
+
+# read carbon density
+
+if (!is.null(params$carbon_density_path)) {
+  carbon_density_path <- params$carbon_density_path
+} else { carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] }
+carbon_density <- read.csv(carbon_density_path)
+
+# read country path
+
+if (!is.null(params$country_path)) {
+  country_path <- params$country_path
+} else { country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)}
+
+```
+
+```{r read_pairs, echo=FALSE}
+  
+# get filenames and filter for matched points
+
+files_full_raw <- list.files(pairs_path,
+                          pattern='*.parquet',full.names=T,recursive=F)
+files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
+files_short_raw <- list.files(path=pairs_path,
+                          pattern='*.parquet',full.names=F,recursive=F)
+files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
+
+# initialise dfs
+
+vars <- c(colnames(read_parquet(files_full[1])),'pair')
+paired_data_raw <- data.frame(matrix(ncol = length(vars), nrow = 0)) %>%
+  setNames(vars) %>%
+  mutate(
+    pair = as.factor(pair),
+    k_trt = as.factor(k_trt),
+    s_trt = as.factor(s_trt)
+  )
+
+for(j in 1:length(files_full)){
+  
+  # read parquet file
+  
+  f <- data.frame(read_parquet(files_full[j]),check.names = FALSE)
+  
+  # add identity column
+  
+  f$pair <- as.factor(c(replicate(nrow(f),str_remove(files_short[j], "\\.parquet$"))))
+  
+  # append data to bottom of df
+  
+  paired_data_raw <- bind_rows(paired_data_raw,f)
+  
+}
+
+# generate separate datasets for project and counterfactual 
+
+project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair)
+cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair)
+
+# create project-counterfactual merged dataset
+
+colnames(cf) <- colnames(project)
+pair_merged <- bind_rows(project,cf)
+names(pair_merged) <- str_sub(names(pair_merged),3)
+names(pair_merged)[names(pair_merged) == "ir"] <- "pair"
+
+# add type column and remove excess cols
+
+data <- pair_merged %>%
+        mutate(type=c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual'))) %>% 
+        select(-c(contains('trt'),ID))
+
+```
+
+```{r get_shapefile_area, echo=FALSE}
+  
+project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000)
+
+```
+
+```{r get_country_names}
+
+# define function for extracting country names 
+
+get_country_names <- function(country_codes_path) {
+    codes <- as.character(fromJSON(country_codes_path))
+    country_names <- countrycode(codes, 'iso2c', 'country.name')
+    country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo'
+    return(country_names)
+  }
+
+# get country names
+
+country_vec <- get_country_names(country_path)
+
+  # define function for printing the country names if there are multiple
+
+  if (length(country_vec) > 1) {
+    country_string <- paste(country_vec[-length(country_vec)], collapse = ", ")
+    country_string <- paste(country_string, "and", country_vec[length(country_vec)])
+  } else {
+    country_string <- country_vec[1]
+  }
+
+
+```
+
+\
+
+# Introduction
+
+This Report has been prepared by researchers at the Cambridge Centre for Carbon Credits (4C) and has been funded by a charitable grant from the Tezos Foundation.  4C utilises innovative, evidence-based approaches to examine the scientific basis of nature-based carbon conservation initiatives and, insodoing, provides a way for different stakeholders to assess the quality of carbon credits (ex post and/or ex ante).  
+
+**Disclaimer:  Nothing in this Report constitutes formal advice or recommendations, an endorsement of proposed action, or intention to collaborate; instead, it sets out the details of an evaluation using a method which is still under development. The Report is considered complete as of the publication date shown, though methods are likely to change in future.**
+
+\
+
+# About the project
+
+`r if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() }` is located in `r country_string`. The proposed area measures `r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. 
+
+For the purposes of this evaluation, we have set the proposed start date to `r start_year`.
+
+```{r echo=FALSE}
+
+# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________
+
+# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document.
+
+```
+
+
+
+\
+
+# Introduction to the 4C method
+
+*Our method for forecasting ex-ante additionality remains under development.*
+
+The 4C approach to forecasting additionality involves identifying places that experienced similar deforestation levels in the past as the project area does today. We start by analyzing forest cover changes in the project area between 10 years ago and the present day. Using pixel-matching techniques, we then identify comparable places outside the project that experienced similar deforestation trends between 20 and 10 years ago (the *matching period*). This allows us to match the deforestation trajectory of the project with that of the matched pixels, but offset in time. This concept is illustrated by the left-hand diagonal arrow in the figure below.
+
+We can consider the matched pixels as a historical representation of the project as it is today. By examining deforestation in the matched pixels over the subsequent 10 years (the *baseline period*), we estimate a *baseline prediction* — the deforestation expected in the project area under the counterfactual (business-as-usual) scenario. This rate is then projected forward over the next 10 years, as illustrated by the right-hand diagonal arrow in the figure below. We convert the deforestation rate to carbon dioxide emissions using best estimates of carbon density.
+
+```{r, echo=FALSE, fig.align='center', fig.width=6}
+
+knitr::include_graphics(diagram_path)
+
+```
+
+
+Making predictions about future deforestation is challenging, and there are multiple sources of uncertainty at play. These include: the quantification of carbon, the choice of matching pixels, the effect of leakage and impermanence, future political changes and market forces. We are constantly improving our method in order to minimise these uncertainties. Due to the inherent uncertainty associated with ex-ante (before-the-fact) predictions, carbon credits should only ever be quantified and issued ex-post (after the fact).
+
+More information about 4C's approach to impact evaluation can be found below.
+
+[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1)
+
+[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence)
+
+[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae)
+
+[Our paper on the social value of impermanent carbon credits](https://www.nature.com/articles/s41558-023-01815-0)
+
+[The PACT methodology for ex-post evaluations](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f)
+
+\
+
+
+# Methods
+
+The following sections will detail how we arrived at a forecast of future deforestation and the potential to generate additionality by reducing this deforestation. This includes the location and quality of the matched points, the deforestation rates in each set of points, and the carbon density values used to convert these deforestation rates to carbon emissions reductions.
+
+\
+
+### Location of matched points
+
+We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. We used these matched points to make a prediction of the counterfactual scenario for deforestation.
+
+Below we show the location of the matching points (shown in blue) relative to the project (shown in red), both at the country and project scale.
+
+`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}`
+
+```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE}
+
+# downsample no. of points by 90%
+
+if(nrow(data) > 20000){
+  data_forplot <- data %>% sample_frac(0.1)
+} else {
+  data_forplot <- data
+}
+
+# plot location of matching points
+
+country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large")
+
+# transform crs
+
+shapefile <- st_transform(shapefile, st_crs(country_map))
+
+ggplot(data=country_map) + 
+  geom_sf(colour=NA,fill='grey80')+
+  geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + 
+  scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+
+  coord_sf()+
+  theme_void()+
+  annotation_scale(text_cex=1.5,location='bl')+
+  theme(legend.title = element_blank(),
+        text=element_text(size=16),
+        legend.position='none')
+
+xmin <- filter(data, type=='Project') %>% select(lng) %>% min()
+xmax <- filter(data, type=='Project') %>% select(lng) %>% max()
+ymin <- filter(data, type=='Project') %>% select(lat) %>% min()
+ymax <- filter(data, type=='Project') %>% select(lat) %>% max()
+
+ggplot(data=country_map) + 
+  geom_sf(colour=NA,fill='grey80')+
+  geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + 
+  scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+
+  coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+
+  theme_void()+
+  annotation_scale(text_cex=1.5,location='bl')+
+  theme(legend.title = element_blank(),
+        text=element_text(size=16),
+        legend.position='none')
+
+```
+
+### Quality of matches
+
+Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the matched points (shown in blue) indicates that the the matched points are composed of places that are similar to the project in terms of the drivers of deforestation and are expected to exhibit similar deforestation trends.
+
+-   Inaccessibility (motorized travel time to healthcare, minutes)
+
+-   Slope ($^\circ$)
+
+-   Elevation (meters)
+
+-   Forest cover at t0 (start year, %)
+
+-   Deforestation at t0 (%)
+
+-   Forest cover at t-5 (5 years prior to start year, %)
+
+-   Deforestation at t-5 (%)
+
+-   Forest cover at t-10 (10 years prior to start year, %)
+
+-   Deforestation at t-10 (%)
+
+Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset.
+
+More information about the datasets we use can be found below:
+
+[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/)
+
+[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm)
+
+[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF)
+
+\
+
+```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE}
+
+# plot matches
+
+source(file.path(script_path,'plot_matchingvars.R'))
+
+plot_matching_variables(data,ex_ante='true')
+
+```
+
+\
+
+### Standardised mean differences
+
+We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). The SMD allows us to quantify the similarity between the project and the matched points in a way that is comparable across variables. 
+
+In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and matched points, in standard deviations) for each variable. Values further from zero indicate a larger difference. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our SMDs would ideally fall in order for the project and matched points to be considered well-matched.
+
+\
+
+```{r smd}
+
+std_mean_diff <- function(pairs_path) {
+  
+  # clean data
+
+  files_full_raw <- list.files(pairs_path,
+                               pattern='*.parquet',full.names=T,recursive=F)
+  files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
+  files_short_raw <- list.files(path=pairs_path,
+                                pattern='*.parquet',full.names=F,recursive=F)
+  files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
+  
+  # initialise dfs
+  
+  vars <- c(colnames(read_parquet(files_full[1])),'pair')
+  df <- data.frame(matrix(ncol=length(vars),nrow=0))  %>% 
+  setNames(vars) %>% 
+  mutate(k_trt=as.factor(k_trt),
+         s_trt=as.factor(s_trt))
+  
+  for(j in 1:length(files_full)){
+    
+    # read in all parquet files for a given project
+    
+    f <- data.frame(read_parquet(files_full[j])) %>% 
+         mutate(k_trt=as.factor(k_trt),
+                s_trt=as.factor(s_trt))
+    
+    # append data to bottom of df
+    
+    df <- bind_rows(df,f)
+    
+  }
+
+  # calculate smd
+
+  smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE)
+  
+  variables <- c('cpc10_d','cpc5_d','cpc0_d',
+                 'cpc10_u','cpc5_u','cpc0_u',
+                 'access','slope','elevation')
+    
+    for (var in variables) {
+      k_var <- df[[paste0("k_", var)]]
+      s_var <- df[[paste0("s_", var)]]
+      
+      k_mean <- mean(k_var, na.rm = TRUE)
+      s_mean <- mean(s_var, na.rm = TRUE)
+      k_sd <- sd(k_var, na.rm = TRUE)
+      s_sd <- sd(s_var, na.rm = TRUE)
+      
+      pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2)
+      smd <- (k_mean - s_mean) / pooled_sd
+      
+      smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE))
+    }
+    
+  return(smd_results)
+}
+
+results <- std_mean_diff(pairs_path)
+
+# changing sign for interpretation
+
+results$smd <- (-1)*results$smd
+
+# changing order of variables
+
+variables <- c('cpc10_d','cpc5_d','cpc0_d',
+                 'cpc10_u','cpc5_u','cpc0_u',
+                 'access','slope','elevation')
+
+results$variable <- factor(results$variable, levels=variables)
+
+# plotting
+
+ ggplot(results,aes(x=smd,y=variable))+
+  #geom_boxplot(outlier.shape=NA,colour='blue')+
+  geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+
+  geom_vline(xintercept=0)+
+  geom_vline(xintercept=0.25,lty=2,colour='grey30')+
+  geom_vline(xintercept=-0.25,lty=2,colour='grey30')+
+  scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")),
+                            bquote(Deforestation~t[-5]~("%")),
+                            bquote(Deforestation~t[0]~("%")),
+                            bquote(Forest~cover~t[-10]~("%")),
+                            bquote(Forest~cover~t[-5]~("%")),
+                            bquote(Forest~cover~t[0]~("%")),
+                            'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+
+  xlab('Standardised mean difference')+
+  xlim(-1,1)+
+  theme_classic()+
+  theme(axis.title.y=element_blank(),
+        legend.title=element_blank(),
+        legend.box.background=element_rect(),
+        legend.position='none',
+        text=element_text(size=14),
+        axis.text.y=element_text(size=14))
+
+
+```
+
+\
+
+### Deforestation within the project
+
+Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes:
+
+-   Undisturbed forest to degraded forest 
+
+-   Degraded forest to deforested land 
+
+-   Undisturbed forest to deforested land 
+
+-   Undisturbed land to reforested land (which indicates that regrowth occurred after a deforestation event)
+
+\
+
+These transitions are shown in the plot below for the 10-year period between `r start_year-10` and `r start_year`. They are overlaid on the project area which is shown in grey. If a transition is not shown, it did not occur in the period examined.
+
+Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF).
+
+\
+
+```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE}
+
+# plot deforestation within project
+
+source(file.path(script_path,'plot_transitions.R'))
+
+proj_coords <- data %>% 
+  filter(type=='Project') %>% 
+  select(lat,lng)
+
+proj_input_defplot <- data %>% 
+  filter(type=='Project') %>% 
+  select(contains('luc')) %>% 
+  setNames(paste0("luc_", (start_year-10):start_year)) %>%
+  cbind(proj_coords)
+
+proj_input_defplot <- proj_input_defplot[, !is.na(colnames(proj_input_defplot))]
+
+plot_transitions(data=proj_input_defplot,t0=start_year-10,period_length=10,shapefile=shapefile)
+
+```
+
+\
+
+### Land cover changes within project and matched pixels
+
+In the below plots, we show the changes in land classes over time for both the project (red) and the matched points (blue).
+
+Note the following:
+
+-   The vertical grey dashed line represents the start year of the project (`r start_year`). The timings shown on the x-axis are relative to this start year.
+
+-   As explained in the 'Methods' section, the matched points are offset in time relative to the project by 10 years. This means that all changes observed in the matched points happened 10 years prior to the equivalent time point in the project. This time offset allows us to use the last 10 years in the matched points as a prediction of the next 10 years for the project.
+
+-   Solid lines represent ex-post observed changes, whereas the dotted line represents the prediction for the future of the project. 
+
+```{r make_inputs, echo=FALSE}
+
+# preparing inputs
+
+proj_input <- data %>% 
+  filter(type=='Project') %>% 
+  select(contains('luc')) %>% 
+  setNames(paste0("luc_", (start_year-10):start_year))
+proj_input <- proj_input[, !is.na(colnames(proj_input))]
+
+
+cf_input <- data %>% 
+  filter(type=='Counterfactual') %>% 
+  select(contains('luc')) %>% 
+  setNames(paste0("luc_", (start_year-20):(start_year))) %>%
+  select(where(~ all(!is.na(.))))
+
+```
+
+```{r luc_timeseries_all, echo=FALSE}
+
+source(file.path(script_path,'land_cover_timeseries.R'))
+
+# getting results
+
+proj_results <- get_luc_timeseries(proj_input,t0=start_year-10,tend=start_year,type='single') %>% 
+  mutate(type='Project')
+
+cf_results <- get_luc_timeseries(cf_input,t0=start_year-20,tend=start_year,type='single') %>% 
+  mutate(type='Counterfactual')
+
+# combining results
+
+results <- bind_rows(proj_results, cf_results)
+
+```
+
+Showing the trend for undisturbed, degraded, deforested and regrowth in turn:
+
+```{r undisturbed_timeseries, fig.width=8,fig.height=13}
+
+# add prediction from the matched pixels:
+
+prediction <- cf_results %>% 
+                filter(year >= (start_year-10))  %>% 
+                mutate(type='Project',
+                year=year+10)
+
+results <- bind_rows(results,prediction)
+
+# make a custom function for plotting the results
+
+plot_timeseries <- function(luc_value, title_str) {
+  
+  #remove gap between solid and dotted project line
+  percent_val <- results %>%
+  filter(year == start_year 
+          & type == "Project"
+          & luc == luc_value) %>%
+  pull(percentage)
+
+  # df wrangling
+  extended_results <- results %>%
+    mutate(
+      luc = as.numeric(luc),
+      year = as.numeric(year),
+      line_type = ifelse(type == "Project" & year > start_year, "dotted", "solid"),
+      type = case_when(
+      type == "Counterfactual" ~ "Matched points",
+      TRUE ~ type
+    )
+    ) %>%
+    bind_rows(data.frame(
+      year = start_year,
+      luc = luc_value,   
+      percentage = percent_val,
+      type = 'Project',
+      line_type = 'dotted'
+    ))
+
+   extended_results %>%
+    filter(luc == luc_value) %>%
+    ggplot(aes(x = year, y = percentage, color = type, linetype = line_type)) +
+    geom_line(linewidth = 1.5) +
+    geom_vline(xintercept = start_year, linetype = 2, color = 'grey30') +
+    #geom_vline(xintercept = start_year-10, linetype = 2, color = 'grey30') +
+    scale_colour_manual(name = 'Location',
+                        values = c('red','blue'),
+                        breaks = c('Project', 'Matched points'),
+                        labels = c('Project', 'Matched points'))+
+    xlab('Year') +
+    ylab('% cover') +
+    ggtitle(title_str) +
+    guides(linetype = "none") +
+    theme_classic() +
+    scale_linetype_manual(values = c("solid" = "solid", "dotted" = "dotted"))+
+    facet_wrap(~type)+
+    xlim(start_year-20,start_year+10)
+
+}
+
+plot_1 <- plot_timeseries(luc_value=1, title_str='Undisturbed forest') + theme(legend.position='none',axis.title.x = element_blank())
+plot_2 <- plot_timeseries(luc_value=2, title_str='Degraded forest') + theme(legend.position='none', axis.title.x = element_blank())
+plot_3 <- plot_timeseries(luc_value=3, title_str='Deforested land') + theme(legend.position='none', axis.title.x = element_blank())
+plot_4 <- plot_timeseries(luc_value=4, title_str='Regrowth') + theme(legend.position='none', axis.title.x = element_text(size=14))
+
+plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(ncol=1)
+
+```
+
+### Deforestation rates in the matched points during the baseline period
+
+```{r proportions_undisturbed_degraded, echo=FALSE}
+
+# obtaining the area of undisturbed and degraded forest at t0, for use later
+
+source(file.path(script_path,'def_rate.R'))
+
+prop_und <- get_prop_class(data=proj_input,t0=start_year-10,class=1)
+prop_deg <- get_prop_class(data=proj_input,t0=start_year-10,class=2)
+
+```
+
+Here we present the deforestation rates observed in the matched pixels over the past 10 years (the baseline period). 
+
+Forest loss transitions can be broken down into the following processes: 
+
+-   degradation of undisturbed forest
+
+-   deforestation of undisturbed forest
+
+-   deforestation of degraded forest
+
+-   regrowth of undisturbed forest (implies previous deforestation)
+
+We calculate the rate at which these processes occur in the matched pixels using the following method:
+
+1. Calculate the percentage of matched pixels which have undergone one of the above processes (according to the JRC classification) during the baseline period. 
+2. Divide this percentage by 10 to give an annual rate of change, in %/year, relative to the amount of (undisturbed or degraded) forest present at the beginning of the project.
+3. Multiply this rate by the area of the (undisturbed or degraded) forest  10 years prior to the project start to give an annual rate, in hectares per year.
+
+The amounts of forest in the project area 10 years prior to project start are as follows:
+
+-   Undisturbed forest: `r format(100*prop_und, big.mark = ",", scientific = FALSE, digits = 3)`%
+
+-   Degraded forest: `r format(100*prop_deg, big.mark = ",", scientific = FALSE, digits = 3)`%
+
+The rates are given below.
+
+```{r rate_of_forest_loss_ha, echo=FALSE}
+
+source(file.path(script_path,'def_rate.R'))
+
+df_rate_percent <- def_rate_single(data=cf_input,t0=start_year-10,period_length=10)
+
+df_rate_ha <- df_rate_percent
+
+df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3]/100)*project_area_ha*prop_und
+
+df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3]/100)*project_area_ha*prop_deg
+
+knitr::kable(
+  df_rate_ha %>% 
+  rename('Rate (ha/year)' = 3) %>%
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", .)))
+)
+
+
+```
+
+\
+
+### Carbon stock changes in the matched points during the baseline period
+
+Here we present the carbon density calculations for this project.
+
+In order to convert land cover changes to carbon emissions, we use regional aboveground (AGB) carbon density values generated through NASA GEDI data. 
+
+More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi).
+
+Note that, in calculating carbon stock changes, we assume the following:
+
+- Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997)
+
+- Deadwod biomass is 11% of AGB (based on IPCC 2003)
+
+- Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011)
+
+
+\
+```{r additionality_forecast}
+
+baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8))
+colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea')
+luc_counter <- 1
+row_counter <- 1
+
+carbon_density <- filter(carbon_density, land.use.class %in% c(1:6))
+
+for(i in carbon_density$land.use.class){
+
+  for(j in c("Start","End")) {
+
+    # get agb
+
+    agb <- carbon_density$carbon.density[luc_counter]
+
+    # get other values
+
+    bgb <- agb*0.2
+    dw <- agb*0.11
+    total <- agb + bgb + dw
+    #total_co2 <- total*0.47 # we're doing this step later
+
+    # get area of class i
+
+    if (j == "Start") {
+      area_of_forest <- get_prop_class(cf_input,t0=start_year-10,class=i)*project_area_ha
+    } else if (j == "End") {
+      area_of_forest <- get_prop_class(cf_input,t0=start_year,class=i)*project_area_ha }
+
+    # multiply total by area
+    
+    total_byarea <- total*area_of_forest
+
+    # adding to df
+
+    baseline_stocks[row_counter,1] <- j
+    baseline_stocks[row_counter,2] <- i
+    baseline_stocks[row_counter,3] <- agb
+    baseline_stocks[row_counter,4] <- bgb
+    baseline_stocks[row_counter,5] <- dw
+    baseline_stocks[row_counter,6] <- total
+    baseline_stocks[row_counter,7] <- area_of_forest
+    baseline_stocks[row_counter,8] <- total_byarea
+
+    row_counter <- row_counter+1
+
+  }
+
+  # advance counter
+
+  luc_counter <- luc_counter + 1
+
+}
+
+# formatting bits
+
+baseline_stocks_format <- baseline_stocks
+baseline_stocks_format <- baseline_stocks_format %>% filter(time == 'Start')
+baseline_stocks_format <- baseline_stocks_format[2:6]
+
+colnames(baseline_stocks_format) <- c(
+                                 'Land use class',
+                                 'AGB density (t C / ha)',
+                                 'BGB density (t C / ha)',
+                                 'Deadwood biomass density (t C / ha)', 
+                                 'Total biomass density (t C / ha)', 
+                                 'Total biomass (t C)')
+
+
+# renaming classes
+
+baseline_stocks_format <- baseline_stocks_format %>%
+  mutate(`Land use class` = case_when(
+    `Land use class` == "1" ~ 'Undisturbed',
+    `Land use class` == "2" ~ 'Degraded',
+    `Land use class` == "3" ~ 'Deforested',
+    `Land use class` == "4" ~ 'Reforested',
+    `Land use class` == "5" ~ 'Water',
+    `Land use class` == "6" ~ 'Other',
+    TRUE ~ as.character(`Land use class`)  # ensure no unexpected values
+  ))
+
+
+baseline_stocks_format[2:6] <- lapply(baseline_stocks_format[, 2:6], function(x) {
+  if (is.numeric(x)) comma(x) else x
+})
+
+# Print only carbon calculations at this stage
+
+baseline_stocks_format %>% 
+  drop_na() %>% 
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) %>% 
+  kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>%
+  kable_styling(bootstrap_options = "striped")
+
+```
+
+# Results: baseline rate of carbon emissions
+
+In this section we present the annual rate of carbon loss due to deforestation in the matched points during the baseline period. We can take this to be a prediction of the counterfactual scenario for the project (the *baseline*). 
+
+First we present the carbon stock changes observed in the matched points during the baseline period:
+
+```{r results}
+
+baseline_stock_changes <- baseline_stocks[c(1:2,7:8)]
+
+# reshape
+
+reshaped_data <- baseline_stock_changes %>%
+  mutate(luc = as.character(luc)) %>%
+  group_by(luc) %>%
+  summarize(
+    area_start = area[time == "Start"],
+    area_end = area[time == "End"],
+    area_diff = area_start - area_end,
+    c_start = total_byarea[time == "Start"],
+    c_end = total_byarea[time == "End"],
+    c_diff = c_start - c_end,
+    .groups = 'drop'
+  )
+
+# get totals
+
+total_row <- reshaped_data %>%
+  summarize(
+    luc = "Total",
+    area_start = sum(area_start, na.rm = TRUE),
+    area_end = sum(area_end, na.rm = TRUE),
+    area_diff = sum(area_diff, na.rm = TRUE),
+    c_start = sum(c_start, na.rm = TRUE),
+    c_end = sum(c_end, na.rm = TRUE),
+    c_diff = sum(c_diff, na.rm = TRUE)
+  ) %>% 
+  mutate(luc = as.character(luc))
+
+baseline_stock_changes <- bind_rows(reshaped_data, total_row)
+
+# add in conversion to CO2
+
+baseline_stock_changes <- baseline_stock_changes  %>% 
+    mutate(co2_diff = 0.47*c_diff)
+
+# formatting bits
+
+baseline_stock_changes_format <- baseline_stock_changes %>%
+  mutate(across(where(is.numeric), ~ comma(.))) %>%
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", as.numeric(.))))
+
+if (knitr::is_html_output()) {
+  colnames(baseline_stock_changes_format) <- c(
+                                 'Land use class',
+                                 'Area at start (ha)',
+                                 'Area at end (ha)',
+                                 'Area loss (ha)',
+                                 'Biomass at start (t)',
+                                 'Biomass at end (t)',
+                                 'Biomass loss (t)'
+                                 'CO<sub>2</sub> loss (t)')
+} else if (knitr::is_latex_output()) {
+  colnames(baseline_stock_changes_format) <- c(
+                                 'Land use class',
+                                 'Area at start (ha)',
+                                 'Area at end (ha)',
+                                 'Area loss (ha)',
+                                 'Biomass at start (t)',
+                                 'Biomass at end (t)',
+                                 'Biomass loss (t)'
+                                 'CO$_{2}$ loss (t)')
+}
+
+baseline_stock_changes_format <- baseline_stock_changes_format %>%
+  mutate(`Land use class` = case_when(
+    `Land use class` == "1" ~ 'Undisturbed',
+    `Land use class` == "2" ~ 'Degraded',
+    `Land use class` == "3" ~ 'Deforested',
+    `Land use class` == "4" ~ 'Reforested',
+    `Land use class` == "5" ~ 'Water',
+    `Land use class` == "6" ~ 'Other',
+    TRUE ~ as.character(`Land use class`)  # ensure no unexpected values
+  ))
+
+baseline_stock_changes_format[nrow(baseline_stock_changes_format), 1] <- 'Total'
+
+filtered_data <- baseline_stock_changes_format %>%
+  drop_na() %>%
+  mutate(across(where(is.numeric), ~ sprintf("%.2f", .)))
+
+last_row_index <- nrow(filtered_data)
+
+filtered_data %>%
+  kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>%
+  kable_styling(bootstrap_options = "striped") %>%
+  row_spec(last_row_index, bold = TRUE) 
+
+```
+
+```{r results_summary}
+
+# find the difference
+
+delta_c <- as.numeric(baseline_stock_changes[nrow(baseline_stock_changes), ncol(baseline_stock_changes)])
+delta_c_annual <- delta_c/10
+
+```
+
+To calculate the baseline annual rate of carbon emissions, we sum the the differences in carbon stocks between the start and end of the baseline period, then divide the total by the length of the baseline period (10 years).
+
+**For this project, the baseline annual rate of carbon emissions, in tonnes of carbon dioxide per year, is `r format(delta_c_annual, big.mark = ",", scientific = FALSE, digits = 3)`.** This should be interpreted as the maximum estimated number of carbon credits that could be generated if the project mitigated 100% of the baseline level of deforestation, assuming this is confirmed by ex post observations. We present alternative mitigation scenarios below.
+
+### Expected additionality under different mitigation scenarios
+
+Additionality depends not only on baseline deforestation rate but the ability of the project to mitigate that deforestation. Therefore, we present the expected additionality under different mitigation scenarios (100% to 10% mitigation success). The 100% mitigation scenario occurs where the project is able to mitigate all of the deforestation that would have occurred under the counterfactual scenario. This scenario is unlikely to be realistic, but gives a sense of the deforestation risk faced by each area. It is more likely that only a proportion of this baseline deforestation is mitigated - for example, under the 50% mitigation scenario, the project is able to reduce the deforestation rate by half. The higher the proportion of the baseline deforestation that is mitigated, the greater the additionality of a project.
+
+Note that we present the raw additionality, without accounting for leakage and impermenance (discussed later).
+
+We are in the process of producing confidence intervals that reflect the uncertainty associated with the baseline, which will be added to future revisions of this document.
+
+```{r}
+
+scenarios <- data.frame(matrix(ncol=2,nrow=5))
+scenarios[1] <- c("10%","25%","50%","75%","100%")
+scenarios[2] <- delta_c_annual*c(0.1,0.25,0.5,0.75,1)
+
+if (knitr::is_html_output()) {
+  colnames(scenarios) <- c('Scenario', 
+                            'Additionality (t CO<sub>2</sub> / year)')
+} else if (knitr::is_latex_output()) {
+  colnames(scenarios) <- c('Scenario', 
+                            'Additionality (t CO$_{2}$ / year)')
+}
+
+scenarios <- scenarios %>% 
+  mutate(across(where(is.numeric), comma))
+
+knitr::kable(
+  scenarios
+)
+
+```
+
+\
+
+# Accounting for leakage and impermanence
+
+Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**.
+
+**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage is likely to be lower if the processes leading deforestation and degradation do not result in high yielding land uses, or if the carbon densities within the project are high compared with those in other areas where these activities are taking place. Leakage can also be reduced by interventions which improve yields in areas already under production. We can provide guidance on how this could be achieved.
+
+**Impermanence** occurs when the additionality generated by a project is reversed. Additional carbon stocks in forests are inherently vulnerable to these reversals. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. In future revisions of this document we aim to include indicative estimates of the equivalent permanence (the relative value of a impermanent credit relative to a permanent credit) for this project.
+
+You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence).
+
+---
+
+### Reproducibility
+
+This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation).
diff --git a/evaluations/methods_diagram.png b/evaluations/methods_diagram.png
new file mode 100644
index 0000000000000000000000000000000000000000..0e544b378e8f6fab3384fcbb5afd0cea6a64b5cb
GIT binary patch
literal 37672
zcmb5WXIN8R*DV}6f*=Zr5Slj%3J4+rq=P6Z5Cmxg0-_K?7ij`grPwGI5Q20Fp^FK<
zNw*{rx}g`P_g<54<$j*`yx))W=lt-}Wbf>?_9}CZImTGNGBnU(W8q_gKp<>)Zr{8I
zfzZJq5ZYO0M(~YoDeVpL7me3Fog0vnPX2lDf!<LQt_gvZ#jzgQF@Vn}p4_(Zf<V|?
zsDEfWJTUeU$ou*`H#JS3*(@KkCUN&C?}m)y3)@ee)8x3r$f@!6`VAx(tx`Ob3cU&=
z`BB78rk8w8-OL}DS(G%mxaDZFPQO%&@6It=4>O|Y;y$Bf#5FJf=-W$;f2JW}p{Mz?
z)uwAhObby93!}63f#&_>;F7`;8`GebG2-^{uH~-tx>FPl=$8iVacbZR@DIo<tqkfH
z@%QQZ{{49C|Nar@W{|NwTxpeOkUGN4p<dcZu7M_R&vm!Ngg%>CKLMG)5h-fNg$USQ
z_*+juwQwqA?}gh)RkyO*!JLR6_si&svV1y7{BwF#M6#@7?DBZyxTt>8L<zOYwl^9d
zgk1)T%1Btp!0BAQPXSEeTSGiE?8LR818;>5qg>EKjWQA9g@^)~0%C-d0HHaOb(EKS
z*K=@};V_EVIu8sx1oE0YOq2%ajz$x+zHp|#rcM(VD=i#<`8x({;RahS%F1d^rM_?5
z3k^=Mf(8tW2ptnUZlKsI@Nu{QLwu5qd&HYy)V}}GNNP?W+ufv8)nBfQf_^lw3{S?v
zC&7y%4d_wlj737%vMBk+c{T4mn!*{}?QJ5_nQXM)y`E2Jv}_JSYYkr<4(@s6rIfW3
z;e-}xx#b!3`ScJSQOdIFgu%UqyCE`%znE4oM2B}o1lxM%mP!YFR%$uL(ik*cZ*+2Q
zv8PM6=$v56J$&3<h{mP}vGrrYwaI+MbVpq2y?3WgU7FA5mAQQmscboQg>)(6Xv4v;
z=XN9Mk?$c3#lh?M)wZP{A)aE2Iq7Z2jb365J9DPwT~cmpt)cx;1*~>LB04rrfM7G^
z?7N)xEE0uzt|~u`{w<Ua37xr>@#?avRk%EXMQYGCq+TKR9&x@e|ARtFG1StHOlDTc
z*%xNuj}u5tLbV_MK|4luH4ZL?m9NmUEF|e?R3A=hb%Yxs#+Tf7)P<6-*eQZJx|Q3O
z$Lsz!$?)p@NVV7BAzo8EG(2S{W*FyW>>o7V5M=YayANmA66HT=KV0FKa6V7I=8o--
zGKF_#BG;XrE&zS_nsR;XfOFV9#MeS-x(z?PtU8xY(Q?63N<We0N|JKs&l`2eaUP$u
zoln~TtcsUfolSqE7CW|DK!b=Y+AP$~s;n2;ERIe;!9FIx$B#<OVtLmP*Xc$edsLG`
z9D*ZQo;rHV)OzGld}&PWn$xw$d!v&LcZ*9=0wHfnk;!qK+((jP=_&q=lxu;(*buK`
z-S9E#J%9QwxhsM73$x6Wkqdh@9iQ!XX6eWsDupQ&<({qDO5YT>*RU$%+$($J3&S03
z5g{uFN?l5doHnwyUTZ-SA!N34_fe(8xokYsuessMQUTAKGtsFc-Qym@M}h3>vDuAx
zkH<{AtN$Jq#HGv)&pSRnIAN-uI5*x#=YS}67?au)a12xqO6@Mq&5wht&S!}6hL2mp
z$MsCsp7|drVXe}43N6kYrMy-{_mcM>)b0>vIq4YfgCUU6o7p!wwI;P8ETRR?eTX??
zrrEuFQ=Qio&at_2tR;k8UHnw--xG7=SxslE#4+@hC#A#sWmQFAh|nm0o>8mAQC8vp
zZpn^^@6Ew2eDz#zrw<)Bv4$z=?fdED!TFinE+@USxt_oMW_wLRURKt&x2*UGsxhiE
zAEl>&+%_VMHJ<5Td_z85L>MU8`KeBi32Tj0`tpuSCrj`#$?TEmMos2j@MjUt!Q?Db
ziZVglzF}~YF<HRBin7Ukcb?Uq{+37fs#Us{M_!k}_8%AWU_SK0afx%*s+Lf;+stfs
zT%p!uYu1psGe;}r%ovI<&o=&fES$_{FLkMPPTNRFszu+8>%zq&Sr&@gi}do~Xcm!a
zAs>&AnHd~G%Bm&XvOQhm8I`@b3P17`CH6&S?`mF2IkG&z>#^~cTwv3AFR#+B;?l`t
zYXs$Jl_XJQG#>OjB!AV2GJr$tjQj142g#_Yz333NV}$gCX>iJRw;f{<gn}MYk*Hpx
zq^HOxx**@Vmm?s`ylY3i^`-l_?URKCta7TGhr`9ek$+b3@W!+&{W6F5C_al7`_ewe
zi)G~d(msL_w4>MOlOK(Kn@>j8yH7pD3fcT=RNFg80}o1pwT4&)8Pxa5)-_=X+v`H(
z>*MCp;>sJ&Gl**$^SS{8$Bv&4ZS&aSjy+vyHJ4Z3FVK$T+d*>{jzRqOrD+N5m$v4j
zOczgX$saUqD{f)&71}zP$o2l{U~SSI!ngaNK*&70y#_tVpio!i-_M5JbKBltaQwi<
zQW?Ogem(qQQ9P8)tGXFT+4VoRhz=g;mEQ7b*xq9g7PTDDtvozX7YtBV)#lpTzJBWH
zSpwx=qxmsDys~%t=48!fm!gFLS4N7cn0*6J(5Zuw1HLVf8(~&)g*8XU=9IFtN1fYc
zTS3u6!zyYtkULr#uXrQhWhiC;d=q<Mc+qidu6sq&%XnimYCFml*$q#2Pug<LcYoro
zm6`GiHHVlxkR5%+86o7OSfIyoKT1Gc%D?k^Q+h!bH0Mif^4gh*OpQu*cf(TwJIef>
zFf-vEHfs}D>%7<CY+#Q`_0=GL_=z4a=;)Vu;{7I_FsbL5n%vTh0lnvrYp@p8-67-T
zWat3qeBNHE)lor+&3Y{5+Cjwue}VjkF!u@Whv8b8X8kuKJ0{xiGdj}h8YZ0R9lk@v
zOwseyq!Z{fp2#g1kjc|3U;Oh(GU?mn(nqzdMU-qP%rHnWrIF&H@SQni<+vmzsiC6g
z5_9C`(7D;Ob<)(laOO1fk!X?<I`oWN?*krFuPt)H+*>}mpVr0dqf2w5&P^W2&~|8a
zm3g0rf|f%vR9RKIL~-+Ybz_jZ&zysU>mi>+m5iI@Xig-<>}1NL81AixXK!nX8q3!d
zqO(#aCKxRhCQt*MYT>%ZPhJdh4DTFBtB2qy*Oo9M$Db@`h0E*R?p<<~*kX(xh<euh
zUS0@gb}_*9auMcrb_a%CRhv7!dCM0So@>^EqRp-B=DMcZ&|=Coy0rL`ydr3|+GyjV
z^7P`t?~~;Eo9dP9C!*7%3|}w=-EE%cgv8&ZM<p4#Ahw+@NlTZ0l|rj=*>43qvlx#5
zH0Z<h9)ChHnpqKv@CNzLUXyo;$xvz4Ujf^LM~*EY^v4@-ymx=St&fXMN!%NR9ZQ*S
zZL*EWN5`Si%NdA(9h=_wCa<~){)P@l<wh}Aa3yyVBJ>P4g|bn)XLEPW3+;+Y#J+OM
z&}{VKoOV+8Rc<wywdDBMARB>h(G&0{YVR(9-W7D!B%@$|gM6@ADTy`ic$n%<g;;a)
z2X#5gB$x5{l+=u=(>C$^MA<(9^7S0BdVNu~Ng>NiE5)@)`-@I-fio8B&|<sz3Ckq(
zHs*c&@~jb$&-eF~;I0tf9da~TPtG`gXiR=SR{3X0^<oeu_iRz&0w(47;pnT_4Ck}N
zt_SKN-H$Ee=D)ro*PJ#L4R<m-xNq6WnY%l<Dz$fR-*C8*Cun7^`QV^uH#=VR-+gm4
zLo=JcS1Ww}pRghKf^g_3jqy(MuepZvP4LV#BM*1WtzsuF%Pj#Fk>inw1(&PNXWZ;F
zbQ6`F=e6&f+#wROOCs~iUyUg!aThd1dG8k9M*YKGzQIQEX}P=E)n5$H@+d6T8LfD~
zWbUyq9T@T>BtQQ{cfJ*=;jY{%BCcTWAYS+;eQ(iLQeVegMxNqB;;K{tXWEr>Ff%yC
zX6ty)%cG!!54Cn6+`XH*Dz>NQ|C|=`{-wqX?fs!wWIdmoJ@EJLZ((veZwj)*83q@&
zaw}Qg`L^||(`W|a@5juAblmJ5RaeI+=zGUey%WYqK3qdLjZL#A49hWUU7>{{i32us
zf!)I{3#bA4h{jJDZsj&SM@w7y%E8!5t7&n~n$z2sdn;;N`##7bmw?nU?XC4~|7MTE
z^jb@bb7Sg-m%fU<nQ{*nU})%uaOlE%?y7O=V3$QBXHdG4vtUNY$Y=ax>AloBw{2yk
zb8O;q&UPhUs@mHJ6U%}}F(KBy1GGx!SBZ<+lNmBwa`emnFIIJI3k7u!hWfu{NF6Bk
zpRrhlk}G#gXR8#xQ!({zK3X=uTMBP2T{562uVf9xyc_7jd_A2D{rIq{JdNShYDH;9
z5vsJb9=7Gl<=e--)Nnf5{Ryg9bnuR@?S}!&Ip&@2?f{n2&jZ@VYbK|OG1+4_TV5>g
zFK?+U!$VkhCWu`(4_)TGw3Tb+BHYp2)v#LzLgqSON_F?Nt+xE@(gSDEMjm;pYB=pn
z`PQA@n4m8Wd25zu%GKF#ga&uAj94ym7nA2@_>K&=YI}#BgOUW_Om^S|KPV0Q)Hk3j
zIm=(0>A+Ok!`pV?xjR1&1h=1c&kn2_*m?!!-CS5UvF<-Cco){3UJ^+uUf-jKT!6p1
zTTUzRsvF?LpB0^lJIgLT0lX%D3vcl;b~B9EhhtL2<!Wf~6?>Ll4;RrEG?(~oUiYVX
zG|X!w#8SQALo*Fc>`bl`xpdd|=tFUtH|Qez`VQoiuH`ru8&$?Y6^5eNpmo#PaZi8$
zI`=*7e@YTQ;hWs&in*BwF2N9~w4gRXoeE=QP7ZSFmt#@Fp{Mp9<E;jJlNh?erSC!m
z#0&bfovCi1cVqXVNZ*CNA5c2ajaJUPCI)h~dsA<gUy8)8wQKNyOiZkKNBaH_KIaXA
zykrmug(C!V=8Xm&_)EBq`Tz0b(~_XWodhNh1+bHatu3FUCfjd^cBU#1?<^13uUEJY
z7a8XnjGY6|zNBq}q&9WoY#6ot(Rg$kdGF7+@e%5Gm(CPr-bq^HL$NhdJ`^8ruJ5FC
zBNi#hirG*<l2V0?%izzV?eP>J3KRB2tnA|8MOkt<>0R`p2es2VG-0B4bsC$tf%pJ;
zS=UZ7Qt0`}mw&7{e7%v90&0#sxfO`|#y>*TM(=ST&!CMzmF#qce0JP9<yb3G-G6{Z
zXzAe_x_e!}Up4__0^#rg<=}pF>1U7CG%`;4SE*V0dJAD+72l~wDqR3KKs;|`+)0uS
z-Zu%%qm&ZZ)jvv^+~NJKs2i&=ObH`(HX>qo@#H-o>~bJpz9A491UE*$Mxk@ImyiSb
zZZpW)XnuzD&~|!imtZ`!@aAVONHzS0*Y4)O)#HvROHr(9vsZRXe<hA0Qv6WMze6r9
z&HarQa=byL*oSx7sGpSUw+nIfy~otZ+cYk;5)vzL$nNvd__xuyg<(a`)|$f@mUuO*
zN_QU-tdw<4)oG4X|IQ95NjX=Zf;o|2q}EQIk)N)GOs5aK?zA#uU;B-;sTgNeAGjbs
zZEp{*3Jat?n>qoCfVjH#HXd779e&RU1;(ii9Ih`0uN})juMG*!c_hSVa?fQprAt<#
zk+f87ML^q3+<p8@3?5zU{>~y!2z8~C{xW#SjPuN}?*%S_SSOx?$(iHyq+)UdNuUL<
zy(P0aaDKO+3$9md-$1HP?#i9_0QYMIUHvO*2WoZ)Zf@?es=B%nU0vNe&-2LR7(R{&
zy6=aS$2%LoA;qysuda$>yi$ey;hzu!ImzG3jn=Z_FR=!732bqnIVRiP!9T+ONl&A$
zKmN%J$zcTxWr_=lx-mKP?h_tSv<5wEpS>-2>cPt0NUg6NNDfCpHr(b(qiAblHm)6<
z#12vjrS0~^-;5Vs4APzCS9q%Dzk$UYx^<qIk-=q;Gt+6(PVqy?t_~u^#vbi_SMsCQ
z@K2awFB#;{-<KOJ+S0|cr+e{e;bi9iQrvwzmr!{aa7)G>XiQ=utHP;<^t;r-gL!R4
zw6SgT3T0m5BK7?}MTo+_eP;qzDhUH)4+jqQRvx_!$GiPHeu&**^E(Z0x%Eop#qWE5
zSRM~p$~Ix?a16PSyi?4(pV1h{1}?t_p`r}!<T%~6k-~^nvXp;gBctqJKw6qT_9cSr
zmjvngF5Kvl4{m|Tu^ZBbU2^MDoRq=ZRHEu}{nj^|a313g8<44}4lyulP!~~sOZfp;
zmg3B6uUrz|(b@gv6NiwH0?Wd#Dpv5c+ZV-Qw?D6q$!QsQ@`wmZ$}Y-`x*iLSQkkZl
zGhvLca`PP}8fNd_>ivkrEC&)8M+i}$!8IrC49zKqz(I_^hW9ET{7!GwXJ=}tppLIF
zH_lAk1!0ryI#B$g_FPsJO#~o<{=H(V8@sDTAWGJs;c{N_A4mni5awov-4-x)5o{cC
z-OFFO@ty7NUK2IU%xh$5iaT9_&zq98V$f+$)@&(HKrFo=6vmj(9f2yd_2RkFx9|G<
zL5P+CxcveT_#SO2yE`1*5{hwXlwI^mA$g^YF#N#-E=J;%u=}&;v>G)WNJFl^9{(oH
zf?I0klDx8G1?l1@L43@Ur`!JT-@g^0eqP-JduPN{STMW;%?{DnIE^%fX{S#rup0J;
zV%nfb&V&rWL>v{9$frgmK+7C1^n6_0<z6;m)fij(oqt3S!nYrd6cOmvC*B@LlCsP}
zC|UIA0pwWdGbccoK}Bf(XNTPH4bav7>RJ`9lus-BIFb}p2t8}U6){wPZSe_?vZ2?(
z&*!)I*`h-pp(D5w_lCrISH(!Ne`)HCbcIOl^=6PK3@t4ev=JgYvECveNS;QXK@c-E
zD-Z-ekGg<uXXy1O84B9dNHzlA@u<XcAM=eqA)|#HlKiE2sHp!PVpJ(9b;BRKLYaue
z!o&2GXxXOTf_yUH{85&xykwamST(ZLxVoX7=|c$*ycO#@z9__Efa*kdI`671-gQ~@
zswEkX%POy*#afYjT{jJO4K6>~n~bh~N9qugH9rM``~l1PJMtJnnxREwLWM(klv0)x
z1@(S<(^k{ycePtJ=|^%`vAYY41{g_<5G@-=rqiQ^m*``cpC0mo6fwl=h*QN>U@$Q`
zAWX<ND&2XYv5>N3sqleyp<>{14do2@na^W3c=MMi(S|<cLkcBnS@EIoga;Y*Id!#q
z(4k~;+hb)z;u^{ZU6~(){+93j@#bB%{Tu<6XGEy2clWJ<uMT=1CtGP;kPu;O3=yK3
z<>XQ@utk1P`X-&irr1`xHD`D=+1GAHdqEd#E_!I1PH7k(=W)E5zQmn&B5wamHV6xb
zvb1c5!p_S@PA)8prIZvBNvzg9;=%S+7iZ8xXTXR)(ik1UY0v&7uY_kiIXymG!wD{!
zP%lh<!8dBl{X_IFzgiv``~{zS;bw}woB02%SR$9O9g-zJ81)Ek;5dn<u}lZQ7pSIy
zy?_a4VdIc>mVDB2)o%);HB@E$pQrvqgvfX=XDk?ZB_79?ID&?Z==p@vXWz=ZZ1eg}
zjo?~K2f&x#bU^F6#$E2mQOcgU21N7p3O)E5+}a7-E;{5Hd4*l8)K>+WnwbL`dd^s4
zmPu;YHp;1m5VJ~cDD)p~@PHCq9&A2O4X4hP)M!HS&=4T#GR~bwhSFE<t6<OPRdG80
za>uf+iu8dWOo1PqnVMYPi=k~EIBHrNNe8nvA=TO%E1>*;UKq3$fWcs%WH&nv{#;rw
zEG*5tJp4$0!X29$;=}XkVM`QSh3~TE>h))98MQ2}T5G@iXdt)MzyzGq`y^Ga`GS!|
zK0X{xXr4R17!ra)Vn}tpMP18eJ<x$iH!^4zoCqknfoiYWaB~-_MHjOG(`~qd-;ykJ
zO9Z<e;>`nw;58(S5eYFxQnxAAvCG$-<?&uVLZZQ3wGTOzd?N;(3Fe%Oj+Twc%qgn>
z1yGIE*yg$M{-ri3LFsv?^^?AB{Y=C<BQ>cAw}I^4TjS%Ruu;fxiNCnes2m+{OsolM
za$RNv8?cJCQ;gywzd?ibZ}g=$e($vg9W1Q*?Tvpq>4cfV@_N$3YDxnCs?>EBx#?g#
z-&x&%9~{XH(@?G39Sd~-IGQB$xXuX~ndRhI(1+x+?1Ss!ORWAd@q>59ou@8pZJL4s
z*YSo?YbP6Ljtt!xKE|7)l|$*_=xsUBrV|*Kr?-uXCi$294n&vlW+!$IvOfg@_QJ$}
z=Lp=C14iZhyD&PYq4yW{TmegMic)|nM4|$9k_ZQgZMN(Eo&MiO5yQJR)YNhUtS%G=
zV8{v(hgQ!ic_TGj^ndSRLHv9dUCY`zlZwb2cQRX#tpzv7Kcwf&cm-<OOwm|e8lwLh
zb|Bq{hu`g6zAga-@ko9b(pQJSG_7oarnt>Ro$5(B8I+}&jg1ZOqpg}76>cSODrsgf
zU}N1z7<AZZap;+*YkudWJCznCYv;1w)V)a2ZiWBvO1Y~U-`)51{rfX1axRI1F{ICm
zUL%*%aO*P?AC6<LxAgtoFNh|zAE|KS)t>NxNjwvwLD%=9S&ze>)>4|Yq(?tRo&fDg
z*eUQ|Z`Z$i__dbf>slp3H5mC{Bq0oNlZI*^ju^7gm+#I+5syKZqD(pQQj3dH5lg=v
zefJwvwrdC;>v(ODPK8f^yE)$qs@CWxXs5Wer&=ni9|>Ts8jb3Uv6w?@hEfZ8q0a5r
zErAdUUa|q1ghQ4(V9lne`QE%*qodL1X`~^2WcLBe6-SeY&Wd?XT>8D5V!dbF7zTlQ
z(LzG6gH-_k*eVXs0Q+@N)_*DNS7D_uyRi<=p>Y$+%pa0XdHTzJR~IZ8S`a@J<NH9L
zhe!#>1oEL2)?ysC*f&t0#fS<<>k?j7Cb6abVg%_s;|u6H`^`d_=h!E-sE(k!JXsQ3
z>AtBjebhAQk`{aRzU8FCX+v2WIc_k^NOurhbU>$1aEewLZD1aG?fgLa*}7IZ1&B_J
zIQw>7#lD{&h1zHOc1>1v)%q?MsDxHgC6amS^1M(Z4L?%|BYC@d_)c?RH&TuXiaJ|A
z#suI{uY!ACEu0AHdR(W?vq^@3Lqj0z|1X1w@^{<FIpz2C<?MdK>z2ZXeQVWK;k|uX
z0EM`o;stmE#!1T-ujN$SbJNFlT7iT35eC2eMrlt$0rdMji0<)P1wB+QEHxLa%ZZLf
zItypW$%AW=8lY>gM|<nHjB-!(@<HLZepOV=#08FmYr?0%*3V|q&`|jGG98&9>U7ux
zOGGiK90~ZHxpwIP@1sEwiDxS4p|9{5x8XA7Gs5r&7bttC1rS_kZAZaGYs7+Bs$!TR
z%J2L-S68GmxP|RAWjDW(Mhsk21F=%&MCB8+1jKw#?uz7SjSpt5bKgAV2?*Mu{31k%
z|2@1Zsxgip1rX8U+G9VEHR83vEX09Xc>VfyR({V-2^bR@Q325D|G8$8%c~lIQ<g{*
zCpfZuy@-zvo~9x)#hV#48?FOI7Vdw{oxF3be<fa7o^Et;3L$^Vt)057T==Xb6~d6y
z;rIkdl5rYze=KmivIzV+1>I6?r_!&6;FVkQ^ZjoMB2bHGt7!~1c&xpR{{AO)6HmS_
z)9sz~A~(S79IWtyl3w8TAc|mYQJQ5Xxz&%8EC6EVbZmYM>Z*Tpk0@BAgbNRHjVfiA
z;U9bpdM@=v<WG~T)n$hF&8gurgSx3?Uzs^kWkKzE-(}(dWTQhHtfu-u)NF(X<3qSq
zCk-z%ar!g4ngk}^fFG_5eWH-s1qQbl3@)M%NOD;Z^m<DYNCoSNV)aF->sG!gscOBh
zW2ID-9Lh-Thvb7?i>fDf-6l>oK3A<gQyWV}ms;Wo?Z?fo_^x9a@Gvdv!_eXcVy4!p
z#kz@84%mhL<GU^JzUJ|Re$;>S&O(?59hXMcv;|)w?BC^|#YM%@zD?#MksQT&ipK_x
zkFc|$wakC+yHN?-4e(Uj+kpuPF!4LhtkTaqnqDxC_G{r32*#90zKaTY_BTh;TR$iV
zF3tvZZLUMc^w2*x{~HE{+iDU;mu5EcW^l|H4SXjg{zSkpJcu>akdt`ac}f|~f*r#t
zn1h4EK-MkR)pOVEU)HkZ*xtGG%8&jvK<B>!NUA4$ARafG)mw_N$h=WV0zn5mEtWjG
zk-lULmLWS}O}L2R46GxVXN9mV-%hVaw~8G`^@+ZlnllxD5vgJ+qqv9?z%U3ihM_|!
zWOw@=ylhw1X+wTYj|sQ8X0Objk4xN8qj*rlP)k#{=Y!S&r`)=HmK)_c-Cq5Uly&RP
z2=gWO$#NI~09S^@;YkHBi3Z1t<sE$2Z1|FkYwK{*rU5p=gIe=s%(ebQ90^z~9ze&0
zbUfW8C=8F)2iD6JxQy16LzAgOa!kDag6~-qr!D3FKG{7kd$jc&6V&GxGys!iI{$4P
zFICrN>j+-$^<7C?I8Yefwio(sF#d<jmGv`_2tKY4PZAKFpA}7<{()`l9}Y<JI#xK>
zS>#o$aL4j7y?q{PNC(l7p$g|vo>owtxUEmMjqTO%j$r{P8EObRT#Dc{_)=nB8!!l+
z!u1+pd1Vn**sbKNcHx9H)nTZMSNr<3te@ZQNI=(^Gx9QgcIrj@PhSYw_#q~-n_dm_
zU6I4D#Ift<ef%25pMO<UvBvIHcAJ!w*(jxSw{oaVz%|Whe{L(#LN1s#(ZvkOpE`H-
zTdLgXMAHkmzdsXJB!ie?^SSwm$L(*|+fkO7hv_E!?69nS#4I_&@I@5ZHySmqk3eCL
z?atPrD)@C}13ee2Z}jpzvq)(Y0pPL*3n%|<;bq6_wKnPCZtb_{COdngS>QFfxw^gl
z+$arr0?J(gZ<D;lBqaQng$<IU*F?w07p3tb7);}xV*!^TM-F7)ACdGhZD$s_?5?X;
zSAV8LIza(6sg2;VeO?BofxLgM@!={xYD=%z5;cRP(19vA*)0xs+r%7)z3U4ih_KIn
zExJ~H?<;JFJ$-i<yHKh}0*!56zpHFfL3?G&AWGy@5jcneF$GUCLUPPWPt%k=E0*L_
zc3r69@Sa#jm^OhnQQFqsZ`9|LpQI)3dwqLIYDxZI*?4UeC%)D8^TAr7Uy>#Adq7|{
zD`SPRNCh2it;-LnP>wd;dUEcJoo>hs+#U6+cNuD&NqQVxw!b-BI|!NAOh9|as*Jey
zJoKAFiWn3C_AbY`h?XEX_+Yx6_@FVkzV@ENl_WuB-=f_H4b({irH|rp5$l1jB9}nF
zsUz<gk{D$X6uyL+taMbW$~Mt6WQH=0ezxFes-716Xmd?(o=k2TPiYveX%!`2ipQra
z52o(p7DGU7F+w`8BjEFKx8`bnDS)z0ne2|>U36k-(4Q9#_=|HbEGNN}88&}?B2=t+
zip8vbvsAbgKi;rBZog<)bz`#aA0qOKJ_my9;S3?gF>q&Tedp`mf_dYD^{GV?Hg8v{
zo`6r59W})&uh#YMti)`&GMsrU4xi4ahWM4%WHLx}kW09FCc+H)XDTedK-pv2SB&9n
zPV>?ZH`!?J34^U9Eg@)kbXJ?pK5cFrR9@~jJYb!lJcgKReU*@@-P>hWZ231P3}%>s
z=)-6!Yu(GuS2uMMW%Cs4Z1<C;j2D5~4-L22sf{vMFzGk)VS4%cR)+spw@&@1>Gl{b
zs1E|CqpGG}ON@M9+8(s^6A>1``4@!8T*%eU7UK!ej&Bbx4oH|uq!cymi@w%-AegRE
z=7RLmG8HiO3&n&pp`I-MEe*QjwcE{H(6_ZRIt*<*SSTFR6TZTQC}|`I_#xz}Mw8d2
zK8pkXy*GTgHqb@_T`Phs2A}CdWT7qi`mD1m@;(rx&2dfA@$qT$edCUmfOet5TZ?8U
z{U_#t_vXF{l2$<3*E>6^u!~DhINv1q1Qow0N?9*-8h-k9fU;iW_M0l&*AneJ8gZpC
zt<XX;EO0IkzGe!3@5_)u$9(i7ya8lQO5Wl$?Bm&WTPw@@#i4xJBNyZJf_;}0jd!o#
zzx**$Juw!V_N25ZtbzTQO)1c<f46s*IGDFKaR1<7dp~>p@!GLmL_@5rdMVj&-!>$u
zChiu)c&(cD*0kK;!&uV{Cl(ZqFadNfns1;7h9B{82#!kk5$gnLjq+}+;xk-<CPQ&F
zsLWzGk-Qh8cJ0tp;8%J}3}h>KnjEKIB0c)Y_8_$JC1sz`pTaWdLM-pNHtD=~gsw~|
zdDXxkNf#s_?Iw3vdJJM`17lE-`BAjc1wP$=si0$)2i>bOdt|Px1x{h^*IPdJhyG4O
z=?%d_LI=SMo9kQhELWO#1=EzyJy!{8sA+x(XAwj;tVK#!y(2+A>L#_>uwZh2*1H%e
zFT++Z#>-Eh?F733{TPZ3)T#Qw;Z~I|Cr3r}Bcgz}i}y2+HshT`S64970Gk8PHf*t{
zGqtbRPe}f7l-=)rreTiwJ8@N|hBGMPoF|(T9t9z)wkqvQtvlq?5i;Fd)e`|v_)~HN
za-ac00+UZTg2o?;r_DKLeKE9Uk!1~D3_0F7ReUOF<Z2|QhBhZ$r;3I9*gsMo-5?ZX
z>l2Auk2}b`D7kmwO%KN2<M9o66CM1Ml(qOS%73kiss0n2PnYU$mC;Olj?O6s+MbJ+
zxp$f^QP;cs0CL}|O~2(7hw?(C#2hlXrGn{rFjndukIB~zb$5T^xal*kT(%$|;nNcn
zTy<uHFotm8B&<N^&0l+~1ene&|6`i1Gq67$I!68xvL4ra_X&@z5+YpdiR$&QK`}L%
zalrymv`J87o4L6)P`?ewogT5sW=*UU)l*azCW#$E2TJM_^uZ}7&dM=xnC68~WX5}r
zW-O>n2k9Tzg~#Qu>W$mNx0FvZDISk)%Q4!fvmZ-t$)1ixMZIqOh&?TGlxJY-kgOik
zpiok}7WJ7$d-k(etO8@tr3bnB7)*w4uf}xOHLJiE7C+WkZnW0fI))P@v8qfX3=oa6
z2O%?=jx#<jP&bYYIqm`z`VzhgMz*!3)lp$BJG+C(l?zkNrAa{&xa~}FrR`*0Q=x-~
zW`(WCo!`kmm<`-Rlm3j|=c<mU=qI(Brr#P)oCz9PlntTKvbahp)O>jPL~iM4$j=eS
zQ{G~y5B!hUvI-g-Q3bA(<C!<|Y3;XMxQ_Kjb>!PhO7GL7gpMYd5=>ELk2?ZrX3Ga{
zHaoH1T(S)5Aj_LY@m5dADkIrLO^g!|RO<nVm5=96S?&`MdhN+Rgrg3=+$fs7F4*wD
zdG$}EyR(Z6$JN<LEo01<Z_O)r%oEc;_zxo)jNK`_<F1}BL<6?(y2l%(i+@0H(VL_e
zbUoTXk^JqR@gB$VQnT^j3I~gTk_CN3HskHwia4=@<L#4?rH)^V_`5q!21h@=BOsw=
zQVqN+Q6Oe^<FJRTjpM2Nhdm!Ze!TzY?3JZt$D=Cp#xH5BbhV&57qQPz!g-#L1s@VU
z?g#!xYIWk259YE*zmI>QZ*%;gS|MXHDly6J=bt7}(gyLN&>7p+qpp|LEg|0};C3u`
zw^;`KFi^8EE4NHTL)`w*e|{Jf6&td!ASExtfLl9AC2#NXWi>4?tJi8?;i6x7s(*=S
zzj;XUeeu@D*87Fh*8Y2_{n5O%JI5Jv(t+yW>|)>JtW2O%-84<kOAoI3dXF==UNStA
zV)Q}mZpoRKp|Ce*PIJ!&p0Qk0NS`%#u?%Xr_Qu%~zEmftuRbm+qzQfgb3phZxpXx0
z?df{x7_%^kO<wo!xNfd)@va}~elL{fPE<}{?lOOmd?M`cmDjkFTQZ%XUgP(j`EDU=
zg!;^@j%rOokafTN&!#$h)NdyeJ}dSXHuq^a>6r4GIw9~$lO?om=RwhcMX;HoMDO_@
zwn2~Gkw(~b1?63_B?~&p{u!`lBiG<g^oG#Li|`jcglAtmboaiO@2Y##<;nEUpnfrB
z=G(D|S$?)1JiYwj>^9GC;K%kQ0TbIY7Fd?tRZcMj+!d>eXLb(DL*-_GB#z`4P#Po*
z+mbhUO2xD1oi)nruE19SuS=5nfL&E9PVBTa&(23eLPuB?n33O6Zw{E{U-mJgIAt|w
z#`?*Gssha#&r?FxiEhQZ4<ZMle|0(ZH?R1HiR#<s0@Oa{4WqWO3i5v-YsRz4LfC>Y
zy55Nh6HuG=)2#J;S<8_nN<+6IZzUvNN(Xr=G9;|AMQ?1JXO-XbyZp`3+8O$b3}U9i
z;X$T-782Jddb|EnSuSqS$>dTYeoRR4e{<Ylu0Q$<(s#<mX+vw#dF32pr?1h();v4r
zI@9q>b|fVJlwUL-{Uz^_rw!`vl5pua?(+Eq?M-CqgXc<IsBn=%B{9st`?FHi`0*zw
z%xLfNjIpFR%yOx;#KBTQ*8V$(6%P4sc{C-1Pry)kGRSGlB)7Y9`kjbQfDSxHZOWi)
z$ce==@4JwQ9IJ5O1uJS5C2WIxm7O+kK8IZRhfJ|rVdQ1eRj;jEF>9^%?R)1w(U-^I
zmHB)CzW-qj*NA2g!9QPFKU;g*(Yd!s{Ti5;O?kqDhUe9RY@rx~RHd>+(YP9)wG@Uq
zha_=19&%sE3CPR5O5bYDb_+2c^e+pXskepdZiQlCBI$?mu@?Lcs^v~*qYw9Lm8L+S
zjq;w7&&{Cqa|+*|iRoF*KDhiWLvjDl2e+|0e~Vg_Q!jqAOMN_6aps)#NM8j>)UXIp
z0FkUbUlT<;a3zTC-NI--Ip!*RwJWJK!X?eDJ+*?1jO7@oyu+ag<rfaw9J$*{VF&bl
z%hsGW8Mf)Asyq$8Sy1~q$Gk81K3&OFefkwH&C0|Vba#87*JadLi(7?=DNL4cH8Gd<
zk?u{q*8L<~70Km}BOwM$tlfToyj)aC!*A*o#V)YWiRt;H1<%wDNy6P_g9ryVEBs!p
zsMcij&bA)c{ccWfV!O7`%PU!zj8@9;_i0hvHFuBCEWb%8Kaa80jcyt2vfXMibxRxn
z!^qp+`0m;fPHS}WPguFTAzkF9;y`E2eYm3W?zQF1##U8E60^e|jia@KFwTedY_A1O
zQAfs@ZpP2uY6rW;@>ZKn0QSF(nbztL8+Rwz2oTw7cXEuB9gCqS#-9)ViMZXCQzD!b
z<lU^Psjx&E6Wg=wyQh@V=|NC5l^PMXvRke_<#<Z2Mt){Cy5QkKb<koV`MSPsU{zo$
zZR(ixfy|55;~nmm8+7G+u$Gv|dd397z$KYLDlXOdK*;@Mv!JRM)Tuc8%rqT`l6SNG
zE4x<ns$WqUP~Mw-bT0?`CGUUv1{J9^Ynb%F@uJ&l{wk!DlZya@ipgP<c%f?3_vBV;
z8%jK2I=$H8OC<%LRlqt@9_DzJO}F;<;v$QU`pn734Z-Kd&eji%XR&L$7bd@Dz1H`Z
zH5~mS!5<-^#6lyWWtt<9JQ&$%W}4LZUDzta=%UE;P22RuAKS;Om~gJ*_kBBFoX1Pf
z!w^%KaOcCZ7Ii%a;c^mt-%cQNYsqB|`N^Mz-n;5<0B08tDBiKtA<#D*FMHHVXn1Do
z_NqjEe&oC9ztT!-7<BztaNuAa8b;h)EinBwzI+|`tG>PFvN;oM6gKFqCFs*13QU3u
zf11{3B}ePHbh!+gn{?8ZkAf#dGayFBNk0^h`TNFdO43v2+!k15?+xxs9o!$sA2qB%
z{V_p)K^OlaMx^?U`TJ6icd3!@6JHCP;agn0zD{U4Y+rV}8Q*pI%x_BasMxli2GSbs
zj$4IJsxJ!J7-y$AMlzO)%a%<B5|Q<<)Jo(jMXQ9$=2tt7M$dW<Z}D_5M%miVTJxl+
z1^|bzzQ%GfkL@zk$sp^=C)s5s^F9G72lP!7frDLE>bq+ly<M8)mBDGfvLUvAlNcFM
zH}w^NX~Sti72|M9Z7-uT4*qa$qN(EPUtPaMHGM@bH{4E<d8rNXgLou<`V<JKiw_)%
zMi07C>0c>(Z^98%>DNa2SwMpjM2tdwxgYh>YY-rPTpB5!Q7v!1=ik4o(|E^X?zFQy
zZu>=?09YZ3INUvjPx(AmoI1syKbci=wweJ;DNROFs~QpWf=Zz{v&qz`dClmT+J&OC
zY1$;~61Cy9EM<uZ5U6(ke7GABJyep{4+#7fM?2X>tO9(5Ar3A83H|fNA>+gi{L9lK
zMb@>xV~;4vm)$kI#p&HKA5aYXpRd1LI2oVm9?}GpHFfF}hU|O(nM8bu0{y6!P`?*P
z_zu`HcaSXYS!EU;oXy?A=jP_x7(+}SAMCEK+RzDKbs3OUxRxU8$S%;efm|B=YT{&O
zVr>QJmyNuMFzv-h3oJ7Afx8v{n=_;|)j)BrQ=urMTqiYB0nAd*%P}DE17OP~0Ee~O
zaOgh3k?kV`P{7VWTiwP_3E%C0^Spo-ec{1o7blZkP4WZf?;dA3(B&}bxDS;-@OV@n
zC3>GH``7^|rJdsRRf-Y$8BiYP`8{|l6C<+QhxQ*(7=4x94iOQ0xB#@T{j+a0IE&u+
zFBVtU`N#~ZaBB2|T<D5Rw$(C_fd(LLjpM{&gIAtXpDqn55*$YTK2OWXYr>V79#|<u
zHeTr#85;Nnzm!PvrrtCANbawsK7p3Y8?B%GNGs}Oz6#g#m6}B<D{A8SHI+3wPPbaW
zn+Bzk%hv6EZW$ua>8yQdOl)@lb>t^^=uf3Fo{Ia1%}_z-jW;M-%pz&Ia@@U3WmEre
zCL`?!QTW&YO$-nW;B2)zut$7*4%pi0+7Iz3;!Xnz^LzAt20l}JVCMMhtNKe@IbLF9
zYreN*xPwu{cT$`Xae{M%dH)(&vT@Vm;|;=J%8Ew=;I5x$Q;qr$3|V9gzTadMRQVm~
zKa=bjkV5Ayj#A3VPxVULDX^%1>aZ|4iDbL^L`y07Xipq^iMIS-Uq53C>siS)afSV?
zkT(I%nlMunfs=2#t89}9hvmwz=92c`d8-bjM)2>YkkaT@E@WFvgjgGlcOs5!5#-WE
zlOSCvpEevxOCqlTI{M<-kh9$PN~&l0{AN})v(>Nvw5*Zok@1|mSxSd$*UU&(^g;)o
z!fr1P4#$NYJ@#W1ek~5)tX1$TRXH;8J5wg|VRzst^Pr3z2lOpLCGUo3FG$qG9BKQ~
zt3YUh2mYj%;B+()!#|IVGP|2YiW5#RYk7fHFi_i*C8zL$k*(ml{C!O^Lg1ibrsiX(
zWBpQ_dQ`D4UHVS8(I?sB^dEoIV9pv(mNl>;y!HctC;10(oR<K{`H=7yoeR5wzulFl
zy4()+@ODHLniW44f!Hm?`yFo&*2)&Z2)BA$03tFA#!?j<z$?{fy)wx(+QoH#IDQ?2
zE&%;{%VLw+?JWzJ`S9a|*zj9k**Z@x%y|t!GuM@gtDsPcW+q4?mx&Y0T~j_e1|m8I
z<(n8YFnEc1`KXW-NSSU7A|(!Ky&0Qbc|*|N@&rodr85kr#JWTGnbMlxY*2SPpELCi
zH_Qp87gjcP(vIQ#+xhzwn!_Y>ez2`{K)?3tVdh|rTFVlk%zPZ%cJVs|c`Law57yL}
z=I-xIgw7vzqcf@hzP0dK<IewbAB#ifg-{wyjCdf8sq^noqP!+$iih@8>O2l;%jP##
zDt{E|h~_yfp$1j~1Lq6CZzISyqnJtDyl7}mzAop=GwlRXf0JCJer@g)ATPDWXPj<Q
z#5$)9FhVpe0lM5;9UqUu%e(%b&}PdvaHLr_r_1o|7AN%N>W%WgzsU`igFc&h+PG-(
zaY$o#FO$^60_25IuGhCBwNoCwu1QS*=yW<N3Fh=3n(VVWHW~xzyJ?Dgg2|k=`07L;
zSXClCm^0jwEi(rx&wEa=LE^Q6S8tp`-W~nEl=VjHE0qw$4&~E)0})g_FC|~*8z>uQ
zV`%!MRF**{xKZxnU>1-|Y8jWIs`oPXW2FRx#@^K=(0x?vqsMUGYxcJje6F&HeX<G2
zAKaH5lAMT982i#+Xl}k@%lYhJKF?SPi5!0WfKL{o?_eAXR8I+M>D_j?&)S5h!Zo0*
z=0Y>8fwjX=CGfIdua1v}@hooLv6}VPOR9-T&y_Z2=e-fef=WsDT%|_ma?xdA3%j_i
zm(Auz7m)RE;#=5C(Yjg51E<5C{lf0;Riz_EqDtSnhX=+8zY_e|vi_pS*|!_JrI+Oo
z0#Bb)e40ZgZ^h-UJ8_@hx`*Rr5L$EEcM_5xG0SR{08!!DWB5Q%PV|t<_yHTBYXNco
z=;e^|ZuKYt#2WA0M2RnN#Q5%q0KVOA<Ij%-0&N!8BbWfZ>ude?fK^cn4NCaI-$D##
zjXg()QW7kNb7iCRCHDij!Ja7)#NUgXg^3b#^>|Wb9!J%KEP&?qWV!A#ShCHYO8F}t
zQg-XjS$|XBNMT3}PjSx|Y4`yq7t6<psrSNBGvg?(bu~hp2L|l&ZP$o5FHayFQEO>(
zkf&n*ictly`JA23QvJ>b-n^ZHZY~@_nW__=`R8;Pqeie%?qxyXxv9VY!*z>y<;Kfq
zd}ytOIUVHoSz0#8U7T)qy;Hn_^Jo#fBPty2`8$m+dZ6EaF&0eqNUIbPiPd*2%HNhT
zM(cZ;7m7<rV5uU(^272(MvdO?^`bo_%g_+Cz)zEPpdVw|!Ug&`(k4#41dghaewRJ2
zhs)XqZJX|*7Wyt}Ks}vL(L|;U+5{5nMMu4Rd5MDmXh^|RmHJ~aJzQ_bY_A2bYh#10
zf$-#pCp``j9Z^f%r_9g+E-W<Z6+FWz$teDW{Im;mWD0Ox`1cDyVg~b##Rht~GTGB*
zt&U59Bs3!xfw7c5^v8aXwb*UCMs-b0iJcA8*1Tyanh}J+H6ixe#g_oI*xAWA;nAEg
z><#GeDp^PjcS(=rgQ|FO_&cH{s(M^L@W>hKTRhUnv25Yh-$Ip)r~&uTW|sv5VUau=
zHj@^(LOlfY*$Ir$7mMB%%21$oY40$mzKAv+$--yKmjTi{uaY-LIa}A*71)S&0z`We
z!!M0}mwu^;Chj+B_ja#Xx$R7LtRw(uhX&AfzI?H5vIk-a+mIx%fTa0O$0*duVoeLp
zs!xZuXX$d<63ToHjET6@d3=oCl<?Z5wst>4$*-f`pyxA<6OoNRz&C(I0Nhc4p<hJ6
zB8y~{DYl+%-0t?bdif#W^WMLIFA4xr*cvc%?ZhTtv8pLVvM?kADaQi1fm5zL+*x%t
z3x1={eP!sx75(D04#4vMd4XUe&q%f3|Br;%0ElS6@u0yNe5~35Qo%Xt2{RC7;-T>3
z9)8c((+Nx{12-JEB)2OHo4aF|+<3<M?3ZO<Y##X{r`g^0QU~lDH_$>i28UaD<KU3n
zpYH1`(s8I4^vp4TNko5{BHlX;FHI`mOCzPrX8xUAwNH<&0Qrw+?9*FaP^Jk0^b)JI
za7p1>AOIjE7`vSn)joHzy1T5QXp1io!=Po-@XT9Q&OA@2dk50xYKuz8RVKzx2S?+s
zMxwkzZU8$aEH0dh@5;g{q%5JGS%LQYcAxPmvxYGMOsIvM3Ft3brhGpBk~kM82l6RF
zkAVf*m2l&VNFbjf1<R72`p)Nm3Xm?>L}=~4H{uLkJ3I*>MhxW74Suvz6VG7G-F_7f
z<8Ed-Na&51w=$}ChFnI!yRM8>j|s|dTa1+Yc2Se^(tGq(&XPR{QprcTHwK5%&1R&z
z^!d7(Qv_({(#Nyuh$>Ib{GVA@Ini+W!>xH^e_8I|U+i9}9xh=MG_`TjgoXY>5MaG!
z6@k?4QRL{{;=&*LygOzS*n)s8mhU!P5wq>gkVByhf0+NR+WyBG4}K<=s|(&cKPWY<
zu}Zo5$FdMe_1P2+5z-BK5SL!kX<9axi*=M&1@uFl43U}RFM@@_Ha0fI1?E|b(FJ(m
z1!6n;3dAd6ok#r@`{cvbN`QU+X>uE21lQw(ZGAbq4M~-LC!jzAV~~zekcq$W?i3Ju
zGv*!j+D*qxXLO<uD!I6*R+>-uO_9x1c?)Ryn*%+rq_1r*=Y*TGD6(BDy-}xx+^uW8
z*E`=ZaPx89tJ0MPP#mfrT&EBlK;EBq+g%wQyH7ETQ(n35@pj}@V1hPP@CLg0ohaJQ
zka4$xl7w1AVURG~?d6~TS8Xg3huO`h`lXqd0pOR*gZx<QJ@=ra^%dtEDLf?dY)KEX
zx%$hLP42tPv(;hQK5`P{PXG%3bK8}UEp)G&&AV!vv_!(z%dm))2z1d33@2@q-z$3t
zW7~jt_B$B3P{vGMPN6%Po|7s+*gk)%s0JFJB5;oog}cRp2*{N<aE)Q=ey-x*P4|hV
z>wRuQkkHS<L<sT=kZhYHPPj+hAO>rAmh>QpEI{<7mRmpx`%XiBaJDX|G`QmJyvTUo
zio66I5r2RoN+a7a_f_0=bEhnBK2k#mwHwsCwL!4CWQscX$|YgudrRS}vT<3<f3Crs
z=aPSiy8jb)qx-REAI6Bv_GY$K>-IJlLa*c886kgw?^Yu_Mo2wP9R9YoT-VD?LE*{I
zYhh9Cz;5sW*m#gQWI|Ms6N`jpE>MQwLb(Tk+c3;)!UM4xybQXF%mQ(N8rY*w@=gf=
z%J!)_6rAAsH)}v(Or}n>!<8O+y2cMD#bd-_o|qo*MZ<c<ZxJ2%&SW>=AJ4Icg7PUl
zA4yYeOU7WURsClV01}QAN<&w-4x~LdAj6GSWm@8pllA;6&%P8tsP2L0y+gk{36;k~
zU5s{sL99;3@OCd6URcx#vJyB_;UpBY)OA_uXszWx=NXWK1$g?VuQ@rbKY#tI_dKt9
ze}?JP!5ldfPw}{jFyHDL+o{fH7gU+kMgSi3Uw5m_-;)|2ZWY3QeUjdF=knn|pltaI
zVdjYDA1~Eba@e8ca<y*N7PCJPEi)*$v3Z+!B2AA!Tm^R4_}3a(zf~IrT0>^b+SBZE
zMY)%Ys!DSG_mmOF(rZ?VgM&T#XBXf3Umsngj{!wB>7o+Rb+o2GQ1rfwBW+%)FA7S7
zG?<K$H$6?<y3J((T8(xfPwCeZU5A$uyRlcHhjQ3%$Grq7T(<BBu=DwLVG&}Ef{)^g
zyg(Xwf(l|ISo@7SfMV;ZV|68PFD{NZHY)3Zvme5pQ&qm%Nf(!QGglnD+^z+B*OR8q
zkA*+LSEct8(pVd&q=3=DvS5Jt7OEiCZ^!tu_PQCvYm1&>p<=aZ1-eW1J+8gp*k1=V
z<~PA9KT@Q+9I)dy%JM-ns&fA!Qng84J-uZRCH%}GTsIyJx^op?ps3YU><*3;k%PAr
zyTY#29#?cZ^M9clOCXn&2x@=z7j4FZcCGyoJD}e9O@$6+;*#SB1C@22LHs^_Mgv&K
zyMAW;z@A?<+!)eW?&LMcZqoZ1HhLoRUDS9qie{v8$hG}7es?ph_FU#pC$NyO`h##P
zl~0OB-{(Na(@&N|RSa0AuehXeAy%v&@AT?dk$`VUCZ}cg=rwyLBjmLncxA}T=(QoX
z$R_|*m{|_U1FDNenBBMTSvhA`e~i5mJ222^(nbS>^Cmh1Ps18R>9y?Xx3Zm2%xNx=
z3QHTDZ4ZYAM|i=uw;URfzx;jVdh1f#o&Lc=q&30k&Bz@#+|0G>5XcYnHH}_?2R%^9
z>`EVL<~_l=iQ0Fzxuk}mT4k;CIc0R#Kh_bY`a5~3NT*U1IB;<SBD{S@5e$<%E<J`l
zhNiWvtV2*QK~%eED9pfaCg0^4uG|978bg7FK-w$}u{u^S@9>k0TFjIqyPK_5|4^%O
zn><n4q0h$P46ODSnkfJuyR*CtV3FL#J(#D7j6+B-Z~OMI1u!``JUF@a={G%JWCE~1
zbh|em`vwB+3w%CX2LO9H1f`Z-{ypag_^SfgY+7Im72_bg#o?%6p=<*`I-SEF$-`8T
zFLEp;X=B7?StP(oBQNs;V>!{I15ShMO^l}mmpY%D+xX`~X<oh&BCkce-%Jo?@3G9C
zAMYQ3E4!4eFal~TXHd4BQr$9ixNl@=G@D%RG8izM_460V6w{qY>~dBWu4M@l=0kvk
zDe_w#8`}o8v~e!YqnW+wxZRzs6NX^V!OWeaw0Bso?_|(~W^ugH*u<tE;{)?Yiw29W
z97VL>VtUq>z({(DY!+M#2+9Dda|d|wYh(-0JcqQl058_)OK~`VPPwk^p3vwwDut*+
zhzx;f=z=m1?AP+A*7z#V>AKJ}+e_H4ns=uD2$_t5F3ki4AW+BjAnTc<^Z7lkI+lg7
z<C-eX?+5BNAd8j*97A303SY(FMAKQ?X6M_7d37pw#1ycE6M<8p{u5*>QXF2F0~~4#
zvWU^f34_n(Ct#5H0DwWvQmBaVo%*Px6+>RqTyj5TlI>2GcuXwB6xFQj0_>b_QEn<*
zpLrp|xsQa9!N-fXz#O0Km^QE7pZ}gtc&yo!#*`i)ZZlx{QdO=A1AdgyEs6h7_W-W6
zz2<iRV|?2JNE6DN5-<`{z~JhY;tC>N#ur9*{`(fWMv?Et;dZ%Yx}0vKbq;<rWjkTi
zslEqnJ?MA_)&qbgt#X$)u2hcP%T4tH6hggc-=7~JM!y#Y?yk7gXBgtTmv8Y{{}EWQ
zId|4ZPb&ev)?=2-yXT<5{Hnr(T`)xnY!IF|mmQm;l31hzc6`6bpC8D(D<nNZ?_M>@
zsIYN)Ptvww#AT8n=VmsixLUVaCllJNIQ3HGUk_5IXHre75A{e};eU`7&eX{=WT423
z$!|*KxCVN?>9H{rM0g*lbpUSISmd5wzR0=aiXOBM+U{S$D>(IBPzFw#!X?ek59vp(
zF_2b^D)Fn1o%{+nGINS`y>>E_@oBl-|4r2SoPd0Djpb#Rqp@a<PDCMxc>8&Ud9zG`
z{DJm=%%we40Q;m({z&DFzTQq?gv1MiMV&>EvGxDU9SACAw;6o$SSFRfB_Z9(fRJ|v
zbzwQxe^m&1FI<-xB@Um=1vZw(LW{iH(G=SvFJR?bz){)!cM$6TI6|$39{aqttPdzx
zV0@49n(a)0(|BvC1|L21c-wpqRN#M6qW@oEZypb2`~DA)J-eb5iBc&-F^nh%B`ssg
zQe>$x_N6RSWJ@Yp(p|#Ho_$MW385rf3}cDxku2G>MV8-jPWR{YeZJ50yq@Qe`}Mlx
zGIL$mc^=1kEbsUG$g@3!67bU=pZ&?bB|Y)hI~PgBxbzFtw^=dTRbPQ^Cy?kCb$1)^
zyZ|y-`lmc_?&Aj8oMZ<@7yJW0_;pyj^PP^rHOSP%DUN?Jjvt|VA-)2I)yX(PR|uN|
z+U|QMHP?{=yw|t6FLD1X9<k-jmfEwg&ZLe7oX&6Q0aOMZRoS3Hfh-lA+SaRrc17bG
zPYxR7!gIIho2$46sfswX?)+M0|Cp2qiE*DQcHjk*m<dGYi^!mj?3GJCG7oe1grr8;
zMmH>3GGbbGgU4%5IoUTHQ`+xWQMtC*J>@Xj)?yNRVWA2$IIujDSlZHZe)?;%V|0Zn
z#Vu<8M;y)m5j^Q{%38kdmB-a)4!UcW8;Gx31zwNkALzlAQ|`9@k+Iz6V9z9WB;`*F
z-J3xA)cYTz)}G8Zz(XQr64^wl!OnHZq<U&ZOAs5c*9G1&IBWrwMEJeX|EF)MXA}OP
zQC2dt=&UoTStus9)D;ITdWlscn9knwfIT|C)bm}7I5$k4%T`$ET(ESdKxtkU6Ox_<
z(lP&p{QX;T!0sIPE4D0T@BW={QqbM7ZM_|EZ+mr6_PBoX%hQ~K#?|-sVtHlkPM};9
zj@cXHl5`5y#b6cG$=O)BApuHDPS5EN5NTTZxp)oeQlU-(;KsdLV7|&vCc8&E{@mjh
z9<FrC1UldiD)UgVpv8;@qjtjV_j-f0kj=s8)^r%{H^siw^$a=$fX&=`kBxm8v7^YP
z`EU5*<Wzn<G3d!0XdB*ve+iM(#94zI1H}aU%$By3UxbVt_8%jnoOcgKto<rPR~W{D
z2LUcgS%2Npm9ATJAKvwz{xC5@1rl9}O(`^*?&m>pnHr(|DZIgJ;wwVYKw#wqW!=|>
z>4^;5`J>!e@O)?8qlVcmaXxGPtLY5-Tc=|d%7PBsVfaXp4<Gkg^bmKP7ybA?RIoz-
z%R@G?*ayyJgPyd?Gp#^%=n`&l**OR(`HeS;w-qN3Qgqi5-m~`1q4Z}g&e6r={w5}|
zxO`bpZDGL1smQAUAf)F&yoc3hT8h0G6fbIZyEN0gj`kmEa55Uc679_2IQF?gOBJ8V
z5TFEzhQE2>|5V<{St?xsx-+oRY=pw7{0}bekQr`|lP7`Qz<AatYQGy`)PIy(C*CfB
zlJ8MU1J5cI7P(df&Qh=S3)?pryBWX#fq;FxHSd1?EL%Nc-v81jln^WTz9Kz>xNqM+
zMbebnv}a2N$@Nc%|Jns8EsuFHiS&I1zdZm;V(<Jli6cBc$-J}Qe3Qu>8^zs!Q1>Jg
zrdBglSH9=fH-U>aan|k8H4UGlr1IcIoI2w@7W>np()*@WaQUxa*MYnA5&AomsSj0u
z)j_0OL??#fo}?R<*tgcj$1dII_`WMR^-}}Nn+g}Xx=Nhma6127o|b1<t9ohj`x1ov
z`Kn*QBSTfSN%c54hV~jt*zBNO^bS0LBi)1?GN{<I5JpB_usHbOFw@>(jW7tnv5F1a
z|3bXM8$;rCOmUn;9?>sba#j@-uK)#>du1~~j>LGb8YQ!LmwV>mdM9-KR~|2&G8nlI
zo2IMzl&CZ0tWuvMh=z80PH6Rnva#HNJr(S`;JX$kiOp`yyb7UX*jH8|>r+YUN6y>q
z&twKTWZTn2CWGPn+nXcHF2B7k8C6sy**#e57LlK*pLrANMeF8Jcf(U8e?O6TevoBM
zRc<Te!lV$X<R4n=en#0{DQ*vn^GX;$hd$0Oh=bU(T0el17n8IMeQT$#qK~Xx+rNoC
z-fy#h9)myOAx$v7#42&R(BoJr;N9%x8xIps0;36k1p*Hg5CbCK5hE5iL*M*T?ULaK
z+A{OpJ$iw}14wGGoen9i7&}(j{H9+;8{S(~`yEea#T%R6YOKP}p3gNBVka}4=%CuI
zbbesVDZL3fpe{Ioo*Uo80Sryk<SpBbG2B`e*iMxP+d3b$e^upRNo1BDWzdJXufBpa
zB0au$E*E^)kU97+NTw^PM}9P_Og}%03CM-YcQge>Bb>S_QYDh%MJr*Q|6C(ZnwD`k
z&f84rGhwLvpe9$;TCLdD#?uHj5dg=tr*mIV1RC*QCruU}hya8rhir%u8946Zg384T
zaetgigSUPcb`m@U*#e0AGx*eRasZ<Evsk)T&1$Nfs+W4)7!t(hZ<?Z<_yJ$&are#6
z*2k(;?1bW(+UcAYX6PQ4$*mevAji>p2E<D4qYRj3qZ#}hBrDwzRvO$q(15`^Up&h%
z`@ZVw{=?&2DDU?88~K-aK_-2ZKklF{M$a<)+s{pK7`(n}c~-nDjySOa5YU$$YmK&;
zfK@u#Qt(a}*3q46)?B6Nz!Gl^yyW(r3$m`c!v7jiNIOtv3f~Dkw)OxE&HK9<$Ht{d
zu(O-3&3Z~Gf#r%`F&`yuIrTq<j8EK?>a91ge<`6-s$$u1*_QER@Qtu<lCA)k_4Y7C
z6#kqmxZPc;L?B7^Z4WL4e9R}`_a~|!&h)8Yd(?y%XPR2Pgkxj%>+GjM5F!e7x5$eF
zp27fMw~;0Uo`iHX=J4C_2cPL@e~Ti8$sY(20nY$<t$oN-g%ea8l=Z^K6$kgfewu#K
zU&5B;&U8xpnPt~cNdJ!l@hZ}8;K3UpH9=sx-AJhVXux7RyU&UzFiwWdoS3N4?VTVy
z<`=5^t{8UOB3Sb5A~{yhh9l0EaV8t9E|1B^&f=r?|4G2YYrJosgaCMgrbT-0P(Ca1
zTk~ELN?gop_ESIyJnqYh0phr^!!rH|;Hva48rnBhZMHATnz9|=7)AH9H3y&ipaHT-
z;i9ti8UiFkq3Ncn!odLv%go<MN}Lz$+f=s)8%-FI10^7>_@VPmBC`d9w}7Uw9Y?IB
z+!b&^B_Q$!4d^%7U()Lkf*0j2l~I<(JFs?qQ9?=g_0vM4uNJiIe7ZcYJR|_I<;zCc
zBuBS~`r3W^^yzh_BqQpM;L>v86;GY<@>V-8|7Wq=)n|kQFLfb*zRo_4r72#8v{-ko
z?-kGcNdUroeS5>;+LLW0g&HKdM%-HWCNg6KSs~|46o#rB6{|FLmg3kU14r!CtM5Gr
zmWo`4|5=`@y=n8(WLg&O2%Xv#jk(X3>n-680Fxe3V9u0k(B4>Ia|W_T)E6OwTB<%q
z?*_-=a{abuvnyIPTLUlGdkg58<zU}h$tPaXk{Q+a?dp8l4<#%yTZoSZE}wWi^fRYm
zv2Ni@iSwn3$&&7#SWyRzK*p%4kaEJ-gly~gD=J5S&HQX_Ojxdj-1B-mG|0C`C9%ID
zRsS)cw$TjVexwGqx-Ng&r`PbHvckA0^NhSh+X0gCt?I`cb2d0P$-Y@!7c*u^2Qq2^
z{!MEUdn-t!)Vl^5^3C!Am<NJJ<lhH}B?;9ZChUWEbm5rrF7FW$U}%dG`3CyMlB{*r
zMpHTe2;gs^78df~XhG4XXH<(@(CN*Nz6=)pH6eO^Eau=($?*P|mn|s`i8*Vcui&*k
z21A;kxM`>Iqh8d%CtQfQ{OgB{wb#$AjPlI^SB)lol4|vjK)G$)PIaYrVxRI96TT6+
z2(gRtn~h9{n;v2(3*G^tW74PpPlC$aQVIjw%Lchd<WXgzgZ`*MM}QW+`($wkh-skG
zM!pS-@Fo)=E52l7MRj}6hM$#OoHD&;m|M+HIO;f17x={$62+vCbdd}e1<2$pAgJw)
z)s6qW0u|xqFbjNMY@+XC=O?eGl-^7LZp5$PQ_X3`!sVkQFP(+ww(+TT%_x2iH`i4j
z@?m;M&g4k)m-Rk)tuw1CW{lJf{%Cqb?qKco&_{_x4{=JUO!<fKUpaF_4IBMFy{GF0
zP2<)&&Os;N?jMMU%ZAC0sEj?8yZ&Hb!=^yi(=VW))Lm$G54Y>^<p9r#ug$`OHAVKV
zPH=O5G5+$ieCzR<4*Y)Cvyc8qLPWgxK=tt2C#N3S-Vg&n3IfkEV`z4T4&Ao^D&ymC
zr=v8$OiFVh6Vr=vP*9ExZ+B*%zw?IP6Xi(T-q8}K`Y!i}ZBNJ?^B=tULjZbyKYo+X
zP%{u4YLv7&FA?aPp~u(eTNQ8qvX*^uhbf}*%ZbZ?MC<E{g(87zw2X<d86*C#BxXSG
zohi9Ss;(G|*n_#X@k*SQSP+HiWslV4I0C29FUe%`m$w@}+7(|}$fz}H3!JhU#!$ux
zV%-?6jLcvLnIiKJ%)%Czfpby&cL9-XE?hgQv#`m@K3D}<t-3<Y8&%ee6P<{`RDOI<
z@l|1<dS8H~@bVffN5=LXtfqV*z`jX?hbOs~ll(0~Q7x<8%?bh_3j-d)DW;a^ozMae
zJt!qL7lF#E<}3ljoAX-?4Gp368+bJbd7aQdYwkTE1|{#g19nOsJUCmvN%HHR9EFt)
z-PUFSmy4D`MRp~4ihb+;g6>BVryuVx%fZl&&4=0i?mPn}pN#&~N_N{cqlhO&ac+zL
z-&pYpc8{-klAZxOwPLW9e^YLOx@qn8PqEob<nb&kNWrHhF<8QH-tvUBw!oSes__v>
z#BMcCOgx6|^5$_5P!^3&eA#ncnVT@~DBuL&zt@VJJ0N9qUdv2_^}trEDB{}#l(?q3
zhxh7Vh2rID)i{DC>I$bnXTdHUx@h?hi`}bFfo@}VXnI_Tscg<A6N1pMlJ{C0&CJ4%
zjMo+lHdYGSA*OX*{VuxkwOfkgekcH}XgtxVOb96!{>!sbpe;0J()NZWn2)vR#i;;f
z=Yups)3G%@9LFu7%?R=-2WWOy`cT%swH*l+NE4QJ;<p@zDsLmA=kx4T`WAkv*ue&(
z+WER`Da(Uz_gs`dcfrv>;HjVJO67f*=S2ZWMZG4werOTvMM%$t8QyFDp+S8*!}O0g
z%MOA9XF&nOKLH%v0nZEqn!{5fl<w|emp#!T>bb3En}xgQG0t6acXkK}uT`A?9u6dK
zBX)UjMs;;9-3dM-2!;t^PM*d!PC)oko+VilVT*U)lpv;OusiP|dwU)8#&D5txDdnP
z_gtqz;Z(1v2G_J=r3>Yg2hNs4V*S`kClF%j^Jo<Na6dgd&!VymG>Eh!)dx32`Yzq8
zG|sAwhl}B$D@&HJo1Z>)W&i7t7wJ#z9b#*^T0ci_Lth^*Nm$iOV<2m7xD@GVwSH#w
zeu*Eq7O<|-9m3e_MatMB^tJC&MgKxw=jh$5sJHbhzchShP_Wt#%4u#8AeGKAn&jm)
z>vSDB6dW@=dV}@(BiPK3*blrp(7>!KfU!4-#6Q+8y2(n@BS#=lL?ZSvnyk+VK6Uvp
zufi@#<cJ;E&x3KukYKU!nN$!~a1b!~9O(@ai{AWvz3M7kVlg(j-V_?F^UN5ZPf1YS
z#fXU?qlN*1Hn14NhZVaL2-F_bmwS#&lB}ko^>9`3aGO9zk}#j6{orm<NBe|ZzfR-i
z5KX&h6DHCS${#~L(6+K><J~nM@lb93t9f|!jDQ5V@ubVK<C6bF<HjhRyY>KD&VLma
z0{Em;xMA_{NfNk*QqMqBofn$wcfdyBPcZ>xbYpvN&E-mRb3Byf#w6{BH+=to-!Z6(
zbqlAyfR(_!gzgG{+d!PHVpka3?fn=rVfX~=w^a9Io3RW?L;;h@fNPr7q%mVoz&a+e
z9y;+*0$qnhGm0<o_C~9PxwB7_Hql$rfdUUEA_SZ+%wm?1+3*AF37>=@Q9D&Y<EI$%
z-|p`Z)m+9q3P=}ngrJW?dUxb%?Iu#1(r8^aoFXOAECgqA?Ly#<ucfYubjgZRNa{>W
zoW{Z*pV<vS@*{n(fW^|cbgyQ-r;OqZ1R>!jvcl)OhoaNr<1el1d2n*$Ukg)(5#FMN
zUY7BK9(&uLx&g#7lt{Qy<GnaBmj4{IQ(Ir#?qlQGX_s@M!0Q6Tkmd}thTGih%WoN{
zLj(l2sq=%z6c?6!Uh(o942g(L|IbQCwF@j%5x6ModrFh4qigkA2`S(9cE9(yv}1Qk
z9x1_irOMMiE$)zh6cPpP+m83Mi`RN__io(CZ#$>I1vvOnRaEBnX7~ZeUlCMXD>|%k
z{_D3;eveDQxh9}mV4qSRE4Pp})Yz`s_LwnH10@N^kc@Bb%Yy;gGS}hM>exX%!9WB+
zNT1SVYXR`2O!qb1cPYZMaO2lUi8tx3w&2lg>h<!F7P1OS|LV|>{<fp!?xA+B^i-DC
z7cJVLb&At>A8yFHH4Ha;CqsU$Us$ARFfan_sE4coSf{CqzQMJMsq)X}A9W0<dwW9C
zW!_R>C!8cm9iO{skTcv>!xPX$+mP)!R?mw#ZZ0gb*A=0L36+1x@IN1!rDMMQIM>_g
z05}hU$l=EL@%RkSYp)sOvJV$GQ{GPyn1(I@2*f_=z%kfW0tG~8j-;7cNC0mJ?stQ3
zcNgT&t&;H{ktMng_#9q1XC%UsgnG3q(Vzp-WC8FFpXDFXQvEu;G{ED;ev+|;(s$15
zYv*%_EYX*};~RrK6}ld$gD{gY(+6HiC9*NuTu$93RImVW4w&~@V&;tk7X=&=ZVvC5
zHZy2|;5I_C_hU+Ox{N)KJck?ymO(+4c9Rbp-f3z>3|&f0%W3{TZ`&>o^qU={&2nKL
z7^njdWA1G5LsaBT$NPJ<dEJB)%OaA!Ps``isI%dF_WonchGjc&F3(Viu=q);;JIAG
ziH^F_9jA<}Yv%8%te&%;VG}v<1hMBf>O7y%rk~#!u91$59TebgF2*W&`g8p)OkbGs
zwZn2xKE33UWyT~fZ`)WTmH%qbg0OY_y>&4*+}ZMw^9owBTW4-_5ZvBv?-SRY`)L0o
zOv9wY%h|?&*ZH6B$cW(6a46bkXQbasdGXUAt(o5@5Y-zuW5lvJs(8aSLKowjxg#ZI
zQpW5+Fs=B<=V+gi_mW+k@Vig?C+*uLpW>)o6MEJDJJ(S3Iu(!$hFaf~tB|F8Lv4V$
zC~z2{9j||ey@rU<3%ce$B27OjB!W<u96z%lL7ub!m`!R2TNB^-T9Mlh{JP!1O?<fW
zsN8nmW(g9AAOS3ZKSt?ViXahnY2wv@Thn)Y10Z8?6B^-|nz+T_9Qk7Q{>x)lZb88s
zN>FzPN+Z;QuBXle_Z@O)UPF6m1p<0V8wg{s-;+%zL|uev8M=v|(i!ZRaxB+Ag}=lx
zW`^|1^f-qcmkU@OJbmXz3b@br;!k1M_1zq~8Q-{HH(?^az_Pk8?mP;A3u5tP-|xcx
zGZsJpaBp16f5^hQ%}`b4;~pEa>VF;Gfn6-q*NrVv?U~FMETsQq_|d6dvl_bOG2Uw>
zSwv~mA~$gg*)Le3QZ~}np?hbpuR##6k$LO6jrJOmrnd0IuVS?Z+S+ErWNa~!GAuc?
zQR1nnZ3OXF>dpC4=%bmtQxz;CH!E<{EaV-8_|9n>v|=XB9}lN95ZL$L*^zW}v|k8v
zm1OiF$zp-S`4v;YOtJlbe<q{0MjudMrS)OE-f6@)-abfl5K3BVCdT~&*7*$r)BF*J
zB)u8~O`{!2&fdpfJO#zwg3gR*S(jqB{bm!;s#;$t=!UA*Wd3g?H6feJEO2?7wCPae
zt*(q=lyeCBoe(|C>0v}YtzuNj$R4Y3f6sX*z%!Ej1Uw`RJ@Mn6dN&}`%zrsHb6O&P
zC_ETW$H-e$z|sIU!?ir7=ANlshgWK9b_4Wm28|p}^addp9nRz(C%W;w-lF(a+mwk&
zOG^YkNJ*+4pBhK(J;sLC84RPV0ZAqV3^}#~$E0lMp-;Jxa(0(ESE?N&C+wH4od0J@
z?wqT<RUMb?#DTqAnW!whFLYS%^9Dz|b$hh8)M|Q2QxU^DoB=aN%MBb}Tk0o?`xnHn
zoDag>1dz|`=+4aRsp$;3{#%p^#w0zDxy(;t5kOvRG?(VdO$*7GnYQ_oZ4^b<CQxup
zI{Gs+wdVuc2hbgywb1J5v^L-xw+(n@aKHKSHf3~<<!@t~T!VI_AFxrX;oUBZZp@0F
zl;64q(~@t(6rO$Htp&H~kmZHU=9TTmE<D@Tmc~1pKxqCNpgI$f#~L%7k+C(c_3yIJ
zu$jER2*k7t`DGv5f8<^a#?7`0k-yn8i>ZQ;&yT!Ge&vMsr;5Js@M(osfr3xc=E)uM
z;G9XuxZv2t_4y17CJYEF?A43W&)mGGA}uSM@!f>$gi6A!54=;U4|U|Ood&HRz!uD`
zY~?%^g~5U4nOC&uH1H9jw%^_|%6FXottt7&eXnDji`+o2zo0IOwG)!W;^hvRm$!jd
z<ZyTvYb_gi2kq$F5a%Rq&+OjuC4jz5LF44K*ct=RXy-}HtO5eP|59-b-dIa<OA%=r
zJ;8?g2w`Txu?NQX#HNJFaDAw=V8o7t>f`k(U@(`CB>MNMpinq}o9v=mNc6YDn;-W@
zzmh^%eWYI%#H;!Ol6jxM8WDqA^H@<61?qVvUFf1^xQND8V<cp%RDa4N90e=dyCD3*
zGC|;VLPD7?u>>+H=wEPf{}T=&5A{x?>Hp<lz{x}+1;WWRRKK|n5z<iK#vgx|$RqCQ
zCW)%V`dyCgDZ&0bJ)ihH_bAC97p}5>=<|IZfHk`44$43+<+)l(AV?xqY~~X$sQSse
zBMujrXKATW1gU^!9InZ!VLPSCzt3q%oAQ8TiFFKZ0`y&S^zZTqVpm(ML}HAzsdal!
zv}G}S@FF%vh)=h<pEFz-Beny*hXQw7oE@`~v(B;RiASe;g6H*ZJpYB^JiC*F6eQBX
zXRw1EMfzASKM=OQ^<9{3Wur*S@60wL>K?Z#xg6N}dzQGiQigj#n=kq-ixb9vNPPKJ
z<GXyn+P^O6BeZcNef$Mi3ii1N*|ny}hKo3QEp`$G&61*Ky(N1l-V0Luyg&Q*Ub$CB
z+GIkhOT6p#SsJ>SqcZ85MQ7fHyWfIHGQj$;T-<I+NlCe-8?kY(!~O%_#hXoZ6@NzX
zm~e!@qSvVMqsYu=r1;LWvSpl`%`3>gJ_VhSDQf)6Vx*-Lgh@mcPZnqdtad>m9<a(|
zE$_p#e}aU*O{BhUIZL)ABj#!dJz2Vsk|pop9b>)SPLrno4yGkjzL!d3;(hY_xt;5I
zNJl!Ms$5S2DJXAU&;cNyn)#Z0uwfJ0GCY!4_7lOZS*7k7#-IMgH@(#V)?EI~v4E;-
z#QlK*0`W*L*!Z=&_jrhDX@2|+1clJ9>VDp18HpBi0t!hX^woq=y*8S>>GgWKW%$uw
z+N2{V#$5y)9$cOl!m24JFyQ4t3{>0yA`>}?_c{4?So!mVUk`k<=ASUv9Jd;Rf+yy_
z8ep)jX>IZZ^;SbH;dE$ApMq6g4EOfPu1ZJRDlq#@Pokq?79?#V4~c8VICmM&^(olc
z5RZGu^m;t*3}l+xL%Z?wgkTc73a#cbvP0Hlqr+UTSCf9?t*c%m@qRB%%392sh3-K%
zQ7a6ft?LWpwu^(IrlXhJv!g)y(6*(R6yE*~L%@T<41!Rr*ocMynx_wh0hLz3f}3(k
z0EwmFv+LNRfp@*{HyuySzYM5dYg8FuIQiedQT_dmsk_prlmt*KnmLscAwu2d-!qYw
z(O{O-D~VkQ(+dg%Gfv%&-rX^<gy)}G1b;(&&pD;-aDZNr7CO!8PpaBgp*8McSQz5+
zAxkfnszjRd5DZv>0_-*!Gt>^h8UXiLS(L8j({VzW)v4!$pMu>Jw<s<2!2_TzsBIv^
znD?b>8=ktdgu7Ij;++YAK_{9R7$b=xQxsO0PZpA-u?xVdE0Q;IfM8uPpd7;VfE$om
z9pK?mD0qFfJvpf2Tc-M(qa!hd7(4~jCX5yga<zcL=DfJ-%^4PVeSd`j%#SFMFj~Dj
zqq)Q(rh-0N6h8X>NuRb{3+MKNMBtMiw99VW>66>(*|99rEZ`Mzi2edle_WKHVz30_
zB)!3#2*)d?RsDSlaf<7G3Z*?)OY9!?p9sjZ|3F_^Fmt_UZDNq+RGadbAI#Glu=#O@
z((NKIe6%I^f-EI9LT<dLQ`B*d8gqn*>$wce?w#E?Q8u!w#pGS*SI@H)7^%fMzx_J;
z>MQ80O9PpxpFUI1oTZkNNP^?)xcuz)1FsL%_P9UnB;b|@w0578PTuMrq1KmsGF0<k
zqpoV_#6M?OOZ|s71N|gxW$zP7EO^7D7k?#r@5A_#E;^Jn{n#Osm$e}ISO~rOPiy~l
zmAbk*W1XcALgiVo`tYUKU!9tEkqUS0^or>Zne6UYteB9ah(Giwz~@&k563|bwdJ%#
znf`Yp>)Xz<M&r+*$6+Fo_k|II<pi6pts~#BKqq;He%qtW!{QuU-ABl4%T2^M#bv9!
zVtrd|aKtVBIR=73#ZSrVm#Lq)f9<*9G`xpy^#_1R%l`akmlZZ|pOwe3mejF(!uVYW
zJ)AlRMYH?td>i^Iy`R1Xu{lSf5r^IeS~8JW&CFL*ua*av{=iN~0wWR(q**9BFy$2=
z;ZRn_^5i>qwGLC)^}DNZ+1v9js%M_Yun4gT4{Q)IcB;%|W{_Pv2t6hp2j1h>CD&5H
z+Ui&w<l~zTy4uOXX3^W8o6HcP(Yo?%_WPQPBZX*GP-o{H-lZkpe(@|5CRiQL(pP#c
zT5tDv3xK}P>KUG^;lA+Jm%)6*T#8(@qR598_R<UuDo;3}{W}azl>nvkUuP(sGHw!$
zXJuol36nZ}7tl)GM1PZC#HQm>kafxB%1@I9`d`3t9qHZ`;tTY-K3Fmt(pK*FLaT)=
zTZh?WS8l7QTOl)F)&R2udFcIti;mw_Gd$-zFNv|vZ|{X_?q731nHsjs-1ld!=`q<$
z!#XrQz{{MfLTAG;IHiDB6-#|T&711N4X%4g+!u!jryYYsj*ry!?Aj-2gJhPwE&rHe
zzJ^Y0G8@P@|MQJajVAI^<r;R<sS8WGt%Q8TGJ3eZK>0~a>%{f6k%emik;H1=<MD*1
zA4W_RJ}^?Cw><Xv>I=|`Y#!bh9TOE*#(6&ujjP!VEBJv8;)J~+36s^!MAG0+rORGZ
zWge=b2jO-iEx~BDg^qvsU+kQ!1$8sU2B+pdB!+9~xqsb5E|EuV)eR^22Vi2on~w75
zW&mg6FhLFM&uP&KE>!>bLd@y5{P@mLS@Od4w9K7zF_*pGal@0*e;tH>jPD9Q?gmZ9
zip>Ejt>wj?54OT#`%UH!GCQLS%SOVrjOb2LNoa3umy)c>RmH1!D)@eV3tAVz<`t+P
zLhrN(8f+iRKmhv^V4Ev7t%giqq!U+fkak<oUae*lN=`oy!29MNWb0EX;xMa^5Xn3B
za91A)j0=;t-o!aR?aV7Jf6O<j-<t6d=!jDq*PzCz%n=Aj#1N9tZaLx8*5gwEp-K1a
zPNz2*iu9!(76A@)YtnFQ&#A>7D+V-BJd&*(l1b8|e-kQPh2Q1HT`K-GeP>&VdqyX?
zPHFR>GD&>@{e2!Q&-_yMSldU?o)r7`&sk_WxeMxTLhFY^>jw&9%gO1_Yt7w?)Tpnn
z5plin@nvYA_K4B;(g49fqD3ZbiPd4m=!+=F<e57vg3=e?^^y2>3k9b)CVZ5(*hX|>
zhNEqFHHlj((fiZHC#l}x$djR~l@3K!tki4}dqnmS0$-?isBK{)knQEP(Oi!^vC?~4
zwmBEwEQqQ>bq^Y?0N3;EOFnW{%Lho!dX;(ZGzRB}7{d#Wpt>8OgnTmrox1K7r0J5p
zOQqiQsMV<U(`YmC?9<-{%_-Ytu;7^|%EucNT|5L^#cw$Myw8aY;0Elt1IB`8myIo4
z+AAwyjR)=#mwIj6Q}1<-WyJM*2@78zM%&iz7`$--;LuPlZoZ#jQ_ECkkxmkG|19nI
z;^^W<3Oj$pr%ydnk5CAmbPYYzM8vU{iK&V;hj7L3xgf<wq}axBZ~uk^P%zfWon{TB
zF=m!i7<@cjj7dyS$g`N7HgWH?@uOuCP6qem@#D_eea;(xOB`6^S7#_5W9N~l;)VOg
z8?F6`1N-V!nf@(%DvTymH5Fx&DGtix@i$UB2j5Ta5ekAWgx!f8#k~X;XY+0XD+YVA
z!pko$y{K)X$zKt6htui}3Xe#S@CiHRE-+F^Hy$=0Kz9#)X$+9zuDt2A9e3$=>Q<!q
zOfLv?SU)afFIDwzP;~VYr1Aat!n@5~WXA%9?xK$qv-OlxUt=Tn<hw3*;mBlvg03IN
zZl{s+UEZaN)U30nMU!)ZOvI6dxKGrhbK!80Z6MH$p<y}Xy6Uz5bb3sT$|!Rvm>SG|
zl$c`t8Hz3dNnlK*@V<S1nPVMI=5CM~b}Of<qusHFc83`>;_?8T2)pCKedQ!+laz{U
zq_?s15{iG?kjuPw3f%`N$ws<^K2p)H<K8z;AyF$M0q$TjE&SD~E5rc}()o+XQ}$$?
zk@udrBWb~(pyDp(x~@T5ia}F?Ul%ux5B94k@|ls`@LIoK<j3zZ^Ue}hau`@0xHrX2
zkH8$jHDZS9Sp;7NrXHp^#j2=lB1b_6>&GlN7w7ojIX-(wIEfy5QPEpoZLW*EayFVg
z*)yg?f2U$9u*<~!^Iz3)gScSa(bo9CS9_slU$^^Y0y+2Vo5VO=1wZyg(HR4M{eebI
z(hd3-C;+EW33MlgN8qzycLC?NcM~*WA=Yf11keIxN+TjdsUH@mEq$dy)t1<rUiNr+
zD=16tMH@U5lw`$5yr%n{q1?O>0d4g1oC6LNqW$+n=x6{w2=x${jrJb24Y#6I@fT4c
zQ{%^VH1aDNoo26+Zm*#8;T|jthH6#2i8A}WHw0Fa7;Sd`#hC4B6r=L&2LAy?+g@#X
zr9B2p7_r8ws)`kFp=R#oU_tU^=XaST8Q5A#Y#4)0e8(d%C9NQ=+y+>ul7@--R%&|z
zNl;H1?%*3ZUko`~gZlkpoViaPNUGX|iaNNIe@u+HYf3#&VSzY1;3}M|7_1tI&Q2>_
zdhurBgLv=BQ|iNwDpc=znt6G}eJAW>47v_|oj`<3-RIBrI7~y)RS~!e5YnlA)hqn?
z;J=p`m%0#TtL(gfL``YjaiMK>QYI}{Bo~@EZiSl`Kx>yYV_Gcd^NoSBRjR0~*E~!w
zvYEOvTmRSc1L!ven-No6AGw=YP@SJpD|!4?sL7M4W#@!H^T|@CtgsUAQG?*VpphfC
z7_3k2E0~4Fqu`pb@|0e$xC*`3u=x)dLsSEF&RTQW|C7%j>-^%`m1>7Q?sfa}T${oA
z-Dk}ab}2*#h+q|6%qoT0kSW-2$@_~G!HD(#8!iGc%t&<(gmGN}dIsYg&g+ib?Ydbz
zaY{|Dn^JGir9awS7w^DnerNv91!Ofl{KYW>Y?82ZhiLN@VOOlrxHRRn3@G1klU0O6
zI9>phOHrf2g&8x3UjE*P>8Z$5^^z($cXKG5Jk~1-N&#PPK^sP!zSYb8W|(^40(HET
z5~|n+)}rTx48vMjt!h{=HJ$O~ef08ylY`l7Nk}HvaZ#eAdQZ=S%TJ0y8*MC7x-Bz&
z&gg>8^!=yT(JF8v3py^~P#57T5n+1Vk`%9UaF?Nd0P$C&3b-D}_Uh_RUjI><l%s{b
z1V+!Po=#9xoeD2??k2`7d;Y4!1cdRbVO7>VqW1N$*uLkW?AGi#eeJTPP|}VA4R<n1
z@=a|ZIiF&t268~F5Ova>zrJ|;x0=F89r_@?Mpamln@Ep)Er^^-mwuZ1q0H+5ab1f5
zZ9^xp3;zw$QX~63NHGTt!;}3zwxCMZsxr@Ygf>y`IhT<boOPArVgCa@;K*fZ?ky)<
zOLzIUO+1iVb%P^vYU`23wdpt!|JQ2kQzWWP)mKr8D8FH-#fh@t9E3Otqn$BcP+*Z3
ztJ=%`04hj(pVoxe1iIvYKiB$seO^L7Z!ku!-~8O+;tEmK9yPz^v!o+^=(T(Nq$PU?
z1?<Vw(MwZzT5VNZ#;q%enEG$A7+>tDCi{-4gk%XM9Y&YVld&IjjigBDVLOT)F`U$6
zdg#XK6-=&}x-NnDMDEHv>x?@X%JWz722$*}pJg9q2;P~*ihfmY|JDUya+_1m1l2CD
zbb!K^5!o&%0CcbPNZ4>DitUBTTw{KMRmIBBpXOhMy6g##w`!&>MZHCRBc^(DstOtx
z6cmUL%nAJuRnt{3I6FS7aovdPe-*kzZAJe}$JqS=@_w`pdX>Gf6^o{%^F7eOhuo-K
zUhJ;U(f8cX%DZ;wgao(@OPtH}Hb@9(q}gZ0u1(l}=~wKrCtMMWj&HwBP%8drkqAW2
z9;u!2<BlyI>2BvOy}QXEvUbi_##d+U*1W=VF60OO4AOtMHl@J97&OQSHYQt>@2Y)-
zCAj={Sf*+6C(Uo}iE|mHS!AEFG^3y=H37<bKlZdSLkpKZLp55pfT#EtkH1SWzi*yr
zbN8t8yZJTdQ?&D@^K<&zp?AqM-Qy#hX@BloW_$;wEUE87XmzU_!e2wdMuq+8$WF>j
zInsGhKNUM7bpG^>i9*dUJi-6G59RgL;WV((`tl&)_8m#J75{GuJ`LNX<+@)r`)`Pu
z*_Fp>5DcV*OMAUW9oy=SPvPK^EJPfZbLZ~uQWlV$FTR`6dh%qosPDAH?!0LUC68Tx
zzcl(E))-v;J?S$KvTjw&)a-)R<T~ynnn|aVX0(F6OSmCCd0WMCMa2thCzrhjf<a?>
zNG2UJ_PuvCPF{2tHkNccz<glufhX2Jmnx}DyssW}^|ncATc<8<Jls$cI3&R6^6IT9
z?rwW2vylM2<q7ibz%Ijk6|aM<hY$NUxTT%GqSf#7Y7hVKQvVzw2g7|14a)H`klbM&
zoC2`op-aPTao4`j=RT^4u`smf^8x&1f@*r)Lf3)4c7@)~UxHXTS1<4J{uAfASg&w&
z!H|-Ze_dk7&!YCPl7t)Y2!ze8_4(l%1io|g3bu}VNgJh%D}GZ|-SKVs<o%oMegPXp
z9R@tW^iJ_^W%3&+9RgBIxAbClpZPiH8sC@BGiv6p-nPi*!LNEPMI|e6y|-Oq>(5SG
z*x!yOFOB3<@~iu}<pd5HGB|{}DRGiqbHc0Y-c0*Xv+gDn1v96Y_!D%}IXusxz~-6D
z({hioL6LKzCwgu?q&A4I%lIr>yN=|l{R(0=N}T#f&d^?uue|VK2?LL)X~#AT{yiK$
zo<`W8;yWeRXGMFHcsSSxf>%0kpY&yRv>lqQUn`dN6d73hsJKsYb!Ej*&6J5hS329i
z+#J$r^mW_eBpDY$$OzEM4_f*<(ciyfL5f%NwVt?0dB@{k;K{RdCWHL^#>x&Oq1uF+
zI|&zQ`{M{(<0ZH89Q0&YJJJzT?A6?q_a>eGMVM`0o=i)C>O2Vfe}P=ArwpB(?Zr72
zqX!zE-@^IA3lj<6o)_J0WzbvZ;K)ql12{BmIIZmk?MajQ<6E~Hn9&(et0)z9sBxcB
z@%)8LK6@k^Yv<Kfzc=RBlS#e%`Wg9VhBkwe-Am>j$yrx3fi(})3yQVO!-_^PH1R}7
zUr2oLRKwHAzR2i}ljk$We<pG)J%hA16&TrvjgA=OO41!ZEj`u%kAz|5I1E(g&VfDl
zcKQZUTIz`339|pTASDw);?~Z!V|#J8$dacZ4H^pELSidr#gA+aK-BW|Zm35T7D87)
z8e9w-BdV?y6+6?vQ>+==E3Mc6evN};fP+vU;X=a?G>N!~nhSd>bCy@z#H|ZibQ|<f
zkNnKxg{^&Q^bUGjy&>RA5LCoJh<Vohscs`ZZWx_^QKxv&u3?0SEz2pK{VvzFps*Fc
z2Dg!-6|Bk)K<(ehfv3ZqJQQQ9bx;0z9|WK3cNIV8I!dN7*0g0!?bYn<NHM7V>DUmX
zSusEE-tQKqmjCfCH&N<B`QxxE-(InyOa0g3GjE*>rnqS<uX*6^gEF1t&y{2%I(Cyb
z1$TTnT+DS+J<i>4;}3kEPo<sd1;z5^jwgp$SoF7-{c-W{+0LQS)h(G+X$LqGiOn4H
z8>>xlWz>`DTlMjGPE;p;vm)lSWfq?LZDq${M);CGQ>1L<k>Jx9UwZ)o*SE7bYs~Eu
z@$<c(3aUVzQ@s_H*I+w#n(-I@S{W~>fGI<}j`w8#2A)5yZq($v_b5y@f|3+9o`L)M
zg6LaAiC*oIY?i{B`x+tGq3=J_EpZzR!CTiXlRm+;2%<2~IPcu(0*BH-$<!6^-kAXq
zfGP2zI3-?sdvMccQ%?SbZN?oVPe<#Y%#cNjYk2NGD_2eLEAd&%z9O^g|D(vh*#9^S
zJ{aQYgAGm%tLn7P%)(+aDEHj2;)dtcSrxxhL_}?JPAR;8vnsBCEnnp$ExbbGPR*m+
z>K>D4%mY()&zHS3+Uo1#tK;h;GH^IGTgO+GH2-I_@3U{4?!~U$Qd@aFa6IdFqORLe
z%Gpbx;||o^Tuxam#Gz_6t*5u70QZ$s0o7S91nfEDUj>xK6?HK)Dqk<n@oi=K##qQh
zvkT%=Wf^{nm!hJ;Z*^6Pf_ZJ+98F&FS9370zEQYQoT<U@e(va5vGixVr?6=fHx?@(
zbvk)Gk8RfAzms87wP;qyR=)bJ&So3vZaFb-SCqbAIQ<zzvEKXCDuc#IL8!=M?801n
z8%&F@OGIR2$w7&D7!7R8Aw=6gaPT5~Ch<8k#DF@bi5J<@<|pH8emcYpS)Eg75*k>l
zCIxX3_X;q>h^886h2qV?JPp7$-TH<HVdFp-U(#g6?lotVm)0TFmYsf>;?~=LfZ->x
zW1%y53xX<Rk_rveO#X)QEVgxAIBl8qB)Byx2N4I?ol|?fKJJ5{3hMNjq=O#s)PVAF
z?Wy!xkEs1`1h7f0>YU#%7i`nNS7=5$p;?K-M?1;9v=GHjug3P{v=s^JRx3O7m=qZF
z9Ic8^7z1{S89hgO3%L%8!yy!6c&B?4fOWG{+8dPH2Am)^g#BH~h*bSCVuI=CvZ|gA
zLy19kl@<?zQy|Zd!OM7zURJaR);XjU6+gBCEECng0-hmU8|ta3U|9$ELR$5q45I1r
zt}pRQOFi$}<S`#o5s~`H7W^L?Gw!}=W|?chsw)@fK6`Mqd*d~D?YJL5U`PpGM?d-W
zcExt)fEa`BcE<wH(rt|Q`*76ljX9vVIneE!Rs3;r9t6;QxG=Txcr*wV97P$3SV!9=
zWbD+gVt2u3FXhzQ?qpBGv!b*&lGlOfh4twLZroM}@A(-OxSEVo5`5MC^t5ZaAh&{w
z*Y7Cu%BwjYN~`YD!<>A%ORs27f4tB@2=m4*M4#b@7lBGxc@4zA0WRVUnFXI!_NUB)
z_#2N2gtLDV0e7!(vFzpWx7Lc%>evxlGUyQtNI+hJ8G;#mpheT0x*vor;-KCo7P>G_
zy~`~>{^he%ryK~1JmB3*z5_Dx81}`F5`Lxfm4?A-GGtRlJH55^819{RijKw$Qx2HO
z>oDC7=1Z|do6`pBLW^}UKfDyxtf44ALw73e!%J?d+gq{4Cqo<hZoEq@PyZCgs87%e
zo9WgfK`8>_R74_2MvrER-@eRLdlE`|k2aY)%6k`yp0~KwjA|)y%3pFd*_K#0bEruq
zPT=@6rGEp-6q-&%f4k}1^!_9!$!>IG9aI$!ldBhuH;$c&BlUS0tYzdd;BO;K*WP2~
z;!@S=)ViNiXhr2wX{g+D!l11Udl|B7222Zz=<)y9|Aa%-bmtJsAoRS=22|~cO)vsR
z%p8tb%yV_fOdUHwvit?WRX_nJJqeO&oxeR_%=3>n(Xmm{aq+$-6?`>aXE6tng@`$~
zy}Su~SY>J^<9V^ekB5{z3PS+>7>d;5kj3@cE%t9rihi%JxBzbgS&O!c5mXn`&=>x%
zS%T~o%RlT&d1J-^+6>qOW*X`4+*MSF7!s#IE28(`@mSV{ewCe2PUI9ML(c~2<Fj7|
zTe)yrjJeR$1iaQz_`is<MG!B*b`DdC@IH0qyq6TA3IMiYj)CSkEIj5Gpya+~nzrUd
zL*vKL5mQT_HTe53*Ad5##&{(*9_ST)V~c~pcr#`Y1<aUR4vLT$6GdQv&Te5G8HK0I
zKi1o@U{L7vRC;cyM(MD`ReWP4?H+t|YSmMb1Lv?aiGv3tz=whg!!$H22NbXfz2C5F
zsIhGIP@!UI3nkj(dI5cKIY>uzhHt-8V*j|4ii;Du`MhHLER+&M1>@rV%3%&8QN`;O
zyvI4vj{tZf;8eZF^XRR-zsns8h0$RK$95Px5a|y)w-cE1!uFFyM;s$Ytf@#4)j2BX
zB05D#F`nbY7MlxgA3|aGw~;U~g&=-8S_(Y_9~e(iX$2$ArzH7!5mi=5U4-WHx;gPd
zy4C9MpRLVBwxGe>4D3F6xQh>E(BJPyBPp%e=EgS5qdy>P8<9;Yt7CTi_D!Xv7AwHO
z1b!GgiC&SN5aHCNiuPQ8f6`mvawLO>eIzPIVrU)?yIx!k?gUvUjXGNLIx!v-c?$g^
zQkVlwIj-_|%EoyDGdu;_H)5qL3pu7)fK5PQJ8Y>#=)*AqFlJ#d7hr|q|LFy!|204o
zUHEsNB$hXisrOZLJg5MoAlVFdR~`VZp!&xrL4SZ|_oi6SDJLl3A`C&|tw9The`~`a
zPEv>_i~Psq;-S{3U#%fN-Vt~Fcaj=w02&ekCAsdhR_<s0t94Mzv8UhhhO*~%rh9-+
zy$*AIz^@m$S_1V!l82zq6tExa;SK9F|8=N3dx>K(SEnW@y<#WAL0%mdwlQKG${vU5
zZe8A%ETZ&H=WlTs+CW^QQ>+o?@mW!GN$TrIu0QXTE{(}EJ56KIbEUu4WBJt<b7^qu
zz(SK1)(h<(bvl>ELQ~x&PLU{R0kea|r??sD^+FK1A$u8{I(Q8|n$nPlxkN8*h|!iV
zidSxfjeSAUeGklua2jvV`|o?=xcq^S@+F6Q=1fnpcg*o_cvu+c2IUOs9K)kDs=)Zj
z;e5tfKBx{R;a3*qrDT_asJ)~(LcGIbkmzqmr<6Zmv~!GswSm3>x|v)%N7+D+X!Y{k
z@9#wKEkk*w4K9^mTo-anO$uu&nJ|Xv^v2AlEH>i$ok|>{EZ>jy_g6AvvQgMe-v@-6
z2{caQuRIaj2oLy?={>K1D($`46puN8Haei6x=Pcb*UGPpRav&Cj3l@hfRfwa-#k2a
z)mVSvA!!PW8bVFGHg@8(>38=lOWri5rs1LAkPhbg4)m#e6BWn9N=SY)>i&{K0fy)%
z0)|v34pTrjAS6|i+tEItjs8Oqr9V#4M2AXdsO2k$fFjr=AIbowXA1EtG=0Q$APnSr
zPP2(F3xtZVFh*Mnkm~d~Th{Gpju+!#npJ&-SG$Sr&3Xjk`uC-Z1y{{-<J^iQs4ELo
z%y=RCZF<j7IRh9#<r=gSF#q{k=FvA`TD3P9jBua^M}JV6efwO#F$=peYVSkVqaEF<
z<IZF|M_f#0IFAl$OiS7&kbw~(LbF2x({h*oqmzOAyS4}D*y0`N-?ai~Icp<~PJw4a
zhkn5PSa%2avylWV5$?$I$O9E{*X&emKVb&vW)qA${3#;DbZral2eJ2!s@>c>6~8i{
zr=t{5$r$$5Tm%H<WjXj`sMe@`gm5&p#8vf|Hr(`n`c3Pkr2#!C&hw|WBFSYTfC<fV
z1OthOFl3>FYIty$O1n98q$0W&g9$;!Ls;yK|3Cjkw;3N1|M!put(zz*s@D`e^FQ=w
zet_UTFqI8DjKsu^*PSn}(7iCDHV1lb2XtgBtzp0D$UT)&7;k<@Z#Ck#{MWAl2%zs8
znos!eGaa}N^=@$N&<-v30Zb8Kc~l_IV&=1zR;9XG?R^7{cc-ib^-mWeeB}ntsW1~J
zK<m}CDl~^y$87cEm1Coxk{8BDWH9%|QLK}611i^@QUx(lPvBVUE=5J+d%G}&^k;uh
zxT2LjJRFCP2Xha@va%^<$`LCRTf#R=&|eEn>?KzP=;T;l;xBefw#-ZB6yj4szX^aH
zL;UgMZ%;GVa+;(rCFL%NsbxSrUo6*Lg_4mV2mSk>2DCO7!k>0Nhk&ohk@>Bi`dQ@C
z+Z7H5-Pn6075bunZ`ch~M%){Z0Aj!#qo+;(c=9vf5;c~*OE}V7{UFTb1<gzbjtI2L
z^n?}IZ_J*Akr}-#r$lH14)hR3fRWyUfNg|G;39z3_W+RkCTMjAc(-C*32FxK_n%AF
z5_UH>gyWBlz%h-JeU;rnC>|Gaty)h1&)_~RcpSgX9jZO%n{JNo#$jkZXbWK$((-L#
zb6M4wbL#Y0xnPpY&<@84X(SV;%qaDuID~Cr+jUs@NKNGyygS|DVDF|Upu%+21>^n#
zRy+ln!2dpt8t82bk-Y#fgWu>qqkq-^ZG!UhOv_o~Qje9f@vzL_#x@3@Z)6w~?;g>~
zjf(*YdWlo44@-a<{X2)c;BbON;Gx=bt<&&1e=e~R_|WT9Gw*my!J~NJtHVD-#)I$M
zdGooj64?v7w<-*gXa8<5JV=1%YS>JK0gW4xhU#D`wH7Cis4QLt1|vKQxFCK^nMN>L
z<BqOk`SITFc%|x<C}GTpec+Q`K#>B&e#9+FSP2W=^xJ5?B6<6N-xXFe)zsv)rQZzn
zsc6;JCii%zYL}ZfJec6~kRg7E1|S2I?bvy+W-0m>xK%cP9tYjHD_uJxt^WQ5)c=k1
zdP3e6$%|#sU)FL8x8glsk_1m{W56@I--M=87pM@v{*T{GGBJbMJm^WS?x`@)4B>;0
zmWt2aTS3YN5vU|?&N8?g(jyExI6eMkVFc@k9msU?s`la&Q@wCHy2L{SBngj4KRW_`
zBgrU`$^@na9LEQ)g0M}36c%Z1fTl+$^y+)Jid6z~u-&xjH`vMVapdzJa8P*FJS57}
z_~h+qVsWaz<)|xwCV^QnJ*S^RV*M#w$-1gXE>-u%4?RtonSk<xihSI0pAw|d19+&>
zeEQfqcvGd&Ay)QqlhE14L`9Pi(TIizuFw*p6N#0D=-v>o54S`3!GO)Nivq3P9<e15
z%SA&HEncu+a0CF~>oGeH9O7~Ps}{1tv=hq;v<}g6JHE3ZSC|)<21A<wA#w_m!jsO;
zdnk{@fy~A0ciC&HcWQ4ncJffSDH(Jhx*@ji*%5h;o(#c)Uxp4y$@Mfy5{QsyxsE(L
zRqn5s8Iqr-&tePH2fw6eGMrXVI=vmeW%djn+iWwbHi3Zjc>7PW7I*E*rXCN3BAMaC
z*x8VuF|!g90$2O^u(#F^&_8)eg~p^5SW8*j4l&64toCo(5-4NKo*J<a5o+n2t~|g2
zn%=y5^a^W;I@+p9_w7W<N8N|NBDUbx#M@!%WP54u$i4JEwI@m8RcnUsQu(vsj_=u9
z_&tuMN(SwvM~=^9YC^M*ICSphmg}(euk2tB5DVH%H?!wY(xVT4IB^LpU&&>!VF0%i
z@C2zd30gyrVYsh>*@v17YPyGm+LBN*5xIwKyp3Bf_pv&?Pm4K!B2Y{C8rxlvnkUGj
z3ECBmFi{#Al}+2n36Nr<=%;EODdx(e2Q+X*VbT2K|L0Mao2<SSsw>5CjFicvAwm_K
z^(Zf2$^i!$g!bv;3|#P`)tj)H8n|>ORxUSN;<00TqJP2eAegk}4g+vct^i+FthlYj
zShsETD?l+TI565UeGq=OpFkcmiuXKZ0iKx!^PC;sBYocbWi)=OlvU%@jkPf5jvzV%
zP~iZPT*dEac4zE}fS>vN0$f^#l0bCh9!G=VOFy7(9R8)`CMYcQT|@)|t*TZX6)GA)
z(WsYOL^E~dX9OYWFyYt*DA1X94+A%|d~>8fcNRxW6fYX$7bFSFv2~mJ@z;ayYrtCI
zuhH+`$k2o*LeaVFRSpp-Xc|!+V@e3>?Z|*ReKMHU2q_%He`Z;aP9NTP9W-?mL1-Da
zwKgw0B>z{U`TzY-yKNv}_p2)-yVBJy49Lrl{N}=w+6AR5z*&F2)cn~ozpM@gGJTBp
z>Ggz}8(9w#IfqWlRTUbyf_*Z-hbA^HfQP~7CS`&rz5#N7Vexg=*j;y^6&s&FKUmod
zfizvP>N8AV^R@~WdET+QJXcvy@ICnY9CXCm3oNIdlQOxtDzw+rtoR^$1^-dpRY58c
zR*0y1z<A$929z->!<B=q5cjBi^sc<=wdRq^iI<;W&w?726Y$Fr#pQ-<aYgBEDvV^G
zsN6OCtD}1TOSq6uHHpQ<Xs#|oWIVr}5Va#%@OWcU;RqZZ6SDrxbOvqT_`>$*=Yv*K
z<hU>&HIme~HjDJ(oV$j!!YOV88q+^O>vyzQ><-`)-{lm27<?aE869|Um-h!wY47wk
zBn}C2zjw#NFTv{H!KCjiKzet7EX2<d8yGH(E7yNFw&aIcDPNAQO^rbB!@@v5I0=pI
z5*{zy;>VQ;;`x13GiXYlk8EI<-VWA)A()^p)T_-p1+Yq-!?Vle&K<VT1}ekKhyFSH
zI$BuKX~u$em;dUu=^xy<o^O?02AS+N&EN?tqYvbQhz(3%YVup8>^EAJshmGwK+cf-
zd@$qLV17Ft5X7CK;ey3}o*8&o8O`ygCv00OqBVuTOhpfWB1v7Le{G7?`@wP~rI?+}
z_}mJ{NK7d#@Sl?nHTGkC%Y8T9<Bt7GWSfVk&s!G38?wO3HksbXF@C9k<*D%U+hmxH
z9`$gC$btgW{r>q>I;L%J<|f~T9Log@t%FW1QE$Mfee?+#yav)Fzzzoqxu05{`tZ=!
zduT~ypcbY4V$~A>I77332e-QB!NaGKpjPyjsawVdd@DU8sw0(cR5lLXYKh~iA~b^K
zObmOr$MP>UnNju~E?@s`xV?E3P29KY_j}p$N0vwO^Uj>I*^c=KM#4MH-}3>r*}kqI
zw-+B3j(^S)wmd$*;~Zyp0bOiomF4w;W5-kTvaSv+V6Qbp8ZvF;;UL~J{mi{_cKsnc
zmk>7d%);T-4YsH&7;Wg3zC54VJTFQW<t6PmpnYWNJ;9vkF%4X<PqwsiDYNFAK$z>@
z6r67RVqrVG8qq_bD=Q=6XJrr6D3-Ro*W4&Q;KXM5h5G_;(1uzj7ZU>W(<TIn^M|rr
zIIx5<kRk9Y$f};9WDhuSM-ud6*(DVY$Z&KyjO5+rA8;UOqL+39UYh>A6Y+(x&wM5U
z+i)lGsPla{qy1)i>aXn|=$6!nEk62~ktD}&=%9DY(6bqPVNzEhe^<L<_{Y1S4r6M;
z&Zy~CSN7iu+MK|;Wt&sq*C)1@9`UHl2P7TEE2;~R0nMq)haJlbk9=Q(kz43wviRyD
z+ut9wn!;GGXmQq$gTf?rQ{Rp6+|xE~FKs@*j<(!b>8Om>zUwsZ=wVlBY=!i?eU^bU
z;v<_{RN?dDYlUUCU*FlJBj}j{N*zh!kcZjoxm@xKomP9ly3N6^U$nS8*133D^nSAD
zJ#Sh4*F_`YlAhkTSRRJH07j>eY;ox(TyiEmS_c_Mps#O|FK5VfjoCaIoVuic$<|Yo
zGo5SfM``t%^YpM0E^}e+hPJF{Vfk#GaAn~xse!m=pqO<(KmGXitaEAaa3!;Y2HGN7
z?=#5L3`2p#anVnnyrPt6aq9mzIo}(W;J>~+)ll8JDV^uw1Udw5dkl02tltDM02O#6
zo9+sK>wWz)=*nVge};)efL9?y57YkL7V0ch*}A&v&u$EcaA#wg03W(lXb9lF7&I}%
zSYKbi)4>Rr<DC^}5Y8^YsWsvBTjGoCiLp9^?<={TjCWv#_VRCqkYT6fJ51y;)IxAW
vZ|#>}^4~KE{p3GI9{M5n|Mt%(og1R!O8O2LRk~%-?@yo9Igzh%`S$+-mUzbb

literal 0
HcmV?d00001

diff --git a/evaluations/scripts/def_rate.R b/evaluations/scripts/def_rate.R
new file mode 100644
index 0000000..6a4c417
--- /dev/null
+++ b/evaluations/scripts/def_rate.R
@@ -0,0 +1,328 @@
+
+
+
+def_rate <- function(data,t0,period_length,process='all'){
+  
+  # get name of column for start year
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  # filter down to pixels with undisturbed forest (JRC class 1)
+  
+  data_filtered <- data[data[,t0_index]==1,] 
+  
+  # count 1s at t0 in project and match
+  
+  proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() 
+  cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() 
+  
+  # identify where there have been changes during the evaluation period
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered %>% 
+    select(paste0('luc_',tend))
+  
+  # choosing processes to measure
+  
+  if(process=='def_only'){
+    
+    response <- case_when(
+      luc_tend==1 ~ 0,
+      luc_tend==2 ~ 0,
+      luc_tend==3 ~ 1,
+      luc_tend==4 ~ 0,
+      luc_tend>4 ~ 0)
+    
+  } else if(process=='deg_only'){
+      
+      response <- case_when(
+        luc_tend==1 ~ 0,
+        luc_tend==2 ~ 1,
+        luc_tend==3 ~ 0,
+        luc_tend==4 ~ 0,
+        luc_tend>4 ~ 0)
+      
+    } else {
+    
+    response <- case_when(
+      luc_tend==1 ~ 0,
+      luc_tend==2 ~ 1,
+      luc_tend==3 ~ 1,
+      luc_tend==4 ~ 1,
+      luc_tend>4 ~ 0)
+    
+    }
+  
+  
+  data_filtered$response <- response
+  
+  # count up number of pixels where there have been changes for each type
+  
+  proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% 
+    nrow()
+  cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
+  
+  proj_rate <- 100*(proj_changes/proj_1s)/period_length
+  cf_rate <- 100*(cf_changes/cf_1s)/period_length
+  
+  # make df
+  
+  df <- data.frame(matrix(ncol=2,nrow=1))
+  colnames(df) <- c('Project','Counterfactual')
+  df[1,1] <- proj_rate
+  df[1,2] <- cf_rate
+  
+  return(df)
+  
+}
+
+
+
+def_rate_seperate <- function(data,t0,period_length){
+  
+  # get name of column for start year
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  # filter down to pixels with undisturbed forest (JRC class 1)
+  
+  data_filtered <- data[data[,t0_index]==1,] 
+  
+  # count 1s at t0 in project and cf
+  
+  proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() 
+  cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() 
+  
+  # identify where there have been changes during the evaluation period
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered %>% 
+    select(paste0('luc_',tend))
+  
+  # measuring responses
+  
+  def_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 1,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  deg_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 1,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  ref_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 1,
+    luc_tend>4 ~ 0)
+  
+  data_filtered$def_response <- def_response
+  data_filtered$deg_response <- deg_response
+  data_filtered$ref_response <- ref_response
+  
+  # count up number of pixels where there have been changes for each type
+  
+  proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% 
+    nrow()
+  cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% 
+    nrow()
+  cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% 
+    nrow()
+  cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
+  
+  proj_def <- 100*(proj_def_changes/proj_1s)/period_length
+  cf_def <- 100*(cf_def_changes/cf_1s)/period_length
+  
+  proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length
+  cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length
+  
+  proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length
+  cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length
+  
+  # adding the degraded-to-deforested transition
+  
+  data_filtered_2 <- data[data[,t0_index]==2,]
+  
+  # count 2s at t0 in project and cf
+  
+  proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow()
+  cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() 
+  
+  # identify where there have been changes during the evaluation period
+  
+  luc_tend_2 <- data_filtered_2 %>% 
+    select(paste0('luc_',tend))
+  
+  def_response_2 <- case_when(
+    luc_tend_2==1 ~ 0,
+    luc_tend_2==2 ~ 0,
+    luc_tend_2==3 ~ 1,
+    luc_tend_2==4 ~ 0,
+    luc_tend_2>4 ~ 0)
+  
+  data_filtered_2$def_response_2 <- def_response_2
+  
+  proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% 
+    nrow()
+  cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% 
+    nrow()
+  
+  proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length
+  cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length
+  
+  # make df
+  
+  df <- data.frame(matrix(ncol=4,nrow=8))
+  
+  colnames(df) <- c('Process','Forest type','Location','Rate (%/year)')
+  
+  df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2))
+  df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2))
+  df[3] <- c(rep(c('Project','Counterfactual'),times=4))
+  df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref)
+  
+  return(df)
+  
+}
+
+get_prop_class <- function(data,t0,class){
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  data_filtered <- data[data[,t0_index]==class,]
+  
+  total_count <- data %>% nrow()
+  class_count <- data_filtered %>% nrow()
+  prop <- class_count/total_count
+  
+  return(prop)
+  
+}
+
+
+def_rate_single <- function(data,t0,period_length){
+  
+  # get name of column for start year
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  # filter down to pixels with undisturbed forest (JRC class 1)
+  
+  data_filtered <- data[data[,t0_index]==1,] 
+  
+  # count 1s at t0 in project and cf
+  
+  no_1s <- nrow(data_filtered)
+  
+  # identify where there have been changes during the evaluation period
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered %>% 
+    select(paste0('luc_',tend))
+  
+  # measuring responses
+  
+  def_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 1,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  deg_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 1,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 0,
+    luc_tend>4 ~ 0)
+  
+  ref_response <- case_when(
+    luc_tend==1 ~ 0,
+    luc_tend==2 ~ 0,
+    luc_tend==3 ~ 0,
+    luc_tend==4 ~ 1,
+    luc_tend>4 ~ 0)
+  
+  data_filtered$def_response <- def_response
+  data_filtered$deg_response <- deg_response
+  data_filtered$ref_response <- ref_response
+  
+  # count up number of pixels where there have been changes for each type
+  
+  def_changes <- data_filtered %>% filter(def_response==1) %>% 
+    nrow()
+  
+  deg_changes <- data_filtered %>% filter(deg_response==1) %>% 
+    nrow()
+  
+  ref_changes <- data_filtered %>% filter(ref_response==1) %>% 
+    nrow()
+  
+  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
+  
+  def <- 100*(def_changes/no_1s)/period_length
+  
+  deg <- 100*(deg_changes/no_1s)/period_length
+  
+  ref <- 100*(ref_changes/no_1s)/period_length
+  
+  # adding the degraded-to-deforested transition
+  
+  data_filtered_2 <- data[data[,t0_index]==2,]
+  
+  # count 2s at t0 in project and cf
+  
+  no_2s <- data_filtered_2 %>% nrow()
+  
+  # identify where there have been changes during the evaluation period
+  
+  luc_tend_2 <- data_filtered_2 %>% 
+    select(paste0('luc_',tend))
+  
+  def_response_2 <- case_when(
+    luc_tend_2==1 ~ 0,
+    luc_tend_2==2 ~ 0,
+    luc_tend_2==3 ~ 1,
+    luc_tend_2==4 ~ 0,
+    luc_tend_2>4 ~ 0)
+  
+  data_filtered_2$def_response_2 <- def_response_2
+  
+  def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% 
+    nrow()
+
+  deg_to_def <- 100*(def_changes_2/no_2s)/period_length
+  
+  # make df
+  
+  df <- data.frame(matrix(ncol=3,nrow=4))
+  
+  colnames(df) <- c('Process','Forest type','Rate (%/year)')
+  
+  df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation')
+  df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest')
+  df[3] <- c(deg,def,deg_to_def,ref)
+  
+  return(df)
+  
+}
\ No newline at end of file
diff --git a/evaluations/scripts/land_cover_timeseries.R b/evaluations/scripts/land_cover_timeseries.R
new file mode 100644
index 0000000..6490bf1
--- /dev/null
+++ b/evaluations/scripts/land_cover_timeseries.R
@@ -0,0 +1,111 @@
+
+get_luc_timeseries <- function(data,t0,tend,type='both'){
+  
+  years_list <- seq(t0,tend)
+
+  if(type=='both'){
+
+    df <- data.frame(matrix(ncol=4,nrow=8*length(years_list)))
+  
+    colnames(df) <- c('year','type','luc','percentage')
+    
+    counter <- 1
+    
+    for(year in years_list) {
+      
+      for(i in seq (1:4)) {
+        
+        for(type_value in c('Project','Counterfactual')) {
+          
+          total <- data %>% filter(type == type_value) %>% nrow()
+          
+          no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow()
+          
+          prop <- no_class_i/total
+          
+          df[counter,1] <- year
+          df[counter,2] <- type_value
+          df[counter,3] <- i
+          df[counter,4] <- prop*100
+          
+          counter <- counter+1
+        
+        }
+        
+      }
+      
+    }
+
+  } else if(type=='single'){
+
+    df <- data.frame(matrix(ncol=3,nrow=4*length(years_list)))
+  
+    colnames(df) <- c('year','luc','percentage')
+    
+    counter <- 1
+    
+    for(year in years_list) {
+      
+      for(i in seq (1:4)) {
+          
+          total <- data %>% nrow()
+          
+          no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow()
+          
+          prop <- no_class_i/total
+          
+          df[counter,1] <- year
+          df[counter,2] <- i
+          df[counter,3] <- prop*100
+          
+          counter <- counter+1
+        
+        }
+      
+    }
+
+  }
+
+  return(drop_na(df))
+  
+}
+
+luc_class1_uncertainty <- function(data,t0,tend) {
+
+  years_list <- seq(t0-10,tend)
+  
+  df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list)))
+  
+  colnames(df) <- c('year','type','pair','percent_class1')
+  
+  counter <- 1
+  
+  for(year in years_list) {
+    
+    for(type_value in c('Project','Counterfactual')) {
+      
+      for(pair_id in unique(data$pair)) {
+        
+        total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow()
+        
+        no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow()
+        
+        prop <- no_class_i/total
+        
+        df[counter,1] <- year
+        df[counter,2] <- type_value
+        df[counter,3] <- pair_id
+        df[counter,4] <- prop*100
+        
+        counter <- counter+1
+        
+      }
+      
+    }
+    
+  }
+  
+  return(drop_na(df))
+
+}
+
diff --git a/evaluations/scripts/plot_matchingvars.R b/evaluations/scripts/plot_matchingvars.R
new file mode 100644
index 0000000..ec47f01
--- /dev/null
+++ b/evaluations/scripts/plot_matchingvars.R
@@ -0,0 +1,42 @@
+plot_matching_variables <- function(data, ex_ante = 'false') {
+  
+  cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc'))
+  cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages
+  cont_data <- melt(cont_data)
+  
+  # rename labels
+  cont_data$variable <- factor(cont_data$variable, 
+                  levels = c('access', 'cpc0_u', 'cpc0_d', 
+                  'slope', 'cpc5_u', 'cpc5_d', 
+                  'elevation', 'cpc10_u', 'cpc10_d'))
+  
+  levels(cont_data$variable) <- c('Inaccessibility',
+                                  'Forest~cover~t[0]',
+                                  'Deforestation~t[0]',
+                                  'Slope',
+                                  'Forest~cover~t[-5]',
+                                  'Deforestation~t[-5]',
+                                  'Elevation',
+                                  'Forest~cover~t[-10]',
+                                  'Deforestation~t[-10]')
+  
+  # determine labels based on ex_ante
+  if (ex_ante == 'false') {
+    plot_labels <- c('Counterfactual', 'Project')
+  } else if (ex_ante == 'true') {
+    plot_labels <- c('Matched points', 'Project')}
+  
+  # plot
+  matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) +
+    geom_density(adjust = 10, size = 1) +
+    facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) +
+    ylab('Density') +
+    scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) +
+    theme_classic() +
+    theme(legend.title = element_blank(),
+          axis.title.x = element_blank(),
+          axis.text.y = element_blank(),
+          axis.ticks.y = element_blank())
+  
+  return(matchingvars)
+}
\ No newline at end of file
diff --git a/evaluations/scripts/plot_transitions.R b/evaluations/scripts/plot_transitions.R
new file mode 100644
index 0000000..2931a60
--- /dev/null
+++ b/evaluations/scripts/plot_transitions.R
@@ -0,0 +1,63 @@
+library(ggspatial)
+
+plot_transitions <- function(data,t0,period_length,shapefile){
+  
+  # count number of 1s at project start
+  
+  t0_index <- grep(paste0('luc_',t0),colnames(data))
+  
+  data_filtered <- data[data[,t0_index]==1,]
+  
+  # identify where there have been changes
+  
+  tend <- t0 + period_length
+  
+  luc_tend <- data_filtered[[paste0('luc_', tend)]]
+  
+  response <- case_when(
+    luc_tend==1 ~ NA,
+    luc_tend==2 ~ 'deg',
+    luc_tend==3 ~ 'def',
+    luc_tend==4 ~ 'ref',
+    luc_tend>4 ~ NA)
+  
+  data_filtered$response <- as.factor(response)
+  data_filtered <- data_filtered %>% filter(!is.na(response))
+  
+  # adding deg --> def transition
+  
+  # count number of 2s at project start
+  
+  data_filtered_2s <- data[data[,t0_index]==2,]
+
+  # identify where there have been changes
+  
+  luc_tend <- data_filtered_2s[[paste0('luc_', tend)]]
+  
+  response <- case_when(
+    luc_tend==1 ~ NA,
+    luc_tend==2 ~ NA,
+    luc_tend==3 ~ 'deg_to_def',
+    luc_tend==4 ~ NA,
+    luc_tend>4 ~ NA)
+  
+  data_filtered_2s$response <- as.factor(response)
+  data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response))
+  
+  combined_dat <- bind_rows(data_filtered, data_filtered_2s)
+  combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref'))
+  
+  # plotting 
+  
+  plot <- combined_dat %>% 
+    filter(response != 0) %>% 
+    ggplot(aes(x=lng,y=lat,colour=response))+
+    geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+
+    geom_point(alpha=0.5,size=0.5)+
+    scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+
+    annotation_scale(text_cex = 1.3)+
+    theme_void()
+  
+  return(plot)
+  
+}
diff --git a/evaluations/scripts/std_mean_diff.R b/evaluations/scripts/std_mean_diff.R
new file mode 100644
index 0000000..63d81ba
--- /dev/null
+++ b/evaluations/scripts/std_mean_diff.R
@@ -0,0 +1,57 @@
+
+std_mean_diff <- function(path_to_pairs) {
+  
+  # clean data
+
+  files_full_raw <- list.files(path_to_pairs,
+                               pattern='*.parquet',full.names=T,recursive=F)
+  files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
+  files_short_raw <- list.files(path=path_to_pairs,
+                                pattern='*.parquet',full.names=F,recursive=F)
+  files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
+  
+  # initialise dfs
+  
+  vars <- c(colnames(read_parquet(files_full[1])),'pair')
+  df <- data.frame(matrix(ncol=length(vars),nrow=0))
+  colnames(df) <- vars
+  
+  for(j in 1:length(files_full)){
+    
+    # read in all parquet files for a given project
+    
+    f <- data.frame(read_parquet(files_full[j]))
+    
+    # append data to bottom of df
+    
+    df <- bind_rows(df,f)
+    
+  }
+
+  # calculate smd
+
+  smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE)
+  
+  variables <- c('cpc10_d','cpc5_d','cpc0_d',
+                 'cpc10_u','cpc5_u','cpc0_u',
+                 'access','slope','elevation')
+    
+    for (var in variables) {
+      k_var <- df[[paste0("k_", var)]]
+      s_var <- df[[paste0("s_", var)]]
+      
+      k_mean <- mean(k_var, na.rm = TRUE)
+      s_mean <- mean(s_var, na.rm = TRUE)
+      k_sd <- sd(k_var, na.rm = TRUE)
+      s_sd <- sd(s_var, na.rm = TRUE)
+      
+      pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2)
+      smd <- (k_mean - s_mean) / pooled_sd
+      
+      smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE))
+    }
+    
+  return(smd_results)
+}
+  
+  
\ No newline at end of file

From 435ebfbcba15eb2b0f93261ebf03d806ffd201c4 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Wed, 28 Aug 2024 09:46:28 +0000
Subject: [PATCH 16/19] Deleted unnecessary stuff

---
 scripts/ex_ante_evaluation_template.Rmd | 1001 -----------------------
 scripts/scripts/def_rate.R              |  328 --------
 scripts/scripts/land_cover_timeseries.R |  111 ---
 scripts/scripts/plot_matchingvars.R     |   42 -
 scripts/scripts/plot_transitions.R      |   63 --
 scripts/scripts/std_mean_diff.R         |   57 --
 6 files changed, 1602 deletions(-)
 delete mode 100644 scripts/ex_ante_evaluation_template.Rmd
 delete mode 100644 scripts/scripts/def_rate.R
 delete mode 100644 scripts/scripts/land_cover_timeseries.R
 delete mode 100644 scripts/scripts/plot_matchingvars.R
 delete mode 100644 scripts/scripts/plot_transitions.R
 delete mode 100644 scripts/scripts/std_mean_diff.R

diff --git a/scripts/ex_ante_evaluation_template.Rmd b/scripts/ex_ante_evaluation_template.Rmd
deleted file mode 100644
index be95ce1..0000000
--- a/scripts/ex_ante_evaluation_template.Rmd
+++ /dev/null
@@ -1,1001 +0,0 @@
----
-output:
-  html_document:
-    theme: spacelab
-    df_print: paged
-    toc: yes
-    toc_float: yes
-  pdf_document:
-    toc: yes
-params:
-  proj: null
-  t0: null
-  input_dir: null
-  output_dir: null
-  fullname: null
-  country_path: null
-  shapefile_path: null
-  pairs_path: null
-  carbon_density_path: null
-  branch: null
----
-
-```{r include=FALSE}
-
-# TO KNIT THIS NOTEBOOK, RUN THE FOLLOWING LINE IN A SHELL TERMINAL:
-
-# Rscript -e "rmarkdown::render(input='~/evaluations/R/ex_ante_evaluation_template.Rmd', output_file='~/evaluations/R/example_project.html', params=list(proj='example_project', t0=2010, ...))"
-
-# Mandatory args: proj, t0
-# Optional args: input dir, output dir, fullname, country_path, shapefile path, pairs_path, carbon density path
-# You must either specify input dir and output dir OR provide absolute paths to each of the objects required
-
-```
-
-```{r settings, include=FALSE}
-knitr::opts_chunk$set(
-  echo = FALSE, warning=FALSE,message=FALSE)
-
-library(tidyverse)
-library(sf)
-library(reshape2)
-library(maps)
-library(mapdata)
-library(ggspatial)
-library(arrow)
-library(rnaturalearth)
-library(rnaturalearthdata)
-library(rnaturalearthhires)
-library(stringr)
-library(jsonlite)
-library(countrycode)
-library(scales)
-library(here)
-library(patchwork)
-library(knitr)
-library(kableExtra)
-
-```
-
-```{r read_inputs, echo=FALSE,warning=FALSE, message=FALSE}
-
-project_name <- params$proj
-start_year <- as.numeric(params$t0)
-branch <- params$branch
-
-```
-
----
-title: "`r paste0('4C Ex-Ante Evaluation: ', if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() })`"
-subtitle: "`r format(Sys.Date(), "%B %Y")`"
----
-
-```{r set_paths, echo=FALSE,warning=FALSE, message=FALSE}
-
-# get output format
-
-output_format <- ifelse(knitr::is_latex_output(), "latex", "html")
-
-# get script path
-
-script_path <- here('scripts')
-
-# get explainer diagram path 
-
-diagram_path <- here('methods_diagram.png')
-
-# get data path
-
-if (!is.null(params$output_dir)) {
-  data_path <- paste0(params$output_dir,'/',project_name)
-}
-
-# get path to pairs
-
-if (!is.null(params$pairs_path)) {
-  pairs_path <- params$pairs_path
-} else { pairs_path <- file.path(data_path,'pairs') }
-
-# read shapefile
-
-if (!is.null(params$input_dir)) {
-  input_dir <- params$input_dir 
-}
-
-if (!is.null(params$shapefile_path)) {
-  shapefile_path <- params$shapefile_path
-} else { shapefile_path <- paste0(input_dir,'/',project_name,'.geojson') }
-shapefile <- read_sf(shapefile_path)
-
-# read carbon density
-
-if (!is.null(params$carbon_density_path)) {
-  carbon_density_path <- params$carbon_density_path
-} else { carbon_density_path <- list.files(data_path,pattern='carbon',full.names=T)[1] }
-carbon_density <- read.csv(carbon_density_path)
-
-# read country path
-
-if (!is.null(params$country_path)) {
-  country_path <- params$country_path
-} else { country_path <- list.files(path = data_path, pattern = "country-list", full.names = TRUE)}
-
-```
-
-```{r read_pairs, echo=FALSE}
-  
-# get filenames and filter for matched points
-
-files_full_raw <- list.files(pairs_path,
-                          pattern='*.parquet',full.names=T,recursive=F)
-files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
-files_short_raw <- list.files(path=pairs_path,
-                          pattern='*.parquet',full.names=F,recursive=F)
-files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
-
-# initialise dfs
-
-vars <- c(colnames(read_parquet(files_full[1])),'pair')
-paired_data_raw <- data.frame(matrix(ncol = length(vars), nrow = 0)) %>%
-  setNames(vars) %>%
-  mutate(
-    pair = as.factor(pair),
-    k_trt = as.factor(k_trt),
-    s_trt = as.factor(s_trt)
-  )
-
-for(j in 1:length(files_full)){
-  
-  # read parquet file
-  
-  f <- data.frame(read_parquet(files_full[j]),check.names = FALSE)
-  
-  # add identity column
-  
-  f$pair <- as.factor(c(replicate(nrow(f),str_remove(files_short[j], "\\.parquet$"))))
-  
-  # append data to bottom of df
-  
-  paired_data_raw <- bind_rows(paired_data_raw,f)
-  
-}
-
-# generate separate datasets for project and counterfactual 
-
-project <- paired_data_raw %>% dplyr::select(starts_with('k'),pair)
-cf <- paired_data_raw %>% dplyr::select(starts_with('s'),pair)
-
-# create project-counterfactual merged dataset
-
-colnames(cf) <- colnames(project)
-pair_merged <- bind_rows(project,cf)
-names(pair_merged) <- str_sub(names(pair_merged),3)
-names(pair_merged)[names(pair_merged) == "ir"] <- "pair"
-
-# add type column and remove excess cols
-
-data <- pair_merged %>%
-        mutate(type=c(replicate(nrow(project),'Project'),replicate(nrow(cf),'Counterfactual'))) %>% 
-        select(-c(contains('trt'),ID))
-
-```
-
-```{r get_shapefile_area, echo=FALSE}
-  
-project_area_ha <- as.numeric(sum(st_area(st_make_valid(shapefile)))/10000)
-
-```
-
-```{r get_country_names}
-
-# define function for extracting country names 
-
-get_country_names <- function(country_codes_path) {
-    codes <- as.character(fromJSON(country_codes_path))
-    country_names <- countrycode(codes, 'iso2c', 'country.name')
-    country_names[country_names == 'Congo - Kinshasa'] <- 'Democratic Republic of the Congo'
-    return(country_names)
-  }
-
-# get country names
-
-country_vec <- get_country_names(country_path)
-
-  # define function for printing the country names if there are multiple
-
-  if (length(country_vec) > 1) {
-    country_string <- paste(country_vec[-length(country_vec)], collapse = ", ")
-    country_string <- paste(country_string, "and", country_vec[length(country_vec)])
-  } else {
-    country_string <- country_vec[1]
-  }
-
-
-```
-
-\
-
-# Introduction
-
-This Report has been prepared by researchers at the Cambridge Centre for Carbon Credits (4C) and has been funded by a charitable grant from the Tezos Foundation.  4C utilises innovative, evidence-based approaches to examine the scientific basis of nature-based carbon conservation initiatives and, insodoing, provides a way for different stakeholders to assess the quality of carbon credits (ex post and/or ex ante).  
-
-**Disclaimer:  Nothing in this Report constitutes formal advice or recommendations, an endorsement of proposed action, or intention to collaborate; instead, it sets out the details of an evaluation using a method which is still under development. The Report is considered complete as of the publication date shown, though methods are likely to change in future.**
-
-\
-
-# About the project
-
-`r if (!is.null(params$fullname)) { params$fullname } else { gsub('_', ' ', project_name) %>% stringr::str_to_title() }` is located in `r country_string`. The proposed area measures `r format(project_area_ha, big.mark = ",", scientific = FALSE, digits = 3)` hectares. 
-
-For the purposes of this evaluation, we have set the proposed start date to `r start_year`.
-
-```{r echo=FALSE}
-
-# ________________ FILL IN PROJECT-SPECIFIC INFORMATION __________________
-
-# Replace this chunk with a short narrative about the context of the project and why it was chosen for evaluation by 4C. Include any other details relevant to the interpretation of this document.
-
-```
-
-
-
-\
-
-# Introduction to the 4C method
-
-*Our method for forecasting ex-ante additionality remains under development.*
-
-The 4C approach to forecasting additionality involves identifying places that experienced similar deforestation levels in the past as the project area does today. We start by analyzing forest cover changes in the project area between 10 years ago and the present day. Using pixel-matching techniques, we then identify comparable places outside the project that experienced similar deforestation trends between 20 and 10 years ago (the *matching period*). This allows us to match the deforestation trajectory of the project with that of the matched pixels, but offset in time. This concept is illustrated by the left-hand diagonal arrow in the figure below.
-
-We can consider the matched pixels as a historical representation of the project as it is today. By examining deforestation in the matched pixels over the subsequent 10 years (the *baseline period*), we estimate a *baseline prediction* — the deforestation expected in the project area under the counterfactual (business-as-usual) scenario. This rate is then projected forward over the next 10 years, as illustrated by the right-hand diagonal arrow in the figure below. We convert the deforestation rate to carbon dioxide emissions using best estimates of carbon density.
-
-```{r, echo=FALSE, fig.align='center', fig.width=6}
-
-knitr::include_graphics(diagram_path)
-
-```
-
-
-Making predictions about future deforestation is challenging, and there are multiple sources of uncertainty at play. These include: the quantification of carbon, the choice of matching pixels, the effect of leakage and impermanence, future political changes and market forces. We are constantly improving our method in order to minimise these uncertainties. Due to the inherent uncertainty associated with ex-ante (before-the-fact) predictions, carbon credits should only ever be quantified and issued ex-post (after the fact).
-
-More information about 4C's approach to impact evaluation can be found below.
-
-[Quantify Earth, an interactive platform which demonstrates our pixel-matching approach in action](https://quantify.earth/#/globe/1)
-
-[4C explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence)
-
-[Cambridge Carbon Impact: an evaluation of some existing carbon projects](https://www.cambridge.org/engage/coe/article-details/6409c345cc600523a3e778ae)
-
-[Our paper on the social value of impermanent carbon credits](https://www.nature.com/articles/s41558-023-01815-0)
-
-[The PACT methodology for ex-post evaluations](https://www.cambridge.org/engage/coe/article-details/657c8b819138d23161bb055f)
-
-\
-
-
-# Methods
-
-The following sections will detail how we arrived at a forecast of future deforestation and the potential to generate additionality by reducing this deforestation. This includes the location and quality of the matched points, the deforestation rates in each set of points, and the carbon density values used to convert these deforestation rates to carbon emissions reductions.
-
-\
-
-### Location of matched points
-
-We sampled `r format((nrow(data)/2), big.mark = ",", scientific = FALSE, digits = 3)` points within the project and an equal number of matching points from outside of the project. We used these matched points to make a prediction of the counterfactual scenario for deforestation.
-
-Below we show the location of the matching points (shown in blue) relative to the project (shown in red), both at the country and project scale.
-
-`r if(nrow(data) > 20000){"Note that, for clarity and computational efficiency, we show only 10% of the points."}`
-
-```{r match_locations, warning=FALSE, echo=FALSE, message=FALSE}
-
-# downsample no. of points by 90%
-
-if(nrow(data) > 20000){
-  data_forplot <- data %>% sample_frac(0.1)
-} else {
-  data_forplot <- data
-}
-
-# plot location of matching points
-
-country_map <- ne_countries(country = country_vec, returnclass = "sf", scale= "large")
-
-# transform crs
-
-shapefile <- st_transform(shapefile, st_crs(country_map))
-
-ggplot(data=country_map) + 
-  geom_sf(colour=NA,fill='grey80')+
-  geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + 
-  scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+
-  coord_sf()+
-  theme_void()+
-  annotation_scale(text_cex=1.5,location='bl')+
-  theme(legend.title = element_blank(),
-        text=element_text(size=16),
-        legend.position='none')
-
-xmin <- filter(data, type=='Project') %>% select(lng) %>% min()
-xmax <- filter(data, type=='Project') %>% select(lng) %>% max()
-ymin <- filter(data, type=='Project') %>% select(lat) %>% min()
-ymax <- filter(data, type=='Project') %>% select(lat) %>% max()
-
-ggplot(data=country_map) + 
-  geom_sf(colour=NA,fill='grey80')+
-  geom_point(data=data_forplot,mapping=aes(x=lng,y=lat,colour=type),alpha=0.5,size=0.5) + 
-  scale_color_manual(values=c('blue','red'),labels=c('Matched points','Project'))+
-  coord_sf(xlim=c(xmin-0.5,xmax+0.5),ylim=c(ymin-0.5,ymax+0.5))+
-  theme_void()+
-  annotation_scale(text_cex=1.5,location='bl')+
-  theme(legend.title = element_blank(),
-        text=element_text(size=16),
-        legend.position='none')
-
-```
-
-### Quality of matches
-
-Here we show how well the matching points align with the project in terms of our matching variables. Correspondence between the project (shown in red in the plots below) and the matched points (shown in blue) indicates that the the matched points are composed of places that are similar to the project in terms of the drivers of deforestation and are expected to exhibit similar deforestation trends.
-
--   Inaccessibility (motorized travel time to healthcare, minutes)
-
--   Slope ($^\circ$)
-
--   Elevation (meters)
-
--   Forest cover at t0 (start year, %)
-
--   Deforestation at t0 (%)
-
--   Forest cover at t-5 (5 years prior to start year, %)
-
--   Deforestation at t-5 (%)
-
--   Forest cover at t-10 (10 years prior to start year, %)
-
--   Deforestation at t-10 (%)
-
-Forest cover and deforestation are measured as the proportion of pixels within a 1km radius of a particular point which are classified either as undisturbed forest (in the case of forest cover) or deforested (in the case of deforestation) by the JRC Tropical Moist Forest dataset.
-
-More information about the datasets we use can be found below:
-
-[MAP access to healthcare](https://malariaatlas.org/project-resources/accessibility-to-healthcare/)
-
-[SRTM elevation data](https://www.earthdata.nasa.gov/sensors/srtm)
-
-[JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF)
-
-\
-
-```{r match_quality, warning=FALSE,message=FALSE, echo=FALSE}
-
-# plot matches
-
-source(file.path(script_path,'plot_matchingvars.R'))
-
-plot_matching_variables(data,ex_ante='true')
-
-```
-
-\
-
-### Standardised mean differences
-
-We can quantify the similarity in matching variables in terms of their standardised mean difference (SMD). The SMD allows us to quantify the similarity between the project and the matched points in a way that is comparable across variables. 
-
-In the below plot, the blue points indicate the SMD value (i.e. the amount of difference between the project and matched points, in standard deviations) for each variable. Values further from zero indicate a larger difference. The grey dotted lines at (-0.25, +0.25) represent the boundary within which our SMDs would ideally fall in order for the project and matched points to be considered well-matched.
-
-\
-
-```{r smd}
-
-std_mean_diff <- function(pairs_path) {
-  
-  # clean data
-
-  files_full_raw <- list.files(pairs_path,
-                               pattern='*.parquet',full.names=T,recursive=F)
-  files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
-  files_short_raw <- list.files(path=pairs_path,
-                                pattern='*.parquet',full.names=F,recursive=F)
-  files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
-  
-  # initialise dfs
-  
-  vars <- c(colnames(read_parquet(files_full[1])),'pair')
-  df <- data.frame(matrix(ncol=length(vars),nrow=0))  %>% 
-  setNames(vars) %>% 
-  mutate(k_trt=as.factor(k_trt),
-         s_trt=as.factor(s_trt))
-  
-  for(j in 1:length(files_full)){
-    
-    # read in all parquet files for a given project
-    
-    f <- data.frame(read_parquet(files_full[j])) %>% 
-         mutate(k_trt=as.factor(k_trt),
-                s_trt=as.factor(s_trt))
-    
-    # append data to bottom of df
-    
-    df <- bind_rows(df,f)
-    
-  }
-
-  # calculate smd
-
-  smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE)
-  
-  variables <- c('cpc10_d','cpc5_d','cpc0_d',
-                 'cpc10_u','cpc5_u','cpc0_u',
-                 'access','slope','elevation')
-    
-    for (var in variables) {
-      k_var <- df[[paste0("k_", var)]]
-      s_var <- df[[paste0("s_", var)]]
-      
-      k_mean <- mean(k_var, na.rm = TRUE)
-      s_mean <- mean(s_var, na.rm = TRUE)
-      k_sd <- sd(k_var, na.rm = TRUE)
-      s_sd <- sd(s_var, na.rm = TRUE)
-      
-      pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2)
-      smd <- (k_mean - s_mean) / pooled_sd
-      
-      smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE))
-    }
-    
-  return(smd_results)
-}
-
-results <- std_mean_diff(pairs_path)
-
-# changing sign for interpretation
-
-results$smd <- (-1)*results$smd
-
-# changing order of variables
-
-variables <- c('cpc10_d','cpc5_d','cpc0_d',
-                 'cpc10_u','cpc5_u','cpc0_u',
-                 'access','slope','elevation')
-
-results$variable <- factor(results$variable, levels=variables)
-
-# plotting
-
- ggplot(results,aes(x=smd,y=variable))+
-  #geom_boxplot(outlier.shape=NA,colour='blue')+
-  geom_point(colour='blue',fill='blue',alpha=0.3,size=4)+
-  geom_vline(xintercept=0)+
-  geom_vline(xintercept=0.25,lty=2,colour='grey30')+
-  geom_vline(xintercept=-0.25,lty=2,colour='grey30')+
-  scale_y_discrete(labels=c(bquote(Deforestation~t[-10]~("%")),
-                            bquote(Deforestation~t[-5]~("%")),
-                            bquote(Deforestation~t[0]~("%")),
-                            bquote(Forest~cover~t[-10]~("%")),
-                            bquote(Forest~cover~t[-5]~("%")),
-                            bquote(Forest~cover~t[0]~("%")),
-                            'Inaccessibility (mins)',paste0('Slope (',intToUtf8(176),')'),'Elevation (m)'))+
-  xlab('Standardised mean difference')+
-  xlim(-1,1)+
-  theme_classic()+
-  theme(axis.title.y=element_blank(),
-        legend.title=element_blank(),
-        legend.box.background=element_rect(),
-        legend.position='none',
-        text=element_text(size=14),
-        axis.text.y=element_text(size=14))
-
-
-```
-
-\
-
-### Deforestation within the project
-
-Now focusing on deforestation within the project, we can examine the spatial distribution of the following 4 processes pertinent to forest carbon stock changes:
-
--   Undisturbed forest to degraded forest 
-
--   Degraded forest to deforested land 
-
--   Undisturbed forest to deforested land 
-
--   Undisturbed land to reforested land (which indicates that regrowth occurred after a deforestation event)
-
-\
-
-These transitions are shown in the plot below for the 10-year period between `r start_year-10` and `r start_year`. They are overlaid on the project area which is shown in grey. If a transition is not shown, it did not occur in the period examined.
-
-Data from [JRC tropical moist forest dataset](https://forobs.jrc.ec.europa.eu/TMF).
-
-\
-
-```{r deforestation_spatial_distribution, echo=FALSE, warning=FALSE}
-
-# plot deforestation within project
-
-source(file.path(script_path,'plot_transitions.R'))
-
-proj_coords <- data %>% 
-  filter(type=='Project') %>% 
-  select(lat,lng)
-
-proj_input_defplot <- data %>% 
-  filter(type=='Project') %>% 
-  select(contains('luc')) %>% 
-  setNames(paste0("luc_", (start_year-10):start_year)) %>%
-  cbind(proj_coords)
-
-proj_input_defplot <- proj_input_defplot[, !is.na(colnames(proj_input_defplot))]
-
-plot_transitions(data=proj_input_defplot,t0=start_year-10,period_length=10,shapefile=shapefile)
-
-```
-
-\
-
-### Land cover changes within project and matched pixels
-
-In the below plots, we show the changes in land classes over time for both the project (red) and the matched points (blue).
-
-Note the following:
-
--   The vertical grey dashed line represents the start year of the project (`r start_year`). The timings shown on the x-axis are relative to this start year.
-
--   As explained in the 'Methods' section, the matched points are offset in time relative to the project by 10 years. This means that all changes observed in the matched points happened 10 years prior to the equivalent time point in the project. This time offset allows us to use the last 10 years in the matched points as a prediction of the next 10 years for the project.
-
--   Solid lines represent ex-post observed changes, whereas the dotted line represents the prediction for the future of the project. 
-
-```{r make_inputs, echo=FALSE}
-
-# preparing inputs
-
-proj_input <- data %>% 
-  filter(type=='Project') %>% 
-  select(contains('luc')) %>% 
-  setNames(paste0("luc_", (start_year-10):start_year))
-proj_input <- proj_input[, !is.na(colnames(proj_input))]
-
-
-cf_input <- data %>% 
-  filter(type=='Counterfactual') %>% 
-  select(contains('luc')) %>% 
-  setNames(paste0("luc_", (start_year-20):(start_year))) %>%
-  select(where(~ all(!is.na(.))))
-
-```
-
-```{r luc_timeseries_all, echo=FALSE}
-
-source(file.path(script_path,'land_cover_timeseries.R'))
-
-# getting results
-
-proj_results <- get_luc_timeseries(proj_input,t0=start_year-10,tend=start_year,type='single') %>% 
-  mutate(type='Project')
-
-cf_results <- get_luc_timeseries(cf_input,t0=start_year-20,tend=start_year,type='single') %>% 
-  mutate(type='Counterfactual')
-
-# combining results
-
-results <- bind_rows(proj_results, cf_results)
-
-```
-
-Showing the trend for undisturbed, degraded, deforested and regrowth in turn:
-
-```{r undisturbed_timeseries, fig.width=8,fig.height=13}
-
-# add prediction from the matched pixels:
-
-prediction <- cf_results %>% 
-                filter(year >= (start_year-10))  %>% 
-                mutate(type='Project',
-                year=year+10)
-
-results <- bind_rows(results,prediction)
-
-# make a custom function for plotting the results
-
-plot_timeseries <- function(luc_value, title_str) {
-  
-  #remove gap between solid and dotted project line
-  percent_val <- results %>%
-  filter(year == start_year 
-          & type == "Project"
-          & luc == luc_value) %>%
-  pull(percentage)
-
-  # df wrangling
-  extended_results <- results %>%
-    mutate(
-      luc = as.numeric(luc),
-      year = as.numeric(year),
-      line_type = ifelse(type == "Project" & year > start_year, "dotted", "solid"),
-      type = case_when(
-      type == "Counterfactual" ~ "Matched points",
-      TRUE ~ type
-    )
-    ) %>%
-    bind_rows(data.frame(
-      year = start_year,
-      luc = luc_value,   
-      percentage = percent_val,
-      type = 'Project',
-      line_type = 'dotted'
-    ))
-
-   extended_results %>%
-    filter(luc == luc_value) %>%
-    ggplot(aes(x = year, y = percentage, color = type, linetype = line_type)) +
-    geom_line(linewidth = 1.5) +
-    geom_vline(xintercept = start_year, linetype = 2, color = 'grey30') +
-    #geom_vline(xintercept = start_year-10, linetype = 2, color = 'grey30') +
-    scale_colour_manual(name = 'Location',
-                        values = c('red','blue'),
-                        breaks = c('Project', 'Matched points'),
-                        labels = c('Project', 'Matched points'))+
-    xlab('Year') +
-    ylab('% cover') +
-    ggtitle(title_str) +
-    guides(linetype = "none") +
-    theme_classic() +
-    scale_linetype_manual(values = c("solid" = "solid", "dotted" = "dotted"))+
-    facet_wrap(~type)+
-    xlim(start_year-20,start_year+10)
-
-}
-
-plot_1 <- plot_timeseries(luc_value=1, title_str='Undisturbed forest') + theme(legend.position='none',axis.title.x = element_blank())
-plot_2 <- plot_timeseries(luc_value=2, title_str='Degraded forest') + theme(legend.position='none', axis.title.x = element_blank())
-plot_3 <- plot_timeseries(luc_value=3, title_str='Deforested land') + theme(legend.position='none', axis.title.x = element_blank())
-plot_4 <- plot_timeseries(luc_value=4, title_str='Regrowth') + theme(legend.position='none', axis.title.x = element_text(size=14))
-
-plot_1 + plot_2 + plot_3 + plot_4 + plot_layout(ncol=1)
-
-```
-
-### Deforestation rates in the matched points during the baseline period
-
-```{r proportions_undisturbed_degraded, echo=FALSE}
-
-# obtaining the area of undisturbed and degraded forest at t0, for use later
-
-source(file.path(script_path,'def_rate.R'))
-
-prop_und <- get_prop_class(data=proj_input,t0=start_year-10,class=1)
-prop_deg <- get_prop_class(data=proj_input,t0=start_year-10,class=2)
-
-```
-
-Here we present the deforestation rates observed in the matched pixels over the past 10 years (the baseline period). 
-
-Forest loss transitions can be broken down into the following processes: 
-
--   degradation of undisturbed forest
-
--   deforestation of undisturbed forest
-
--   deforestation of degraded forest
-
--   regrowth of undisturbed forest (implies previous deforestation)
-
-We calculate the rate at which these processes occur in the matched pixels using the following method:
-
-1. Calculate the percentage of matched pixels which have undergone one of the above processes (according to the JRC classification) during the baseline period. 
-2. Divide this percentage by 10 to give an annual rate of change, in %/year, relative to the amount of (undisturbed or degraded) forest present at the beginning of the project.
-3. Multiply this rate by the area of the (undisturbed or degraded) forest  10 years prior to the project start to give an annual rate, in hectares per year.
-
-The amounts of forest in the project area 10 years prior to project start are as follows:
-
--   Undisturbed forest: `r format(100*prop_und, big.mark = ",", scientific = FALSE, digits = 3)`%
-
--   Degraded forest: `r format(100*prop_deg, big.mark = ",", scientific = FALSE, digits = 3)`%
-
-The rates are given below.
-
-```{r rate_of_forest_loss_ha, echo=FALSE}
-
-source(file.path(script_path,'def_rate.R'))
-
-df_rate_percent <- def_rate_single(data=cf_input,t0=start_year-10,period_length=10)
-
-df_rate_ha <- df_rate_percent
-
-df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Undisturbed forest',3]/100)*project_area_ha*prop_und
-
-df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3] <- (df_rate_ha[df_rate_ha$`Forest type`=='Disturbed forest',3]/100)*project_area_ha*prop_deg
-
-knitr::kable(
-  df_rate_ha %>% 
-  rename('Rate (ha/year)' = 3) %>%
-  mutate(across(where(is.numeric), ~ sprintf("%.2f", .)))
-)
-
-
-```
-
-\
-
-### Carbon stock changes in the matched points during the baseline period
-
-Here we present the carbon density calculations for this project.
-
-In order to convert land cover changes to carbon emissions, we use regional aboveground (AGB) carbon density values generated through NASA GEDI data. 
-
-More information on GEDI data is available [here](https://www.earthdata.nasa.gov/sensors/gedi).
-
-Note that, in calculating carbon stock changes, we assume the following:
-
-- Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997)
-
-- Deadwod biomass is 11% of AGB (based on IPCC 2003)
-
-- Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011)
-
-
-\
-```{r additionality_forecast}
-
-baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8))
-colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea')
-luc_counter <- 1
-row_counter <- 1
-
-carbon_density <- filter(carbon_density, land.use.class %in% c(1:6))
-
-for(i in carbon_density$land.use.class){
-
-  for(j in c("Start","End")) {
-
-    # get agb
-
-    agb <- carbon_density$carbon.density[luc_counter]
-
-    # get other values
-
-    bgb <- agb*0.2
-    dw <- agb*0.11
-    total <- agb + bgb + dw
-    #total_co2 <- total*0.47 # we're doing this step later
-
-    # get area of class i
-
-    if (j == "Start") {
-      area_of_forest <- get_prop_class(cf_input,t0=start_year-10,class=i)*project_area_ha
-    } else if (j == "End") {
-      area_of_forest <- get_prop_class(cf_input,t0=start_year,class=i)*project_area_ha }
-
-    # multiply total by area
-    
-    total_byarea <- total*area_of_forest
-
-    # adding to df
-
-    baseline_stocks[row_counter,1] <- j
-    baseline_stocks[row_counter,2] <- i
-    baseline_stocks[row_counter,3] <- agb
-    baseline_stocks[row_counter,4] <- bgb
-    baseline_stocks[row_counter,5] <- dw
-    baseline_stocks[row_counter,6] <- total
-    baseline_stocks[row_counter,7] <- area_of_forest
-    baseline_stocks[row_counter,8] <- total_byarea
-
-    row_counter <- row_counter+1
-
-  }
-
-  # advance counter
-
-  luc_counter <- luc_counter + 1
-
-}
-
-# formatting bits
-
-baseline_stocks_format <- baseline_stocks
-baseline_stocks_format <- baseline_stocks_format %>% filter(time == 'Start')
-baseline_stocks_format <- baseline_stocks_format[2:6]
-
-colnames(baseline_stocks_format) <- c(
-                                 'Land use class',
-                                 'AGB density (t C / ha)',
-                                 'BGB density (t C / ha)',
-                                 'Deadwood biomass density (t C / ha)', 
-                                 'Total biomass density (t C / ha)', 
-                                 'Total biomass (t C)')
-
-
-# renaming classes
-
-baseline_stocks_format <- baseline_stocks_format %>%
-  mutate(`Land use class` = case_when(
-    `Land use class` == "1" ~ 'Undisturbed',
-    `Land use class` == "2" ~ 'Degraded',
-    `Land use class` == "3" ~ 'Deforested',
-    `Land use class` == "4" ~ 'Reforested',
-    `Land use class` == "5" ~ 'Water',
-    `Land use class` == "6" ~ 'Other',
-    TRUE ~ as.character(`Land use class`)  # ensure no unexpected values
-  ))
-
-
-baseline_stocks_format[2:6] <- lapply(baseline_stocks_format[, 2:6], function(x) {
-  if (is.numeric(x)) comma(x) else x
-})
-
-# Print only carbon calculations at this stage
-
-baseline_stocks_format %>% 
-  drop_na() %>% 
-  mutate(across(where(is.numeric), ~ sprintf("%.2f", .))) %>% 
-  kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>%
-  kable_styling(bootstrap_options = "striped")
-
-```
-
-# Results: baseline rate of carbon emissions
-
-In this section we present the annual rate of carbon loss due to deforestation in the matched points during the baseline period. We can take this to be a prediction of the counterfactual scenario for the project (the *baseline*). 
-
-First we present the carbon stock changes observed in the matched points during the baseline period:
-
-```{r results}
-
-baseline_stock_changes <- baseline_stocks[c(1:2,7:8)]
-
-# reshape
-
-reshaped_data <- baseline_stock_changes %>%
-  mutate(luc = as.character(luc)) %>%
-  group_by(luc) %>%
-  summarize(
-    area_start = area[time == "Start"],
-    area_end = area[time == "End"],
-    area_diff = area_start - area_end,
-    c_start = total_byarea[time == "Start"],
-    c_end = total_byarea[time == "End"],
-    c_diff = c_start - c_end,
-    .groups = 'drop'
-  )
-
-# get totals
-
-total_row <- reshaped_data %>%
-  summarize(
-    luc = "Total",
-    area_start = sum(area_start, na.rm = TRUE),
-    area_end = sum(area_end, na.rm = TRUE),
-    area_diff = sum(area_diff, na.rm = TRUE),
-    c_start = sum(c_start, na.rm = TRUE),
-    c_end = sum(c_end, na.rm = TRUE),
-    c_diff = sum(c_diff, na.rm = TRUE)
-  ) %>% 
-  mutate(luc = as.character(luc))
-
-baseline_stock_changes <- bind_rows(reshaped_data, total_row)
-
-# add in conversion to CO2
-
-baseline_stock_changes <- baseline_stock_changes  %>% 
-    mutate(co2_diff = 0.47*c_diff)
-
-# formatting bits
-
-baseline_stock_changes_format <- baseline_stock_changes %>%
-  mutate(across(where(is.numeric), ~ comma(.))) %>%
-  mutate(across(where(is.numeric), ~ sprintf("%.2f", as.numeric(.))))
-
-if (knitr::is_html_output()) {
-  colnames(baseline_stock_changes_format) <- c(
-                                 'Land use class',
-                                 'Area at start (ha)',
-                                 'Area at end (ha)',
-                                 'Area loss (ha)',
-                                 'Biomass at start (t)',
-                                 'Biomass at end (t)',
-                                 'Biomass loss (t)'
-                                 'CO<sub>2</sub> loss (t)')
-} else if (knitr::is_latex_output()) {
-  colnames(baseline_stock_changes_format) <- c(
-                                 'Land use class',
-                                 'Area at start (ha)',
-                                 'Area at end (ha)',
-                                 'Area loss (ha)',
-                                 'Biomass at start (t)',
-                                 'Biomass at end (t)',
-                                 'Biomass loss (t)'
-                                 'CO$_{2}$ loss (t)')
-}
-
-baseline_stock_changes_format <- baseline_stock_changes_format %>%
-  mutate(`Land use class` = case_when(
-    `Land use class` == "1" ~ 'Undisturbed',
-    `Land use class` == "2" ~ 'Degraded',
-    `Land use class` == "3" ~ 'Deforested',
-    `Land use class` == "4" ~ 'Reforested',
-    `Land use class` == "5" ~ 'Water',
-    `Land use class` == "6" ~ 'Other',
-    TRUE ~ as.character(`Land use class`)  # ensure no unexpected values
-  ))
-
-baseline_stock_changes_format[nrow(baseline_stock_changes_format), 1] <- 'Total'
-
-filtered_data <- baseline_stock_changes_format %>%
-  drop_na() %>%
-  mutate(across(where(is.numeric), ~ sprintf("%.2f", .)))
-
-last_row_index <- nrow(filtered_data)
-
-filtered_data %>%
-  kable(escape = FALSE, format = ifelse(knitr::is_latex_output(), "latex", "html")) %>%
-  kable_styling(bootstrap_options = "striped") %>%
-  row_spec(last_row_index, bold = TRUE) 
-
-```
-
-```{r results_summary}
-
-# find the difference
-
-delta_c <- as.numeric(baseline_stock_changes[nrow(baseline_stock_changes), ncol(baseline_stock_changes)])
-delta_c_annual <- delta_c/10
-
-```
-
-To calculate the baseline annual rate of carbon emissions, we sum the the differences in carbon stocks between the start and end of the baseline period, then divide the total by the length of the baseline period (10 years).
-
-**For this project, the baseline annual rate of carbon emissions, in tonnes of carbon dioxide per year, is `r format(delta_c_annual, big.mark = ",", scientific = FALSE, digits = 3)`.** This should be interpreted as the maximum estimated number of carbon credits that could be generated if the project mitigated 100% of the baseline level of deforestation, assuming this is confirmed by ex post observations. We present alternative mitigation scenarios below.
-
-### Expected additionality under different mitigation scenarios
-
-Additionality depends not only on baseline deforestation rate but the ability of the project to mitigate that deforestation. Therefore, we present the expected additionality under different mitigation scenarios (100% to 10% mitigation success). The 100% mitigation scenario occurs where the project is able to mitigate all of the deforestation that would have occurred under the counterfactual scenario. This scenario is unlikely to be realistic, but gives a sense of the deforestation risk faced by each area. It is more likely that only a proportion of this baseline deforestation is mitigated - for example, under the 50% mitigation scenario, the project is able to reduce the deforestation rate by half. The higher the proportion of the baseline deforestation that is mitigated, the greater the additionality of a project.
-
-Note that we present the raw additionality, without accounting for leakage and impermenance (discussed later).
-
-We are in the process of producing confidence intervals that reflect the uncertainty associated with the baseline, which will be added to future revisions of this document.
-
-```{r}
-
-scenarios <- data.frame(matrix(ncol=2,nrow=5))
-scenarios[1] <- c("10%","25%","50%","75%","100%")
-scenarios[2] <- delta_c_annual*c(0.1,0.25,0.5,0.75,1)
-
-if (knitr::is_html_output()) {
-  colnames(scenarios) <- c('Scenario', 
-                            'Additionality (t CO<sub>2</sub> / year)')
-} else if (knitr::is_latex_output()) {
-  colnames(scenarios) <- c('Scenario', 
-                            'Additionality (t CO$_{2}$ / year)')
-}
-
-scenarios <- scenarios %>% 
-  mutate(across(where(is.numeric), comma))
-
-knitr::kable(
-  scenarios
-)
-
-```
-
-\
-
-# Accounting for leakage and impermanence
-
-Leakage and permanence are two factors that affect the long-term emissions reductions contributed by a project but **have not been included in this evaluation**.
-
-**Leakage** is the displacement of activities which deplete forest carbon stocks from the project to other areas due to the implementation of the project. In the worst case scenario, 100% of these activities are displaced, effectively nullifying the additionality of the project. Leakage is likely to be lower if the processes leading deforestation and degradation do not result in high yielding land uses, or if the carbon densities within the project are high compared with those in other areas where these activities are taking place. Leakage can also be reduced by interventions which improve yields in areas already under production. We can provide guidance on how this could be achieved.
-
-**Impermanence** occurs when the additionality generated by a project is reversed. Additional carbon stocks in forests are inherently vulnerable to these reversals. The estimates given in this evaluation assume that all carbon stored is permanent, but in reality this is unlikely to be the case. In future revisions of this document we aim to include indicative estimates of the equivalent permanence (the relative value of a impermanent credit relative to a permanent credit) for this project.
-
-You can find out more about our plans to deal with leakage and permanence in our [explainer page](https://4c.cst.cam.ac.uk/about/additionality-leakage-and-permanence).
-
----
-
-### Reproducibility
-
-This report was generated on `r format(Sys.Date(), "%B %d, %Y")` using the `r branch` of the [tmf-implementation code](https://github.com/quantifyearth/tmf-implementation).
diff --git a/scripts/scripts/def_rate.R b/scripts/scripts/def_rate.R
deleted file mode 100644
index 6a4c417..0000000
--- a/scripts/scripts/def_rate.R
+++ /dev/null
@@ -1,328 +0,0 @@
-
-
-
-def_rate <- function(data,t0,period_length,process='all'){
-  
-  # get name of column for start year
-  
-  t0_index <- grep(paste0('luc_',t0),colnames(data))
-  
-  # filter down to pixels with undisturbed forest (JRC class 1)
-  
-  data_filtered <- data[data[,t0_index]==1,] 
-  
-  # count 1s at t0 in project and match
-  
-  proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() 
-  cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() 
-  
-  # identify where there have been changes during the evaluation period
-  
-  tend <- t0 + period_length
-  
-  luc_tend <- data_filtered %>% 
-    select(paste0('luc_',tend))
-  
-  # choosing processes to measure
-  
-  if(process=='def_only'){
-    
-    response <- case_when(
-      luc_tend==1 ~ 0,
-      luc_tend==2 ~ 0,
-      luc_tend==3 ~ 1,
-      luc_tend==4 ~ 0,
-      luc_tend>4 ~ 0)
-    
-  } else if(process=='deg_only'){
-      
-      response <- case_when(
-        luc_tend==1 ~ 0,
-        luc_tend==2 ~ 1,
-        luc_tend==3 ~ 0,
-        luc_tend==4 ~ 0,
-        luc_tend>4 ~ 0)
-      
-    } else {
-    
-    response <- case_when(
-      luc_tend==1 ~ 0,
-      luc_tend==2 ~ 1,
-      luc_tend==3 ~ 1,
-      luc_tend==4 ~ 1,
-      luc_tend>4 ~ 0)
-    
-    }
-  
-  
-  data_filtered$response <- response
-  
-  # count up number of pixels where there have been changes for each type
-  
-  proj_changes <- data_filtered %>% filter(response==1 & type=='Project') %>% 
-    nrow()
-  cf_changes <- data_filtered %>% filter(response==1 & type=='Counterfactual') %>% 
-    nrow()
-  
-  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
-  
-  proj_rate <- 100*(proj_changes/proj_1s)/period_length
-  cf_rate <- 100*(cf_changes/cf_1s)/period_length
-  
-  # make df
-  
-  df <- data.frame(matrix(ncol=2,nrow=1))
-  colnames(df) <- c('Project','Counterfactual')
-  df[1,1] <- proj_rate
-  df[1,2] <- cf_rate
-  
-  return(df)
-  
-}
-
-
-
-def_rate_seperate <- function(data,t0,period_length){
-  
-  # get name of column for start year
-  
-  t0_index <- grep(paste0('luc_',t0),colnames(data))
-  
-  # filter down to pixels with undisturbed forest (JRC class 1)
-  
-  data_filtered <- data[data[,t0_index]==1,] 
-  
-  # count 1s at t0 in project and cf
-  
-  proj_1s <- data_filtered %>% filter(type=='Project') %>% nrow() 
-  cf_1s <- data_filtered %>% filter(type=='Counterfactual') %>% nrow() 
-  
-  # identify where there have been changes during the evaluation period
-  
-  tend <- t0 + period_length
-  
-  luc_tend <- data_filtered %>% 
-    select(paste0('luc_',tend))
-  
-  # measuring responses
-  
-  def_response <- case_when(
-    luc_tend==1 ~ 0,
-    luc_tend==2 ~ 0,
-    luc_tend==3 ~ 1,
-    luc_tend==4 ~ 0,
-    luc_tend>4 ~ 0)
-  
-  deg_response <- case_when(
-    luc_tend==1 ~ 0,
-    luc_tend==2 ~ 1,
-    luc_tend==3 ~ 0,
-    luc_tend==4 ~ 0,
-    luc_tend>4 ~ 0)
-  
-  ref_response <- case_when(
-    luc_tend==1 ~ 0,
-    luc_tend==2 ~ 0,
-    luc_tend==3 ~ 0,
-    luc_tend==4 ~ 1,
-    luc_tend>4 ~ 0)
-  
-  data_filtered$def_response <- def_response
-  data_filtered$deg_response <- deg_response
-  data_filtered$ref_response <- ref_response
-  
-  # count up number of pixels where there have been changes for each type
-  
-  proj_def_changes <- data_filtered %>% filter(def_response==1 & type=='Project') %>% 
-    nrow()
-  cf_def_changes <- data_filtered %>% filter(def_response==1 & type=='Counterfactual') %>% 
-    nrow()
-  
-  proj_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Project') %>% 
-    nrow()
-  cf_deg_changes <- data_filtered %>% filter(deg_response==1 & type=='Counterfactual') %>% 
-    nrow()
-  
-  proj_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Project') %>% 
-    nrow()
-  cf_ref_changes <- data_filtered %>% filter(ref_response==1 & type=='Counterfactual') %>% 
-    nrow()
-  
-  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
-  
-  proj_def <- 100*(proj_def_changes/proj_1s)/period_length
-  cf_def <- 100*(cf_def_changes/cf_1s)/period_length
-  
-  proj_deg <- 100*(proj_deg_changes/proj_1s)/period_length
-  cf_deg <- 100*(cf_deg_changes/cf_1s)/period_length
-  
-  proj_ref <- 100*(proj_ref_changes/proj_1s)/period_length
-  cf_ref <- 100*(cf_ref_changes/cf_1s)/period_length
-  
-  # adding the degraded-to-deforested transition
-  
-  data_filtered_2 <- data[data[,t0_index]==2,]
-  
-  # count 2s at t0 in project and cf
-  
-  proj_2s <- data_filtered_2 %>% filter(type=='Project') %>% nrow()
-  cf_2s <- data_filtered_2 %>% filter(type=='Counterfactual') %>% nrow() 
-  
-  # identify where there have been changes during the evaluation period
-  
-  luc_tend_2 <- data_filtered_2 %>% 
-    select(paste0('luc_',tend))
-  
-  def_response_2 <- case_when(
-    luc_tend_2==1 ~ 0,
-    luc_tend_2==2 ~ 0,
-    luc_tend_2==3 ~ 1,
-    luc_tend_2==4 ~ 0,
-    luc_tend_2>4 ~ 0)
-  
-  data_filtered_2$def_response_2 <- def_response_2
-  
-  proj_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Project') %>% 
-    nrow()
-  cf_def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1 & type=='Counterfactual') %>% 
-    nrow()
-  
-  proj_deg_to_def <- 100*(proj_def_changes_2/proj_2s)/period_length
-  cf_deg_to_def <- 100*(cf_def_changes_2/cf_2s)/period_length
-  
-  # make df
-  
-  df <- data.frame(matrix(ncol=4,nrow=8))
-  
-  colnames(df) <- c('Process','Forest type','Location','Rate (%/year)')
-  
-  df[1] <- c(rep(c('Degradation','Deforestation','Deforestation','Reforestation'),each=2))
-  df[2] <- c(rep(c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest'),each=2))
-  df[3] <- c(rep(c('Project','Counterfactual'),times=4))
-  df[4] <- c(proj_deg,cf_deg,proj_def,cf_def,proj_deg_to_def,cf_deg_to_def,proj_ref,cf_ref)
-  
-  return(df)
-  
-}
-
-get_prop_class <- function(data,t0,class){
-  
-  t0_index <- grep(paste0('luc_',t0),colnames(data))
-  data_filtered <- data[data[,t0_index]==class,]
-  
-  total_count <- data %>% nrow()
-  class_count <- data_filtered %>% nrow()
-  prop <- class_count/total_count
-  
-  return(prop)
-  
-}
-
-
-def_rate_single <- function(data,t0,period_length){
-  
-  # get name of column for start year
-  
-  t0_index <- grep(paste0('luc_',t0),colnames(data))
-  
-  # filter down to pixels with undisturbed forest (JRC class 1)
-  
-  data_filtered <- data[data[,t0_index]==1,] 
-  
-  # count 1s at t0 in project and cf
-  
-  no_1s <- nrow(data_filtered)
-  
-  # identify where there have been changes during the evaluation period
-  
-  tend <- t0 + period_length
-  
-  luc_tend <- data_filtered %>% 
-    select(paste0('luc_',tend))
-  
-  # measuring responses
-  
-  def_response <- case_when(
-    luc_tend==1 ~ 0,
-    luc_tend==2 ~ 0,
-    luc_tend==3 ~ 1,
-    luc_tend==4 ~ 0,
-    luc_tend>4 ~ 0)
-  
-  deg_response <- case_when(
-    luc_tend==1 ~ 0,
-    luc_tend==2 ~ 1,
-    luc_tend==3 ~ 0,
-    luc_tend==4 ~ 0,
-    luc_tend>4 ~ 0)
-  
-  ref_response <- case_when(
-    luc_tend==1 ~ 0,
-    luc_tend==2 ~ 0,
-    luc_tend==3 ~ 0,
-    luc_tend==4 ~ 1,
-    luc_tend>4 ~ 0)
-  
-  data_filtered$def_response <- def_response
-  data_filtered$deg_response <- deg_response
-  data_filtered$ref_response <- ref_response
-  
-  # count up number of pixels where there have been changes for each type
-  
-  def_changes <- data_filtered %>% filter(def_response==1) %>% 
-    nrow()
-  
-  deg_changes <- data_filtered %>% filter(deg_response==1) %>% 
-    nrow()
-  
-  ref_changes <- data_filtered %>% filter(ref_response==1) %>% 
-    nrow()
-  
-  # calculate deforestation rate (= the rate of loss of undisturbed forest) as a percentage
-  
-  def <- 100*(def_changes/no_1s)/period_length
-  
-  deg <- 100*(deg_changes/no_1s)/period_length
-  
-  ref <- 100*(ref_changes/no_1s)/period_length
-  
-  # adding the degraded-to-deforested transition
-  
-  data_filtered_2 <- data[data[,t0_index]==2,]
-  
-  # count 2s at t0 in project and cf
-  
-  no_2s <- data_filtered_2 %>% nrow()
-  
-  # identify where there have been changes during the evaluation period
-  
-  luc_tend_2 <- data_filtered_2 %>% 
-    select(paste0('luc_',tend))
-  
-  def_response_2 <- case_when(
-    luc_tend_2==1 ~ 0,
-    luc_tend_2==2 ~ 0,
-    luc_tend_2==3 ~ 1,
-    luc_tend_2==4 ~ 0,
-    luc_tend_2>4 ~ 0)
-  
-  data_filtered_2$def_response_2 <- def_response_2
-  
-  def_changes_2 <- data_filtered_2 %>% filter(def_response_2==1) %>% 
-    nrow()
-
-  deg_to_def <- 100*(def_changes_2/no_2s)/period_length
-  
-  # make df
-  
-  df <- data.frame(matrix(ncol=3,nrow=4))
-  
-  colnames(df) <- c('Process','Forest type','Rate (%/year)')
-  
-  df[1] <- c('Degradation','Deforestation','Deforestation','Reforestation')
-  df[2] <- c('Undisturbed forest','Undisturbed forest','Disturbed forest','Undisturbed forest')
-  df[3] <- c(deg,def,deg_to_def,ref)
-  
-  return(df)
-  
-}
\ No newline at end of file
diff --git a/scripts/scripts/land_cover_timeseries.R b/scripts/scripts/land_cover_timeseries.R
deleted file mode 100644
index 6490bf1..0000000
--- a/scripts/scripts/land_cover_timeseries.R
+++ /dev/null
@@ -1,111 +0,0 @@
-
-get_luc_timeseries <- function(data,t0,tend,type='both'){
-  
-  years_list <- seq(t0,tend)
-
-  if(type=='both'){
-
-    df <- data.frame(matrix(ncol=4,nrow=8*length(years_list)))
-  
-    colnames(df) <- c('year','type','luc','percentage')
-    
-    counter <- 1
-    
-    for(year in years_list) {
-      
-      for(i in seq (1:4)) {
-        
-        for(type_value in c('Project','Counterfactual')) {
-          
-          total <- data %>% filter(type == type_value) %>% nrow()
-          
-          no_class_i <- data %>% filter(type == type_value & .data[[paste0('luc_',year)]]==i) %>% nrow()
-          
-          prop <- no_class_i/total
-          
-          df[counter,1] <- year
-          df[counter,2] <- type_value
-          df[counter,3] <- i
-          df[counter,4] <- prop*100
-          
-          counter <- counter+1
-        
-        }
-        
-      }
-      
-    }
-
-  } else if(type=='single'){
-
-    df <- data.frame(matrix(ncol=3,nrow=4*length(years_list)))
-  
-    colnames(df) <- c('year','luc','percentage')
-    
-    counter <- 1
-    
-    for(year in years_list) {
-      
-      for(i in seq (1:4)) {
-          
-          total <- data %>% nrow()
-          
-          no_class_i <- data %>% filter(.data[[paste0('luc_',year)]]==i) %>% nrow()
-          
-          prop <- no_class_i/total
-          
-          df[counter,1] <- year
-          df[counter,2] <- i
-          df[counter,3] <- prop*100
-          
-          counter <- counter+1
-        
-        }
-      
-    }
-
-  }
-
-  return(drop_na(df))
-  
-}
-
-luc_class1_uncertainty <- function(data,t0,tend) {
-
-  years_list <- seq(t0-10,tend)
-  
-  df <- data.frame(matrix(ncol=4,nrow=2*length(unique(data$pair))*length(years_list)))
-  
-  colnames(df) <- c('year','type','pair','percent_class1')
-  
-  counter <- 1
-  
-  for(year in years_list) {
-    
-    for(type_value in c('Project','Counterfactual')) {
-      
-      for(pair_id in unique(data$pair)) {
-        
-        total <- data %>% filter(type == type_value & pair == pair_id) %>% nrow()
-        
-        no_class_i <- data %>% filter(type == type_value & pair == pair_id & .data[[paste0('luc_',year)]]==1) %>% nrow()
-        
-        prop <- no_class_i/total
-        
-        df[counter,1] <- year
-        df[counter,2] <- type_value
-        df[counter,3] <- pair_id
-        df[counter,4] <- prop*100
-        
-        counter <- counter+1
-        
-      }
-      
-    }
-    
-  }
-  
-  return(drop_na(df))
-
-}
-
diff --git a/scripts/scripts/plot_matchingvars.R b/scripts/scripts/plot_matchingvars.R
deleted file mode 100644
index ec47f01..0000000
--- a/scripts/scripts/plot_matchingvars.R
+++ /dev/null
@@ -1,42 +0,0 @@
-plot_matching_variables <- function(data, ex_ante = 'false') {
-  
-  cont_data <- data %>% dplyr::select(type, elevation, slope, access, starts_with('cpc'))
-  cont_data[, 5:length(cont_data)] <- 100 * cont_data[, 5:length(cont_data)] # cpcs as percentages
-  cont_data <- melt(cont_data)
-  
-  # rename labels
-  cont_data$variable <- factor(cont_data$variable, 
-                  levels = c('access', 'cpc0_u', 'cpc0_d', 
-                  'slope', 'cpc5_u', 'cpc5_d', 
-                  'elevation', 'cpc10_u', 'cpc10_d'))
-  
-  levels(cont_data$variable) <- c('Inaccessibility',
-                                  'Forest~cover~t[0]',
-                                  'Deforestation~t[0]',
-                                  'Slope',
-                                  'Forest~cover~t[-5]',
-                                  'Deforestation~t[-5]',
-                                  'Elevation',
-                                  'Forest~cover~t[-10]',
-                                  'Deforestation~t[-10]')
-  
-  # determine labels based on ex_ante
-  if (ex_ante == 'false') {
-    plot_labels <- c('Counterfactual', 'Project')
-  } else if (ex_ante == 'true') {
-    plot_labels <- c('Matched points', 'Project')}
-  
-  # plot
-  matchingvars <- ggplot(data = cont_data, mapping = aes(x = value, colour = type)) +
-    geom_density(adjust = 10, size = 1) +
-    facet_wrap(~variable, scales = 'free', nrow = 3, labeller = label_parsed) +
-    ylab('Density') +
-    scale_colour_manual(values = c('blue', 'red'), labels = plot_labels) +
-    theme_classic() +
-    theme(legend.title = element_blank(),
-          axis.title.x = element_blank(),
-          axis.text.y = element_blank(),
-          axis.ticks.y = element_blank())
-  
-  return(matchingvars)
-}
\ No newline at end of file
diff --git a/scripts/scripts/plot_transitions.R b/scripts/scripts/plot_transitions.R
deleted file mode 100644
index 2931a60..0000000
--- a/scripts/scripts/plot_transitions.R
+++ /dev/null
@@ -1,63 +0,0 @@
-library(ggspatial)
-
-plot_transitions <- function(data,t0,period_length,shapefile){
-  
-  # count number of 1s at project start
-  
-  t0_index <- grep(paste0('luc_',t0),colnames(data))
-  
-  data_filtered <- data[data[,t0_index]==1,]
-  
-  # identify where there have been changes
-  
-  tend <- t0 + period_length
-  
-  luc_tend <- data_filtered[[paste0('luc_', tend)]]
-  
-  response <- case_when(
-    luc_tend==1 ~ NA,
-    luc_tend==2 ~ 'deg',
-    luc_tend==3 ~ 'def',
-    luc_tend==4 ~ 'ref',
-    luc_tend>4 ~ NA)
-  
-  data_filtered$response <- as.factor(response)
-  data_filtered <- data_filtered %>% filter(!is.na(response))
-  
-  # adding deg --> def transition
-  
-  # count number of 2s at project start
-  
-  data_filtered_2s <- data[data[,t0_index]==2,]
-
-  # identify where there have been changes
-  
-  luc_tend <- data_filtered_2s[[paste0('luc_', tend)]]
-  
-  response <- case_when(
-    luc_tend==1 ~ NA,
-    luc_tend==2 ~ NA,
-    luc_tend==3 ~ 'deg_to_def',
-    luc_tend==4 ~ NA,
-    luc_tend>4 ~ NA)
-  
-  data_filtered_2s$response <- as.factor(response)
-  data_filtered_2s <- data_filtered_2s %>% filter(!is.na(response))
-  
-  combined_dat <- bind_rows(data_filtered, data_filtered_2s)
-  combined_dat$response <- factor(combined_dat$response, levels=c('deg','deg_to_def','def','ref'))
-  
-  # plotting 
-  
-  plot <- combined_dat %>% 
-    filter(response != 0) %>% 
-    ggplot(aes(x=lng,y=lat,colour=response))+
-    geom_sf(data=shapefile,inherit.aes=F,fill='grey80',colour=NA)+
-    geom_point(alpha=0.5,size=0.5)+
-    scale_colour_manual(values=c('yellow','orange','red','green'),name='Transition',labels=c('Undisturbed to degraded','Degraded to deforested','Undisturbed to deforested','Undisturbed to reforested'))+
-    annotation_scale(text_cex = 1.3)+
-    theme_void()
-  
-  return(plot)
-  
-}
diff --git a/scripts/scripts/std_mean_diff.R b/scripts/scripts/std_mean_diff.R
deleted file mode 100644
index 63d81ba..0000000
--- a/scripts/scripts/std_mean_diff.R
+++ /dev/null
@@ -1,57 +0,0 @@
-
-std_mean_diff <- function(path_to_pairs) {
-  
-  # clean data
-
-  files_full_raw <- list.files(path_to_pairs,
-                               pattern='*.parquet',full.names=T,recursive=F)
-  files_full <- files_full_raw[!grepl('matchless',files_full_raw)]
-  files_short_raw <- list.files(path=path_to_pairs,
-                                pattern='*.parquet',full.names=F,recursive=F)
-  files_short <- files_short_raw[!grepl('matchless',files_short_raw)]
-  
-  # initialise dfs
-  
-  vars <- c(colnames(read_parquet(files_full[1])),'pair')
-  df <- data.frame(matrix(ncol=length(vars),nrow=0))
-  colnames(df) <- vars
-  
-  for(j in 1:length(files_full)){
-    
-    # read in all parquet files for a given project
-    
-    f <- data.frame(read_parquet(files_full[j]))
-    
-    # append data to bottom of df
-    
-    df <- bind_rows(df,f)
-    
-  }
-
-  # calculate smd
-
-  smd_results <- data.frame(variable = character(), smd = numeric(), stringsAsFactors = FALSE)
-  
-  variables <- c('cpc10_d','cpc5_d','cpc0_d',
-                 'cpc10_u','cpc5_u','cpc0_u',
-                 'access','slope','elevation')
-    
-    for (var in variables) {
-      k_var <- df[[paste0("k_", var)]]
-      s_var <- df[[paste0("s_", var)]]
-      
-      k_mean <- mean(k_var, na.rm = TRUE)
-      s_mean <- mean(s_var, na.rm = TRUE)
-      k_sd <- sd(k_var, na.rm = TRUE)
-      s_sd <- sd(s_var, na.rm = TRUE)
-      
-      pooled_sd <- sqrt((k_sd^2 + s_sd^2) / 2)
-      smd <- (k_mean - s_mean) / pooled_sd
-      
-      smd_results <- rbind(smd_results, data.frame(variable = var, smd = smd, stringsAsFactors = FALSE))
-    }
-    
-  return(smd_results)
-}
-  
-  
\ No newline at end of file

From 62c0b38ee086863c3d05cff49319d29622ab0d7d Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Wed, 28 Aug 2024 09:48:59 +0000
Subject: [PATCH 17/19] Update tmfpython.sh

---
 scripts/tmfpython.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index 7361eb3..f2ece30 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -197,5 +197,5 @@ deactivate
 # Run ex-ante evaluation
 if [ "$verbose" == "true" ]; then
 ea_output_file="${output_dir}/${proj}_ex_ante_evaluation.html"
-Rscript -e "rmarkdown::render(input='scripts/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))"
+Rscript -e "rmarkdown::render(input='evaluations/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))"
 fi
\ No newline at end of file

From 7823ebf1b15e16f5874e7962d045d17cd30774c8 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Thu, 29 Aug 2024 16:04:28 +0000
Subject: [PATCH 18/19] Updated tmfpython.sh to mirror ex post evaluations code

---
 scripts/tmfpython.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/tmfpython.sh b/scripts/tmfpython.sh
index f2ece30..fdecea6 100755
--- a/scripts/tmfpython.sh
+++ b/scripts/tmfpython.sh
@@ -6,7 +6,7 @@
 #p: project name/ID - must match name of shapefile
 #t: year of project start (t0)
 #e: evaluation year (default: 2022)
-#v: verbose - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: false).
+#v: report - whether to run an ex-ante evaluation and knit the results in an R notebook (true/false, default: true).
 
 # Check which branch is currently checked out
 branch=$(git rev-parse --abbrev-ref HEAD)
@@ -18,7 +18,7 @@ set -e
 input_dir=""
 output_dir=""
 eval_year=2022
-verbose=true
+report=true
 
 #####################################
 
@@ -31,7 +31,7 @@ function display_help() {
     echo "  -p <proj>           Project name"
     echo "  -t <t0>             Start year"
     echo "  -e <year>           Evaluation year"
-    echo "  -v <verbose>        Knit ex ante evaluation as .Rmd? (true/false)"
+    echo "  -r <report>        Knit ex ante evaluation as .Rmd? (true/false)"
     echo "  -h                  Display this help message"
     echo "Example:"
     echo "  $0 -i '/maps/aew85/projects' -o '/maps/aew85/tmf_pipe_out -p 1201 -t 2012"
@@ -46,7 +46,7 @@ do
         p) proj=${OPTARG};;
         t) t0=${OPTARG};;
         e) eval_year=${OPTARG};;
-        r) verbose=${OPTARG};;
+        r) report=${OPTARG};;
         h) display_help; exit 0;;
         *) echo "Invalid option: -${OPTARG}" >&2; display_help; exit 1;;
     esac
@@ -57,7 +57,7 @@ echo "Output directory: $output_dir"
 echo "Project: $proj"
 echo "t0: $t0"
 echo "Evaluation year: $eval_year"
-echo "Ex-ante evaluation: $verbose"
+echo "Ex-ante evaluation: $report"
 
 if [ $# -eq 0 ]; then
     display_help
@@ -195,7 +195,7 @@ echo "--Pairs matched.--"
 deactivate
 
 # Run ex-ante evaluation
-if [ "$verbose" == "true" ]; then
+if [ "$report" == "true" ]; then
 ea_output_file="${output_dir}/${proj}_ex_ante_evaluation.html"
 Rscript -e "rmarkdown::render(input='evaluations/ex_ante_evaluation_template.Rmd',output_file='${ea_output_file}',params=list(proj='${proj}',t0='${t0}',input_dir='${input_dir}',output_dir='${output_dir}',branch='${branch}'))"
 fi
\ No newline at end of file

From 8d391fa259774e46fa26f941cb4ddb7feb4fb761 Mon Sep 17 00:00:00 2001
From: Abby Williams <abbyevewilliams@gmail.com>
Date: Fri, 30 Aug 2024 14:57:11 +0000
Subject: [PATCH 19/19] Update ex_ante_evaluations_template.Rmd

---
 evaluations/ex_ante_evaluation_template.Rmd | 30 +++++++++++----------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/evaluations/ex_ante_evaluation_template.Rmd b/evaluations/ex_ante_evaluation_template.Rmd
index be95ce1..3a92a4c 100644
--- a/evaluations/ex_ante_evaluation_template.Rmd
+++ b/evaluations/ex_ante_evaluation_template.Rmd
@@ -733,7 +733,7 @@ Note that, in calculating carbon stock changes, we assume the following:
 
 - Belowground biomass (BGB) is 20% of AGB (based on Cairns et al. 1997)
 
-- Deadwod biomass is 11% of AGB (based on IPCC 2003)
+- Deadwood biomass is 11% of AGB (based on IPCC 2003)
 
 - Emitted carbon dioxide is 47% of total biomass (based on Martin and Thomas 2011)
 
@@ -741,8 +741,8 @@ Note that, in calculating carbon stock changes, we assume the following:
 \
 ```{r additionality_forecast}
 
-baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=8))
-colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total_c','area','total_byarea')
+baseline_stocks <- data.frame(matrix(nrow=2*nrow(carbon_density),ncol=9))
+colnames(baseline_stocks) <- c('time','luc','agb','bgb','dwb','total','total_c','area','total_byarea')
 luc_counter <- 1
 row_counter <- 1
 
@@ -761,7 +761,7 @@ for(i in carbon_density$land.use.class){
     bgb <- agb*0.2
     dw <- agb*0.11
     total <- agb + bgb + dw
-    #total_co2 <- total*0.47 # we're doing this step later
+    total_c <- total*0.47
 
     # get area of class i
 
@@ -772,7 +772,7 @@ for(i in carbon_density$land.use.class){
 
     # multiply total by area
     
-    total_byarea <- total*area_of_forest
+    total_byarea <- total_c*area_of_forest
 
     # adding to df
 
@@ -782,8 +782,9 @@ for(i in carbon_density$land.use.class){
     baseline_stocks[row_counter,4] <- bgb
     baseline_stocks[row_counter,5] <- dw
     baseline_stocks[row_counter,6] <- total
-    baseline_stocks[row_counter,7] <- area_of_forest
-    baseline_stocks[row_counter,8] <- total_byarea
+    baseline_stocks[row_counter,7] <- total_c
+    baseline_stocks[row_counter,8] <- area_of_forest
+    baseline_stocks[row_counter,9] <- total_byarea
 
     row_counter <- row_counter+1
 
@@ -803,11 +804,11 @@ baseline_stocks_format <- baseline_stocks_format[2:6]
 
 colnames(baseline_stocks_format) <- c(
                                  'Land use class',
-                                 'AGB density (t C / ha)',
-                                 'BGB density (t C / ha)',
-                                 'Deadwood biomass density (t C / ha)', 
-                                 'Total biomass density (t C / ha)', 
-                                 'Total biomass (t C)')
+                                 'AGB (t / ha)',
+                                 'BGB (t / ha)',
+                                 'Deadwood biomass (t / ha)', 
+                                 'Total biomass (t / ha)', 
+                                 'Total carbon (t / ha)')
 
 
 # renaming classes
@@ -846,7 +847,7 @@ First we present the carbon stock changes observed in the matched points during
 
 ```{r results}
 
-baseline_stock_changes <- baseline_stocks[c(1:2,7:8)]
+baseline_stock_changes <- baseline_stocks[c(1:2,8:9)]
 
 # reshape
 
@@ -882,7 +883,7 @@ baseline_stock_changes <- bind_rows(reshaped_data, total_row)
 # add in conversion to CO2
 
 baseline_stock_changes <- baseline_stock_changes  %>% 
-    mutate(co2_diff = 0.47*c_diff)
+    mutate(co2_diff = (44/12)*c_diff)
 
 # formatting bits
 
@@ -938,6 +939,7 @@ filtered_data %>%
 
 ```
 
+
 ```{r results_summary}
 
 # find the difference