From fa1361d0d662bf1cbff15588cdd2ca9aa083de36 Mon Sep 17 00:00:00 2001
From: ypwang19 <yaping.wang@users.noreply.github.com>
Date: Fri, 17 May 2024 18:26:43 +0000
Subject: [PATCH 1/6] Add inverse distance weighted average to the superob
 function

---
 utils/obsproc/superob.h | 116 +++++++++++++++++++++++++++++++++-------
 1 file changed, 98 insertions(+), 18 deletions(-)
diff --git a/utils/obsproc/superob.h b/utils/obsproc/superob.h
index 413527ea2..76f73e1d3 100644
--- a/utils/obsproc/superob.h
+++ b/utils/obsproc/superob.h
@@ -8,11 +8,47 @@
 
 namespace gdasapp {
   namespace superobutils {
+    // Function to compute distance between two points
+    struct GeoPoint {
+        double latitude;
+        double longitude;
+    };
+    const double EARTH_RADIUS_KM = 6371.0;
+
+    // Function to convert degrees to radians
+    double toRadians(double degree) {
+        return degree * (M_PI / 180.0);
+    } 
+
+    // Function to calculate the Haversine distance between two points
+    double haversineDistance(const GeoPoint& point1, const GeoPoint& point2) {
+        double lat1 = toRadians(point1.latitude);
+        double lon1 = toRadians(point1.longitude);
+        double lat2 = toRadians(point2.latitude);
+        double lon2 = toRadians(point2.longitude);
+
+        // Haversine formula
+        double dlat = lat2 - lat1;
+        double dlon = lon2 - lon1;
+        double a = std::sin(dlat / 2.0) * std::sin(dlat / 2.0) +
+                   std::cos(lat1) * std::cos(lat2) *
+                   std::sin(dlon / 2.0) * std::sin(dlon / 2.0);
+        double c = 2.0 * std::atan2(std::sqrt(a), std::sqrt(1.0 - a));
+        double distance = EARTH_RADIUS_KM * c;
+        return distance;
+    }
+
     // Function to perform subsampling/binning of gridded data with a given stride
     template <typename T>
     std::vector<std::vector<T>> subsample2D(const std::vector<std::vector<T>>& inputArray,
                                             const std::vector<std::vector<int>>& mask,
-                                            const eckit::Configuration & fullConfig) {
+                                            const eckit::Configuration & fullConfig,
+                                            bool useCressman = false,
+                                            const std::vector<std::vector<T>>& inputlat={},
+                                            const std::vector<std::vector<T>>& inputlon={},
+                                            const std::vector<std::vector<T>>& targetlat={},
+                                            const std::vector<std::vector<T>>& targetlon={}
+) {
       // Get the binning configuration
       int stride;
       int minNumObs;
@@ -31,30 +67,74 @@ namespace gdasapp {
       // Perform subsampling
       T sum;
       int count;
-      for (int i = 0; i < subsampledRows; ++i) {
-        for (int j = 0; j < subsampledCols; ++j) {
-          count = 0;
-          sum = static_cast<T>(0);
-          // Compute the average within the stride
-          for (int si = 0; si < stride; ++si) {
-            for (int sj = 0; sj < stride; ++sj) {
-              int row = i * stride + si;
-              int col = j * stride + sj;
-              if (row < numRows && col < numCols && mask[row][col] == 1) {
-                sum += inputArray[row][col];
-                count++;
+      if (not useCressman) {
+        for (int i = 0; i < subsampledRows; ++i) {
+          for (int j = 0; j < subsampledCols; ++j) {
+            count = 0;
+            sum = static_cast<T>(0);
+            // Compute the average within the stride
+            for (int si = 0; si < stride; ++si) {
+              for (int sj = 0; sj < stride; ++sj) {
+                int row = i * stride + si;
+                int col = j * stride + sj;
+                if (row < numRows && col < numCols && mask[row][col] == 1) {
+                  sum += inputArray[row][col];
+                  count++;
+                }
               }
             }
+
+            // Calculate the average and store it in the subsampled array
+            if ( count < minNumObs ) {
+              subsampled[i][j] = static_cast<T>(-9999);
+            } else {
+              subsampled[i][j] = sum / static_cast<T>(count);
+            }
           }
+        }
+      } else {
+      // Apply Cressman interpolation if selected
+        // Perform Cressman interpolation
+        double cressmanRadius;
+        fullConfig.get("binning.cressman radius", cressmanRadius);
+        for (int i = 0; i < subsampledRows; ++i) {
+          for (int j = 0; j < subsampledCols; ++j) {
+            // Initialize sum and sumWeights for Cressman interpolation
+            count = 0;
+            sum = static_cast<T>(0);
+            T sumWeights = static_cast<T>(0);
+            // Loop through neighboring points for interpolation
+            for (int si = 0; si < stride; ++si) {
+              for (int sj = 0; sj < stride; ++sj) {
+                int row = i * stride + si;
+                int col = j * stride + sj;
+                if (row < numRows && col < numCols && mask[row][col] == 1) {
+                  GeoPoint point1 = {inputlat[row][col], inputlon[row][col]};  
+                  GeoPoint point2 = {targetlat[i][j], targetlon[i][j]};
+                  double distance = haversineDistance(point1, point2);
+                  double distance_sq = distance * distance;
+                  double cressmanRadius_sq = cressmanRadius * cressmanRadius;
+                  double weight = (distance <= cressmanRadius) ? (cressmanRadius_sq - distance_sq ) / 
+                                  (cressmanRadius_sq + distance_sq) : 0.0;
+                  //double weight = (distance <= cressmanRadius) ? 1.0 / (distance * distance) : 0.0;
+                  sum += inputArray[row][col] * weight;
+                  sumWeights += weight;
+                  count ++;
+                }
+              }
+            }
 
-          // Calculate the average and store it in the subsampled array
-          if ( count < minNumObs ) {
-            subsampled[i][j] = static_cast<T>(-9999);
-          } else {
-            subsampled[i][j] = sum / static_cast<T>(count);
+           // std::cout << " YPW" << sumWeights << count << std::endl;
+            // Update subsampled value with Cressman interpolation
+            if (count < minNumObs || sumWeights == 0.0) {
+              subsampled[i][j] = static_cast<T>(-9999);
+            } else {
+              subsampled[i][j] = sum / static_cast<T>(sumWeights);
+            }
           }
         }
       }
+
       std::cout << " done subsampling" << std::endl;
       return subsampled;
     }

From 873b91c5631280b53ae983b676856b40f64504d4 Mon Sep 17 00:00:00 2001
From: ypwang19 <yaping.wang@users.noreply.github.com>
Date: Fri, 17 May 2024 18:30:42 +0000
Subject: [PATCH 2/6] Enable viirs converter to use the new option in superob

---
 utils/obsproc/Viirsaod2Ioda.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/utils/obsproc/Viirsaod2Ioda.h b/utils/obsproc/Viirsaod2Ioda.h
index 3bcdb26b0..e2bd00840 100644
--- a/utils/obsproc/Viirsaod2Ioda.h
+++ b/utils/obsproc/Viirsaod2Ioda.h
@@ -133,7 +133,8 @@ namespace gdasapp {
       std::vector<std::vector<int>> mask_s;
 
       if ( fullConfig_.has("binning") ) {
-        // deal with longitude when points cross the international date line
+        // Do superobbing
+        // Deal with longitude when points cross the international date line
         float minLon = std::numeric_limits<float>::max();
         float maxLon = std::numeric_limits<float>::min();
 
@@ -158,19 +159,18 @@ namespace gdasapp {
             }
         }
 
-
+        
         lat2d_s = gdasapp::superobutils::subsample2D(lat, mask, fullConfig_);
         mask_s = gdasapp::superobutils::subsample2D(mask, mask, fullConfig_);
         if (fullConfig_.has("binning.cressman radius")) {
-        // Weighted-average (cressman) does not work yet. Need to develop subsample2D_cressman
-        // function to superob.h or incorporate weighted average to subsample2D function.
-        // obsvalue_s = gdasapp::superobutils::subsample2D_cressman(obsvalue, lat, lon,
-        //              lat2d_s, lon2d_s, mask, fullConfig_);
-        // obserror_s = gdasapp::superobutils::subsample2D_cressman(obserror, lat, lon,
-        // lat2d_s, lon2d_s, mask, fullConfig_);
-          obsvalue_s = gdasapp::superobutils::subsample2D(obsvalue, mask, fullConfig_);
-          obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_);
+        // Weighted-average (cressman) superob
+          bool useCressman = true;
+          obsvalue_s = gdasapp::superobutils::subsample2D(obsvalue, mask, fullConfig_, 
+                       useCressman, lat, lon, lat2d_s, lon2d_s);
+          obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_, 
+                       useCressman, lat, lon, lat2d_s, lon2d_s);
         } else {
+        // Simple-average superob
           obsvalue_s = gdasapp::superobutils::subsample2D(obsvalue, mask, fullConfig_);
           obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_);
         }

From 3db201b57fef62958bb39a23ae82ea7fe6db24b0 Mon Sep 17 00:00:00 2001
From: ypwang19 <yaping.wang@users.noreply.github.com>
Date: Fri, 17 May 2024 18:49:46 +0000
Subject: [PATCH 3/6] fix coding norms

---
 utils/obsproc/Viirsaod2Ioda.h |  5 ++---
 utils/obsproc/superob.h       | 16 +++++++---------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/utils/obsproc/Viirsaod2Ioda.h b/utils/obsproc/Viirsaod2Ioda.h
index e2bd00840..086caf185 100644
--- a/utils/obsproc/Viirsaod2Ioda.h
+++ b/utils/obsproc/Viirsaod2Ioda.h
@@ -159,15 +159,14 @@ namespace gdasapp {
             }
         }
 
-        
         lat2d_s = gdasapp::superobutils::subsample2D(lat, mask, fullConfig_);
         mask_s = gdasapp::superobutils::subsample2D(mask, mask, fullConfig_);
         if (fullConfig_.has("binning.cressman radius")) {
         // Weighted-average (cressman) superob
           bool useCressman = true;
-          obsvalue_s = gdasapp::superobutils::subsample2D(obsvalue, mask, fullConfig_, 
+          obsvalue_s = gdasapp::superobutils::subsample2D(obsvalue, mask, fullConfig_,
                        useCressman, lat, lon, lat2d_s, lon2d_s);
-          obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_, 
+          obserror_s = gdasapp::superobutils::subsample2D(obserror, mask, fullConfig_,
                        useCressman, lat, lon, lat2d_s, lon2d_s);
         } else {
         // Simple-average superob
diff --git a/utils/obsproc/superob.h b/utils/obsproc/superob.h
index 76f73e1d3..f7a5df33c 100644
--- a/utils/obsproc/superob.h
+++ b/utils/obsproc/superob.h
@@ -44,10 +44,10 @@ namespace gdasapp {
                                             const std::vector<std::vector<int>>& mask,
                                             const eckit::Configuration & fullConfig,
                                             bool useCressman = false,
-                                            const std::vector<std::vector<T>>& inputlat={},
-                                            const std::vector<std::vector<T>>& inputlon={},
-                                            const std::vector<std::vector<T>>& targetlat={},
-                                            const std::vector<std::vector<T>>& targetlon={}
+                                            const std::vector<std::vector<T>>& inputlat = {},
+                                            const std::vector<std::vector<T>>& inputlon = {},
+                                            const std::vector<std::vector<T>>& targetlat = {},
+                                            const std::vector<std::vector<T>>& targetlon = {}
 ) {
       // Get the binning configuration
       int stride;
@@ -67,7 +67,7 @@ namespace gdasapp {
       // Perform subsampling
       T sum;
       int count;
-      if (not useCressman) {
+      if (! useCressman) {
         for (int i = 0; i < subsampledRows; ++i) {
           for (int j = 0; j < subsampledCols; ++j) {
             count = 0;
@@ -109,14 +109,13 @@ namespace gdasapp {
                 int row = i * stride + si;
                 int col = j * stride + sj;
                 if (row < numRows && col < numCols && mask[row][col] == 1) {
-                  GeoPoint point1 = {inputlat[row][col], inputlon[row][col]};  
+                  GeoPoint point1 = {inputlat[row][col], inputlon[row][col]};
                   GeoPoint point2 = {targetlat[i][j], targetlon[i][j]};
                   double distance = haversineDistance(point1, point2);
                   double distance_sq = distance * distance;
                   double cressmanRadius_sq = cressmanRadius * cressmanRadius;
-                  double weight = (distance <= cressmanRadius) ? (cressmanRadius_sq - distance_sq ) / 
+                  double weight = (distance <= cressmanRadius) ? (cressmanRadius_sq - distance_sq ) /
                                   (cressmanRadius_sq + distance_sq) : 0.0;
-                  //double weight = (distance <= cressmanRadius) ? 1.0 / (distance * distance) : 0.0;
                   sum += inputArray[row][col] * weight;
                   sumWeights += weight;
                   count ++;
@@ -124,7 +123,6 @@ namespace gdasapp {
               }
             }
 
-           // std::cout << " YPW" << sumWeights << count << std::endl;
             // Update subsampled value with Cressman interpolation
             if (count < minNumObs || sumWeights == 0.0) {
               subsampled[i][j] = static_cast<T>(-9999);

From 17bbf7859bc3b34a3c7469e5c3094ed68c5e4696 Mon Sep 17 00:00:00 2001
From: ypwang19 <yaping.wang@users.noreply.github.com>
Date: Fri, 17 May 2024 21:32:58 +0000
Subject: [PATCH 4/6] fix coding norms

---
 utils/obsproc/superob.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/utils/obsproc/superob.h b/utils/obsproc/superob.h
index f7a5df33c..2c5a6c9dc 100644
--- a/utils/obsproc/superob.h
+++ b/utils/obsproc/superob.h
@@ -18,7 +18,7 @@ namespace gdasapp {
     // Function to convert degrees to radians
     double toRadians(double degree) {
         return degree * (M_PI / 180.0);
-    } 
+    }
 
     // Function to calculate the Haversine distance between two points
     double haversineDistance(const GeoPoint& point1, const GeoPoint& point2) {
@@ -67,7 +67,7 @@ namespace gdasapp {
       // Perform subsampling
       T sum;
       int count;
-      if (! useCressman) {
+      if (!useCressman) {
         for (int i = 0; i < subsampledRows; ++i) {
           for (int j = 0; j < subsampledCols; ++j) {
             count = 0;
@@ -114,11 +114,11 @@ namespace gdasapp {
                   double distance = haversineDistance(point1, point2);
                   double distance_sq = distance * distance;
                   double cressmanRadius_sq = cressmanRadius * cressmanRadius;
-                  double weight = (distance <= cressmanRadius) ? (cressmanRadius_sq - distance_sq ) /
-                                  (cressmanRadius_sq + distance_sq) : 0.0;
+                  double weight = (distance <= cressmanRadius) ? (cressmanRadius_sq - distance_sq )
+                                  / (cressmanRadius_sq + distance_sq) : 0.0;
                   sum += inputArray[row][col] * weight;
                   sumWeights += weight;
-                  count ++;
+                  count++;
                 }
               }
             }

From d39de445499fbba7176571ae9d6eeefbc8deab13 Mon Sep 17 00:00:00 2001
From: ypwang19 <yaping.wang@users.noreply.github.com>
Date: Tue, 21 May 2024 14:40:24 +0000
Subject: [PATCH 5/6] simplify calculation of distance using available
 functions

---
 utils/obsproc/superob.h | 43 ++++++++++-------------------------------
 1 file changed, 10 insertions(+), 33 deletions(-)

diff --git a/utils/obsproc/superob.h b/utils/obsproc/superob.h
index 2c5a6c9dc..70a780c4e 100644
--- a/utils/obsproc/superob.h
+++ b/utils/obsproc/superob.h
@@ -5,39 +5,14 @@
 #include <vector>
 
 #include "eckit/config/LocalConfiguration.h"
+#include "atlas/util/Earth.h"
+#include "atlas/util/Geometry.h"
+#include "atlas/util/Point.h"
 
-namespace gdasapp {
-  namespace superobutils {
-    // Function to compute distance between two points
-    struct GeoPoint {
-        double latitude;
-        double longitude;
-    };
-    const double EARTH_RADIUS_KM = 6371.0;
-
-    // Function to convert degrees to radians
-    double toRadians(double degree) {
-        return degree * (M_PI / 180.0);
-    }
-
-    // Function to calculate the Haversine distance between two points
-    double haversineDistance(const GeoPoint& point1, const GeoPoint& point2) {
-        double lat1 = toRadians(point1.latitude);
-        double lon1 = toRadians(point1.longitude);
-        double lat2 = toRadians(point2.latitude);
-        double lon2 = toRadians(point2.longitude);
 
-        // Haversine formula
-        double dlat = lat2 - lat1;
-        double dlon = lon2 - lon1;
-        double a = std::sin(dlat / 2.0) * std::sin(dlat / 2.0) +
-                   std::cos(lat1) * std::cos(lat2) *
-                   std::sin(dlon / 2.0) * std::sin(dlon / 2.0);
-        double c = 2.0 * std::atan2(std::sqrt(a), std::sqrt(1.0 - a));
-        double distance = EARTH_RADIUS_KM * c;
-        return distance;
-    }
 
+namespace gdasapp {
+  namespace superobutils {
     // Function to perform subsampling/binning of gridded data with a given stride
     template <typename T>
     std::vector<std::vector<T>> subsample2D(const std::vector<std::vector<T>>& inputArray,
@@ -109,9 +84,11 @@ namespace gdasapp {
                 int row = i * stride + si;
                 int col = j * stride + sj;
                 if (row < numRows && col < numCols && mask[row][col] == 1) {
-                  GeoPoint point1 = {inputlat[row][col], inputlon[row][col]};
-                  GeoPoint point2 = {targetlat[i][j], targetlon[i][j]};
-                  double distance = haversineDistance(point1, point2);
+
+                  atlas::PointLonLat p0(inputlon[row][col],inputlat[row][col]);
+                  atlas::PointLonLat p1(targetlon[i][j],targetlat[i][j]);
+                  double distance = atlas::util::Earth::distance(p0, p1)/1000.0;
+
                   double distance_sq = distance * distance;
                   double cressmanRadius_sq = cressmanRadius * cressmanRadius;
                   double weight = (distance <= cressmanRadius) ? (cressmanRadius_sq - distance_sq )

From d2aed7a266f80bce8906e827ef4cae45997efec2 Mon Sep 17 00:00:00 2001
From: ypwang19 <yaping.wang@users.noreply.github.com>
Date: Tue, 21 May 2024 15:05:21 +0000
Subject: [PATCH 6/6] fix coding norm error

---
 utils/obsproc/superob.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/utils/obsproc/superob.h b/utils/obsproc/superob.h
index 70a780c4e..759014f1a 100644
--- a/utils/obsproc/superob.h
+++ b/utils/obsproc/superob.h
@@ -4,10 +4,10 @@
 #include <string>
 #include <vector>
 
-#include "eckit/config/LocalConfiguration.h"
 #include "atlas/util/Earth.h"
 #include "atlas/util/Geometry.h"
 #include "atlas/util/Point.h"
+#include "eckit/config/LocalConfiguration.h"
 
 
 
@@ -84,9 +84,8 @@ namespace gdasapp {
                 int row = i * stride + si;
                 int col = j * stride + sj;
                 if (row < numRows && col < numCols && mask[row][col] == 1) {
-
-                  atlas::PointLonLat p0(inputlon[row][col],inputlat[row][col]);
-                  atlas::PointLonLat p1(targetlon[i][j],targetlat[i][j]);
+                  atlas::PointLonLat p0(inputlon[row][col], inputlat[row][col]);
+                  atlas::PointLonLat p1(targetlon[i][j], targetlat[i][j]);
                   double distance = atlas::util::Earth::distance(p0, p1)/1000.0;
 
                   double distance_sq = distance * distance;