From 671cc53c4c5c3b51daa7590fd2fafe258e65f510 Mon Sep 17 00:00:00 2001
From: Matthew Hasselfield <mhasself@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:03:57 +0000
Subject: [PATCH 1/6] hk: fix some whitespace

---
 python/hk/getdata.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/hk/getdata.py b/python/hk/getdata.py
index 58aec156..0ba5d508 100644
--- a/python/hk/getdata.py
+++ b/python/hk/getdata.py
@@ -775,13 +775,13 @@ def load_range(start, stop, fields=None, alias=None,
         dirs and pkl files in the pre_proc_dir. No chmod if None.
       folder_patterns:  List of patterns to search for in folders. If
         None, default pattern is ['{folder}', 'hk_{folder}_*']. If not
-        None, usage for .g3 folder: ['{folder}'], and example usage 
+        None, usage for .g3 folder: ['{folder}'], and example usage
         for HK books: ['hk_{folder}_lat'] where {folder} will be replaced
         with the first 5 digits of the unix timestamp when looping through
         files.
       strict: If False, log and skip missing fields rather than
         raising a KeyError.
-                
+
     Returns:
 
       Dictionary with structure::
@@ -791,7 +791,7 @@ def load_range(start, stop, fields=None, alias=None,
         }
 
       It will be masked to only have data between start and stop.
-        
+
     Notes:
 
       The "start" and "stop" argument accept a variety of formats,
@@ -858,10 +858,10 @@ def load_range(start, stop, fields=None, alias=None,
         bases = []
         for pattern in folder_patterns:
             extended_pattern = pattern.format(folder=folder)
-            
+
             base = glob.glob(os.path.join(data_dir, extended_pattern))
             bases.extend(base)
-        
+
         if len(bases) > 1:
             bases.sort
             base = bases[0]

From 9e9ff1776c94a62e9e49fecb692f0e0eac40f315 Mon Sep 17 00:00:00 2001
From: Matthew Hasselfield <mhasself@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:20:43 +0000
Subject: [PATCH 2/6] proj.wcs: Projectionist constructor include all
 attributes

And clean up interpol treatment a little bit
---
 python/proj/wcs.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/python/proj/wcs.py b/python/proj/wcs.py
index 587d1e80..49eac62f 100644
--- a/python/proj/wcs.py
+++ b/python/proj/wcs.py
@@ -33,6 +33,10 @@
 # called "q_native".  This is usually the thing to pass to C++ level.
 
 
+#: Valid settings for "interpol".  First entry is the default.
+INTERPOLS = ['nearest', 'bilinear']
+
+
 class Projectionist:
     """This class assists with analyzing WCS information to populate data
     structures needed for accelerated pointing routines.
@@ -152,6 +156,12 @@ def __init__(self):
         self._q_fp_to_native = None
         self._q_fp_to_celestial = None
         self.tile_shape = None
+        self.active_tiles = None
+        self.wcs = None
+        self.proj_name = None
+        self.q_celestial_to_native = None
+        self.interpol = INTERPOLS[0]
+
         self.naxis = np.array([0, 0])
         self.cdelt = np.array([0., 0.])
         self.crpix = np.array([0., 0.])
@@ -194,7 +204,8 @@ def for_geom(cls, shape, wcs, interpol=None):
         self.crpix = np.array(wcs.wcs.crpix)
 
         # Pixel interpolation mode
-        if interpol is None: interpol = "nearest"
+        if interpol is None:
+            interpol = INTERPOLS[0]
         self.interpol = interpol
 
         return self
@@ -213,7 +224,7 @@ def for_map(cls, emap, wcs=None, interpol=None):
         """
         if wcs is None:
             wcs = emap.wcs
-        return cls.for_geom(emap.shape, wcs)
+        return cls.for_geom(emap.shape, wcs, interpol=interpol)
 
     @classmethod
     def for_source_at(cls, alpha0, delta0, gamma0=0.,
@@ -286,11 +297,17 @@ def get_ProjEng(self, comps='TQU', proj_name=None, get=True,
         """
         if proj_name is None: proj_name = self.proj_name
         tile_suffix = 'Tiled' if self.tiling else 'NonTiled'
+
         # Interpolation mode
-        if interpol is None: interpol = self.interpol
-        if interpol in ["nn", "nearest"]: interpol_suffix = ""
-        elif interpol in ["lin", "bilinear"]: interpol_suffix = "_Bilinear"
-        else: raise ValueError("ProjEng interpol '%s' not recognized" % str(interpol))
+        if interpol is None:
+            interpol = self.interpol
+        if interpol in ["nn", "nearest"]:
+            interpol_suffix = ""
+        elif interpol in ["lin", "bilinear"]:
+            interpol_suffix = "_Bilinear"
+        else:
+            raise ValueError("ProjEng interpol '%s' not recognized" % str(interpol))
+
         projeng_name = f'ProjEng_{proj_name}_{comps}_{tile_suffix}{interpol_suffix}'
         if not get:
             return projeng_name

From 792e47b82dce96d9a85201b73ae2f9f23c680b9a Mon Sep 17 00:00:00 2001
From: Matthew Hasselfield <mhasself@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:27:56 +0000
Subject: [PATCH 3/6] array_ops.cxx: re-indent.  No change in substance.

---
 src/array_ops.cxx | 246 +++++++++++++++++++++++-----------------------
 1 file changed, 123 insertions(+), 123 deletions(-)

diff --git a/src/array_ops.cxx b/src/array_ops.cxx
index d924e13f..d353d2e3 100644
--- a/src/array_ops.cxx
+++ b/src/array_ops.cxx
@@ -6,11 +6,11 @@
 #include <vector>
 #include <string>
 extern "C" {
-  #include <cblas.h>
-  // Additional prototypes for Fortran LAPACK routines.
-  // sposv: solve Ax = b for A positive definite.
-  void sposv_(const char* uplo, int* n, int* nrhs, float* a, int* lda,
-              float* b, int* ldb, int* info );
+    #include <cblas.h>
+    // Additional prototypes for Fortran LAPACK routines.
+    // sposv: solve Ax = b for A positive definite.
+    void sposv_(const char* uplo, int* n, int* nrhs, float* a, int* lda,
+                float* b, int* ldb, int* info );
 }
 
 #include <boost/python.hpp>
@@ -403,97 +403,97 @@ void pcut_poly_translate_helper(const vector<RangesInt32> & iranges, const vecto
 // should be checked by get_gap_fill_poly.
 
 void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
-                             float *a, float *b,
-                             int buffer, int ncoeff,
-                             bool inplace, float *extract)
+                              float *a, float *b,
+                              int buffer, int ncoeff,
+                              bool inplace, float *extract)
 {
-        // Generate Ranges corresponding to samples near the edges of
-        // intervals we want to fill.
-        RangesInt32 rsegs = gaps.buffered((int32_t)buffer);
-        rsegs.intersect(gaps.complement());
-
-        // We are guaranteed that there is one or two rseg intervals
-        // between each of our gaps, and zero or one rseg intervals
-        // before the first gap and after the second gap.
-
-        // Index of the rseg we're working with.
-        int model_i = 0;
-
-        for (auto const &gap: gaps.segments) {
-                // std::cout << "GAP\n";
-                int contrib_samps = 0;
-                int contrib_segs = 0;
-                memset(a, 0, ncoeff*ncoeff*sizeof(*a));
-                memset(b, 0, ncoeff*sizeof(*b));
-                float x0 = gap.first;
-
-                for (; model_i < rsegs.segments.size(); model_i++) {
-                        // Advance until just before this gap.
-                        auto const &ival = rsegs.segments[model_i];
-                        if (ival.second + 1 < gap.first)
-                                continue;
-                        // Include this interval in the fit.
-                        for (int i=ival.first; i<ival.second; i++) {
-                                float xx = 1.;
-                                float yxx = data[i];
-                                float x = i - x0;
-                                for (int j=0; j<ncoeff; j++) {
-                                        b[j] += yxx;
-                                        yxx *= x;
-                                        a[j] += xx;
-                                        xx *= x;
-                                }
-                                // Now down...
-                                for (int k=2; k<ncoeff+1; k++) {
-                                        a[k*ncoeff - 1] += xx;
-                                        xx *= x;
-                                }
-                        }
-                        contrib_samps += ival.second - ival.first;
-                        contrib_segs += 1;
-                        // If this was the right side interval, bail
-                        // now (so we can possibly re-use this
-                        // interval for next gap).
-                        if (ival.first > gap.first)
-                                break;
+    // Generate Ranges corresponding to samples near the edges of
+    // intervals we want to fill.
+    RangesInt32 rsegs = gaps.buffered((int32_t)buffer);
+    rsegs.intersect(gaps.complement());
+
+    // We are guaranteed that there is one or two rseg intervals
+    // between each of our gaps, and zero or one rseg intervals
+    // before the first gap and after the second gap.
+
+    // Index of the rseg we're working with.
+    int model_i = 0;
+
+    for (auto const &gap: gaps.segments) {
+        // std::cout << "GAP\n";
+        int contrib_samps = 0;
+        int contrib_segs = 0;
+        memset(a, 0, ncoeff*ncoeff*sizeof(*a));
+        memset(b, 0, ncoeff*sizeof(*b));
+        float x0 = gap.first;
+
+        for (; model_i < rsegs.segments.size(); model_i++) {
+            // Advance until just before this gap.
+            auto const &ival = rsegs.segments[model_i];
+            if (ival.second + 1 < gap.first)
+                continue;
+            // Include this interval in the fit.
+            for (int i=ival.first; i<ival.second; i++) {
+                float xx = 1.;
+                float yxx = data[i];
+                float x = i - x0;
+                for (int j=0; j<ncoeff; j++) {
+                    b[j] += yxx;
+                    yxx *= x;
+                    a[j] += xx;
+                    xx *= x;
+                }
+                // Now down...
+                for (int k=2; k<ncoeff+1; k++) {
+                    a[k*ncoeff - 1] += xx;
+                    xx *= x;
                 }
+            }
+            contrib_samps += ival.second - ival.first;
+            contrib_segs += 1;
+            // If this was the right side interval, bail
+            // now (so we can possibly re-use this
+            // interval for next gap).
+            if (ival.first > gap.first)
+                break;
+        }
 
-                // Restrict order based on number of contributing samples.
-                int n_keep = std::min(contrib_samps / 10 + 1, ncoeff);
-                if (contrib_samps > 0) {
-                        // Fill in a's interior.
-                        for (int r=1; r<ncoeff; r++)
-                                for (int c=0; c<ncoeff-1; c++)
-                                        a[r*ncoeff + c] = a[r*ncoeff + c - ncoeff + 1];
-
-                        // Re-organize a if the order has changed...
-                        if (n_keep < ncoeff) {
-                                for (int r=1; r<n_keep; r++)
-                                        for (int c=0; c<n_keep; c++)
-                                                a[r*n_keep+c] = a[r*ncoeff+c];
-                        }
+        // Restrict order based on number of contributing samples.
+        int n_keep = std::min(contrib_samps / 10 + 1, ncoeff);
+        if (contrib_samps > 0) {
+            // Fill in a's interior.
+            for (int r=1; r<ncoeff; r++)
+                for (int c=0; c<ncoeff-1; c++)
+                    a[r*ncoeff + c] = a[r*ncoeff + c - ncoeff + 1];
+
+            // Re-organize a if the order has changed...
+            if (n_keep < ncoeff) {
+                for (int r=1; r<n_keep; r++)
+                    for (int c=0; c<n_keep; c++)
+                        a[r*n_keep+c] = a[r*ncoeff+c];
+            }
 
-                        // Solve the system...
-                        int one = 1;
-                        int err = 0;
-                        sposv_("Upper", &n_keep, &one, a, &n_keep, b, &n_keep, &err);
-                }
-                float *write_to = nullptr;
-                float *save_data = nullptr;
-                if (inplace) {
+            // Solve the system...
+            int one = 1;
+            int err = 0;
+            sposv_("Upper", &n_keep, &one, a, &n_keep, b, &n_keep, &err);
+        }
+        float *write_to = nullptr;
+        float *save_data = nullptr;
+        if (inplace) {
             // Copy original data to extract, write model to data.
             save_data = extract;
             write_to = data + gap.first;
-                } else {
+        } else {
             // Write results to extract, do not touch data.
             write_to = extract;
-                }
+        }
 
-                if (save_data != nullptr) {
+        if (save_data != nullptr) {
             for (int i=gap.first; i<gap.second; i++, save_data++)
                 *save_data = data[i];
-                }
-                if (write_to != nullptr) {
+        }
+        if (write_to != nullptr) {
             for (int i=gap.first; i<gap.second; i++, write_to++) {
                 float xx = 1.;
                 *write_to = 0.;
@@ -502,34 +502,34 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
                     xx *= (i - gap.first);
                 }
             }
-                }
-                if (extract != nullptr)
-            extract += (gap.second - gap.first);
         }
+        if (extract != nullptr)
+            extract += (gap.second - gap.first);
+    }
 }
 
 void get_gap_fill_poly(const bp::object ranges,
-               const bp::object tod,
-               int buffer,
-               int order,
-               bool inplace,
-               const bp::object ex)
+                       const bp::object tod,
+                       int buffer,
+                       int order,
+                       bool inplace,
+                       const bp::object ex)
 {
-        // As a test, copy data from rangemat into segment.
+    // As a test, copy data from rangemat into segment.
     auto rangemat = extract_ranges<int32_t>(ranges);
-        int ndet = rangemat.size();
+    int ndet = rangemat.size();
 
-        BufferWrapper<float> tod_buf  ("tod",  tod,  false, std::vector<int>{ndet,-1});
-        int nsamp = tod_buf->shape[1];
+    BufferWrapper<float> tod_buf  ("tod",  tod,  false, std::vector<int>{ndet,-1});
+    int nsamp = tod_buf->shape[1];
 
-        int ncoeff = order + 1; // Let us not speak of order again.
-        float *a = (float*)malloc(ncoeff*(ncoeff+1)*sizeof(*a));
-        float *b = a + ncoeff*ncoeff;
+    int ncoeff = order + 1; // Let us not speak of order again.
+    float *a = (float*)malloc(ncoeff*(ncoeff+1)*sizeof(*a));
+    float *b = a + ncoeff*ncoeff;
 
-        float *ex_data = nullptr;
-        std::vector<int> ex_offsets;
+    float *ex_data = nullptr;
+    std::vector<int> ex_offsets;
 
-        if (ex.ptr() != Py_None) {
+    if (ex.ptr() != Py_None) {
         // Compute offsets of each detector into ex.
         int n = 0;
         for (auto const &r: rangemat) {
@@ -539,18 +539,18 @@ void get_gap_fill_poly(const bp::object ranges,
         }
         BufferWrapper<float> ex_buf("ex", ex, false, std::vector<int>{n});
         ex_data = (float*)ex_buf->buf;
-        }
+    }
 
-        for (int di=0; di < rangemat.size(); di++) {
-                float* data = (float*)((char*)tod_buf->buf + di*tod_buf->strides[0]);
-                float* _ex = ex_data;
-                if (_ex != nullptr)
+    for (int di=0; di < rangemat.size(); di++) {
+        float* data = (float*)((char*)tod_buf->buf + di*tod_buf->strides[0]);
+        float* _ex = ex_data;
+        if (_ex != nullptr)
             _ex += ex_offsets[di];
-                get_gap_fill_poly_single(rangemat[di], data, a, b, buffer, ncoeff,
-                     inplace, _ex);
-        }
+        get_gap_fill_poly_single(rangemat[di], data, a, b, buffer, ncoeff,
+                                 inplace, _ex);
+    }
 
-        free(a);
+    free(a);
 }
 
 PYBINDINGS("so3g")
@@ -559,16 +559,16 @@ PYBINDINGS("so3g")
     bp::def("process_cuts",  process_cuts);
     bp::def("translate_cuts", translate_cuts);
     bp::def("get_gap_fill_poly",  get_gap_fill_poly,
-                "get_gap_fill_poly(ranges, signal, buffer, order, extract)\n"
-                "\n"
-                "Do polynomial gap-filling.\n"
-                "\n"
-                "Args:\n"
-                "  ranges: RangesMatrix with shape (ndet, nsamp)\n"
-                "  signal: data array with shape (ndet, nsamp)\n"
-                "  buffer: integer stating max number of samples to use on each end\n"
-                "  order: order of polynomial to use (1 means linear)\n"
-        "  inplace: whether to overwrite data array with the model\n"
-        "  extract: array to write the original data samples (inplace)\n"
-        "    or the model (!inplace) into.\n");
+            "get_gap_fill_poly(ranges, signal, buffer, order, extract)\n"
+            "\n"
+            "Do polynomial gap-filling.\n"
+            "\n"
+            "Args:\n"
+            "  ranges: RangesMatrix with shape (ndet, nsamp)\n"
+            "  signal: data array with shape (ndet, nsamp)\n"
+            "  buffer: integer stating max number of samples to use on each end\n"
+            "  order: order of polynomial to use (1 means linear)\n"
+            "  inplace: whether to overwrite data array with the model\n"
+            "  extract: array to write the original data samples (inplace)\n"
+            "    or the model (!inplace) into.\n");
 }

From f2cfbf79c379531a50804cc824610d82412e6fb6 Mon Sep 17 00:00:00 2001
From: Matthew Hasselfield <mhasself@users.noreply.github.com>
Date: Fri, 2 Feb 2024 17:08:09 +0000
Subject: [PATCH 4/6] get_gap_fill_poly64, for float64

---
 src/array_ops.cxx      | 70 +++++++++++++++++++++++++++++-------------
 test/test_array_ops.py | 48 +++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 22 deletions(-)
 create mode 100644 test/test_array_ops.py

diff --git a/src/array_ops.cxx b/src/array_ops.cxx
index d353d2e3..ada8afd8 100644
--- a/src/array_ops.cxx
+++ b/src/array_ops.cxx
@@ -11,6 +11,8 @@ extern "C" {
     // sposv: solve Ax = b for A positive definite.
     void sposv_(const char* uplo, int* n, int* nrhs, float* a, int* lda,
                 float* b, int* ldb, int* info );
+    void dposv_(const char* uplo, int* n, int* nrhs, double* a, int* lda,
+                double* b, int* ldb, int* info );
 }
 
 #include <boost/python.hpp>
@@ -402,10 +404,27 @@ void pcut_poly_translate_helper(const vector<RangesInt32> & iranges, const vecto
 // When extract is non-null, it needs to be the right size... this
 // should be checked by get_gap_fill_poly.
 
-void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
-                              float *a, float *b,
+template <typename T>
+inline void Xposv(const char* uplo, int* n, int* nrhs, T* a, int* lda,
+                  T* b, int* ldb, int* info );
+
+template <>
+void Xposv<float>(const char* uplo, int* n, int* nrhs, float* a, int* lda,
+                  float* b, int* ldb, int* info ) {
+    return sposv_(uplo, n, nrhs, a, lda, b, ldb, info);
+}
+
+template <>
+void Xposv<double>(const char* uplo, int* n, int* nrhs, double* a, int* lda,
+                     double* b, int* ldb, int* info ) {
+    return dposv_(uplo, n, nrhs, a, lda, b, ldb, info);
+}
+
+template <typename T>
+void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
+                              T *a, T *b,
                               int buffer, int ncoeff,
-                              bool inplace, float *extract)
+                              bool inplace, T *extract)
 {
     // Generate Ranges corresponding to samples near the edges of
     // intervals we want to fill.
@@ -425,7 +444,7 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
         int contrib_segs = 0;
         memset(a, 0, ncoeff*ncoeff*sizeof(*a));
         memset(b, 0, ncoeff*sizeof(*b));
-        float x0 = gap.first;
+        T x0 = gap.first;
 
         for (; model_i < rsegs.segments.size(); model_i++) {
             // Advance until just before this gap.
@@ -434,9 +453,9 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
                 continue;
             // Include this interval in the fit.
             for (int i=ival.first; i<ival.second; i++) {
-                float xx = 1.;
-                float yxx = data[i];
-                float x = i - x0;
+                T xx = 1.;
+                T yxx = data[i];
+                T x = i - x0;
                 for (int j=0; j<ncoeff; j++) {
                     b[j] += yxx;
                     yxx *= x;
@@ -476,10 +495,10 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
             // Solve the system...
             int one = 1;
             int err = 0;
-            sposv_("Upper", &n_keep, &one, a, &n_keep, b, &n_keep, &err);
+            Xposv("Upper", &n_keep, &one, a, &n_keep, b, &n_keep, &err);
         }
-        float *write_to = nullptr;
-        float *save_data = nullptr;
+        T *write_to = nullptr;
+        T *save_data = nullptr;
         if (inplace) {
             // Copy original data to extract, write model to data.
             save_data = extract;
@@ -495,7 +514,7 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
         }
         if (write_to != nullptr) {
             for (int i=gap.first; i<gap.second; i++, write_to++) {
-                float xx = 1.;
+                T xx = 1.;
                 *write_to = 0.;
                 for (int j=0; j<n_keep; j++) {
                     *write_to += xx * b[j];
@@ -508,6 +527,7 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, float *data,
     }
 }
 
+template <typename T>
 void get_gap_fill_poly(const bp::object ranges,
                        const bp::object tod,
                        int buffer,
@@ -519,14 +539,14 @@ void get_gap_fill_poly(const bp::object ranges,
     auto rangemat = extract_ranges<int32_t>(ranges);
     int ndet = rangemat.size();
 
-    BufferWrapper<float> tod_buf  ("tod",  tod,  false, std::vector<int>{ndet,-1});
+    BufferWrapper<T> tod_buf  ("tod",  tod,  false, std::vector<int>{ndet,-1});
     int nsamp = tod_buf->shape[1];
 
     int ncoeff = order + 1; // Let us not speak of order again.
-    float *a = (float*)malloc(ncoeff*(ncoeff+1)*sizeof(*a));
-    float *b = a + ncoeff*ncoeff;
+    T *a = (T*)malloc(ncoeff*(ncoeff+1)*sizeof(*a));
+    T *b = a + ncoeff*ncoeff;
 
-    float *ex_data = nullptr;
+    T *ex_data = nullptr;
     std::vector<int> ex_offsets;
 
     if (ex.ptr() != Py_None) {
@@ -537,13 +557,13 @@ void get_gap_fill_poly(const bp::object ranges,
             for (auto const &r: r.segments)
                 n += (r.second - r.first);
         }
-        BufferWrapper<float> ex_buf("ex", ex, false, std::vector<int>{n});
-        ex_data = (float*)ex_buf->buf;
+        BufferWrapper<T> ex_buf("ex", ex, false, std::vector<int>{n});
+        ex_data = (T*)ex_buf->buf;
     }
 
     for (int di=0; di < rangemat.size(); di++) {
-        float* data = (float*)((char*)tod_buf->buf + di*tod_buf->strides[0]);
-        float* _ex = ex_data;
+        T* data = (T*)((char*)tod_buf->buf + di*tod_buf->strides[0]);
+        T* _ex = ex_data;
         if (_ex != nullptr)
             _ex += ex_offsets[di];
         get_gap_fill_poly_single(rangemat[di], data, a, b, buffer, ncoeff,
@@ -558,17 +578,23 @@ PYBINDINGS("so3g")
     bp::def("nmat_detvecs_apply", nmat_detvecs_apply);
     bp::def("process_cuts",  process_cuts);
     bp::def("translate_cuts", translate_cuts);
-    bp::def("get_gap_fill_poly",  get_gap_fill_poly,
+    bp::def("get_gap_fill_poly",  get_gap_fill_poly<float>,
             "get_gap_fill_poly(ranges, signal, buffer, order, extract)\n"
             "\n"
-            "Do polynomial gap-filling.\n"
+            "Do polynomial gap-filling on a float32 array.\n"
             "\n"
             "Args:\n"
             "  ranges: RangesMatrix with shape (ndet, nsamp)\n"
-            "  signal: data array with shape (ndet, nsamp)\n"
+            "  signal: data array (float32) with shape (ndet, nsamp)\n"
             "  buffer: integer stating max number of samples to use on each end\n"
             "  order: order of polynomial to use (1 means linear)\n"
             "  inplace: whether to overwrite data array with the model\n"
             "  extract: array to write the original data samples (inplace)\n"
             "    or the model (!inplace) into.\n");
+    bp::def("get_gap_fill_poly64",  get_gap_fill_poly<double>,
+            "get_gap_fill_poly64(ranges, signal, buffer, order, extract)\n"
+            "\n"
+            "Do polynomial gap-filling for float64 data.\n"
+            "\n"
+            "See details in docstring for get_gap_fill_poly.\n");
 }
diff --git a/test/test_array_ops.py b/test/test_array_ops.py
new file mode 100644
index 00000000..170d0fd6
--- /dev/null
+++ b/test/test_array_ops.py
@@ -0,0 +1,48 @@
+import unittest
+
+import so3g
+import numpy as np
+
+
+class TestPolyFill(unittest.TestCase):
+    """Test the polynomial gap filling.
+
+    """
+    def test_00_basic(self):
+        n = 1000
+        p0 = [1., .2, -.14]
+        t = np.arange(1000) * .1
+        sig0 = np.polyval(p0, t)[None,:]
+
+        ix, nx = 200, 20
+        r = so3g.RangesInt32(n)
+        r.add_interval(ix, ix+nx)
+        s = r.mask()
+
+        BAD_DATA = -100
+
+        for dtype, fillfunc, tolerance in [
+                ('float32', so3g.get_gap_fill_poly, 1e-4),
+                ('float64', so3g.get_gap_fill_poly64, 1e-10)]:
+            sig1 = sig0.astype(dtype)
+
+            # Write the fill into ex array (inplace=False)
+            sig = sig1.copy()
+            sig[0, s] = BAD_DATA
+            ex = np.zeros((s.sum()), dtype=sig.dtype)
+            fillfunc([r], sig, 10, 2, False, ex)
+            np.testing.assert_allclose(ex, sig1[0, s], rtol=tolerance)
+            np.testing.assert_allclose(BAD_DATA, sig[0, s], rtol=tolerance)
+
+            # Write the fill and extract the filled (inplace=True)
+            sig = sig1.copy()
+            sig[0, s] = BAD_DATA
+            ex = np.zeros((s.sum()), dtype=sig.dtype)
+            fillfunc([r], sig, 10, 2, True, ex)
+            np.testing.assert_allclose(sig1[0, s], sig[0, s], rtol=tolerance)
+            np.testing.assert_allclose(BAD_DATA, ex, rtol=tolerance)
+
+
+
+if __name__ == '__main__':
+    unittest.main()

From 83a3d20567a7a171d48b7bfc061e3885be0642ce Mon Sep 17 00:00:00 2001
From: Matthew Hasselfield <mhasself@users.noreply.github.com>
Date: Fri, 2 Feb 2024 17:14:51 +0000
Subject: [PATCH 5/6] get_gap_fill_poly*: actually always do math in double

Even when back-filling into float32 signal arrays
---
 src/array_ops.cxx | 38 ++++++++++----------------------------
 1 file changed, 10 insertions(+), 28 deletions(-)

diff --git a/src/array_ops.cxx b/src/array_ops.cxx
index ada8afd8..58f4443d 100644
--- a/src/array_ops.cxx
+++ b/src/array_ops.cxx
@@ -8,9 +8,7 @@
 extern "C" {
     #include <cblas.h>
     // Additional prototypes for Fortran LAPACK routines.
-    // sposv: solve Ax = b for A positive definite.
-    void sposv_(const char* uplo, int* n, int* nrhs, float* a, int* lda,
-                float* b, int* ldb, int* info );
+    // dposv: solve Ax = b for A positive definite.
     void dposv_(const char* uplo, int* n, int* nrhs, double* a, int* lda,
                 double* b, int* ldb, int* info );
 }
@@ -404,25 +402,9 @@ void pcut_poly_translate_helper(const vector<RangesInt32> & iranges, const vecto
 // When extract is non-null, it needs to be the right size... this
 // should be checked by get_gap_fill_poly.
 
-template <typename T>
-inline void Xposv(const char* uplo, int* n, int* nrhs, T* a, int* lda,
-                  T* b, int* ldb, int* info );
-
-template <>
-void Xposv<float>(const char* uplo, int* n, int* nrhs, float* a, int* lda,
-                  float* b, int* ldb, int* info ) {
-    return sposv_(uplo, n, nrhs, a, lda, b, ldb, info);
-}
-
-template <>
-void Xposv<double>(const char* uplo, int* n, int* nrhs, double* a, int* lda,
-                     double* b, int* ldb, int* info ) {
-    return dposv_(uplo, n, nrhs, a, lda, b, ldb, info);
-}
-
 template <typename T>
 void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
-                              T *a, T *b,
+                              double *a, double *b,
                               int buffer, int ncoeff,
                               bool inplace, T *extract)
 {
@@ -444,7 +426,7 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
         int contrib_segs = 0;
         memset(a, 0, ncoeff*ncoeff*sizeof(*a));
         memset(b, 0, ncoeff*sizeof(*b));
-        T x0 = gap.first;
+        double x0 = gap.first;
 
         for (; model_i < rsegs.segments.size(); model_i++) {
             // Advance until just before this gap.
@@ -453,9 +435,9 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
                 continue;
             // Include this interval in the fit.
             for (int i=ival.first; i<ival.second; i++) {
-                T xx = 1.;
-                T yxx = data[i];
-                T x = i - x0;
+                double xx = 1.;
+                double yxx = data[i];
+                double x = i - x0;
                 for (int j=0; j<ncoeff; j++) {
                     b[j] += yxx;
                     yxx *= x;
@@ -495,7 +477,7 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
             // Solve the system...
             int one = 1;
             int err = 0;
-            Xposv("Upper", &n_keep, &one, a, &n_keep, b, &n_keep, &err);
+            dposv_("Upper", &n_keep, &one, a, &n_keep, b, &n_keep, &err);
         }
         T *write_to = nullptr;
         T *save_data = nullptr;
@@ -514,7 +496,7 @@ void get_gap_fill_poly_single(const RangesInt32 &gaps, T *data,
         }
         if (write_to != nullptr) {
             for (int i=gap.first; i<gap.second; i++, write_to++) {
-                T xx = 1.;
+                double xx = 1.;
                 *write_to = 0.;
                 for (int j=0; j<n_keep; j++) {
                     *write_to += xx * b[j];
@@ -543,8 +525,8 @@ void get_gap_fill_poly(const bp::object ranges,
     int nsamp = tod_buf->shape[1];
 
     int ncoeff = order + 1; // Let us not speak of order again.
-    T *a = (T*)malloc(ncoeff*(ncoeff+1)*sizeof(*a));
-    T *b = a + ncoeff*ncoeff;
+    double *a = (double*)malloc(ncoeff*(ncoeff+1)*sizeof(*a));
+    double *b = a + ncoeff*ncoeff;
 
     T *ex_data = nullptr;
     std::vector<int> ex_offsets;

From 50b184e53867f50d14fdede8b3779cd08e0c98d3 Mon Sep 17 00:00:00 2001
From: Matthew Hasselfield <mhasself@users.noreply.github.com>
Date: Fri, 2 Feb 2024 17:40:41 +0000
Subject: [PATCH 6/6] RangesMatrix: add close_gaps

---
 python/proj/ranges.py | 10 ++++++++++
 test/test_ranges.py   | 18 ++++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/python/proj/ranges.py b/python/proj/ranges.py
index f4f3e0bc..0b912638 100644
--- a/python/proj/ranges.py
+++ b/python/proj/ranges.py
@@ -190,6 +190,16 @@ def collect(items, join_depth):
             return RangesMatrix(ranges, child_shape=items[0].shape[1:])
         return collect(items, axis)
 
+    def close_gaps(self, gap_size=0):
+        """Call close_gaps(gap_size) on all children.  Any ranges that abutt
+        each other within the gap_size are merged into a single entry.
+        Usually a gap_size of 0 is not possible, but if a caller is
+        carefully using append_interval_no_check, then it can happen.
+
+        """
+        for r in self.ranges:
+            r.close_gaps(gap_size)
+
     def get_stats(self):
         samples, intervals = [], []
         for r in self.ranges:
diff --git a/test/test_ranges.py b/test/test_ranges.py
index 33ce382a..f3eca596 100644
--- a/test/test_ranges.py
+++ b/test/test_ranges.py
@@ -114,3 +114,21 @@ def test_int_args(self):
         with self.assertRaises(ValueError):
             r.add_interval(object(), object())
         self.assertEqual(len(r.ranges()), 2)
+
+    def test_close_gaps(self):
+        def _get_gapped(size=0):
+            r = Ranges(1000)
+            r.append_interval_no_check(10, 20)
+            r.append_interval_no_check(20+size, 30+size)
+            return r
+        r = _get_gapped()
+        assert(len(r.ranges()) == 2)
+        r.close_gaps(0)
+        assert(len(r.ranges()) == 1)
+
+        rr = RangesMatrix([_get_gapped(), _get_gapped(10)])
+        rr.close_gaps()
+        assert(len(rr[0].ranges()) == 1)
+        assert(len(rr[1].ranges()) == 2)
+        rr.close_gaps(10)
+        assert(len(rr[1].ranges()) == 1)