Merge branch 'main' into NPI-3290-clearer-error-on-empty-sinex-header…

…-extras
GeoscienceAustralia · Jun 3, 2024 · 6c5b9b3 · 6c5b9b3
2 parents 1f3b5f3 + 12c5a9d
commit 6c5b9b3
Show file tree

Hide file tree

Showing 5 changed files with 72 additions and 70 deletions.
diff --git a/gnssanalysis/gn_datetime.py b/gnssanalysis/gn_datetime.py
@@ -161,8 +161,8 @@ def datetime2yydoysec(datetime: _Union[_np.ndarray, _pd.Series]) -> _np.ndarray:
         )  # .astype("datetime64[Y]") called on ndarray will return 4-digit year, not YYYY-MM-DD as in case of Series
     datetime_Y = datetime.astype("datetime64[Y]")
     datetime_D = datetime.astype("datetime64[D]")
-    doy = _pd.Series((datetime_D - datetime_Y).astype(int).astype(str))
-    seconds = _pd.Series((datetime - datetime_D).astype("timedelta64[s]").astype(int).astype(str))
+    doy = _pd.Series((datetime_D - datetime_Y).astype("int64").astype(str))
+    seconds = _pd.Series((datetime - datetime_D).astype("timedelta64[s]").astype("int64").astype(str))
     yydoysec = (
         _pd.Series(datetime_Y.astype(str)).str.slice(2).values
         + ":"
@@ -175,32 +175,32 @@ def datetime2yydoysec(datetime: _Union[_np.ndarray, _pd.Series]) -> _np.ndarray:
 
 def gpsweeksec2datetime(gps_week: _np.ndarray, tow: _np.ndarray, as_j2000: bool = True) -> _np.ndarray:
     """trace file date (gps week, time_of_week) to datetime64 conversion"""
-    ORIGIN = (_gn_const.GPS_ORIGIN - _gn_const.J2000_ORIGIN).astype(int) if as_j2000 else _gn_const.GPS_ORIGIN
+    ORIGIN = (_gn_const.GPS_ORIGIN - _gn_const.J2000_ORIGIN).astype("int64") if as_j2000 else _gn_const.GPS_ORIGIN
     datetime = ORIGIN + (gps_week * _gn_const.SEC_IN_WEEK + tow)
     return datetime
 
 
 def datetime2gpsweeksec(array: _np.ndarray, as_decimal=False) -> _Union[tuple, _np.ndarray]:
     if array.dtype == int:
-        ORIGIN = _gn_const.J2000_ORIGIN.astype(int) - _gn_const.GPS_ORIGIN.astype(int)
+        ORIGIN = _gn_const.J2000_ORIGIN.astype("int64") - _gn_const.GPS_ORIGIN.astype("int64")
         gps_time = array + ORIGIN  # need int conversion for the case of datetime64
     else:
-        ORIGIN = _gn_const.GPS_ORIGIN.astype(int)
-        gps_time = array.astype("datetime64[s]").astype(int) - ORIGIN  # datetime64 converted to int seconds
+        ORIGIN = _gn_const.GPS_ORIGIN.astype("int64")
+        gps_time = array.astype("datetime64[s]").astype("int64") - ORIGIN  # datetime64 converted to int seconds
 
-    weeks_int = (gps_time / _gn_const.SEC_IN_WEEK).astype(int)
+    weeks_int = (gps_time / _gn_const.SEC_IN_WEEK).astype("int64")
     tow = gps_time - weeks_int * _gn_const.SEC_IN_WEEK  # this eliminates rounding error problem
     return weeks_int + (tow / 1000000) if as_decimal else (weeks_int, tow)
 
 
 def datetime2j2000(datetime: _np.ndarray) -> _np.ndarray:
     """datetime64 conversion to int seconds after J2000 (2000-01-01 12:00:00)"""
-    return (datetime.astype("datetime64[s]") - _gn_const.J2000_ORIGIN).astype(int)
+    return (datetime.astype("datetime64[s]") - _gn_const.J2000_ORIGIN).astype("int64")
 
 
 def j20002datetime(j2000secs: _np.ndarray, as_datetime: bool = False) -> _np.ndarray:
     """int64 seconds after J2000 (2000-01-01 12:00:00) conversion to datetime64, if as_datetime selected - will additionally convert to datetime.datetime"""
-    j2000secs = j2000secs if isinstance(j2000secs.dtype, int) else j2000secs.astype(int)
+    j2000secs = j2000secs if isinstance(j2000secs.dtype, int) else j2000secs.astype("int64")
     datetime64 = _gn_const.J2000_ORIGIN + j2000secs
     if as_datetime:
         return datetime64.astype(_datetime)
@@ -223,7 +223,7 @@ def j20002yydoysec(j2000secs: _np.ndarray) -> _np.ndarray:
 
 
 def datetime2mjd(array: _np.ndarray) -> tuple:
-    mjd_seconds = (array - _gn_const.MJD_ORIGIN).astype(int)  # seconds
+    mjd_seconds = (array - _gn_const.MJD_ORIGIN).astype("int64")  # seconds
     return mjd_seconds // _gn_const.SEC_IN_DAY, (mjd_seconds % _gn_const.SEC_IN_DAY) / _gn_const.SEC_IN_DAY
 
 
@@ -238,7 +238,7 @@ def pydatetime_to_mjd(dt: _datetime) -> float:
 
 
 def j20002mjd(array: _np.ndarray) -> tuple:
-    j2000_mjd_bias = (_gn_const.J2000_ORIGIN - _gn_const.MJD_ORIGIN).astype(int)  # in seconds
+    j2000_mjd_bias = (_gn_const.J2000_ORIGIN - _gn_const.MJD_ORIGIN).astype("int64")  # in seconds
     mjd_seconds = j2000_mjd_bias + array
     return mjd_seconds // _gn_const.SEC_IN_DAY, (mjd_seconds % _gn_const.SEC_IN_DAY) / _gn_const.SEC_IN_DAY
 
@@ -250,7 +250,7 @@ def j20002j2000days(array: _np.ndarray) -> _np.ndarray:
 
 def mjd2datetime(mjd: _np.ndarray, seconds_frac: _np.ndarray, pea_partials=False) -> _np.ndarray:
     seconds = (
-        (86400 * seconds_frac).astype(int) if not pea_partials else seconds_frac.astype(int)
+        (86400 * seconds_frac).astype("int64") if not pea_partials else seconds_frac.astype("int64")
     )  # pod orb_partials file has a custom mjd date format with frac being seconds
     dt = _gn_const.MJD_ORIGIN + mjd.astype("timedelta64[D]") + seconds
     return dt
@@ -284,12 +284,12 @@ def j20002rnxdt(j2000secs: _np.ndarray) -> _np.ndarray:
     minute = datetime.astype("datetime64[m]")
 
     date_y = "*" + _pd.Series(year.astype(str)).str.rjust(6).values
-    date_m = _pd.Series(((month - year).astype(int) + 1).astype(str)).str.rjust(3).values
-    date_d = _pd.Series(((day - month).astype(int) + 1).astype(str)).str.rjust(3).values
+    date_m = _pd.Series(((month - year).astype("int64") + 1).astype(str)).str.rjust(3).values
+    date_d = _pd.Series(((day - month).astype("int64") + 1).astype(str)).str.rjust(3).values
 
-    time_h = _pd.Series((hour - day).astype(int).astype(str)).str.rjust(3).values
-    time_m = _pd.Series((minute - hour).astype(int).astype(str)).str.rjust(3).values
-    time_s = (_pd.Series((datetime - minute)).view(int) / 1e9).apply("{:.8f}\n".format).str.rjust(13).values
+    time_h = _pd.Series((hour - day).astype("int64").astype(str)).str.rjust(3).values
+    time_m = _pd.Series((minute - hour).astype("int64").astype(str)).str.rjust(3).values
+    time_s = (_pd.Series((datetime - minute)).view("int64") / 1e9).apply("{:.8f}\n".format).str.rjust(13).values
     return date_y + date_m + date_d + time_h + time_m + time_s
 
 

diff --git a/gnssanalysis/gn_io/igslog.py b/gnssanalysis/gn_io/igslog.py
@@ -1,4 +1,5 @@
 """IGS log files parser"""
+
 import glob as _glob
 import re as _re
 from multiprocessing import Pool as _Pool
@@ -297,7 +298,9 @@ def gather_metadata(logs_glob_path="/data/station_logs/station_logs_IGS/*/*.log"
     xyz_norm = (xyz_array[valid_mask] ** 2).sum(axis=1) ** 0.5
     valid_mask[valid_mask] = (xyz_norm > 6000000) & (xyz_norm < 6500000)
 
-    llh = _gn_transform.xyz2llh(xyz_array[valid_mask], method="heikkinen", ellipsoid=_gn_const.WGS84, latlon_as_deg=True)
+    llh = _gn_transform.xyz2llh(
+        xyz_array[valid_mask], method="heikkinen", ellipsoid=_gn_const.WGS84, latlon_as_deg=True
+    )
     llh_snx = _gn_io.sinex.llh2snxdms(llh)
 
     llh2 = id_loc_df[~valid_mask][["LAT", "LON", "HEI"]]
@@ -435,7 +438,7 @@ def meta2sting(id_loc_df, rec_df, ant_df):
         + " "
         + rec_df.END_SNX
         + " "
-        + +rec_df.RECEIVER.str.ljust(20, " ")
+        + rec_df.RECEIVER.str.ljust(20, " ")
         + " "
         + rec_df["S/N"].str.ljust(5, " ")
         + " "

diff --git a/gnssanalysis/gn_io/nanu.py b/gnssanalysis/gn_io/nanu.py
@@ -99,7 +99,7 @@ def get_bad_sv_from_nanu_df(nanu_df: _pd.DataFrame, datetime: _Union[_np.datetim
         "UNUSABLE START TIME ZULU",
         "UNUSABLE START CALENDAR DATE",
         "LAUNCH JDAY",
-        "LAUNCH TIME ZULU"
+        "LAUNCH TIME ZULU",
         # 'DECOMMISSIONING TIME ZULU', 'DECOMMISSIONING CALENDAR DATE',
         # 'DECOMMISSIONING START TIME ZULU',
         # 'DECOMMISSIONING START CALENDAR DATE'
@@ -123,7 +123,7 @@ def get_bad_sv_from_nanu_df(nanu_df: _pd.DataFrame, datetime: _Union[_np.datetim
     hhmm = np_time[na_time_mask].astype("U4").view("<U2").reshape(-1, 2)
 
     nd = _np.ndarray(shape=np_time.shape, dtype="timedelta64[s]")
-    nd.fill(_np.nan)
+    nd.fill(_np.timedelta64("nat"))
     nd[na_time_mask] = hhmm[:, 0].astype("timedelta64[h]") + hhmm[:, 1].astype("timedelta64[m]")
 
     dt_df = _pd.concat([df.drop(labels=columns_date + columns_time, axis=1), dates + nd], axis=1)

diff --git a/gnssanalysis/gn_io/pod.py b/gnssanalysis/gn_io/pod.py
@@ -14,7 +14,7 @@ def pod_get_IC_dt(pod_out: bytes) -> int:
     end = pod_out.find(b"\n", begin)
     date = _pd.Series(pod_out[begin:end].strip().decode()).str.split(pat=r"\s+")
     year, month, day, frac = date.tolist()[0]
-    dt_value = (_np.datetime64("-".join([year, month.zfill(2), day])) - _gn_const.J2000_ORIGIN).astype(int)
+    dt_value = (_np.datetime64("-".join([year, month.zfill(2), day])) - _gn_const.J2000_ORIGIN).astype("int64")
     return dt_value + int(86400 * float(frac))
 
 

diff --git a/gnssanalysis/gn_io/sp3.py b/gnssanalysis/gn_io/sp3.py
@@ -36,11 +36,11 @@
 _RE_SP3_HEAD_FDESCR = _re.compile(rb"\%c[ ]+(\w{1})[ ]+cc[ ](\w{3})")
 
 # Nodata ie NaN constants for SP3 format
-SP3_CLOCK_NODATA_STRING  = " 999999.999999" # Not used for reading, as fractional components are optional
+SP3_CLOCK_NODATA_STRING = " 999999.999999"  # Not used for reading, as fractional components are optional
 SP3_CLOCK_NODATA_NUMERIC = 999999
-SP3_POS_NODATA_STRING    = "      0.000000"
-SP3_CLOCK_STD_NODATA     = -1000
-SP3_POS_STD_NODATA       = -100
+SP3_POS_NODATA_STRING = "      0.000000"
+SP3_CLOCK_STD_NODATA = -1000
+SP3_POS_STD_NODATA = -100
 
 
 def sp3_clock_nodata_to_nan(sp3_df):
@@ -159,15 +159,15 @@ def read_sp3(sp3_path, pOnly=True):
     sp3_df.attrs["path"] = sp3_path
 
     # Check for duplicate epochs, dedupe and log warning
-    if sp3_df.index.has_duplicates: # a literaly free check
-        duplicated_indexes = sp3_df.index.duplicated() # Typically sub ms time. Marks all but first instance as duped.
+    if sp3_df.index.has_duplicates:  # a literaly free check
+        duplicated_indexes = sp3_df.index.duplicated()  # Typically sub ms time. Marks all but first instance as duped.
         first_dupe = sp3_df.index.get_level_values(0)[duplicated_indexes][0]
         logging.warning(
             f"Duplicate epoch(s) found in SP3 ({duplicated_indexes.sum()} additional entries, potentially non-unique). "
             f"First duplicate (as J2000): {first_dupe} (as date): {first_dupe + gn_const.J2000_ORIGIN} "
             f"SP3 path is: '{str(sp3_path)}'. Duplicates will be removed, keeping first."
         )
-        sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")] # Now dedupe them, keeping the first of any clashes
+        sp3_df = sp3_df[~sp3_df.index.duplicated(keep="first")]  # Now dedupe them, keeping the first of any clashes
 
     return sp3_df
 
@@ -295,9 +295,7 @@ def gen_sp3_header(sp3_df):
     return "".join(line1 + line2 + sats_header.tolist() + sv_orb_head.tolist() + head_c + head_fi + comment)
 
 
-def gen_sp3_content(
-    sp3_df: _pd.DataFrame, sort_outputs: bool = False, buf: Union[None, _io.TextIOBase] = None
-):
+def gen_sp3_content(sp3_df: _pd.DataFrame, sort_outputs: bool = False, buf: Union[None, _io.TextIOBase] = None):
     """
     Organises, formats (including nodata values), then writes out SP3 content to a buffer if provided, or returns
      it otherwise.
@@ -316,25 +314,27 @@ def gen_sp3_content(
         clk_base = 1.025
 
         def pos_log(x):
-            return _np.minimum( # Cap value at 99
-                        _np.nan_to_num( # If there is data, use the following formula. Else return NODATA value.
-                            _np.rint(_np.log(x) / _np.log(pos_base)), # Rounded to nearest int
-                            nan=SP3_POS_STD_NODATA
-                        ),
-                       99
-                   ).astype(int)
+            return _np.minimum(  # Cap value at 99
+                _np.nan_to_num(  # If there is data, use the following formula. Else return NODATA value.
+                    _np.rint(_np.log(x) / _np.log(pos_base)), nan=SP3_POS_STD_NODATA  # Rounded to nearest int
+                ),
+                99,
+            ).astype(int)
 
         def clk_log(x):
             return _np.minimum(
-                       _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA),
-                       999 # Cap at 999
-                  ).astype(int)
-
-        std_df = (
-            sp3_df["STD"]
-            .transform({"X": pos_log, "Y": pos_log, "Z": pos_log, "CLK": clk_log})
-            .rename(columns=lambda x: "STD" + x)
-        )
+                _np.nan_to_num(_np.rint(_np.log(x) / _np.log(clk_base)), nan=SP3_CLOCK_STD_NODATA), 999  # Cap at 999
+            ).astype(int)
+
+        std_df = sp3_df["STD"]
+        #  Remove attribute data from this shallow copy of the Dataframe.
+        #  This works around an apparent bug in Pandas, due to the fact calling == on two Series produces a list
+        #  of element-wise comparisons, rather than a single boolean value. This list seems to break Pandas
+        #  concat() when it is invoked within transform() and tries to check if attributes match between columns
+        #  being concatenated.
+        std_df.attrs = {}
+        std_df = std_df.transform({"X": pos_log, "Y": pos_log, "Z": pos_log, "CLK": clk_log})
+        std_df = std_df.rename(columns=lambda x: "STD_" + x)
         out_df = _pd.concat([out_df, std_df], axis="columns")
 
     def prn_formatter(x):
@@ -349,16 +349,16 @@ def prn_formatter(x):
     # Longer term we should maybe reimplement this again, maybe just processing groups line by line to format them?
 
     def pos_formatter(x):
-        if isinstance(x, str): # Presume an inf/NaN value, already formatted as nodata string. Pass through.
-            return x # Expected value "      0.000000"
-        return format(x, "13.6f") # Numeric value, format as usual
+        if isinstance(x, str):  # Presume an inf/NaN value, already formatted as nodata string. Pass through.
+            return x  # Expected value "      0.000000"
+        return format(x, "13.6f")  # Numeric value, format as usual
 
     def clk_formatter(x):
         # If this value (nominally a numpy float64) is actually a string, moreover containing the mandated part of the
         # clock nodata value (per the SP3 spec), we deduce nodata formatting has already been done, and return as is.
-        if isinstance(x, str) and x.strip(' ').startswith("999999."): # TODO performance: could do just type check
+        if isinstance(x, str) and x.strip(" ").startswith("999999."):  # TODO performance: could do just type check
             return x
-        return format(x, "13.6f") # Not infinite or NaN: proceed with normal formatting
+        return format(x, "13.6f")  # Not infinite or NaN: proceed with normal formatting
 
     # NOTE: the following formatters are fine, as the nodata value is actually a *numeric value*,
     # so DataFrame.to_string() will invoke them for those values.
@@ -377,14 +377,14 @@ def clk_std_formatter(x):
 
     formatters = {
         "PRN": prn_formatter,
-        "X": pos_formatter,   # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
+        "X": pos_formatter,  # pos_formatter() can't handle nodata (Inf / NaN). Handled prior.
         "Y": pos_formatter,
         "Z": pos_formatter,
-        "CLK": clk_formatter, # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
-        "STDX": pos_std_formatter,   # Nodata is represented as an integer, so can be handled here.
+        "CLK": clk_formatter,  # Can't handle CLK nodata (Inf or NaN). Handled prior to invoking DataFrame.to_string()
+        "STDX": pos_std_formatter,  # Nodata is represented as an integer, so can be handled here.
         "STDY": pos_std_formatter,
         "STDZ": pos_std_formatter,
-        "STDCLK": clk_std_formatter, # ditto above
+        "STDCLK": clk_std_formatter,  # ditto above
     }
     for epoch, epoch_vals in out_df.reset_index("PRN").groupby(axis=0, level="J2000"):
         # Format and write out the epoch in the SP3 format
@@ -402,26 +402,25 @@ def clk_std_formatter(x):
         # NOTE: DataFrame.to_string() as called below, takes formatter functions per column. It does not, however
         # invoke them on NaN values!! As such, trying to handle NaNs in the formatter is a fool's errand.
         # Instead, we do it here, and get the formatters to recognise and skip over the already processed nodata values
-        
+
         # POS nodata formatting
         # Fill +/- infinity values with SP3 nodata value for POS columns
-        epoch_vals['X'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Y'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Z'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["X"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Y"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Z"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_POS_NODATA_STRING, inplace=True)
         # Now do the same for NaNs
-        epoch_vals['X'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Y'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
-        epoch_vals['Z'].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["X"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Y"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
+        epoch_vals["Z"].fillna(value=SP3_POS_NODATA_STRING, inplace=True)
         # NOTE: we could use replace() for all this, though fillna() might be faster in some
         # cases: https://stackoverflow.com/a/76225227
         # replace() will also handle other types of nodata constants: https://stackoverflow.com/a/54738894
-
 
         # CLK nodata formatting
         # Throw both +/- infinity, and NaN values to the SP3 clock nodata value.
         # See https://stackoverflow.com/a/17478495
-        epoch_vals['CLK'].replace(to_replace=[_np.inf, _np.NINF], value=SP3_CLOCK_NODATA_STRING, inplace=True)
-        epoch_vals['CLK'].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)
+        epoch_vals["CLK"].replace(to_replace=[_np.inf, _np.NINF], value=SP3_CLOCK_NODATA_STRING, inplace=True)
+        epoch_vals["CLK"].fillna(value=SP3_CLOCK_NODATA_STRING, inplace=True)
 
         # Now invoke DataFrame to_string() to write out the values, leveraging our formatting functions for the
         # relevant columns.
@@ -519,9 +518,9 @@ def diff_sp3_rac(
     nd_rac = diff_eci.values[:, _np.newaxis] @ _gn_transform.eci2rac_rot(sp3_baseline_eci_vel)
     df_rac = _pd.DataFrame(
         nd_rac.reshape(-1, 3),
-        index=sp3_baseline.index, # Note that if the test and baseline have different SVs, this index will refer to
-                                  # data which is missing in the 'test' dataframe (and so is likely to be missing in
-                                  # the diff too).
+        index=sp3_baseline.index,  # Note that if the test and baseline have different SVs, this index will refer to
+        # data which is missing in the 'test' dataframe (and so is likely to be missing in
+        # the diff too).
         columns=[["EST_RAC"] * 3, ["Radial", "Along-track", "Cross-track"]],
     )