From cf557cc178b8542a5f742965d8d26a46af11d7f7 Mon Sep 17 00:00:00 2001 From: davidalbo Date: Tue, 28 Feb 2023 13:46:02 -0700 Subject: [PATCH] Bugfix #2426 develop buoy (#2475) Co-authored-by: John Halley Gotway --- data/table_files/ndbc_stations.xml | 3981 +++++++++++------ docs/Users_Guide/config_options.rst | 28 + scripts/utility/Makefile.am | 3 +- scripts/utility/Makefile.in | 3 +- .../utility/build_ndbc_stations_from_web.py | 634 +++ src/tools/other/ascii2nc/ndbc_handler.cc | 103 +- 6 files changed, 3333 insertions(+), 1419 deletions(-) create mode 100755 scripts/utility/build_ndbc_stations_from_web.py diff --git a/data/table_files/ndbc_stations.xml b/data/table_files/ndbc_stations.xml index 7e73ef73a3..114957cd5a 100644 --- a/data/table_files/ndbc_stations.xml +++ b/data/table_files/ndbc_stations.xml @@ -1,1368 +1,2613 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/Users_Guide/config_options.rst b/docs/Users_Guide/config_options.rst index 025c633d29..297049cdb6 100644 --- a/docs/Users_Guide/config_options.rst +++ b/docs/Users_Guide/config_options.rst @@ -272,6 +272,34 @@ The default table can be found in the installed XML content for all stations that allows lookups of latitude, longitude, and, in some cases, elevation for all stations based on stationId. +This set of stations comes from 2 online sources +`The active stations website `_ +and `The complete stations website `_. +As these lists can change as a function of time, a script can be run to pull +down the contents of both websites and merge any changes with the existing stations +file content, creating an updated stations file locally. +The MET_NDBC_STATIONS environment variable can be then set to refer to this newer +stations file. Also, the MET development team will periodically +run this script and update *share/met/table_files/ndbc_stations.xml*. + +To run this utility: + +build_ndbc_stations_from_web.py <-d> <-p> <-o OUTPUT_FILE> +Usage: build_ndbc_stations_from_web.py [options] +Options: + -h, --help show this help message and exit + -d, --diagnostic Rerun using downlaoded files, skipping download step + (optional, default: False) + -p, --prune Prune files that are no longer online (optional, + default:False) + -o OUT_FILE, --out=OUT_FILE + Save the text into the named file (default: + merged.txt ) + +NOTE: The downloaded files are written to a subdirectory ndbc_temp_data which +can be deleted once the final output file is created. + + MET_BASE ^^^^^^^^ diff --git a/scripts/utility/Makefile.am b/scripts/utility/Makefile.am index 6e22a33e2b..d807a69977 100644 --- a/scripts/utility/Makefile.am +++ b/scripts/utility/Makefile.am @@ -26,7 +26,8 @@ pythonutilitydir = $(pkgdatadir)/utility pythonutility_DATA = \ - print_pointnc2ascii.py + print_pointnc2ascii.py \ + build_ndbc_stations_from_web.py EXTRA_DIST = ${pythonutility_DATA} diff --git a/scripts/utility/Makefile.in b/scripts/utility/Makefile.in index a515a31201..bdaec7b3f9 100644 --- a/scripts/utility/Makefile.in +++ b/scripts/utility/Makefile.in @@ -298,7 +298,8 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ pythonutilitydir = $(pkgdatadir)/utility pythonutility_DATA = \ - print_pointnc2ascii.py + print_pointnc2ascii.py \ + build_ndbc_stations_from_web.py EXTRA_DIST = ${pythonutility_DATA} MAINTAINERCLEANFILES = Makefile.in diff --git a/scripts/utility/build_ndbc_stations_from_web.py b/scripts/utility/build_ndbc_stations_from_web.py new file mode 100755 index 0000000000..cc20b6d02d --- /dev/null +++ b/scripts/utility/build_ndbc_stations_from_web.py @@ -0,0 +1,634 @@ +#!/usr/bin/env python3 + +''' +Created on February 17, 2023 + +@author: davealbo + +The script reads NDBC station information from two NOAA websites and merges the contents into one local list. +The list contains latitude, longitude and elevation data for all known stations. +The local list can be read by ascii2nc for processing of NDBC data inputs. +Algorithm: + Read the current default ndbc_stations.xml file and create a list of default station information objects. + Pull down active station xml file from web and create a list of active information objects. + Write the list to an active stations text file. + Pull down complete index list from web. + for each file refered to in the complete index list contents: + pull down that stations web page data and append to list of complete station information objects. + Write the list of complete station info objects to a text file. + Save all the individual web page data that was pulled down into a subdirectory. + + Compare the complete stations information to the default station information objects. + If a station is on the complete list but not on the default list, add it to the default list. + If a station is on both lists, but has different location info, change the locaiton info to that of the complete, + (unless the complete has no meaningful lat/lon information, typically 0,0). + + Compare the augmented default list to the active stations list. + If a station is on the active list but not on the default list, add it to the default list. + If a station is on both lists, but has different location info, keep the default list values + (unless the default has no meaningful lat/lon information, typically 0,0, then change to the active). + + Log Warnings about discrepancies. + Keep counts of everything. + + Write the final default list a a new output. + + Optionally prune the default list, removing all stations that are not active or complete. + +''' + +from optparse import OptionParser +import os +import shutil +import shlex +import errno +from subprocess import Popen, PIPE + +# this needs to change! +# hardwired location of current default stations file +DEFAULT_STATIONS_FILE = "../../data/table_files/ndbc_stations.xml" + +# hardwired NOAA top webpage +TOP_WEBSITE = "https://www.ndbc.noaa.gov" + +# hardwired website with active station xml +ACTIVE_WEBSITE = "https://www.ndbc.noaa.gov/activestations.xml" + +# hardwired data subdirectory +DATA_SUBDIR = "/ndbc_temp_data" + +#hardwired complete stations subdirecdtory +STATIONS_SUBDIR = "/ndbc_temp_data/stations" + +# hardwired result of a wget of ACTIVE_WEBSITE +ACTIVE_STATIONS_XML = "./ndbc_temp_data/activestations.xml" + +# hardwired website with index to a complete list of stations +COMPLETE_INDEX_WEBSITE = "https://www.ndbc.noaa.gov/to_station.shtml" + +# hardwired result of a wget of COMPLETE_INDEX_WEBSITE +COMPLETE_STATIONS_INDEX_INFO = "./ndbc_temp_data/to_station.shtml" + +# hardwired name of optionally saved active stations +ACTIVE_TEXT_FILE = "./ndbc_temp_data/active.txt" + +# hardwired name of optionally saved complete stations +COMPLETE_TEXT_FILE = "./ndbc_temp_data/complete.txt" + +# default output file name +DEFAULT_OUTPUT_FILE = "merged.txt" + +MISSING = -99.9 + +def usage(): + print(f'Usage: BuildNdbcStationsFromWeb.py , <--diagnostic> <--out=out_filename> <--prune>') + print(f' -d/--diagnostic: special mode to rerun using already downloaded files, skips all downloading if True (Downloaded files are in ./{DATA_SUBDIR}') + print(f' -o/--out=out_filename: save final text into the named file (default: file name is {DEFAULT_OUTPUT_FILE})"') + print(f' -p/--prune: Delete all stations from the local ndbc_stations file that are no longer online') + print(f' Note: <> indicates optional arguments') + +#---------------------------------------------- +def create_parser_options(parser): + parser.add_option("-d", "--diagnostic", dest="diagnostic", action="store_true", default=False, help="Rerun using downlaoded files, skipping download step (optional, default: False)") + parser.add_option("-p", "--prune", dest="prune", action="store_true", default=False, help="Prune files that are no longer online (optional, default:False)") + parser.add_option("-o", "--out", dest="out_file", + default=DEFAULT_OUTPUT_FILE, help=" Save the text into the named file (default: " + DEFAULT_OUTPUT_FILE +" )") + parser.add_option("-H", "--Help", dest="full_usage", action="store_true", default=False, help = " show more usage information (optional, default = False)") + return parser.parse_args() + +#---------------------------------------------- +class Station: + def __init__(self, name = "", idvalue="", lat=MISSING, lon=MISSING, elev=MISSING): + self._name = name + self._id = idvalue + self._lat = lat + self._lon = lon + self._elev = elev + + def empty(self): + return self._id == "" + + def textForLookups(self): + if self._elev == MISSING: + txt = ''.format(a=self._id,b=self._lat,c=self._lon) + else: + txt = ''.format(a=self._id,b=self._lat,c=self._lon,d=self._elev) + return txt + + def location_match(self, other): + if self.empty() or other.empty(): + # this method is used to print mismatches, so don't print mismatches to empty stations + return True + return self._lat == other._lat and self._lon == other._lon and self._elev == other._elev + + def location_string(self): + if self._elev == MISSING: + txt = '{a}({b},{c})'.format(a=self._name,b=self._lat,c=self._lon) + else: + txt = '{a}({b},{c},{d})'.format(a=self._name,b=self._lat,c=self._lon,d=self._elev) + return txt + + def equals(self, other): + return self._id == other._id and self._lat == other._lat and self._lon == other._lon and self._elev == other._elev + + def setName(self, name): + self._name = name + +#---------------------------------------------- +def replaceLatLonIfGood(header, name, stations, station): + if station._lat == 0 and station._lon == 0: + #print(header, ",No replacement using:", station.textForLookups()) + return False + for n in range(len(stations)): + if stations[n]._id == station._id: + print(header, "Replacing: ", stations[n].textForLookups(), " with ", station.textForLookups()) + s = station + s.setName(name) + stations[n] = station + return True + print("Warning:", header, "No match for replacment of station ", station._id) + return False + +#---------------------------------------------- +def replaceLatLonIfListIsBad(header, name, stations, station): + if station._lat == 0 and station._lon == 0: + #print(header, ",No replacement using:", station.textForLookups()) + return False + for n in range(len(stations)): + if stations[n]._id == station._id: + if stations[n]._lat == 0 and stations[n]._lon == 0: + print(header, "Replacing: ", stations[n].textForLookups(), " with ", station.textForLookups()) + s = station + s.setName(name) + stations[n] = station + return True + else: + return False + + print("Warning:", header, "No match for replacment of station ", station._id) + return False + +#---------------------------------------------- +def matchingId(id, stations): + for station in stations: + if station._id == id: + return station + return Station() + +#---------------------------------------------- +def doCmd(cmd, debug=False): + + #print(cmd) + my_env = os.environ.copy() + args = shlex.split(cmd) + proc = Popen(args, stdout=PIPE, stderr=PIPE, env=my_env) + out, err = proc.communicate() + exitcode = proc.returncode + if exitcode == 0: + return str(out) + else: + if debug: + print("Command failed ", cmd) + return "" + +#---------------------------------------------------------------------------- +def makeOrScrub(path, debug=False): + if (debug): + print("Recreating path " + path) + if (os.path.exists(path)): + try: + shutil.rmtree(path) + os.makedirs(path) + except: + print('WARNING: ' + path + ' not completely cleaned out.') + else: + os.makedirs(path) + + +#---------------------------------------------- +def main(diagnostic, out_file, prune): + + cwd = os.getcwd() + + if not diagnostic: + status = True + + dataDir = cwd + DATA_SUBDIR + print("cleanining out ", dataDir) + makeOrScrub(dataDir) + + os.chdir(dataDir) + + # pull the active stations xml from the web + cmd = "wget " + ACTIVE_WEBSITE + print(cmd) + s = doCmd(cmd, True) + if not s: + status = False + # pull the complete stations html from the web + cmd = "wget " + COMPLETE_INDEX_WEBSITE + print(cmd) + s = doCmd(cmd, True) + if not s: + status = False + if not status: + print("ERROR reading web content") + os.exit(1) + + # move back to top directory + os.chdir(cwd) + + # prepare to compare to the default stations file to see what has changed + default_stations = parse("Default", DEFAULT_STATIONS_FILE) + numDefault = len(default_stations) + print("PARSED DEFAUILT STATIONS FILE NUM=", len(default_stations)) + + # make a copy of this as the final outputs + final_stations = default_stations + for f in final_stations: + f.setName("Final") + + # parse the active stations XML to create a list, which will become the final list + if diagnostic: + active_stations = parse("Active", ACTIVE_TEXT_FILE) + print("PARSED ACTIVE STATION FILES: num=", len(active_stations)) + else: + active_stations = processActive("Active") + print("BUILT ACTIVE STATION FILES: num=", len(active_stations)) + + # read the complete stations html, find all the individual stations web links, + # pull each stations data, parse that downloaded station content to create a list + if diagnostic: + complete_stations = parse("Complete", COMPLETE_TEXT_FILE) + print("PARSED COMPLETE STATIONS FILES: num=", len(complete_stations)) + else: + complete_stations = processComplete("Complete") + print("BUILT COMPLETE STATIONS FILES: num=", len(complete_stations)) + + # see which ids are not in complete from active, and which have different lat/lons + # note the one used if that happens is always the active one at this point + numNew = 0 + numNewComplete = 0 + numNewActive = 0 + numConflict = 0 + numConflictChanged = 0 + numComplete = 0 + numActive = 0 + numCompleteNotActive = 0 + numActiveNotComplete = 0 + + # compare complete stations to default stations + for complete in complete_stations: + numComplete = numComplete + 1 + id = complete._id + default = matchingId(id, default_stations) + active = matchingId(id, active_stations) + if active.empty(): + numCompleteNotActive = numCompleteNotActive + 1 + if default.empty(): + # station is on the complete list but not on the default list, add it + f = complete + f.setName("Final") + final_stations.append(f) + numNew = numNew+1 + numNewComplete = numNewComplete + 1 + else: + # compare complete and default + if not complete.location_match(default): + numConflict = numConflict + 1 + if replaceLatLonIfGood("Complete to Final", "Final", final_stations, complete): + numConflictChanged = numConflictChanged + 1 + + # compare active stations to final stations + for active in active_stations: + numActive = numActive + 1 + id = active._id + final = matchingId(id, final_stations) + complete = matchingId(id, complete_stations) + if complete.empty(): + numActiveNotComplete = numActiveNotComplete +1 + if final.empty(): + # station is on the active list but not on the final list, add it + a = active + a.setName("Final") + final_stations.append(a) + numNew = numNew+1 + numNewActive = numNewActive + 1 + else: + # compare complete and default + if not final.location_match(active): + numConflict = numConflict + 1 + if replaceLatLonIfListIsBad("Active to Final", "Final", final_stations, active): + numConflictChanged = numConflictChanged + 1 + + # see which id's have vanished from the current default list, to be used when prune is true + numVanished = 0 + purgeIds = [] + print("Comparing current default stations to final list") + for default in default_stations: + id = default._id + active = matchingId(id, active_stations) + complete = matchingId(id, complete_stations) + if active.empty() and complete.empty(): + #print("Station in the local table file but no longer on the webpages:", id) + numVanished = numVanished+1 + purgeIds.append(id) + + for f in final_stations: + id = f._id + default = matchingId(id, default_stations) + if default.empty(): + #print("New station on web not in local table file:", id) + numNew = numNew+1 + + #now write out the full meal deal by creating a string list + nout = 0 + nprune = 0 + txtAll = [] + for f in final_stations: + if prune and f.IdOnList(purgeIds): + print("Pruning station: ", f._id, " No longer on line") + nprune = nprune + 1 + else: + txt = f.textForLookups() + txtAll.append(txt) + nout = nout + 1 + + # sort for ease of use + txtAll.sort() + fout = open(out_file, "w") + for txt in txtAll: + fout.write(txt+"\n") + fout.close() + + print("Num complete: ", numComplete) + print("Num active: ", numActive) + print("Num default: ", numDefault) + print("Num final: ", nout) + print("Num pruned: ", nprune) + print("Num vanished: ", numVanished) + print("Num new complete: ", numNewComplete) + print("Num new active: ", numNewActive) + print("Num new total: ", numNew) + print("Num conflict no change: ", numConflict) + print("Num conflict with change:", numConflictChanged) + print("Num active not complete: ", numActiveNotComplete) + print("Num complete not active: ", numCompleteNotActive) + + return 0 + +#---------------------------------------------------- +def processComplete(name): + ''' + read the complete stations html, find all the individual stations web links, + pull each stations data, parse that downloaded station content to create a list + ''' + + # initialize return to empty + stations = [] + + # create the output location, which should be ./ndbc_temp_data/stations + cwd = os.getcwd() + outLoc = cwd + STATIONS_SUBDIR + if not makeDirIfNeeded(outLoc): + print("ERROR creating storage for individual station files ", outLoc) + return stations + + + # Open the file with the list of php pages online (or local files pulled down) + with open(COMPLETE_STATIONS_INDEX_INFO, 'r') as file: + data = file.read().replace('\n', '') + file.close() + + # start at the beginning + index = 0 + txtAll = [] + while index < len(data): + # pull down another stations info if you can, and parse it + [index, station] = createNextStationInfo(name, data, index) + if index == -1: + break + if not station.empty(): + # form a string and append that plus all the individual stuff to lists + txt = station.textForLookups() + txtAll.append(txt) + stations.append(station) + + # keep the subdirectory of individual stations information + # sort the list for ease of use, then write it + txtAll.sort() + fout = open(COMPLETE_TEXT_FILE, "w") + for txt in txtAll: + fout.write(txt+"\n") + fout.close() + return stations + +#---------------------------------------------- +def createNextStationInfo(name, data, i): + + s = Station() + + #data has entries like this: 45001 + #on entry i points to the starting location within data to start looking + index = data.find('href="station_page.php?', i) + if index == -1: + return [-1, s] + + # the stuff beyond 'href="' is the file name that you get via wget, followed by another '"' + index2 = index + 6 # point to 'station_page' + index3 = data.find('">', index2) # point to " at end (which is followed by >) + + index = index3 + 3 # set index for return to beyond this + + # what to go for online: + ref = TOP_WEBSITE + '/' + data[index2:index3] + + # name of returned file + filename = data[index2:index3] + + # temporarily change to the correct subdirectory + cwd = os.getcwd() + os.chdir(cwd + STATIONS_SUBDIR) + # go get it + cmd = 'wget "' + ref + '"' + print(cmd) + s = doCmd(cmd, True) + # move back + os.chdir(cwd) + if not s: + # note try to keep going forward as index has been updated + print("ERROR data not online: ", ref) + return [index, s] + + # parse the file and return the information, including the next index + return parseStationInfo(name, cwd + STATIONS_SUBDIR + "/" + filename, index) + +#---------------------------------------------------------------------------- +def makeDirIfNeeded(path, debug=False): + if (debug): + print("Making directory if needed " + path) + + try: + os.makedirs(path) + return True + except OSError as exception: + if exception.errno != errno.EEXIST: + print("ERROR creating", path) + return False + else: + return True + +#---------------------------------------------------------------------------- +def parseStationInfo(name, fname, index): + + s = Station() + + # the file is assumed already downloaded + # initialize station values + station = setStationId(fname) + if not station: + return [index, s] + elev = setElev(fname) + lat = setLat(fname) + lon = setLon(fname) + s = Station(name, station, lat, lon, elev) + return [index, s] + +#---------------------------------------------- +def setStationId(fname): + stationId = "" + cmd = 'grep "var currentstnid" ' + fname + s = doCmd(cmd, True) + if s: + index6 = s.find("'", 0) + index7 = s.find("'", index6+1) + stationId = s[index6+1:index7] + return stationId + +#---------------------------------------------- +def setElev(fname): + elev = MISSING + cmd = 'grep "Site elev" ' + fname + #print(cmd) + s = doCmd(cmd) + if s: + if "m above mean sea level" in s: + # scan to + index6 = s.find("") + index7 = s.find("m above") + elev = float(s[index6+4:index7]) + elif " sea level', index_all+1) + if indexend == -1: + print("UNexpected lack of />") + break + + data = data_all[index_all:indexend+2] + if debug: + print("Parsing this: ", data) + index = 0 + + # expect to see '" - << "Standard NDBC format is the only supported format\n\n"; + mlog << Warning << "\n" << method_name << "->" + << "Standard NDBC format is the only supported format: " + << filename << "\n\n"; return false; } @@ -229,11 +230,11 @@ bool NdbcHandler::_parseObservationLineStandard(DataLine &data_line, // Make sure that the line contains the correct number of tokens // if (data_line.n_items() != NUM_COLS_STANDARD) { - mlog << Error << "\n" << method_name << "-> " - << "line number " << data_line.line_number() - << " does not have the correct number of columns " << data_line.n_items() - << " (" << NUM_COLS_STANDARD << "). Skipping this line in \"" - << filename << "\".\n\n"; + mlog << Warning << "\n" << method_name << "-> " + << "Skipping line number " << data_line.line_number() + << " with an unexpected number of columns (" + << data_line.n_items() << " != " << NUM_COLS_STANDARD << "): " + << filename << "\n\n"; return false; } @@ -242,10 +243,10 @@ bool NdbcHandler::_parseObservationLineStandard(DataLine &data_line, // time_t valid_time = _getValidTime(data_line); if (valid_time == 0) { - mlog << Error << "\n" << method_name << "-> " - << "line number " << data_line.line_number() - << " time could not be parsed, skipping this line in \"" - << filename << "\".\n\n"; + mlog << Warning << "\n" << method_name << "-> " + << "Skipping line number " << data_line.line_number() + << " whose vaild time cannot not be parsed: " + << filename << "\n\n"; return false; } @@ -263,9 +264,9 @@ bool NdbcHandler::_parseObservationLineStandard(DataLine &data_line, name = column[i].name; grib_code = i; // it's not actually grib code, its obs_vid, according to howard _addObservations(Observation(header_type, stationId, valid_time, - stationLat, stationLon, stationAlt, - quality_flag, grib_code, pressure_level_hpa, - height_m, value, name)); + stationLat, stationLon, stationAlt, + quality_flag, grib_code, pressure_level_hpa, + height_m, value, name)); } return true; } @@ -287,15 +288,15 @@ bool NdbcHandler::_setStationInfo(const string &filename) // expect .txt as the name i0 = fname.find(".txt"); if (i0 == string::npos) { - mlog << Error << "\n" << "expect file name of format '.txt'\n" - << "Got " << fname << "\n\n"; + mlog << Warning << "\n" << "NDBC file name does not follow the " + << "expected '.txt' format: " << fname << "\n\n"; return false; } stationId = fname.substr(0, i0); if (!locations.lookupLatLonElev(stationId, stationLat, stationLon, - stationAlt)) { - mlog << Error << "\n" << "No location information found for station " - << stationId << " do not process file " << filename << "\n\n"; + stationAlt)) { + mlog << Warning << "\n" << "NDBC station " << stationId + << " location information not found: " << filename << "\n\n"; return false; } return true; @@ -323,8 +324,8 @@ bool NdbcHandler::_determineFileType(LineDataFile &ascii_file) } } format_version = NDBC_FORMAT_VERSION_UNKNOWN; - mlog << Error << "\nNdbcHandler::_determineFileType -> " - << "Unknown file type\n\n"; + mlog << Warning << "\nNdbcHandler::_determineFileType -> " + << "Unknown file type: " << ascii_file.filename() << "\n\n"; return false; } @@ -335,10 +336,10 @@ time_t NdbcHandler::_getValidTime(const DataLine &data_line) const // // Pull out the date information // - if (column_pointer_year < 0 || column_pointer_month < 0 || column_pointer_day < 0 || + if (column_pointer_year < 0 || column_pointer_month < 0 || column_pointer_day < 0 || column_pointer_hour < 0 || column_pointer_minute < 0) { - mlog << Error << "\nNdbcHandler::_getValidTime -> " - << "Not all time related column pointers are set\n\n"; + mlog << Warning << "\nNdbcHandler::_getValidTime -> " + << "Not all time related column pointers are set.\n\n"; return 0; } string year = _extractColumn(data_line, column_pointer_year); @@ -391,9 +392,9 @@ bool NdbcHandler::_readHeaderInfo(LineDataFile &ascii_file) // The first line of the file contains the headers // if (!(ascii_file >> data_line)) { - mlog << Error << "\nNdbcHandler::_readHeaderInfo() -> " - << "error reading header line from input ASCII file \"" - << ascii_file.filename() << "\"\n\n"; + mlog << Warning << "\nNdbcHandler::_readHeaderInfo() -> " + << "Problem reading header line from input ASCII file: " + << ascii_file.filename() << "\n\n"; return false; } @@ -401,9 +402,10 @@ bool NdbcHandler::_readHeaderInfo(LineDataFile &ascii_file) // Check for the correct number of columns in the header line // if (data_line.n_items() != NUM_COLS_STANDARD) { - mlog << Error << "\nNdbcHandler::_readHeaderInfo() -> " - << "NDBC file has incorrect number of columns (" - << data_line.n_items() << ") in header line\n\n"; + mlog << Warning << "\nNdbcHandler::_readHeaderInfo() -> " + << "Unexpected number of header columns (" << data_line.n_items() + << " != " << NUM_COLS_STANDARD << "): " + << ascii_file.filename() << "\n\n"; return false; } @@ -426,30 +428,33 @@ bool NdbcHandler::_readHeaderInfo(LineDataFile &ascii_file) } else { bool found = false; for (size_t j=0; j " - << "NDBC file has unknown header item " << s << "\n\n"; - status = false; + mlog << Warning << "\nNdbcHandler::_readHeaderInfo() -> " + << "Unexpected header column (" << s << "): " + << ascii_file.filename() << "\n\n"; + status = false; } } } - if (column_pointer_year == -1 || column_pointer_month == -1 || - column_pointer_day == -1 || column_pointer_hour == -1 || + if (column_pointer_year == -1 || column_pointer_month == -1 || + column_pointer_day == -1 || column_pointer_hour == -1 || column_pointer_minute == -1) { - mlog << Error << "\nNdbcHandler::_readHeaderInfo() -> " - << "NDBC file did not have all time fields in header \n\n"; - status = false; + mlog << Warning << "\nNdbcHandler::_readHeaderInfo() -> " + << "NDBC file did not have all time fields in header: " + << ascii_file.filename() << "\n\n"; + status = false; } for (size_t j=0; j " - << "NDBC file did not have all expected fields in header \n\n"; + mlog << Warning << "\nNdbcHandler::_readHeaderInfo() -> " + << "NDBC file did not have all expected fields in header: " + << ascii_file.filename() << "\n\n"; status = false; break; }