From f4c1dc491a669d2ae7d0a5609000ca9fed875cf0 Mon Sep 17 00:00:00 2001 From: John Halley Gotway Date: Fri, 7 Jan 2022 14:04:15 -0700 Subject: [PATCH] Per #1810, lots and lots of changes to tc-gen to support genesis shapefile vx. Rather than processing each shape as its read, store them in the new GenShapeInfo and GenShapeInfo array classes. While reading them, check for duplicates. After storing all the unduplicated shapes in GenShapeInfo, apply each config file filter to subset and score them. --- met/src/tools/tc_utils/tc_gen/tc_gen.cc | 489 +++++++++++++----------- 1 file changed, 276 insertions(+), 213 deletions(-) diff --git a/met/src/tools/tc_utils/tc_gen/tc_gen.cc b/met/src/tools/tc_utils/tc_gen/tc_gen.cc index 1d9c9add7f..3325827b5c 100644 --- a/met/src/tools/tc_utils/tc_gen/tc_gen.cc +++ b/met/src/tools/tc_utils/tc_gen/tc_gen.cc @@ -80,6 +80,9 @@ static void process_tracks (const StringArray &, static void process_edecks (const StringArray &, const StringArray &, ProbInfoArray &); +static void process_shapes (const StringArray &, + GenShapeInfoArray &); + static void get_genesis_pairs (const TCGenVxOpt &, const ConcatString &, const GenesisInfoArray &, @@ -95,6 +98,10 @@ static void do_probgen_pct (const TCGenVxOpt &, const GenesisInfoArray &, const TrackInfoArray &, ProbGenPCTInfo &); +static void do_genshape_pct (const TCGenVxOpt &, + GenShapeInfoArray &, + const GenesisInfoArray &, + ProbGenPCTInfo &); static int find_genesis_match (const GenesisInfo &, const GenesisInfoArray &, @@ -104,9 +111,8 @@ static int find_probgen_match (const ProbGenInfo &, const GenesisInfoArray &, const TrackInfoArray &, bool, double, int, int); -static int find_shape_match (const ShpPolyRecord &, - const GenesisInfoArray &, - unixtime, unixtime); +static int find_genshape_match (const GenShapeInfo &, + const GenesisInfoArray &); static void setup_txt_files (int, int, int); static void setup_table (AsciiTable &); @@ -134,6 +140,8 @@ static void write_pct_genmpr_cols(ProbGenPCTInfo &, int, int, static void write_nc (GenCTCInfo &); static void finish_txt_files (); +static ConcatString string_to_basin_abbr(ConcatString); + static void usage (); static void set_source (const StringArray &, const char *, StringArray &, StringArray &); @@ -500,33 +508,12 @@ void score_genesis_prob(const GenesisInfoArray &best_ga, //////////////////////////////////////////////////////////////////////// void score_genesis_shape(const GenesisInfoArray &best_ga) { - int i, j, k, i_bga; - int n_rec, total_recs, total_probs; - StringArray shape_files, sa; - ConcatString shp_file_name, dbf_file_name, basin_cs, case_cs, cs; - NumArray probs; - unixtime issue_ut; - bool is_event; - - DbfFile dbf_file; - ShpFile shp_file; - ShpPolyRecord poly_rec; - const GenesisInfo *bgi; - + int i, j, total_probs; + StringArray shape_files; + GenShapeInfoArray shapes_all, shapes_subset; ProbGenPCTInfo probgen_pct; - map basin_map; - map::iterator it; - - // Initialize to the first filter - probgen_pct.clear(); - probgen_pct.set_vx_opt(&conf_info.VxOpt[0]); - // Setup output files based on the maximum number of basins - // and lead times possible - const int max_n_basin = 3; - setup_txt_files(max_n_basin, max_n_shape_prob, 0); - - // Get the list of shapefiles + // Get the list of input shapefiles for(i=0; i " - << "the specified shapefile (" << shp_file_name - << ") and corresponding database file (" << dbf_file_name - << ") must both exists!\n\n"; - } - - // Extract the issue time from the filename: - // gtwo_areas_YYYYMMDDHHMM.shp - sa = shp_file_name.basename().split("_."); - if(sa.n() >= 3 && sa[2].length() >= 12) { - cs << cs_erase - << sa[2].substr(0, 8).c_str() << "_" - << sa[2].substr(8, 4).c_str() << "00"; - issue_ut = yyyymmdd_hhmmss_to_unix(cs.c_str()); - } - else { - mlog << Error << "\nscore_genesis_shape() -> " - << "can't extract the issue time from \"" << shp_file_name - << "\" since it does not match the regular expression \"" - << gen_shp_reg_exp << "\"!\n\n"; - exit(1); - } - - // Round the file timestamp to the nearest synoptic time (00, 06, 12, 18) - int h3 = 3*sec_per_hour; - int h6 = 6*sec_per_hour; - issue_ut = ( (int) (issue_ut + h3) / h6 ) * h6; - - // Open the dbf and shp files - dbf_file.open(dbf_file_name.c_str()); - shp_file.open(shp_file_name.c_str()); - - // Store the number of records - n_rec = dbf_file.header()->n_records; - - // Update the running total - total_recs += n_rec; - - // Check expected shape types - const ShapeType shape_type = (ShapeType) (shp_file.header()->shape_type); - if(shape_type != shape_type_polygon) { - mlog << Error << "\nscore_genesis_shape() -> " - << "shapefile type \"" << shapetype_to_string(shape_type) - << "\" in file \"" << shp_file_name - << "\" is not supported!\n\n"; - exit(1); - } - - mlog << Debug(4) << "[File " << i+1 << " of " << shape_files.n() << "]: " - << "Found " << n_rec << " records with issue time " - << unix_to_yyyymmdd_hhmmss(issue_ut) << " in file \"" - << shp_file_name << "\".\n"; - - // Process each shape - for(j=0; j " - << "hit shp file EOF before reading all records!\n\n"; - exit(1); - } + // Setup output files based on the maximum number of filters + // and lead times possible + setup_txt_files(conf_info.n_vx(), max_n_shape_prob, 0); - // Read the current shape and metadata - shp_file >> poly_rec; - poly_rec.toggle_longitudes(); - sa = dbf_file.subrecord_values(j); - basin_cs = sa[0]; - basin_cs.replace(" ", "_", false); + // Process each verification filter + for(i=0; i max_n_shape_prob) { - mlog << Warning << "\nscore_genesis_shape() -> " - << "unexpected number of shapefile probabilities (" - << probs.n() << ") in record " << j+1 << " of file \"" - << dbf_file_name << "\"!\n\n"; - continue; + // Check filters to define subset + if(conf_info.VxOpt[i].is_keeper(shapes_all[j])) { + shapes_subset.add(shapes_all[j]); + total_probs += shapes_all[j].n_prob(); } - - // Search for the genesis match - i_bga = find_shape_match(poly_rec, best_ga, issue_ut, - issue_ut + shape_prob_search_sec); - - // Pointer to the matching BEST track - bgi = (is_bad_data(i_bga) ? - (const GenesisInfo *) 0 : - &best_ga[i_bga]); - - // Update the running total - total_probs += probs.n(); - - // Score each probability - for(k=0; kgenesis_time() - issue_ut) < - (shape_prob_lead_hr[k] * sec_per_hour); - - if(is_event) { - case_cs << "MATCHES " - << unix_to_yyyymmdd_hhmmss(bgi->genesis_time()) - << " BEST track " << bgi->storm_id() - << " genesis at (" << bgi->lat() << ", " - << bgi->lon() << ").\n"; - } - else { - case_cs << "has NO MATCH in the BEST track.\n"; - } - } - // No Best track match - else { - case_cs << "has NO MATCH in the BEST track.\n"; - } - - mlog << Debug(5) << case_cs; - - // Add a new basin map entry, if needed - if(basin_map.count(basin_cs) == 0) { - basin_map[basin_cs] = probgen_pct; - } - - // Store this probability - basin_map[basin_cs].add_shape(basin_cs, issue_ut, - shape_prob_lead_hr[k], - probs[k], bgi, is_event); - - } // end for k } // end for j - // Close files - dbf_file.close(); - shp_file.close(); - - } // end for i - - mlog << Debug(3) << "Found a total of " << total_probs - << " probabilities, for " << total_recs << " genesis shapes, for " - << basin_map.size() << " basins, from " << shape_files.n() - << " input files.\n"; + mlog << Debug(2) + << "[Filter " << i+1 << " (" << conf_info.VxOpt[i].Desc + << ")" << "] Verifying " << total_probs << " probabilities from " + << shapes_subset.n() << " of the " << shapes_all.n() + << " input genesis shapes against " << best_ga.n() << " " + << conf_info.BestEventInfo.Technique << " genesis events.\n"; - // Write the shape statistics for each basin - for(it=basin_map.begin(); it!=basin_map.end(); it++) { + // Do the probabilistic verification + do_genshape_pct(conf_info.VxOpt[i], shapes_subset, + best_ga, probgen_pct); - // Write the statistics output - write_pct_stats(it->second); + // Write the statistics output + write_pct_stats(probgen_pct); - } // end for it + } // end for i return; } @@ -1038,7 +876,7 @@ void do_probgen_pct(const TCGenVxOpt &vx_opt, } // Store pair info - pgi.add_pgi(model_pa.prob_gen(i), j, bgi, is_event); + pgi.add_probgen(model_pa.prob_gen(i), j, bgi, is_event); } // end for j } // end for i @@ -1046,6 +884,77 @@ void do_probgen_pct(const TCGenVxOpt &vx_opt, return; } +//////////////////////////////////////////////////////////////////////// + +void do_genshape_pct(const TCGenVxOpt &vx_opt, + GenShapeInfoArray &fcst_sa, + const GenesisInfoArray &best_ga, + ProbGenPCTInfo &pgi) { + int i, j, i_bga; + const GenesisInfo *bgi; + bool is_event; + ConcatString case_cs; + + // Initialize + pgi.clear(); + pgi.set_vx_opt(&vx_opt); + + // Loop over the array of shapes + for(i=0; igenesis_time() - fcst_sa[i].issue_time()) < + (fcst_sa[i].lead_sec(j)); + + if(is_event) { + case_cs << "MATCHES " + << unix_to_yyyymmdd_hhmmss(bgi->genesis_time()) + << " BEST track " << bgi->storm_id() + << " genesis at (" << bgi->lat() << ", " + << bgi->lon() << ").\n"; + } + else { + case_cs << "has NO MATCH in the BEST track.\n"; + } + } + // No Best track match + else { + case_cs << "has NO MATCH in the BEST track.\n"; + } + + mlog << Debug(5) << case_cs; + + // Store pair info + pgi.add_genshape(fcst_sa[i], j, bgi, is_event); + + } // end for j + } // end for i + + return; +} //////////////////////////////////////////////////////////////////////// @@ -1224,16 +1133,19 @@ int find_probgen_match(const ProbGenInfo &prob_gi, //////////////////////////////////////////////////////////////////////// -int find_shape_match(const ShpPolyRecord &shp_poly, - const GenesisInfoArray &bga, - unixtime beg_ut, unixtime end_ut) { +int find_genshape_match(const GenShapeInfo &gsi, + const GenesisInfoArray &bga) { int i, i_bga; double x, y; unixtime min_ut; + // Time window + unixtime beg_ut = gsi.issue_time(); + unixtime end_ut = beg_ut + shape_prob_search_sec; + // Load the shape GridClosedPolyArray p; - p.set(shp_poly, conf_info.DLandGrid); + p.set(gsi.poly(), conf_info.DLandGrid); // Search Best track genesis events for a match for(i=0,i_bga=bad_data_int; i " + << "the specified shapefile (" << shp_file_name + << ") and corresponding database file (" << dbf_file_name + << ") must both exists!\n\n"; + } + + // Extract the issue time from the filename: + // gtwo_areas_YYYYMMDDHHMM.shp + sa = shp_file_name.basename().split("_."); + if(sa.n() >= 3 && sa[2].length() >= 12) { + cs << cs_erase + << sa[2].substr(0, 8).c_str() << "_" + << sa[2].substr(8, 4).c_str() << "00"; + file_ut = yyyymmdd_hhmmss_to_unix(cs.c_str()); + } + else { + mlog << Error << "\nprocess_shapes() -> " + << "can't extract the issue time from \"" << shp_file_name + << "\" since it does not match the regular expression \"" + << gen_shp_reg_exp << "\"!\n\n"; + exit(1); + } + + // Open the dbf and shp files + dbf_file.open(dbf_file_name.c_str()); + shp_file.open(shp_file_name.c_str()); + + // Store the number of records + n_rec = dbf_file.header()->n_records; + + // Check expected shape types + const ShapeType shape_type = (ShapeType) (shp_file.header()->shape_type); + if(shape_type != shape_type_polygon) { + mlog << Error << "\nprocess_shapes() -> " + << "shapefile type \"" << shapetype_to_string(shape_type) + << "\" in file \"" << shp_file_name + << "\" is not supported!\n\n"; + exit(1); + } + + mlog << Debug(4) << "[File " << i+1 << " of " << files.n() << "]: " + << "Found " << n_rec << " records in file \"" << shp_file_name << "\".\n"; + + // Process each shape + for(j=0; j " + << "hit shp file EOF before reading all records!\n\n"; + exit(1); + } + + // Read the current shape and metadata + shp_file >> poly_rec; + poly_rec.toggle_longitudes(); + sa = dbf_file.subrecord_values(j); + + // Initialize GenShapeInfo + gsi.clear(); + gsi.set_time(file_ut); + gsi.set_basin(string_to_basin_abbr(sa[0]).c_str()); + gsi.set_poly(poly_rec); + + // Parse probabilities from the subrecord values + for(k=0; k= max_n_shape_prob) { + mlog << Warning << "\nprocess_shapes() -> " + << "unexpected number of shapefile probabilities (" + << gsi.n_prob() << ") in record " << j+1 + << " of file \"" << dbf_file_name + << "\"!\n\n"; + continue; + } + + // Store the probability info + gsi.add_prob(shape_prob_lead_hr[gsi.n_prob()]*sec_per_hour, + atoi(sa[k].c_str())/100.0); + } + } // end for k + + // Store this shape, check for duplicates + if(shapes.add(gsi, true)) { + mlog << Debug(5) << "Add new " << gsi.serialize() << "\n"; + total_probs += gsi.n_prob(); + } + + } // end for j + + // Close files + dbf_file.close(); + shp_file.close(); + + } // end for i + + mlog << Debug(3) << "Found a total of " << total_probs + << " probabilities in " << shapes.n() << " genesis shapes from " + << files.n() << " input files.\n"; + + return; +} + //////////////////////////////////////////////////////////////////////// // // Setup the output ASCII files @@ -2113,6 +2159,10 @@ void write_ctc_genmpr_row(StatHdrColumns &shc, //////////////////////////////////////////////////////////////////////// void write_pct_stats(ProbGenPCTInfo &pgi) { + + // Check for no data to write + if(pgi.PCTMap.size() == 0) return; + int i, lead_hr, lead_sec; // Setup header columns @@ -2203,10 +2253,10 @@ void write_pct_genmpr_row(StatHdrColumns &shc, shc.set_alpha(bad_data_double); // Write a line for each matched pair - for(i=0; i