Skip to content

Commit

Permalink
Adjust scraper to collect total credit across all cpids
Browse files Browse the repository at this point in the history
This commit adds the internal machinery to the scraper to
compute the total credit across ALL cpids for each project,
regardless of whether they are active beaconholders or not.

This is to support auto greylisting.

This commit does NOT include the network manifest changes.
  • Loading branch information
jamescowens committed Jan 7, 2025
1 parent 666f2a8 commit 205988a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 13 deletions.
65 changes: 52 additions & 13 deletions src/gridcoin/scraper/scraper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,9 +372,10 @@ bool MarkScraperFileManifestEntryNonCurrent(ScraperFileManifestEntry& entry);
* @param filetype
* @param sProject
* @param excludefromcsmanifest
* @param all_cpid_total_credit
*/
void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype, const std::string& sProject,
const bool& excludefromcsmanifest);
const bool& excludefromcsmanifest, const uint64_t &all_cpid_total_credit);
/**
* @brief Constructs the scraper statistics from the current state of the scraper, which is all of the in scope files at the
* time the function is called
Expand Down Expand Up @@ -527,11 +528,12 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist);
* @param Consensus
* @param GlobalVerifiedBeaconsCopy
* @param IncomingVerifiedBeacons
* @param all_cpid_total_credit
* @return bool true if successful
*/
bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& file, const std::string& etag,
BeaconConsensus& Consensus, ScraperVerifiedBeacons& GlobalVerifiedBeaconsCopy,
ScraperVerifiedBeacons& IncomingVerifiedBeacons);
ScraperVerifiedBeacons& IncomingVerifiedBeacons, uint64_t& all_cpid_total_credit);
/**
* @brief Clears the authentication ETag auth.dat file
*/
Expand Down Expand Up @@ -2111,7 +2113,7 @@ bool DownloadProjectHostFiles(const WhitelistSnapshot& projectWhitelist)
}

// Save host xml files to file manifest map with exclude from CSManifest flag set to true.
AlignScraperFileManifestEntries(host_file, "host", prjs.m_name, true);
AlignScraperFileManifestEntries(host_file, "host", prjs.m_name, true, 0);
}

return true;
Expand Down Expand Up @@ -2285,7 +2287,7 @@ bool DownloadProjectTeamFiles(const WhitelistSnapshot& projectWhitelist)
// If in explorer mode and new file downloaded, save team xml files to file manifest map with exclude from CSManifest
// flag set to true. If not in explorer mode, this is not necessary, because the team xml file is just temporary and
// can be discarded after processing.
if (explorer_mode() && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true);
if (explorer_mode() && bDownloadFlag) AlignScraperFileManifestEntries(team_file, "team", prjs.m_name, true, 0);

// If require team whitelist is set and bETagChanged is true, then process the file. This also populates/updated the
// team whitelist TeamIDs in the TeamIDMap and the ETag entries in the ProjTeamETags map.
Expand Down Expand Up @@ -2561,13 +2563,15 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist)
continue;
}

// If in explorer mode, save user (rac) source xml files to file manifest map with exclude from CSManifest flag set
// to true.
if (explorer_mode()) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true);
uint64_t all_cpid_total_credit = 0;

// Now that the source file is handled, process the file.
ProcessProjectRacFileByCPID(prjs.m_name, rac_file, sRacETag, Consensus,
GlobalVerifiedBeaconsCopy, IncomingVerifiedBeacons);
GlobalVerifiedBeaconsCopy, IncomingVerifiedBeacons, all_cpid_total_credit);

// If in explorer mode, save user (rac) source xml files to file manifest map with exclude from CSManifest flag set
// to true.
if (explorer_mode()) AlignScraperFileManifestEntries(rac_file, "user_source", prjs.m_name, true, all_cpid_total_credit);
} // for prjs : projectWhitelist

// Get the global verified beacons and copy the incoming verified beacons from the
Expand Down Expand Up @@ -2615,7 +2619,7 @@ bool DownloadProjectRacFilesByCPID(const WhitelistSnapshot& projectWhitelist)
// This version uses a consensus beacon map (and teamid, if team filtering is specified by policy) to filter statistics.
bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& file, const std::string& etag,
BeaconConsensus& Consensus, ScraperVerifiedBeacons& GlobalVerifiedBeaconsCopy,
ScraperVerifiedBeacons& IncomingVerifiedBeacons)
ScraperVerifiedBeacons& IncomingVerifiedBeacons, uint64_t& all_cpid_total_credit)
{
auto explorer_mode = []() { LOCK(cs_ScraperGlobals); return fExplorer; };
auto require_team_whitelist_membership = []() { LOCK(cs_ScraperGlobals); return REQUIRE_TEAM_WHITELIST_MEMBERSHIP; };
Expand Down Expand Up @@ -2721,6 +2725,16 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil
}
}

// We need to accumulate total credit across ALL project cpids, regardless of their beacon status, to get
// an "all cpid" total credit sum to be used for automatic greylisting purposes.
std::string s_cpid_total_credit = ExtractXML(data, "<total_credit>", "</total_credit>");
uint64_t cpid_total_credit = 0;

if (!ParseUInt64(s_cpid_total_credit, &cpid_total_credit)) {
_log(logattribute::ERR, __func__, "Bad team id in user stats file data.");
continue;
}

// We do NOT want to add a just verified CPID to the statistics this iteration, if it was
// not already active, because we may be halfway through processing the set of projects.
// Instead, add to the incoming verification map (above), which will be handled in the
Expand Down Expand Up @@ -2764,7 +2778,7 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil
}

// User beacon verified. Append its statistics to the CSV output.
out << ExtractXML(data, "<total_credit>", "</total_credit>") << ","
out << s_cpid_total_credit << ","
<< ExtractXML(data, "<expavg_time>", "</expavg_time>") << ","
<< ExtractXML(data, "<expavg_credit>", "</expavg_credit>") << ","
<< cpid
Expand Down Expand Up @@ -2835,7 +2849,7 @@ bool ProcessProjectRacFileByCPID(const std::string& project, const fs::path& fil

// Here, regardless of explorer mode, save processed rac files to file manifest map with exclude from CSManifest flag
// set to false.
AlignScraperFileManifestEntries(gzetagfile, "user", project, false);
AlignScraperFileManifestEntries(gzetagfile, "user", project, false, all_cpid_total_credit);

_log(logattribute::INFO, "ProcessProjectRacFileByCPID", "Complete Process");

Expand Down Expand Up @@ -3221,7 +3235,7 @@ EXCLUSIVE_LOCKS_REQUIRED(cs_StructScraperFileManifest)
}

void AlignScraperFileManifestEntries(const fs::path& file, const std::string& filetype,
const std::string& sProject, const bool& excludefromcsmanifest)
const std::string& sProject, const bool& excludefromcsmanifest, const uint64_t& all_cpid_total_credit)
{
ScraperFileManifestEntry NewRecord;

Expand All @@ -3238,6 +3252,7 @@ void AlignScraperFileManifestEntries(const fs::path& file, const std::string& fi
NewRecord.current = true;
NewRecord.excludefromcsmanifest = excludefromcsmanifest;
NewRecord.filetype = filetype;
NewRecord.all_cpid_total_credit = all_cpid_total_credit;

// Code block to lock StructScraperFileManifest during record insertion and delete because we want this atomic.
{
Expand Down Expand Up @@ -3381,6 +3396,27 @@ bool LoadScraperFileManifest(const fs::path& file)
LoadEntry.filetype = "user";
}

// This handles startup with legacy manifest file without the all_cpid_total_credit column.
if (vline.size() >= 8) {
// In scraper for superblock v3 and autogreylist, we have to record total credit across all cpids, regardless
// of whether they are active beaconholders to support auto greylisting.

uint64_t all_cpid_total_credit = 0;

if (!ParseUInt64(vline[7], &all_cpid_total_credit)) {
// This shouldn't happen given the conditional above, but to be thorough...
_log(logattribute::ERR, __func__, "The \"all_cpid_total_credit\" field not parsed correctly for a manifest "
"entry. Skipping.");
continue;
}

LoadEntry.all_cpid_total_credit = all_cpid_total_credit;
} else {
// The default if the field is not there is user. (Because scraper ver 1 all files in the manifest are
// user.)
LoadEntry.filetype = "user";
}

// Lock cs_StructScraperFileManifest before updating
// global structure.
{
Expand Down Expand Up @@ -3425,6 +3461,7 @@ bool StoreScraperFileManifest(const fs::path& file)
<< "Filename,"
<< "ExcludeFromCSManifest,"
<< "Filetype"
<< "All_cpid_total_credit"
<< "\n";

for (auto const& entry : StructScraperFileManifest.mScraperFileManifest)
Expand All @@ -3437,7 +3474,9 @@ bool StoreScraperFileManifest(const fs::path& file)
+ entry.second.project + ","
+ entry.first + ","
+ ToString(entry.second.excludefromcsmanifest) + ","
+ entry.second.filetype + "\n";
+ entry.second.filetype
+ ToString(entry.second.all_cpid_total_credit)
+ "\n";
stream << sScraperFileManifestEntry;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/gridcoin/scraper/scraper.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ struct ScraperFileManifestEntry
bool current = true;
bool excludefromcsmanifest = true;
std::string filetype;
uint64_t all_cpid_total_credit = 0;
};

/**
Expand Down

0 comments on commit 205988a

Please sign in to comment.