Skip to content

Commit 17bff6b

Browse files
authored
Add report of Pbench Agent version statistics (#3624)
* Add report of Pbench Agent version statistics Add a new `--agent` option and generator to report on the Pbench Agent versions present on the server. By default we report on the "main" versions, like "0.50" and "0.73"; by adding `--detail` it'll report the much longer list of full agent versions ("v0.73-2g6da0cfc8d") as well as "nonsense" version metadata like "system". This also recognizes the `--since` and `--until` options to report on agent versions that appear within a specific time range. The report is sorted by the last date a given version was seen, which makes it a bit easier to see that only 0.73 has been used since March 08 2024. ```console Dataset statistics by Pbench Agent version: Count Version First Last ---------- ---------------------- ------------ ------------ 1 '0.37' Apr 13 2012 Apr 13 2012 68 '0.44' Jan 04 2018 Feb 06 2018 84 '0.46' Jan 03 2018 Mar 09 2018 1,341 '0.47' Jan 02 2018 Apr 03 2018 2,197 '0.49' Mar 21 2018 Aug 04 2018 1,388 '0.48' Feb 06 2018 Aug 14 2018 171 '0.51' Aug 10 2018 Aug 31 2018 4,962 '0.50' May 11 2018 Sep 25 2018 494 '0.52' Aug 24 2018 Jan 02 2019 1,942 '0.53' Sep 13 2018 May 29 2019 898 '0.58' Apr 08 2019 May 30 2019 246 '0.55' Jan 28 2019 Jun 06 2019 1,205 '0.54' Nov 27 2018 Jul 01 2019 1 '0.61' Jul 08 2019 Jul 08 2019 532 '0.57' Mar 15 2019 Aug 28 2019 382 '0.62' Jul 17 2019 Sep 10 2019 1,426 '0.56' Feb 11 2019 Oct 16 2019 1,067 '0.59' Apr 30 2019 Nov 12 2019 1,454 '0.63' Jul 31 2019 Dec 18 2019 2,151 '0.65' Sep 27 2019 Feb 21 2020 1,342 '0.64' Aug 27 2019 Mar 26 2020 1,587 '0.60' May 25 2019 May 22 2020 5,255 '0.66' Nov 07 2019 Jul 10 2020 4,596 '0.67' Jan 16 2020 Nov 30 2020 33 '0.70' Nov 18 2020 Jan 12 2021 7,427 '0.68' Apr 01 2020 Apr 27 2021 54,179 '0.69' Jun 25 2020 Mar 08 2023 44,870 '0.71' Oct 17 2020 Feb 28 7,073 '0.72' Jun 24 2022 Mar 08 3,977 '0.73' Aug 14 2023 today ``` I won't capture the full list here (it's much longer), but the "nonsense" version report is currently: ```console Datasets with nonsensical version metadata: Count Version First Last ---------- ---------------------- ------------ ------------ 37 'system' Mar 30 2019 Apr 01 2019 54 'plugins:' Jan 26 2018 Apr 27 2021 5 '' Oct 02 2018 Dec 20 2021 3 'v(unknown)-g(unknown)' Dec 14 2020 Sep 30 2022 ``` * Tweaks
1 parent a7419ce commit 17bff6b

File tree

2 files changed

+128
-3
lines changed

2 files changed

+128
-3
lines changed

lib/pbench/cli/server/report.py

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from collections import defaultdict
2+
from dataclasses import dataclass
23
import datetime
34
from operator import and_
45
from pathlib import Path
@@ -384,9 +385,6 @@ def summarize_dates(base_query: Query, options: dict[str, Any]):
384385
since = options.get("since")
385386
until = options.get("until")
386387

387-
if since and until and since > until:
388-
raise Exception("The --until value must be later than the --since value")
389-
390388
by_year = defaultdict(int)
391389
by_month = defaultdict(int)
392390
by_day = defaultdict(int)
@@ -514,6 +512,118 @@ def report_uploads(options: dict[str, Any]):
514512
summarize_dates(rows, options)
515513

516514

515+
def report_agent(options: dict[str, Any]):
516+
"""Report dataset statistics by agent version"""
517+
518+
v_pattern = re.compile(r"(?P<major>\d+\.\d+)(?:\.\d+)?(?:-\w+)")
519+
520+
@dataclass
521+
class Daterange:
522+
first: Optional[datetime.datetime] = None
523+
last: Optional[datetime.datetime] = None
524+
525+
def add(self, date: datetime.datetime):
526+
if self.first is None or date < self.first:
527+
self.first = date
528+
if self.last is None or date > self.last:
529+
self.last = date
530+
531+
def print_versions(
532+
target: dict[str, Daterange], counts: dict[str, int], quote: bool = False
533+
):
534+
cw = 10
535+
vw = 23
536+
dw = 11
537+
click.echo(
538+
f" {'Count':^{cw}s} {'Version':^{vw}s} {'First':^{dw}s} {'Last':^{dw}s}"
539+
)
540+
click.echo(f" {'':-<{cw}} {'':-<{vw}} {'':-<{dw}} {'':-<{dw}}")
541+
for version, dates in sorted(target.items(), key=lambda k: k[1].last):
542+
count = counts[version]
543+
first = humanize.naturaldate(dates.first)
544+
last = humanize.naturaldate(dates.last)
545+
v = "'" + version + "'" if quote else version
546+
click.echo(f" {count:>{cw},d} {v:^{vw}s} {first:>{dw}s} {last:>{dw}s}")
547+
548+
watcher.update("analyzing version patterns")
549+
since = options.get("since")
550+
until = options.get("until")
551+
552+
# Create a subquery from our basic select parameters so that we can use
553+
# the label (SQL "AS date") in our WHERE filter clauses. (In a direct query
554+
# PostgreSQL doesn't allow filtering on renamed columns.)
555+
subquery = (
556+
Database.db_session.query(
557+
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label(
558+
"date"
559+
),
560+
Metadata.value["pbench", "rpm-version"].as_string().label("version"),
561+
)
562+
.filter(Metadata.key == "metalog")
563+
.subquery()
564+
)
565+
566+
count = 0
567+
dateless = 0
568+
versionless = 0
569+
570+
versions = defaultdict(int)
571+
majorversions = defaultdict(int)
572+
nonversions = defaultdict(int)
573+
range = defaultdict(Daterange)
574+
majorrange = defaultdict(Daterange)
575+
nonversionrange = defaultdict(Daterange)
576+
577+
filters = []
578+
579+
query = Database.db_session.query(subquery.c.date, subquery.c.version).order_by(
580+
subquery.c.date
581+
)
582+
583+
if since:
584+
verifier.status(f"Filter since {since}")
585+
filters.append(subquery.c.date >= since)
586+
if until:
587+
verifier.status(f"Filter until {until}")
588+
filters.append(subquery.c.date <= until)
589+
if filters:
590+
query = query.filter(*filters)
591+
rows = query.execution_options(stream_results=True).yield_per(SQL_CHUNK)
592+
593+
for row in rows:
594+
count += 1
595+
date: datetime.datetime = row[0]
596+
version = row[1]
597+
if not isinstance(version, str):
598+
versionless += 1
599+
continue
600+
if not isinstance(date, datetime.datetime):
601+
dateless += 1
602+
date = datetime.datetime.fromtimestamp(0.0)
603+
m = v_pattern.search(version)
604+
if m:
605+
maj = m.group("major")
606+
versions[version] += 1
607+
majorversions[maj] += 1
608+
range[version].add(date)
609+
majorrange[maj].add(date)
610+
else:
611+
nonversions[version] += 1
612+
nonversionrange[version].add(date)
613+
614+
click.echo("Dataset statistics by Pbench Agent version:")
615+
print_versions(majorrange, majorversions)
616+
if options.get("detail"):
617+
click.echo("Dataset statistics by full Pbench Agent version:")
618+
print_versions(range, versions)
619+
click.echo("Datasets with nonsensical version metadata:")
620+
print_versions(nonversionrange, nonversions, quote=True)
621+
if dateless:
622+
click.echo(f"{dateless:,d} datasets lack a date")
623+
if versionless:
624+
click.echo(f"{versionless:,d} datasets lack a Pbench Agent version")
625+
626+
517627
def report_audit():
518628
"""Report audit log statistics."""
519629

@@ -693,6 +803,12 @@ def report_states():
693803

694804
@click.command(name="pbench-report-generator")
695805
@pass_cli_context
806+
@click.option(
807+
"--agent",
808+
default=False,
809+
is_flag=True,
810+
help="Display Pbench Agent version statistics",
811+
)
696812
@click.option("--all", "-a", default=False, is_flag=True, help="Display full report")
697813
@click.option(
698814
"--archive", "-A", default=False, is_flag=True, help="Display archive statistics"
@@ -766,6 +882,12 @@ def report(context: object, **kwargs):
766882
rv = 0
767883

768884
try:
885+
886+
since = kwargs.get("since")
887+
until = kwargs.get("until")
888+
if since and until and since > until:
889+
raise Exception("The --until value must be later than the --since value")
890+
769891
config = config_setup(context)
770892
logger = get_pbench_logger("pbench-report-generator", config)
771893
cache_m = CacheManager(config, logger)
@@ -790,6 +912,8 @@ def report(context: object, **kwargs):
790912
else:
791913
click.echo(f"Unexpected statistics option {stats}", err=True)
792914
rv = 1
915+
if kwargs.get("all") or kwargs.get("agent"):
916+
report_agent(kwargs)
793917
if kwargs.get("all") or kwargs.get("audit"):
794918
report_audit()
795919
if kwargs.get("all") or kwargs.get("sql"):

server/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ flask-sqlalchemy
1313
gunicorn
1414
humanfriendly
1515
humanize
16+
numpy<2.0 # Indirect: elasticsearch
1617
pquisby
1718
psycopg2
1819
pyesbulk>=2.0.1

0 commit comments

Comments
 (0)