From 6b5633cc6877aaf35aa53bb2d6600879a15d1cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johnny=20Marie=CC=81thoz?= Date: Tue, 14 Nov 2023 10:54:29 +0100 Subject: [PATCH] stats: add periodic task to generate reports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Renames private method. * Adds periodic task for monthly and yearly. * Adds cli to generate synchronous and asynchronous reports. * Enables all tasks but ebook during the setup. Co-Authored-by: Johnny Mariéthoz --- rero_ils/config.py | 16 +++++++++ .../stats/api/indicators/circulation.py | 2 +- .../modules/stats/api/indicators/others.py | 4 +-- .../modules/stats/api/indicators/patron.py | 2 +- rero_ils/modules/stats/api/report.py | 25 ++++++++++++- rero_ils/modules/stats/cli.py | 35 +++++++++++-------- rero_ils/modules/stats/tasks.py | 31 +++++++++++++++- rero_ils/modules/stats_cfg/api.py | 4 +-- scripts/setup | 13 +++---- tests/ui/stats/test_stats_report.py | 8 ++--- 10 files changed, 108 insertions(+), 32 deletions(-) diff --git a/rero_ils/config.py b/rero_ils/config.py index 80e344b4ca..b5ff221dec 100644 --- a/rero_ils/config.py +++ b/rero_ils/config.py @@ -420,6 +420,22 @@ def _(x): 'schedule': crontab(minute=30, hour=1, day_of_month='1'), # First day of the month at 01:30 UTC, 'enabled': False }, + 'collect-stats-report-month': { + 'task': ('rero_ils.modules.stats.tasks.collect_stats_reports'), + 'schedule': crontab(minute=0, hour=1, day_of_month='1'), # First day of the month at 01:30 UTC, + 'kwargs': { + 'types': 'month' + }, + 'enabled': False + }, + 'collect-stats-report-year': { + 'task': ('rero_ils.modules.stats.tasks.collect_stats_reports'), + 'schedule': crontab(minute=0, hour=1, day_of_month='1', month_of_year='1'), # First day of the month at 01:30 UTC, + 'kwargs': { + 'types': 'year' + }, + 'enabled': False + }, 'delete-provisional-items': { 'task': 'rero_ils.modules.items.tasks.delete_provisional_items', 'schedule': crontab(minute=0, hour=3), # Every day at 03:00 UTC, diff --git a/rero_ils/modules/stats/api/indicators/circulation.py b/rero_ils/modules/stats/api/indicators/circulation.py index 203d938329..4fc2ed2fd7 100644 --- a/rero_ils/modules/stats/api/indicators/circulation.py +++ b/rero_ils/modules/stats/api/indicators/circulation.py @@ -51,7 +51,7 @@ def query(self): .filter('term', loan__trigger=self.trigger) if period := self.cfg.period: es_query = es_query.filter( - 'range', date=self.cfg._get_range_period(period)) + 'range', date=self.cfg.get_range_period(period)) if lib_pids := self.cfg.filter_by_libraries: loc_pids = [ hit.pid for hit in LocationsSearch().filter( diff --git a/rero_ils/modules/stats/api/indicators/others.py b/rero_ils/modules/stats/api/indicators/others.py index 58cac30f44..a3bf26d677 100644 --- a/rero_ils/modules/stats/api/indicators/others.py +++ b/rero_ils/modules/stats/api/indicators/others.py @@ -252,7 +252,7 @@ def query(self): .filter('term', operation='delete') if period := self.cfg.period: es_query = es_query.filter( - 'range', date=self.cfg._get_range_period(period)) + 'range', date=self.cfg.get_range_period(period)) if pids := self.cfg.filter_by_libraries: es_query = es_query.filter( 'terms', record__library_pid=pids) @@ -315,7 +315,7 @@ def query(self): 'term', organisation__pid=self.cfg.org_pid) if period := self.cfg.period: es_query = es_query.filter( - 'range', _created=self.cfg._get_range_period(period)) + 'range', _created=self.cfg.get_range_period(period)) if pids := self.cfg.filter_by_libraries: es_query = es_query.filter( 'terms', library__pid=pids) diff --git a/rero_ils/modules/stats/api/indicators/patron.py b/rero_ils/modules/stats/api/indicators/patron.py index 13e430508c..ad56fc8206 100644 --- a/rero_ils/modules/stats/api/indicators/patron.py +++ b/rero_ils/modules/stats/api/indicators/patron.py @@ -121,7 +121,7 @@ def query(self): :returns: an elasticsearch query object """ es_query = super().query - range_period = self.cfg._get_range_period(self.cfg.period) + range_period = self.cfg.get_range_period(self.cfg.period) op_query = LoanOperationLogsSearch()\ .source('loan.patron.pid')\ .get_logs_by_trigger( diff --git a/rero_ils/modules/stats/api/report.py b/rero_ils/modules/stats/api/report.py index 31dc44684e..3d89f9cb3f 100644 --- a/rero_ils/modules/stats/api/report.py +++ b/rero_ils/modules/stats/api/report.py @@ -30,6 +30,8 @@ from .indicators import NumberOfActivePatronsCfg, NumberOfCirculationCfg, \ NumberOfDeletedItemsCfg, NumberOfDocumentsCfg, NumberOfILLRequests, \ NumberOfItemsCfg, NumberOfPatronsCfg, NumberOfSerialHoldingsCfg +from ..api.api import Stat +from ..models import StatType class StatsReport: @@ -174,7 +176,7 @@ def collect(self, force=False): return self._process_aggregations(results) - def _get_range_period(self, period): + def get_range_period(self, period): """Get the range period for elasticsearch date range aggs.""" if period == 'month': # now - 1 month @@ -193,3 +195,24 @@ def _get_range_period(self, period): _from = f'{previous_year}-01-01T00:00:00' _to = f'{previous_year}-12-31T23:59:59' return dict(gte=_from, lte=_to) + + def create_stat(self, values, dbcommit=True, reindex=True): + """Create a stat report. + + :params values: array - value computed by the StatReport class. + :param dbcommit: bool - if True commit the database transaction. + :param reindex: bool - if True index the document. + :returns: the create report. + """ + data = dict( + type=StatType.REPORT, + config=self.config.dumps(), + values=[dict(results=values)] + ) + if self.period: + range = self.get_range_period(self.period) + data['date_range'] = { + 'from': range['gte'], + 'to': range['lte'] + } + return Stat.create(data, dbcommit=dbcommit, reindex=reindex) diff --git a/rero_ils/modules/stats/cli.py b/rero_ils/modules/stats/cli.py index 4b37682956..60d3550d4f 100644 --- a/rero_ils/modules/stats/cli.py +++ b/rero_ils/modules/stats/cli.py @@ -227,17 +227,24 @@ def collect(pid): click.secho(f'Configuration does not exists.', fg='red') else: stat_report = StatsReport(cfg) - res = stat_report.collect() - data = dict( - type=StatType.REPORT, - config=cfg.dumps(), - values=[dict(results=res)] - ) - if stat_report.period: - range = stat_report._get_range_period(stat_report.period) - data['date_range'] = { - 'from': range['gte'], - 'to': range['lte'] - } - - Stat.create(data, dbcommit=True, reindex=True) + values = stat_report.collect() + stat_report.create_stat(values) + + +@report.command() +@click.argument('frequency', type=click.Choice(['month', 'year'])) +@click.option('--delayed', '-d', is_flag=True, + help='Run indexing in background.') +@with_appcontext +def collect_all(frequency, delayed): + """Extract the stats report values and store it. + + :param pid: pid value of the configuration to use. + """ + from .tasks import collect_stats_reports + if delayed: + res = collect_stats_reports.delay(frequency) + click.secho(f'Generated reports delayed, task id: {res}', fg='green') + else: + res = collect_stats_reports(frequency) + click.secho(f'Generated {len(res)} reports.', fg='green') diff --git a/rero_ils/modules/stats/tasks.py b/rero_ils/modules/stats/tasks.py index ed5fa379dd..e2931a4c17 100644 --- a/rero_ils/modules/stats/tasks.py +++ b/rero_ils/modules/stats/tasks.py @@ -20,9 +20,13 @@ from celery import shared_task from flask import current_app +from rero_ils.modules.stats_cfg.api import StatConfiguration, \ + StatsConfigurationSearch + from .api.api import Stat from .api.librarian import StatsForLibrarian from .api.pricing import StatsForPricing +from .api.report import StatsReport from .models import StatType @@ -40,7 +44,7 @@ def collect_stats_billing(): @shared_task() def collect_stats_librarian(): - """Collect and store the montly statistics for librarian.""" + """Collect and store the monthly statistics for librarian.""" stats_librarian = StatsForLibrarian() date_range = {'from': stats_librarian.date_range['gte'], 'to': stats_librarian.date_range['lte']} @@ -52,3 +56,28 @@ def collect_stats_librarian(): dbcommit=True, reindex=True) return f'New statistics of type {stat["type"]} has\ been created with a pid of: {stat.pid}' + + +@shared_task() +def collect_stats_reports(frequency='month'): + """Collect and store the montly statistics for librarian.""" + pids = [ + hit.pid for hit in + StatsConfigurationSearch().filter( + 'term', frequency=frequency).source('pid').scan() + ] + to_return = [] + logger = current_app.logger + for pid in pids: + try: + cfg = StatConfiguration.get_record_by_pid(pid) + stat_report = StatsReport(cfg) + values = stat_report.collect() + report = stat_report.create_stat(values) + to_return.append(report.pid) + except Exception as error: + logger.error( + f'Unable to generate report from config({pid}) :: {error}', + exc_info=True, stack_info=True + ) + return to_return diff --git a/rero_ils/modules/stats_cfg/api.py b/rero_ils/modules/stats_cfg/api.py index 9062d3e288..843d75d8e1 100644 --- a/rero_ils/modules/stats_cfg/api.py +++ b/rero_ils/modules/stats_cfg/api.py @@ -22,8 +22,8 @@ from rero_ils.modules.fetchers import id_fetcher from rero_ils.modules.minters import id_minter from rero_ils.modules.providers import Provider -from rero_ils.modules.stats_cfg.models import StatCfgIdentifier, \ - StatCfgMetadata + +from .models import StatCfgIdentifier, StatCfgMetadata # provider StatCfgProvider = type( diff --git a/scripts/setup b/scripts/setup index 9bf5b97c66..ac9ad8d0dc 100755 --- a/scripts/setup +++ b/scripts/setup @@ -608,18 +608,17 @@ fi # # OAI configuration info_msg "OAI configuration: ${DATA_PATH}/oaisources.yml" eval ${PREFIX} invenio reroils oaiharvester initconfig ${DATA_PATH}/oaisources.yml + +eval ${PREFIX} invenio reroils scheduler enable_tasks -a -v +# disable ebook harvesting +eval ${PREFIX} invenio reroils scheduler enable_tasks -n ebooks-harvester -d + if ${DEPLOYMENT} then - eval ${PREFIX} invenio reroils scheduler enable_tasks -a -v - # disable ebook harvesting - eval ${PREFIX} invenio reroils scheduler enable_tasks -n ebooks-harvester -d # start oai harvesting asynchrone: beats must be running info_msg "Start OAI harvesting asynchrone" eval ${PREFIX} invenio reroils oaiharvester harvest -n ebooks -a max=150 -q -k else - eval ${PREFIX} invenio reroils scheduler enable_tasks -n scheduler-timestamp -n bulk-indexer -n anonymize-loans -n claims-creation -n accounts -n clear_and_renew_subscriptions -n collect-stats-billing -n collect-stats-librarian -v - eval ${PREFIX} invenio reroils scheduler enable_tasks -n notification-creation -n notification-dispatch-availability -n notification-dispatch-recall -n sync-agents -v - eval ${PREFIX} invenio reroils scheduler enable_tasks -n cancel-expired-request -n delete-provisional-items -n delete-loans-created -v info_msg "For ebooks harvesting run:" msg "\tinvenio reroils oaiharvester harvest -n ebooks -a max=100 -q" fi @@ -650,6 +649,8 @@ fi info_msg "Collect statistics" eval ${PREFIX} invenio reroils stats collect billing +eval ${PREFIX} invenio reroils stats report collect-all month +eval ${PREFIX} invenio reroils stats report collect-all year date success_msg "Perfect ${PROGRAM}! See you soon…" diff --git a/tests/ui/stats/test_stats_report.py b/tests/ui/stats/test_stats_report.py index cb0a626f29..e01fe26022 100644 --- a/tests/ui/stats/test_stats_report.py +++ b/tests/ui/stats/test_stats_report.py @@ -75,11 +75,11 @@ def test_stats_report_range(app): 'rero_ils.modules.stats.api.report.datetime' ) as mock_datetime: mock_datetime.now.return_value = datetime(year=2023, month=2, day=1) - assert StatsReport(cfg)._get_range_period('month') == \ + assert StatsReport(cfg).get_range_period('month') == \ dict(gte='2023-01-01T00:00:00', lte='2023-01-31T23:59:59') - assert StatsReport(cfg)._get_range_period('year') == \ + assert StatsReport(cfg).get_range_period('year') == \ dict(gte='2022-01-01T00:00:00', lte='2022-12-31T23:59:59') mock_datetime.now.return_value = datetime(year=2023, month=1, day=5) - assert StatsReport(cfg)._get_range_period('month') == \ + assert StatsReport(cfg).get_range_period('month') == \ dict(gte='2022-12-01T00:00:00', lte='2022-12-31T23:59:59') - assert not StatsReport(cfg)._get_range_period('foo') + assert not StatsReport(cfg).get_range_period('foo')