Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: configure stats for documents and files #501

Merged
merged 1 commit into from
Feb 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 87 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ lxml = ">=4.6.2"
webdavclient3 = "^3.14.5"
fuzzywuzzy = "^0.18.0"
python-Levenshtein = "^0.12.0"
invenio-stats = "^1.0.0-alpha.18"

[tool.poetry.dev-dependencies]
Flask-Debugtoolbar = ">=0.10.1"
Expand Down
67 changes: 65 additions & 2 deletions sonar/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

from invenio_oauthclient.contrib import orcid
from invenio_records_rest.facets import range_filter
from invenio_stats.processors import EventsIndexer

from sonar.modules.deposits.api import DepositRecord, DepositSearch
from sonar.modules.deposits.permissions import DepositPermission
Expand Down Expand Up @@ -154,6 +155,12 @@ def _(x):
'task': 'invenio_accounts.tasks.clean_session_table',
'schedule': timedelta(minutes=60),
},
# Stats
'stats-process-events': {
'task': 'invenio_stats.tasks.process_events',
'schedule': timedelta(minutes=30),
'args': [('record-view', 'file-download')],
}
}
CELERY_BROKER_HEARTBEAT = 0
#: Disable sending heartbeat events
Expand Down Expand Up @@ -538,8 +545,7 @@ def _(x):
'deposits':
dict(aggs=dict(
status=dict(terms=dict(field='status', size=DEFAULT_AGGREGATION_SIZE)),
user=dict(terms=dict(field='user.pid',
size=DEFAULT_AGGREGATION_SIZE)),
user=dict(terms=dict(field='user.pid', size=DEFAULT_AGGREGATION_SIZE)),
contributor=dict(terms=dict(field='facet_contributors',
size=DEFAULT_AGGREGATION_SIZE))),
filters={
Expand Down Expand Up @@ -740,3 +746,60 @@ def _(x):
}
#: Number of records to return per page in OAI-PMH results.
OAISERVER_PAGE_SIZE = 100

# Stats
# =====
STATS_EVENTS = {
'file-download': {
'signal':
'invenio_files_rest.signals.file_downloaded',
'templates':
'invenio_stats.contrib.file_download',
'event_builders':
['invenio_stats.contrib.event_builders.file_download_event_builder'],
'cls':
EventsIndexer,
'params': {
'preprocessors': [
'invenio_stats.processors:flag_robots',
# Don't index robot events
lambda doc: doc if not doc['is_robot'] else None,
'invenio_stats.processors:flag_machines',
'invenio_stats.processors:anonymize_user',
'invenio_stats.contrib.event_builders:build_file_unique_id',
],
# Keep only 1 file download for each file and user every 30 sec
'double_click_window':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great feature

30,
# Create one index per year which will store file download events
'suffix':
'%Y',
}
},
'record-view': {
'signal':
'invenio_records_ui.signals.record_viewed',
'templates':
'invenio_stats.contrib.record_view',
'event_builders':
['invenio_stats.contrib.event_builders.record_view_event_builder'],
'cls':
EventsIndexer,
'params': {
'preprocessors': [
'invenio_stats.processors:flag_robots',
# Don't index robot events
lambda doc: doc if not doc['is_robot'] else None,
'invenio_stats.processors:flag_machines',
'invenio_stats.processors:anonymize_user',
'invenio_stats.contrib.event_builders:build_record_unique_id',
],
# Keep only 1 file download for each file and user every 30 sec
'double_click_window':
30,
# Create one index per year which will store file download events
'suffix':
'%Y',
},
},
}
10 changes: 10 additions & 0 deletions sonar/modules/documents/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from flask import Blueprint, abort, current_app, g, render_template, request
from flask_babelex import gettext as _
from invenio_i18n.ext import current_i18n
from invenio_pidstore.models import PersistentIdentifier
from invenio_records_ui.signals import record_viewed

from sonar.modules.documents.api import DocumentRecord
from sonar.modules.documents.utils import has_external_urls_for_files, \
Expand Down Expand Up @@ -90,6 +92,14 @@ def detail(pid_value, view='global'):
if not record or record.get('hiddenFromPublic'):
abort(404)

# Send signal when record is viewed
pid = PersistentIdentifier.get('doc', pid_value)
record_viewed.send(
current_app._get_current_object(),
pid=pid,
record=record,
)

# Add restriction, link and thumbnail to files
if record.get('_files'):
# Check if organisation's record forces to point file to an external
Expand Down