Skip to content

Commit

Permalink
add sitemap functionality
Browse files Browse the repository at this point in the history
* Adds a new field serverName on organisation resource.
* Implements sitemap generation for global and dedicated organisations.
* Adds a cli to generate sitemap files.
* Adds a task to generate sitemap files.
* Implements generation of the file robot.txt dynamically.
* Closes #798.

⚠️  ES Update mapping

Co-Authored-by: Bertrand Zuchuat <bertrand.zuchuat@rero.ch>
  • Loading branch information
Garfield-fr committed Aug 18, 2022
1 parent 51ad7d5 commit dae2214
Show file tree
Hide file tree
Showing 22 changed files with 655 additions and 3 deletions.
3 changes: 3 additions & 0 deletions data/organisations/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
],
"isShared": true,
"isDedicated": true,
"serverName": "fredi.hepvs.ch",
"platformName": "FREDI",
"files": [
{
Expand All @@ -64,6 +65,7 @@
"name": "Haute École Pédagogique BEJUNE",
"isShared": true,
"isDedicated": true,
"serverName": "roar.hep-bejune.ch",
"platformName": "# ROAR\n#### Répertoire ouvert et archives BEJUNE",
"files": [
{
Expand Down Expand Up @@ -113,6 +115,7 @@
],
"isShared": true,
"isDedicated": true,
"serverName": "folia.unifr.ch",
"platformName": "# FOLIA\n#### Fribourg Open Library and Archive",
"files": [
{
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ resources = "sonar.resources.cli:resources"
imports = "sonar.modules.cli.imports:imports"
fixtures = "sonar.modules.cli.fixtures:fixtures"
utils = "sonar.modules.cli.utils:utils"
sitemap = "sonar.modules.sitemap.cli.sitemap:sitemap"

[tool.poetry.plugins."invenio_base.apps"]
sonar = "sonar.ext:Sonar"
Expand All @@ -111,6 +112,7 @@ shibboleth_authenticator = "sonar.modules.shibboleth_authenticator.views.client:
theme = "sonar.theme.views:blueprint"
validation = "sonar.modules.validation.views:blueprint"
users = "sonar.modules.users.views:blueprint"
sitemap = "sonar.modules.sitemap.views:blueprint"

[tool.poetry.plugins."invenio_base.api_blueprints"]
deposits = "sonar.modules.deposits.rest:api_blueprint"
Expand Down Expand Up @@ -189,6 +191,7 @@ subdivisions = "sonar.modules.subdivisions.jsonresolvers"
[tool.poetry.plugins."invenio_celery.tasks"]
documents = "sonar.modules.documents.tasks"
stats = "sonar.modules.stats.tasks"
sitemap = "sonar.modules.sitemap.tasks"

[tool.poetry.plugins."invenio_admin.views"]
stats = "sonar.modules.stats.admin:stats_adminview"
Expand Down
5 changes: 5 additions & 0 deletions sonar/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ def _(x):
'documents-stats': {
'task': ('sonar.modules.stats.tasks.collect_stats'),
'schedule': crontab(minute=0, hour=1), # Every day at 01:00 UTC,
},
# Sitemap
'sitemap': {
'task': 'sonar.modules.sitemap.tasks.sitemap_generate_task',
'schedule': crontab(minute=0, hour=2), # Every day at 02:00 UTC,
}
}
CELERY_BROKER_HEARTBEAT = 0
Expand Down
6 changes: 6 additions & 0 deletions sonar/config_sonar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@

"""Specific configuration SONAR."""

SONAR_APP_SERVER_NAME = 'sonar.rero.ch'

SONAR_APP_API_URL = 'https://localhost:5000/api/'

SONAR_APP_ANGULAR_URL = 'https://localhost:5000/manage/'
"""Link to angular integrated app root."""

SONAR_APP_PRODUCTION_STATE = False

SONAR_APP_SITEMAP_ENTRY_SIZE = 10000

SONAR_APP_LANGUAGES_MAP = {
'aar': 'aa',
'abk': 'ab',
Expand Down
8 changes: 8 additions & 0 deletions sonar/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

from __future__ import absolute_import, print_function

import os

import jinja2
import markdown
from flask import current_app, render_template, request, url_for
Expand Down Expand Up @@ -117,6 +119,12 @@ def init_config(self, app):
if k.startswith('SONAR_APP_'):
app.config.setdefault(k, getattr(config_sonar, k))

# Set default if not exists.
if not app.config.get('SONAR_APP_SITEMAP_FOLDER_PATH'):
app.config.setdefault(
'SONAR_APP_SITEMAP_FOLDER_PATH',
os.path.join(app.instance_path, 'sitemap'))

def init_views(self, app):
"""Initialize the main flask views."""
app.url_map.converters['org_code'] = OrganisationCodeConverter
Expand Down
17 changes: 17 additions & 0 deletions sonar/modules/organisations/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,23 @@ def get_shared_or_dedicated_list(self):
['pid', 'name', 'isShared',
'isDedicated']).execute().hits

def get_organisation_pid_by_server_name(self, server_name):
"""Get organisation by server_name.
:param server_name: server name for the dedicated organisation.
:returns: pid of the dedicated organisation.
"""
if hits := self.filter('term', serverName=server_name) \
.source(['pid']).execute().hits:
return hits[0].pid

def get_dedicated_list(self):
"""Get the list of dedicated organisations.
:returns: Iterator of dedicated organisations.
"""
return self.filter('term', isDedicated=True).execute().hits


class OrganisationRecord(SonarRecord):
"""Organisation record class."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,29 @@
"hideExpression": "!field.model.isShared"
}
},
"serverName": {
"title": "Server name (without http)",
"description": "Organisation server name for dedicated.",
"type": "string",
"form": {
"hideExpression": "!field.model.isDedicated",
"expressionProperties": {
"templateOptions.required": "field.model.isDedicated"
},
"validation": {
"validators": {
"uniqueValueKeysInObject": {
"keys": [
"serverName"
]
}
},
"messages": {
"uniqueValueKeysInObjectMessage": "This domain name must be unique."
}
}
}
},
"allowedIps": {
"title": "Allowed IP addresses",
"description": "List of IP addresses or ranges that allow access to private files (access: embargoed or restricted), which are accessible only within the organisation. Note: the bibliographic record (metadata) is always public. Enter one rule per line.",
Expand Down Expand Up @@ -441,6 +464,7 @@
"footer",
"isShared",
"isDedicated",
"serverName",
"allowedIps",
"platformName",
"documentsCustomField1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
"isDedicated": {
"type": "boolean"
},
"serverName": {
"type": "keyword"
},
"allowedIps": {
"type": "text"
},
Expand Down
1 change: 1 addition & 0 deletions sonar/modules/organisations/marshmallow/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class OrganisationMetadataSchemaV1(StrictKeysMixin):
footer = fields.List(fields.Dict())
isShared = fields.Boolean()
isDedicated = fields.Boolean()
serverName = fields.Str(dump_only=True)
allowedIps = SanitizedUnicode()
platformName = SanitizedUnicode()
documentsCustomField1 = fields.Dict()
Expand Down
18 changes: 18 additions & 0 deletions sonar/modules/sitemap/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2022 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Sitemap Modules."""
45 changes: 45 additions & 0 deletions sonar/modules/sitemap/cli/sitemap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2022 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Sitemap cli."""


import click
from flask import current_app
from flask.cli import with_appcontext

from sonar.modules.sitemap.sitemap import sitemap_generate


@click.group()
def sitemap():
"""Sitemap."""

@sitemap.command()
@click.option(
'-s', '--server-name', 'server_name', required=True, default=None)
@with_appcontext
def generate(server_name):
"""Generate a sitemap.
:param: server_name: organisation server name.
"""
sitemap_generate(
server_name,
current_app.config.get('SONAR_APP_SITEMAP_ENTRY_SIZE', 10000)
)
click.secho(f'Generate sitemap for {server_name}', fg='green')
Loading

0 comments on commit dae2214

Please sign in to comment.