Skip to content

Commit

Permalink
documents: update files permissions
Browse files Browse the repository at this point in the history
* Updates files permissions using a CLI command and an input CSV file.
* Moves existing CLI commands for documents.
* Closes #147.

Co-Authored-by: Sébastien Délèze <sebastien.deleze@rero.ch>
  • Loading branch information
Sébastien Délèze committed Mar 27, 2020
1 parent ecb82bf commit ac7a43e
Show file tree
Hide file tree
Showing 12 changed files with 281 additions and 5 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,4 @@ recursive-include data *.crt *.json *.csv
recursive-include docker *.cfg *.conf *.crt *.ini *.key *.pem *.sh
recursive-include docs *.bat *.py *.rst *.txt Makefile
recursive-include sonar *.babelrc *.eslintignore *.gitkeep *.json *.html *.js *.scss *.css *.png *.jpg *.svg *.po *.pot *.mo *.py *.txt *.woff *.woff2
recursive-include tests *.doc *.json *.pdf *.py *.xml *.jpg
recursive-include tests *.doc *.json *.pdf *.py *.xml *.jpg *.csv
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@
],
'flask.commands': [
'fixtures = sonar.modules.cli:fixtures',
'documents = sonar.modules.documents.cli:oaiharvester',
'documents = sonar.modules.documents.cli.documents:documents',
'oaiharvester = \
sonar.modules.documents.cli.oaiharvester:oaiharvester',
'utils = sonar.modules.cli:utils'
],
'invenio_base.apps': [
Expand Down
18 changes: 18 additions & 0 deletions sonar/modules/documents/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""CLI commands for documents."""
29 changes: 29 additions & 0 deletions sonar/modules/documents/cli/documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Documents CLI commands."""

import click

from sonar.modules.documents.cli.rerodoc import rerodoc


@click.group()
def documents():
"""Commands for documents."""

documents.add_command(rerodoc)
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Documents CLI commands."""
"""OAI harvester CLI commands."""

import json

Expand Down
149 changes: 149 additions & 0 deletions sonar/modules/documents/cli/rerodoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""RERODOC specific CLI commands."""

import csv
import re

import click
from flask import current_app
from flask.cli import with_appcontext
from invenio_db import db
from invenio_indexer.api import RecordIndexer

from sonar.modules.documents.api import DocumentRecord


@click.group()
def rerodoc():
"""RERODOC specific commands."""


@rerodoc.command('update-file-permissions')
@click.argument('permissions_file', type=click.File('r'))
@click.option('-c',
'--chunk-size',
type=int,
default=500,
help='Chunk size for bulk indexing.')
@with_appcontext
def update_file_permissions(permissions_file, chunk_size):
"""Update file permission with information given by input file.
:param permissions_file: CSV file containing files permissions.
"""
indexer = RecordIndexer()

def save_records(ids):
"""Save current records set into database and re-index.
:param ids: List of records to save
"""
db.session.commit()
indexer.bulk_index(ids)
indexer.process_bulk_queue()

click.secho(permissions_file.name)
try:
with open(permissions_file.name, 'r') as file:
reader = csv.reader(file, delimiter=';')

# header
header = next(reader)

# check number of columns
if len(header) != 3:
raise Exception('CSV file seems to be not well formatted.')

# To store ids for bulk indexing
ids = []

for row in reader:
try:
# try to load corresponding record
record = DocumentRecord.get_record_by_identifier([{
'type':
'bf:Local',
'value':
row[0]
}])

# No record found, skipping..
if not record:
raise Exception(
'Record {record} not found'.format(record=row[0]))

file_name = '{key}.pdf'.format(key=row[2])

# File not found in record, skipping
if file_name not in record.files:
raise Exception(
'File {file} not found in record {record}'.format(
file=file_name, record=row[0]))

record_file = record.files[file_name]

# permissions contains a status
matches = re.search(r'status:(\w+)$', row[1])
if matches:
# If status if RERO or INTERNAL, file must not be
# displayed, otherwise file is restricted within
# institution
if matches.group(1) in ['RERO', 'INTERNAL']:
current_app.logger.warning(
'Access restricted to {status} for file '
'{record}'.format(status=matches.group(1),
record=row))
record_file['restricted'] = matches.group(
1).lower()
else:
record_file['restricted'] = 'institution'
else:
# permissions contains a date
matches = re.search(
r'allow roles \/\.\*,(\w+),\.\*\/\n\s+.+(\d{4}-'
r'\d{2}-\d{2})', row[1])

if matches:
# file is restricted to institution
if matches.group(1) != 'INTERNAL':
record_file['restricted'] = 'institution'

record_file['embargo_date'] = matches.group(2)

record.commit()
db.session.flush()
ids.append(str(record.id))

# Bulk save and index
if len(ids) % chunk_size == 0:
save_records(ids)
ids = []

except Exception as exception:
click.secho(str(exception), fg='yellow')

# save remaining records
save_records(ids)

click.secho('Process finished', fg='green')

except Exception as exception:
click.secho('An error occured during file process: {error}'.format(
error=str(exception)),
fg='red')
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,17 @@
"type": "string",
"minLength": 1,
"pattern": "^https?://"
},
"restricted": {
"title": "Restricted",
"type": "string",
"minLength": 1
},
"embargo_date": {
"title": "Embargo date",
"type": "string",
"minLength": 1,
"format": "date"
}
},
"required": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

"""Test documents CLI commands."""

import pytest
from click.testing import CliRunner
from invenio_oaiharvester.models import OAIHarvestConfig

import sonar.modules.documents.cli as Cli
import sonar.modules.documents.cli.oaiharvester as Cli


def test_oai_config_create(app, script_info):
Expand Down
56 changes: 56 additions & 0 deletions tests/ui/documents/cli/test_documents_cli_rerodoc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
#
# Swiss Open Access Repository
# Copyright (C) 2019 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Test documents RERODOC cli commands."""

from click.testing import CliRunner
from mock import patch

import sonar.modules.documents.cli.rerodoc as cli


def test_update_file_permissions(app, script_info, document_with_file):
"""Test update file permissions."""
runner = CliRunner()

# Not existing input file
result = runner.invoke(
cli.update_file_permissions,
['./tests/ui/documents/data/not_existing.csv', '-c', '1'],
obj=script_info)
assert 'Error: Invalid value for \'PERMISSIONS_FILE\'' in result.output

# Invalid input file
result = runner.invoke(
cli.update_file_permissions,
['./tests/ui/documents/data/invalid.csv', '-c', '1'],
obj=script_info)
assert 'CSV file seems to be not well formatted.' in result.output

# File cannot be parsed
result = runner.invoke(
cli.update_file_permissions,
['./tests/ui/documents/data/permissions_file.pdf', '-c', '1'],
obj=script_info)
assert 'An error occured during file process' in result.output

# OK
result = runner.invoke(
cli.update_file_permissions,
['./tests/ui/documents/data/permissions_file.csv', '-c', '1'],
obj=script_info)
assert 'Process finished' in result.output
2 changes: 2 additions & 0 deletions tests/ui/documents/data/invalid.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"id_bibrec";"status"
14278;"status:PA16JU"
10 changes: 10 additions & 0 deletions tests/ui/documents/data/permissions_file.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"id_bibrec";"status";"docname"
111111;"status:PA16JU";"test1"
111112;"status:PA16JU";"test1"
111111;"status:PA16JU";"test2"
111111;"status:RERO";"test1"
111111;"firerole:
allow roles /.*,UNIFR,.*/
allow from ""2014-07-14""
allow any
";"test1"
Binary file added tests/ui/documents/data/permissions_file.pdf
Binary file not shown.

0 comments on commit ac7a43e

Please sign in to comment.