diff --git a/src/MCPClient/lib/clientScripts/dip_generation_helper.py b/src/MCPClient/lib/clientScripts/dip_generation_helper.py index b078044892..f1db487ec3 100755 --- a/src/MCPClient/lib/clientScripts/dip_generation_helper.py +++ b/src/MCPClient/lib/clientScripts/dip_generation_helper.py @@ -1,8 +1,123 @@ #!/usr/bin/env python2 from __future__ import print_function import argparse +import ast +import csv import sys +# dashboard +from django.db.models import Q +from main import models + +# archivematicaCommon +import archivesspace +import archivematicaFunctions + +def create_archivesspace_client(): + """ + Create an ArchivesSpace client instance. + """ + # TODO use same code as views_as.py? + repl_dict = models.MicroServiceChoiceReplacementDic.objects.get(description='ArchivesSpace Config') + config = ast.literal_eval(repl_dict.replacementdic) + + try: + client = archivesspace.ArchivesSpaceClient( + host=config['%host%'], + port=config['%port%'], + user=config['%user%'], + passwd=config['%passwd%'] + ) + except archivesspace.AuthenticationError: + print("Unable to authenticate to ArchivesSpace server using the default user! Check administrative settings.") + return None + except archivesspace.ConnectionError: + print("Unable to connect to ArchivesSpace server at the default location! Check administrative settings.") + return None + return client + +def parse_archivesspaceids_csv(files): + """ + Parse filename and reference ID from archivesspaceids.csv files + + :param files: List of paths to archivesspaceids.csv files + :return: Dict with {filename: reference ID} + """ + file_info = {} + # SIP is last, so takes priority + for csv_path in files: + with open(csv_path, 'rbU') as f: + reader = csv.reader(f) + for row in reader: + filename = row[0] + ref_id = row[1] + file_info[filename] = ref_id + return file_info + +def parse_archivesspace_ids(sip_path, sip_uuid): + """ + Parse an archivesspaceids.csv to pre-populate the matching GUI. + + :param sip_path: Path to the SIP to check for an archivesspaceids.csv + :param sip_uuid: UUID of the SIP to auto-populate ArchivesSpace IDs for + :return: 0 on success, 1 on failure + """ + # Check for archivesspaceids.csv + csv_paths = archivematicaFunctions.find_metadata_files(sip_path, 'archivesspaceids.csv') + if not csv_paths: + print('No archivesspaceids.csv files found, exiting') + return 0 + + file_info = parse_archivesspaceids_csv(csv_paths) + if not file_info: + print('No information found in archivesspaceids.csv files') + return 1 + print(file_info) + + # Create client + client = create_archivesspace_client() + if not client: + return 1 + + for filename, ref_id in file_info.items(): + # Get file object (for fileUUID, to see if in DIP) + print(filename, ref_id, '%SIPLocation%' + filename) + try: + + f = models.File.objects.get( + Q(originallocation='%transferDirectory%' + filename) | + Q(originallocation='%transferDirectory%objects/' + filename) | + Q(originallocation='%SIPDirectory%' + filename) | + Q(originallocation='%SIPDirectory%objects/' + filename), + sip_id=sip_uuid + ) + except models.File.DoesNotExist: + print(filename, 'not found in database, skipping') + continue + except models.File.MultipleObjectsReturned: + print('Multiple entries for', filename, 'found in database, skipping') + continue + print('File:', f) + + # Query ref_id to client for resource_id + resource = client.find_by_field('identifier', ref_id) + try: + resource_id = resource[0]['id'] + except IndexError: + print('ArchivesSpace did not return an ID for', ref_id) + print('Returned', resource) + continue + print('Resource ID:', resource_id) + + # Add to ArchivesSpaceDIPObjectResourcePairing + models.ArchivesSpaceDIPObjectResourcePairing.objects.create( + dipuuid=sip_uuid, + fileuuid=f.uuid, + resourceid=resource_id, + ) + + # Check if any files were processed? + return 0 if __name__ == '__main__': parser = argparse.ArgumentParser(description='Parse metadata for DIP helpers') @@ -10,6 +125,9 @@ parser.add_argument('--sipPath', required=True, help='%SIPDirectory%') args = parser.parse_args() + # Return non-zero if any of the helpers fail rc = 0 + rc = rc or parse_archivesspace_ids(args.sipPath, args.sipUUID) + # rc = rc or another_dip_helper(args.sipPath, args.sipUUID) sys.exit(rc) diff --git a/src/MCPClient/tests/fixtures/archivesspace.json b/src/MCPClient/tests/fixtures/archivesspace.json new file mode 100644 index 0000000000..95ae62b2f3 --- /dev/null +++ b/src/MCPClient/tests/fixtures/archivesspace.json @@ -0,0 +1,26 @@ +[ +{ + "pk": "f8749dd2-0923-4b57-a074-45cd92ace56f", + "model": "main.microservicechoicereplacementdic", + "fields": { + "lastmodified": "2015-07-08T17:53:08", + "replaces": null, + "choiceavailableatlink": "a0db8294-f02a-4f49-a557-b1310a715ffc", + "description": "ArchivesSpace Config", + "replacementdic": "{'%port%': '8089', '%object_type%': u'', '%host%': u'localhost', '%xlink_show%': u'none', '%use_statement%': u'none', '%uri_prefix%': u'none', '%xlink_actuate%': u'none', '%access_conditions%': u'', '%use_conditions%': u'', '%restrictions%': u'no', '%passwd%': u'admin', '%user%': u'admin'}" + } +}, +{ + "pk": "a0db8294-f02a-4f49-a557-b1310a715ffc", + "model": "main.microservicechainlink", + "fields": { + "microservicegroup": "Upload DIP", + "defaultexitmessage": "Failed", + "reloadfilelist": true, + "lastmodified": "2015-07-08T17:53:08", + "defaultnextchainlink": "ff89a530-0540-4625-8884-5a2198dea05a", + "currenttask": "5ded9d05-dd24-484a-a8b2-73ec5d35aa63", + "replaces": null + } +} +] diff --git a/src/MCPClient/tests/fixtures/archivesspaceid_sip/objects/metadata/archivesspaceids.csv b/src/MCPClient/tests/fixtures/archivesspaceid_sip/objects/metadata/archivesspaceids.csv new file mode 100644 index 0000000000..9ae1d7e82f --- /dev/null +++ b/src/MCPClient/tests/fixtures/archivesspaceid_sip/objects/metadata/archivesspaceids.csv @@ -0,0 +1 @@ +objects/evelyn's photo.jpg,LI00022 diff --git a/src/MCPClient/tests/fixtures/empty_metadata_files/objects/metadata/archivesspaceids.csv b/src/MCPClient/tests/fixtures/empty_metadata_files/objects/metadata/archivesspaceids.csv new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/MCPClient/tests/fixtures/test_no_files_in_db.yaml b/src/MCPClient/tests/fixtures/test_no_files_in_db.yaml new file mode 100644 index 0000000000..1aa296a81b --- /dev/null +++ b/src/MCPClient/tests/fixtures/test_no_files_in_db.yaml @@ -0,0 +1,25 @@ +interactions: +- request: + body: password=admin + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + Content-Length: ['14'] + Content-Type: [application/x-www-form-urlencoded] + User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] + method: POST + uri: http://localhost:8089/users/admin/login + response: + body: {string: !!python/unicode '{"session":"88373637ab6bd52646d959ad310c1f281fb4ba02073e64c3f4da50b43d67b24a","user":{"lock_version":1159,"username":"admin","name":"Administrator","is_system_user":true,"create_time":"2014-12-05T20:32:17Z","system_mtime":"2015-07-09T23:18:47Z","user_mtime":"2015-07-09T23:18:47Z","jsonmodel_type":"user","groups":[],"is_admin":false,"uri":"/users/1","agent_record":{"ref":"/agents/people/1"},"permissions":{"/repositories/2":["view_repository","update_accession_record","update_resource_record","update_digital_object_record"],"_archivesspace":[]}}} + + '} + headers: + cache-control: ['private, must-revalidate, max-age=0'] + content-length: ['551'] + content-type: [application/json] + date: ['Thu, 09 Jul 2015 23:18:47 GMT'] + server: [Jetty(8.1.5.v20120716)] + x-content-type-options: [nosniff] + status: {code: 200, message: OK} +version: 1 diff --git a/src/MCPClient/tests/fixtures/test_parse_archivesspace_ids.yaml b/src/MCPClient/tests/fixtures/test_parse_archivesspace_ids.yaml new file mode 100644 index 0000000000..7bd2696222 --- /dev/null +++ b/src/MCPClient/tests/fixtures/test_parse_archivesspace_ids.yaml @@ -0,0 +1,52 @@ +interactions: +- request: + body: password=admin + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + Content-Length: ['14'] + Content-Type: [application/x-www-form-urlencoded] + User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] + method: POST + uri: http://localhost:8089/users/admin/login + response: + body: {string: !!python/unicode '{"session":"4a108561f24f7850cb136cd765405fd563853b39b626e7cf3bfc4a99ef2bab0c","user":{"lock_version":898,"username":"admin","name":"Administrator","is_system_user":true,"create_time":"2014-12-05T20:32:17Z","system_mtime":"2015-07-08T21:38:45Z","user_mtime":"2015-07-08T21:38:45Z","jsonmodel_type":"user","groups":[],"is_admin":false,"uri":"/users/1","agent_record":{"ref":"/agents/people/1"},"permissions":{"/repositories/2":["view_repository","update_accession_record","update_resource_record","update_digital_object_record"],"_archivesspace":[]}}} + + '} + headers: + cache-control: ['private, must-revalidate, max-age=0'] + content-length: ['550'] + content-type: [application/json] + date: ['Wed, 08 Jul 2015 21:38:45 GMT'] + server: [Jetty(8.1.5.v20120716)] + x-content-type-options: [nosniff] + status: {code: 200, message: OK} +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] + X-ArchivesSpace-Session: [!!python/unicode '4a108561f24f7850cb136cd765405fd563853b39b626e7cf3bfc4a99ef2bab0c'] + method: GET + uri: http://localhost:8089/repositories/2/search?q=identifier%3ALI00022&page=1&page_size=30 + response: + body: {string: !!python/unicode '{"first_page":1,"last_page":1,"this_page":1,"offset_first":1,"offset_last":1,"total_hits":1,"results":[{"id":"/repositories/2/resources/1151","title":"Digital + futures : \nstrategies for the information age","primary_type":"resource","types":["resource"],"json":"{\"truncated\": + true}","suppressed":false,"publish":true,"system_generated":false,"repository":"/repositories/2","subjects":["Digital + preservation","Digital libraries"],"agents":["Deegan, Marilyn","Neal-Schuman + Publishers","Library Association Publishing Limited","Tanner, Simon"],"agent_uris":["/agents/people/265","/agents/corporate_entities/90","/agents/corporate_entities/98","/agents/people/266"],"creators":["Deegan, + Marilyn","Neal-Schuman Publishers","Library Association Publishing Limited","Tanner, + Simon"],"created_by":"admin","last_modified_by":"admin","user_mtime":"2014-12-06T15:31:03Z","system_mtime":"2014-12-06T15:31:03Z","create_time":"2014-12-06T15:31:03Z","level":"item","finding_aid_title":"","identifier":"LI00022","language":"eng","restrictions":"false","external_id":["367"],"location_uris":["/locations/7799"],"four_part_id":"LI00022","uri":"/repositories/2/resources/1151","jsonmodel_type":"resource"}],"facets":{"facet_queries":{},"facet_fields":{},"facet_dates":{},"facet_ranges":{}}} + '} + headers: + cache-control: ['private, must-revalidate, max-age=0'] + content-length: ['1330'] + content-type: [application/json] + date: ['Wed, 08 Jul 2015 21:38:45 GMT'] + server: [Jetty(8.1.5.v20120716)] + x-content-type-options: [nosniff] + status: {code: 200, message: OK} +version: 1 diff --git a/src/MCPClient/tests/test_dip_generation_helper.py b/src/MCPClient/tests/test_dip_generation_helper.py new file mode 100644 index 0000000000..0557cb7816 --- /dev/null +++ b/src/MCPClient/tests/test_dip_generation_helper.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python2 +import os +import sys +import vcr + +from django.test import TestCase + +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.abspath(os.path.join(THIS_DIR, '../lib/clientScripts'))) +import dip_generation_helper + +from main.models import ArchivesSpaceDIPObjectResourcePairing + +class TestParseArchivesSpaceIDs(TestCase): + + fixture_files = ['sip.json', 'files.json', 'archivesspace.json'] + sip_uuid = '4060ee97-9c3f-4822-afaf-ebdf838284c3' + fixtures = [os.path.join(THIS_DIR, 'fixtures', p) for p in fixture_files] + + def test_no_archivesspace_csv(self): + """ It should do nothing. """ + sip_path = os.path.join(THIS_DIR, 'fixtures', 'emptysip', '') + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + rc = dip_generation_helper.parse_archivesspace_ids(sip_path, self.sip_uuid) + assert rc == 0 + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + + def test_empty_csv(self): + """ It should do nothing if the CSV is empty. """ + sip_path = os.path.join(THIS_DIR, 'fixtures', 'empty_metadata_files', '') + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + rc = dip_generation_helper.parse_archivesspace_ids(sip_path, self.sip_uuid) + assert rc == 1 + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + + @vcr.use_cassette(os.path.join(THIS_DIR, 'fixtures', 'test_no_files_in_db.yaml')) + def test_no_files_in_db(self): + """ It should do nothing if no files are found in the DB. """ + sip_path = os.path.join(THIS_DIR, 'fixtures', 'metadata_csv_sip', '') + sip_uuid = 'dne' + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + rc = dip_generation_helper.parse_archivesspace_ids(sip_path, sip_uuid) + assert rc == 0 + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + + @vcr.use_cassette(os.path.join(THIS_DIR, 'fixtures', 'test_parse_archivesspace_ids.yaml')) + def test_parse_to_db(self): + """ + It should create an entry in ArchivesSpaceDIPObjectResourcePairing for each file in archivesspaceids.csv + It should match the reference ID to a resource ID. + """ + sip_path = os.path.join(THIS_DIR, 'fixtures', 'archivesspaceid_sip', '') + assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False + rc = dip_generation_helper.parse_archivesspace_ids(sip_path, self.sip_uuid) + assert rc == 0 + assert len(ArchivesSpaceDIPObjectResourcePairing.objects.all()) == 1 + r = ArchivesSpaceDIPObjectResourcePairing.objects.all()[0] + assert r.dipuuid == self.sip_uuid + assert r.fileuuid == 'ae8d4290-fe52-4954-b72a-0f591bee2e2f' + assert r.resourceid == '/repositories/2/resources/1151'