-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ArchivesSpace: Parse archivesspaceids.csv for filename->resource ID
- Loading branch information
Showing
7 changed files
with
282 additions
and
0 deletions.
There are no files selected for viewing
118 changes: 118 additions & 0 deletions
118
src/MCPClient/lib/clientScripts/dip_generation_helper.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,133 @@ | ||
#!/usr/bin/env python2 | ||
from __future__ import print_function | ||
import argparse | ||
import ast | ||
import csv | ||
import sys | ||
|
||
# dashboard | ||
from django.db.models import Q | ||
from main import models | ||
|
||
# archivematicaCommon | ||
import archivesspace | ||
import archivematicaFunctions | ||
|
||
def create_archivesspace_client(): | ||
""" | ||
Create an ArchivesSpace client instance. | ||
""" | ||
# TODO use same code as views_as.py? | ||
repl_dict = models.MicroServiceChoiceReplacementDic.objects.get(description='ArchivesSpace Config') | ||
config = ast.literal_eval(repl_dict.replacementdic) | ||
|
||
try: | ||
client = archivesspace.ArchivesSpaceClient( | ||
host=config['%host%'], | ||
port=config['%port%'], | ||
user=config['%user%'], | ||
passwd=config['%passwd%'] | ||
) | ||
except archivesspace.AuthenticationError: | ||
print("Unable to authenticate to ArchivesSpace server using the default user! Check administrative settings.") | ||
return None | ||
except archivesspace.ConnectionError: | ||
print("Unable to connect to ArchivesSpace server at the default location! Check administrative settings.") | ||
return None | ||
return client | ||
|
||
def parse_archivesspaceids_csv(files): | ||
""" | ||
Parse filename and reference ID from archivesspaceids.csv files | ||
:param files: List of paths to archivesspaceids.csv files | ||
:return: Dict with {filename: reference ID} | ||
""" | ||
file_info = {} | ||
# SIP is last, so takes priority | ||
for csv_path in files: | ||
with open(csv_path, 'rbU') as f: | ||
reader = csv.reader(f) | ||
for row in reader: | ||
filename = row[0] | ||
ref_id = row[1] | ||
file_info[filename] = ref_id | ||
return file_info | ||
|
||
def parse_archivesspace_ids(sip_path, sip_uuid): | ||
""" | ||
Parse an archivesspaceids.csv to pre-populate the matching GUI. | ||
:param sip_path: Path to the SIP to check for an archivesspaceids.csv | ||
:param sip_uuid: UUID of the SIP to auto-populate ArchivesSpace IDs for | ||
:return: 0 on success, 1 on failure | ||
""" | ||
# Check for archivesspaceids.csv | ||
csv_paths = archivematicaFunctions.find_metadata_files(sip_path, 'archivesspaceids.csv') | ||
if not csv_paths: | ||
print('No archivesspaceids.csv files found, exiting') | ||
return 0 | ||
|
||
file_info = parse_archivesspaceids_csv(csv_paths) | ||
if not file_info: | ||
print('No information found in archivesspaceids.csv files') | ||
return 1 | ||
print(file_info) | ||
|
||
# Create client | ||
client = create_archivesspace_client() | ||
if not client: | ||
return 1 | ||
|
||
for filename, ref_id in file_info.items(): | ||
# Get file object (for fileUUID, to see if in DIP) | ||
print(filename, ref_id, '%SIPLocation%' + filename) | ||
try: | ||
|
||
f = models.File.objects.get( | ||
Q(originallocation='%transferDirectory%' + filename) | | ||
Q(originallocation='%transferDirectory%objects/' + filename) | | ||
Q(originallocation='%SIPDirectory%' + filename) | | ||
Q(originallocation='%SIPDirectory%objects/' + filename), | ||
sip_id=sip_uuid | ||
) | ||
except models.File.DoesNotExist: | ||
print(filename, 'not found in database, skipping') | ||
continue | ||
except models.File.MultipleObjectsReturned: | ||
print('Multiple entries for', filename, 'found in database, skipping') | ||
continue | ||
print('File:', f) | ||
|
||
# Query ref_id to client for resource_id | ||
resource = client.find_by_field('identifier', ref_id) | ||
try: | ||
resource_id = resource[0]['id'] | ||
except IndexError: | ||
print('ArchivesSpace did not return an ID for', ref_id) | ||
print('Returned', resource) | ||
continue | ||
print('Resource ID:', resource_id) | ||
|
||
# Add to ArchivesSpaceDIPObjectResourcePairing | ||
models.ArchivesSpaceDIPObjectResourcePairing.objects.create( | ||
dipuuid=sip_uuid, | ||
fileuuid=f.uuid, | ||
resourceid=resource_id, | ||
) | ||
|
||
# Check if any files were processed? | ||
return 0 | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Parse metadata for DIP helpers') | ||
parser.add_argument('--sipUUID', required=True, help='%SIPUUID%') | ||
parser.add_argument('--sipPath', required=True, help='%SIPDirectory%') | ||
args = parser.parse_args() | ||
|
||
# Return non-zero if any of the helpers fail | ||
rc = 0 | ||
rc = rc or parse_archivesspace_ids(args.sipPath, args.sipUUID) | ||
# rc = rc or another_dip_helper(args.sipPath, args.sipUUID) | ||
|
||
sys.exit(rc) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
[ | ||
{ | ||
"pk": "f8749dd2-0923-4b57-a074-45cd92ace56f", | ||
"model": "main.microservicechoicereplacementdic", | ||
"fields": { | ||
"lastmodified": "2015-07-08T17:53:08", | ||
"replaces": null, | ||
"choiceavailableatlink": "a0db8294-f02a-4f49-a557-b1310a715ffc", | ||
"description": "ArchivesSpace Config", | ||
"replacementdic": "{'%port%': '8089', '%object_type%': u'', '%host%': u'localhost', '%xlink_show%': u'none', '%use_statement%': u'none', '%uri_prefix%': u'none', '%xlink_actuate%': u'none', '%access_conditions%': u'', '%use_conditions%': u'', '%restrictions%': u'no', '%passwd%': u'admin', '%user%': u'admin'}" | ||
} | ||
}, | ||
{ | ||
"pk": "a0db8294-f02a-4f49-a557-b1310a715ffc", | ||
"model": "main.microservicechainlink", | ||
"fields": { | ||
"microservicegroup": "Upload DIP", | ||
"defaultexitmessage": "Failed", | ||
"reloadfilelist": true, | ||
"lastmodified": "2015-07-08T17:53:08", | ||
"defaultnextchainlink": "ff89a530-0540-4625-8884-5a2198dea05a", | ||
"currenttask": "5ded9d05-dd24-484a-a8b2-73ec5d35aa63", | ||
"replaces": null | ||
} | ||
} | ||
] |
1 change: 1 addition & 0 deletions
1
src/MCPClient/tests/fixtures/archivesspaceid_sip/objects/metadata/archivesspaceids.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
objects/evelyn's photo.jpg,LI00022 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
interactions: | ||
- request: | ||
body: password=admin | ||
headers: | ||
Accept: ['*/*'] | ||
Accept-Encoding: ['gzip, deflate'] | ||
Connection: [keep-alive] | ||
Content-Length: ['14'] | ||
Content-Type: [application/x-www-form-urlencoded] | ||
User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] | ||
method: POST | ||
uri: http://localhost:8089/users/admin/login | ||
response: | ||
body: {string: !!python/unicode '{"session":"88373637ab6bd52646d959ad310c1f281fb4ba02073e64c3f4da50b43d67b24a","user":{"lock_version":1159,"username":"admin","name":"Administrator","is_system_user":true,"create_time":"2014-12-05T20:32:17Z","system_mtime":"2015-07-09T23:18:47Z","user_mtime":"2015-07-09T23:18:47Z","jsonmodel_type":"user","groups":[],"is_admin":false,"uri":"/users/1","agent_record":{"ref":"/agents/people/1"},"permissions":{"/repositories/2":["view_repository","update_accession_record","update_resource_record","update_digital_object_record"],"_archivesspace":[]}}} | ||
|
||
'} | ||
headers: | ||
cache-control: ['private, must-revalidate, max-age=0'] | ||
content-length: ['551'] | ||
content-type: [application/json] | ||
date: ['Thu, 09 Jul 2015 23:18:47 GMT'] | ||
server: [Jetty(8.1.5.v20120716)] | ||
x-content-type-options: [nosniff] | ||
status: {code: 200, message: OK} | ||
version: 1 |
52 changes: 52 additions & 0 deletions
52
src/MCPClient/tests/fixtures/test_parse_archivesspace_ids.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
interactions: | ||
- request: | ||
body: password=admin | ||
headers: | ||
Accept: ['*/*'] | ||
Accept-Encoding: ['gzip, deflate'] | ||
Connection: [keep-alive] | ||
Content-Length: ['14'] | ||
Content-Type: [application/x-www-form-urlencoded] | ||
User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] | ||
method: POST | ||
uri: http://localhost:8089/users/admin/login | ||
response: | ||
body: {string: !!python/unicode '{"session":"4a108561f24f7850cb136cd765405fd563853b39b626e7cf3bfc4a99ef2bab0c","user":{"lock_version":898,"username":"admin","name":"Administrator","is_system_user":true,"create_time":"2014-12-05T20:32:17Z","system_mtime":"2015-07-08T21:38:45Z","user_mtime":"2015-07-08T21:38:45Z","jsonmodel_type":"user","groups":[],"is_admin":false,"uri":"/users/1","agent_record":{"ref":"/agents/people/1"},"permissions":{"/repositories/2":["view_repository","update_accession_record","update_resource_record","update_digital_object_record"],"_archivesspace":[]}}} | ||
|
||
'} | ||
headers: | ||
cache-control: ['private, must-revalidate, max-age=0'] | ||
content-length: ['550'] | ||
content-type: [application/json] | ||
date: ['Wed, 08 Jul 2015 21:38:45 GMT'] | ||
server: [Jetty(8.1.5.v20120716)] | ||
x-content-type-options: [nosniff] | ||
status: {code: 200, message: OK} | ||
- request: | ||
body: null | ||
headers: | ||
Accept: ['*/*'] | ||
Accept-Encoding: ['gzip, deflate'] | ||
Connection: [keep-alive] | ||
User-Agent: [python-requests/2.7.0 CPython/2.7.6 Linux/3.13.0-43-generic] | ||
X-ArchivesSpace-Session: [!!python/unicode '4a108561f24f7850cb136cd765405fd563853b39b626e7cf3bfc4a99ef2bab0c'] | ||
method: GET | ||
uri: http://localhost:8089/repositories/2/search?q=identifier%3ALI00022&page=1&page_size=30 | ||
response: | ||
body: {string: !!python/unicode '{"first_page":1,"last_page":1,"this_page":1,"offset_first":1,"offset_last":1,"total_hits":1,"results":[{"id":"/repositories/2/resources/1151","title":"Digital | ||
futures : \nstrategies for the information age","primary_type":"resource","types":["resource"],"json":"{\"truncated\": | ||
true}","suppressed":false,"publish":true,"system_generated":false,"repository":"/repositories/2","subjects":["Digital | ||
preservation","Digital libraries"],"agents":["Deegan, Marilyn","Neal-Schuman | ||
Publishers","Library Association Publishing Limited","Tanner, Simon"],"agent_uris":["/agents/people/265","/agents/corporate_entities/90","/agents/corporate_entities/98","/agents/people/266"],"creators":["Deegan, | ||
Marilyn","Neal-Schuman Publishers","Library Association Publishing Limited","Tanner, | ||
Simon"],"created_by":"admin","last_modified_by":"admin","user_mtime":"2014-12-06T15:31:03Z","system_mtime":"2014-12-06T15:31:03Z","create_time":"2014-12-06T15:31:03Z","level":"item","finding_aid_title":"","identifier":"LI00022","language":"eng","restrictions":"false","external_id":["367"],"location_uris":["/locations/7799"],"four_part_id":"LI00022","uri":"/repositories/2/resources/1151","jsonmodel_type":"resource"}],"facets":{"facet_queries":{},"facet_fields":{},"facet_dates":{},"facet_ranges":{}}} | ||
'} | ||
headers: | ||
cache-control: ['private, must-revalidate, max-age=0'] | ||
content-length: ['1330'] | ||
content-type: [application/json] | ||
date: ['Wed, 08 Jul 2015 21:38:45 GMT'] | ||
server: [Jetty(8.1.5.v20120716)] | ||
x-content-type-options: [nosniff] | ||
status: {code: 200, message: OK} | ||
version: 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/usr/bin/env python2 | ||
import os | ||
import sys | ||
import vcr | ||
|
||
from django.test import TestCase | ||
|
||
THIS_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
sys.path.append(os.path.abspath(os.path.join(THIS_DIR, '../lib/clientScripts'))) | ||
import dip_generation_helper | ||
|
||
from main.models import ArchivesSpaceDIPObjectResourcePairing | ||
|
||
class TestParseArchivesSpaceIDs(TestCase): | ||
|
||
fixture_files = ['sip.json', 'files.json', 'archivesspace.json'] | ||
sip_uuid = '4060ee97-9c3f-4822-afaf-ebdf838284c3' | ||
fixtures = [os.path.join(THIS_DIR, 'fixtures', p) for p in fixture_files] | ||
|
||
def test_no_archivesspace_csv(self): | ||
""" It should do nothing. """ | ||
sip_path = os.path.join(THIS_DIR, 'fixtures', 'emptysip', '') | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
rc = dip_generation_helper.parse_archivesspace_ids(sip_path, self.sip_uuid) | ||
assert rc == 0 | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
|
||
def test_empty_csv(self): | ||
""" It should do nothing if the CSV is empty. """ | ||
sip_path = os.path.join(THIS_DIR, 'fixtures', 'empty_metadata_files', '') | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
rc = dip_generation_helper.parse_archivesspace_ids(sip_path, self.sip_uuid) | ||
assert rc == 1 | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
|
||
@vcr.use_cassette(os.path.join(THIS_DIR, 'fixtures', 'test_no_files_in_db.yaml')) | ||
def test_no_files_in_db(self): | ||
""" It should do nothing if no files are found in the DB. """ | ||
sip_path = os.path.join(THIS_DIR, 'fixtures', 'metadata_csv_sip', '') | ||
sip_uuid = 'dne' | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
rc = dip_generation_helper.parse_archivesspace_ids(sip_path, sip_uuid) | ||
assert rc == 0 | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
|
||
@vcr.use_cassette(os.path.join(THIS_DIR, 'fixtures', 'test_parse_archivesspace_ids.yaml')) | ||
def test_parse_to_db(self): | ||
""" | ||
It should create an entry in ArchivesSpaceDIPObjectResourcePairing for each file in archivesspaceids.csv | ||
It should match the reference ID to a resource ID. | ||
""" | ||
sip_path = os.path.join(THIS_DIR, 'fixtures', 'archivesspaceid_sip', '') | ||
assert ArchivesSpaceDIPObjectResourcePairing.objects.all().exists() is False | ||
rc = dip_generation_helper.parse_archivesspace_ids(sip_path, self.sip_uuid) | ||
assert rc == 0 | ||
assert len(ArchivesSpaceDIPObjectResourcePairing.objects.all()) == 1 | ||
r = ArchivesSpaceDIPObjectResourcePairing.objects.all()[0] | ||
assert r.dipuuid == self.sip_uuid | ||
assert r.fileuuid == 'ae8d4290-fe52-4954-b72a-0f591bee2e2f' | ||
assert r.resourceid == '/repositories/2/resources/1151' |