From 63a5503d4612d76d54121fbee51eeaa8ad594da7 Mon Sep 17 00:00:00 2001 From: Holly Becker Date: Fri, 29 Jan 2016 17:00:45 -0800 Subject: [PATCH] METS: Handle empty rows in metadata.csv Skip empty rows when parsing a metadata.csv --- .../archivematicaCreateMETSMetadataCSV.py | 3 +++ src/MCPClient/tests/test_create_aip_mets.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/MCPClient/lib/clientScripts/archivematicaCreateMETSMetadataCSV.py b/src/MCPClient/lib/clientScripts/archivematicaCreateMETSMetadataCSV.py index 105fa2c6b2..73aeaa9ee8 100755 --- a/src/MCPClient/lib/clientScripts/archivematicaCreateMETSMetadataCSV.py +++ b/src/MCPClient/lib/clientScripts/archivematicaCreateMETSMetadataCSV.py @@ -57,6 +57,7 @@ def parseMetadata(SIPPath): print >>sys.stderr, "error parsing: ", metadataCSVFilePath traceback.print_exc(file=sys.stderr) sharedVariablesAcrossModules.globalErrorCount += 1 + continue # Provide warning if this file already has differing metadata # Not using all_metadata.update(csv_metadata) because of that for entry, values in csv_metadata.iteritems(): @@ -99,6 +100,8 @@ def parseMetadataCSV(metadataCSVFilePath): header = [h.strip() for h in header[1:]] # Parse data for row in reader: + if not row: + continue entry_name = row[0] if entry_name.endswith("/"): entry_name = entry_name[:-1] diff --git a/src/MCPClient/tests/test_create_aip_mets.py b/src/MCPClient/tests/test_create_aip_mets.py index cfa1ad7605..89bf6def94 100644 --- a/src/MCPClient/tests/test_create_aip_mets.py +++ b/src/MCPClient/tests/test_create_aip_mets.py @@ -348,3 +348,22 @@ def test_parse_metadata_csv_non_ascii(self): assert 'objects/foo.jpg' in dc assert 'dc.title' in dc['objects/foo.jpg'] assert dc['objects/foo.jpg']['dc.title'] == [u'元気です'.encode('utf8')] + + def test_parse_metadata_csv_blank_rows(self): + # Create metadata.csv + data = [ + ['Filename', 'dc.title', 'dc.type', 'dc.type', 'dc.type'], + ['objects/foo.jpg', 'Foo', 'Photograph', 'Still image', 'Picture'], + [], + ] + with open('metadata.csv', 'wb') as f: + writer = csv.writer(f) + for row in data: + writer.writerow(row) + + # Run test + dc = archivematicaCreateMETSMetadataCSV.parseMetadataCSV('metadata.csv') + # Verify + assert dc + assert len(dc) == 1 + assert 'objects/foo.jpg' in dc