Skip to content

Commit

Permalink
METS: Handle empty rows in metadata.csv
Browse files Browse the repository at this point in the history
Skip empty rows when parsing a metadata.csv
  • Loading branch information
Hwesta committed Feb 1, 2016
1 parent ab99b3f commit 6b6ea0c
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def parseMetadata(SIPPath):
print >>sys.stderr, "error parsing: ", metadataCSVFilePath
traceback.print_exc(file=sys.stderr)
sharedVariablesAcrossModules.globalErrorCount += 1
continue
# Provide warning if this file already has differing metadata
# Not using all_metadata.update(csv_metadata) because of that
for entry, values in csv_metadata.iteritems():
Expand Down Expand Up @@ -99,6 +100,8 @@ def parseMetadataCSV(metadataCSVFilePath):
header = [h.strip() for h in header[1:]]
# Parse data
for row in reader:
if not row:
continue
entry_name = row[0]
if entry_name.endswith("/"):
entry_name = entry_name[:-1]
Expand Down
19 changes: 19 additions & 0 deletions src/MCPClient/tests/test_create_aip_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,3 +348,22 @@ def test_parse_metadata_csv_non_ascii(self):
assert 'objects/foo.jpg' in dc
assert 'dc.title' in dc['objects/foo.jpg']
assert dc['objects/foo.jpg']['dc.title'] == [u'元気です'.encode('utf8')]

def test_parse_metadata_csv_blank_rows(self):
# Create metadata.csv
data = [
['Filename', 'dc.title', 'dc.type', 'dc.type', 'dc.type'],
['objects/foo.jpg', 'Foo', 'Photograph', 'Still image', 'Picture'],
[],
]
with open('metadata.csv', 'wb') as f:
writer = csv.writer(f)
for row in data:
writer.writerow(row)

# Run test
dc = archivematicaCreateMETSMetadataCSV.parseMetadataCSV('metadata.csv')
# Verify
assert dc
assert len(dc) == 1
assert 'objects/foo.jpg' in dc

0 comments on commit 6b6ea0c

Please sign in to comment.