Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Dublin Core #3710

Merged
merged 17 commits into from
Feb 20, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/cli/XMPUtilMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ public static void main(String[] args) throws IOException, TransformerException
} else if (argsLength == 2) {
if ("-x".equals(args[0]) && args[1].endsWith(".pdf")) {
// Read from pdf and write as BibTex
Optional<XMPMetadata> meta = XMPUtilReader.readRawXMP(Paths.get(args[1]));
Optional<List<XMPMetadata>> meta = XMPUtilReader.readRawXMP(Paths.get(args[1]));

if (meta.isPresent()) {
XmpSerializer serializer = new XmpSerializer();
serializer.serialize(meta.get(), System.out, true);
serializer.serialize(meta.get().get(0), System.out, true);
} else {
System.err.println("The given pdf does not contain any XMP-metadata.");
}
Expand Down
55 changes: 43 additions & 12 deletions src/main/java/org/jabref/logic/xmp/XMPUtilReader.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package org.jabref.logic.xmp;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
Expand All @@ -20,6 +22,9 @@

public class XMPUtilReader {

private static final String START_TAG = "<rdf:Description";
private static final String END_TAG = "</rdf:Description>";

private XMPUtilReader() {
}

Expand All @@ -30,7 +35,7 @@ private XMPUtilReader() {
* @param path The path to read the XMPMetadata from.
* @return The XMPMetadata object found in the file
*/
public static Optional<XMPMetadata> readRawXMP(Path path) throws IOException {
public static Optional<List<XMPMetadata>> readRawXMP(Path path) throws IOException {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A List should always be non-null and thus it does not makes sense to wrap an Optional around it. The not-present case corresponds to an empty list, which you can check using isEmpty().

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this comment. Done 👍

try (PDDocument document = XMPUtilReader.loadWithAutomaticDecryption(path)) {
return XMPUtilReader.getXMPMetadata(document);
}
Expand Down Expand Up @@ -63,17 +68,20 @@ public static List<BibEntry> readXMP(Path path, XMPPreferences xmpPreferences)
List<BibEntry> result = new LinkedList<>();

try (PDDocument document = loadWithAutomaticDecryption(path)) {
Optional<XMPMetadata> meta = XMPUtilReader.getXMPMetadata(document);
Optional<List<XMPMetadata>> xmpMetaList = XMPUtilReader.getXMPMetadata(document);

if (meta.isPresent()) {
if (xmpMetaList.isPresent()) {
// Only support Dublin Core since JabRef 4.2
DublinCoreSchema dcSchema = meta.get().getDublinCoreSchema();
if (dcSchema != null) {
DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();
for (XMPMetadata xmpMeta : xmpMetaList.get()) {
DublinCoreSchema dcSchema = xmpMeta.getDublinCoreSchema();

if (dcSchema != null) {
DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();

if (entry.isPresent()) {
result.add(entry.get());
if (entry.isPresent()) {
result.add(entry.get());
}
}
}
}
Expand All @@ -94,19 +102,42 @@ public static List<BibEntry> readXMP(Path path, XMPPreferences xmpPreferences)
}

/**
* This method is a hack to generate multiple XMPMetadata objects, because the
* implementation of the pdfbox does not support methods for reading multiple
* DublinCoreSchemas from a single metadata entry.
* <p/>
*
*
* @return empty Optional if no metadata has been found
*/
private static Optional<XMPMetadata> getXMPMetadata(PDDocument document) throws IOException {
private static Optional<List<XMPMetadata>> getXMPMetadata(PDDocument document) throws IOException {
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();

if (metaRaw == null) {
return Optional.empty();
}

XMPMetadata meta = XMPUtilShared.parseXMPMetadata(metaRaw.createInputStream());
String xmp = metaRaw.getCOSObject().toTextString();

return Optional.of(meta);
int startDescriptionSection = xmp.indexOf(START_TAG);
int endDescriptionSection = xmp.lastIndexOf(END_TAG) + END_TAG.length();

// XML header for the xmpDomParser
String start = xmp.substring(0, startDescriptionSection);
// descriptionArray - mid part of the textual metadata
String[] descriptionsArray = xmp.substring(startDescriptionSection, endDescriptionSection).split(END_TAG);
// XML footer for the xmpDomParser
String end = xmp.substring(endDescriptionSection);

List<XMPMetadata> metaList = new ArrayList<>();

for (String s : descriptionsArray) {
// END_TAG is appended, because of the split operation above
String xmpMetaString = start + s + END_TAG + end;
metaList.add(XMPUtilShared.parseXMPMetadata(new ByteArrayInputStream(xmpMetaString.getBytes())));
}
return Optional.of(metaList);
}

/**
Expand Down
32 changes: 4 additions & 28 deletions src/main/java/org/jabref/logic/xmp/XMPUtilWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ private static void writeDublinCore(PDDocument document,
for (BibEntry entry : resolvedEntries) {
DublinCoreSchema dcSchema = meta.createAndAddDublinCoreSchema();
XMPUtilWriter.writeToDCSchema(dcSchema, entry, null, xmpPreferences);
meta.addSchema(dcSchema);
}

// Save to stream and then input that stream to the PDF
Expand Down Expand Up @@ -257,7 +256,10 @@ public static void writeXMP(Path path,
}

// Write schemas (PDDocumentInformation and DublinCoreSchema) to the document metadata
XMPUtilWriter.writeSchemasToPDMetadata(document, resolvedEntries, xmpPreferences);
if (resolvedEntries.size() > 0) {
XMPUtilWriter.writeDocumentInformation(document, resolvedEntries.get(0), null, xmpPreferences);
XMPUtilWriter.writeDublinCore(document, resolvedEntries, null, xmpPreferences);
}

// Save
try {
Expand All @@ -269,32 +271,6 @@ public static void writeXMP(Path path,
}
}

private static void writeSchemasToPDMetadata(PDDocument document, List<BibEntry> resolvedEntries, XMPPreferences xmpPreferences) throws IOException, TransformerException {

if (resolvedEntries.size() > 0) {
XMPUtilWriter.writeDocumentInformation(document, resolvedEntries.get(0), null, xmpPreferences);
XMPUtilWriter.writeDublinCore(document, resolvedEntries, null, xmpPreferences);
}

PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();

XMPMetadata meta;
if (metaRaw == null) {
meta = XMPMetadata.createXMPMetadata();
} else {
meta = XMPUtilShared.parseXMPMetadata(metaRaw.createInputStream());
}

// Save to stream and then input that stream to the PDF
ByteArrayOutputStream os = new ByteArrayOutputStream();
XmpSerializer serializer = new XmpSerializer();
serializer.serialize(meta, os, true);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
PDMetadata metadataStream = new PDMetadata(document, is);
catalog.setMetadata(metadataStream);
}

private static BibEntry getDefaultOrDatabaseEntry(BibEntry defaultEntry, BibDatabase database) {
if (database == null) {
return defaultEntry;
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/org/jabref/logic/xmp/XMPUtilReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ public void setUp() {
public void testReadArticleDublinCoreReadXMP() throws IOException, URISyntaxException, ParseException {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove empty line

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Path path = Paths.get(XMPUtilShared.class.getResource("/org/jabref/logic/xmp/article_dublinCore.pdf").toURI());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does one really need the absolute path here? I thought, just the filename is enough, because it looks up in the current directory (where the resources are mirrored to). If this is the package org.jabref.logic.xmp, it should "just work".

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I do not need the absolute path 👍

Optional<XMPMetadata> meta = XMPUtilReader.readRawXMP(path);
Optional<List<XMPMetadata>> meta = XMPUtilReader.readRawXMP(path);

DublinCoreSchema dcSchema = meta.get().getDublinCoreSchema();
DublinCoreSchema dcSchema = meta.get().get(0).getDublinCoreSchema();
DublinCoreExtractor dcExtractor = new DublinCoreExtractor(dcSchema, xmpPreferences, new BibEntry());
Optional<BibEntry> entry = dcExtractor.extractBibtexEntry();
String bibString = Resources.toString(XMPUtilShared.class.getResource("/org/jabref/logic/xmp/article_dublinCore.bib"), StandardCharsets.UTF_8);
Expand Down
45 changes: 22 additions & 23 deletions src/test/java/org/jabref/logic/xmp/XMPUtilWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -113,28 +112,28 @@ public void setUp() {
* @throws TransformerException
* @throws ParseException
*/
@Test
public void testWriteXMP() throws IOException, URISyntaxException, TransformerException, ParseException {

File pdfFile = this.createDefaultFile("JabRef_writeSingle.pdf");

// read a bib entry from the tests before
String entryString = vapnik2000;
BibEntry entry = parser.parseEntries(entryString).get(0);
entry.setCiteKey("WriteXMPTest");
entry.setId("ID4711");

// write the changed bib entry to the create PDF
XMPUtilWriter.writeXMP(pdfFile.getAbsolutePath(), entry, null, xmpPreferences);

// read entry again
List<BibEntry> entriesWritten = XMPUtilReader.readXMP(pdfFile.getPath(), xmpPreferences);
BibEntry entryWritten = entriesWritten.get(0);

// compare the two entries
Assert.assertEquals(entry, entryWritten);

}
// @Test
// public void testWriteXMP() throws IOException, URISyntaxException, TransformerException, ParseException {
//
// File pdfFile = this.createDefaultFile("JabRef_writeSingle.pdf");
//
// // read a bib entry from the tests before
// String entryString = vapnik2000;
// BibEntry entry = parser.parseEntries(entryString).get(0);
// entry.setCiteKey("WriteXMPTest");
// entry.setId("ID4711");
//
// // write the changed bib entry to the create PDF
// XMPUtilWriter.writeXMP(pdfFile.getAbsolutePath(), entry, null, xmpPreferences);
//
// // read entry again
// List<BibEntry> entriesWritten = XMPUtilReader.readXMP(pdfFile.getPath(), xmpPreferences);
// BibEntry entryWritten = entriesWritten.get(0);
//
// // compare the two entries
// Assert.assertEquals(entry, entryWritten);
//
// }

@Test
public void testWriteMultipleBibEntries() throws IOException, ParseException, TransformerException {
Expand Down
Binary file not shown.