Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Dublin Core #3710

Merged
merged 17 commits into from
Feb 20, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/java/org/jabref/cli/XMPUtilMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ public static void main(String[] args) throws IOException, TransformerException
try (FileReader reader = new FileReader(args[0])) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure, but does the parser accepts an inputStream? Then you could use Files.newInputStream...
would be actually preferable over a fileReader

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No overloaded parse method, which would accept an InputStream.


Collection<BibEntry> entries = result.getDatabase().getEntries();
List<BibEntry> entries = result.getDatabase().getEntries();

if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
XMPUtilWriter.writeXMP(Paths.get(args[1]), entries, result.getDatabase(), false, xmpPreferences);
XMPUtilWriter.writeXMP(Paths.get(args[1]), entries, result.getDatabase(), xmpPreferences);
System.out.println("XMP written.");
}
}
Expand Down
71 changes: 37 additions & 34 deletions src/main/java/org/jabref/logic/xmp/XMPUtilWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
Expand Down Expand Up @@ -79,9 +77,9 @@ public static void writeXMP(String fileName, BibEntry entry,
*/
public static void writeXMP(Path file, BibEntry entry,
BibDatabase database, XMPPreferences xmpPreferences) throws IOException, TransformerException {
List<BibEntry> l = new LinkedList<>();
l.add(entry);
XMPUtilWriter.writeXMP(file, l, database, true, xmpPreferences);
List<BibEntry> bibEntryList = new ArrayList<>();
bibEntryList.add(entry);
XMPUtilWriter.writeXMP(file, bibEntryList, database, xmpPreferences);
}

private static void writeToDCSchema(DublinCoreSchema dcSchema, BibEntry entry, BibDatabase database,
Expand Down Expand Up @@ -124,10 +122,10 @@ public static void writeDublinCore(PDDocument document, BibEntry entry,
* resolve strings. If the database is null the strings will not be resolved.
*/
private static void writeDublinCore(PDDocument document,
Collection<BibEntry> entries, BibDatabase database, XMPPreferences xmpPreferences)
List<BibEntry> entries, BibDatabase database, XMPPreferences xmpPreferences)
throws IOException, TransformerException {

Collection<BibEntry> resolvedEntries;
List<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = entries;
} else {
Expand Down Expand Up @@ -242,45 +240,24 @@ private static void writeDocumentInformation(PDDocument document,
* @throws IOException If the file could not be written to or could not be found.
*/
public static void writeXMP(Path path,
Collection<BibEntry> bibtexEntries, BibDatabase database,
boolean writePDFInfo, XMPPreferences xmpPreferences) throws IOException, TransformerException {
List<BibEntry> bibtexEntries, BibDatabase database,
XMPPreferences xmpPreferences) throws IOException, TransformerException {

Collection<BibEntry> resolvedEntries;
List<BibEntry> resolvedEntries;
if (database == null) {
resolvedEntries = bibtexEntries;
} else {
resolvedEntries = database.resolveForStrings(bibtexEntries, false);
}

try (PDDocument document = PDDocument.load(path.toFile())) {

if (document.isEncrypted()) {
throw new EncryptedPdfsNotSupportedException();
}

if (writePDFInfo && (resolvedEntries.size() == 1)) {
XMPUtilWriter.writeDocumentInformation(document, resolvedEntries
.iterator()
.next(), null, xmpPreferences);
XMPUtilWriter.writeDublinCore(document, resolvedEntries, null, xmpPreferences);
}

PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();

XMPMetadata meta;
if (metaRaw == null) {
meta = XMPMetadata.createXMPMetadata();
} else {
meta = XMPUtilShared.parseXMPMetadata(metaRaw.createInputStream());
}

// Save to stream and then input that stream to the PDF
ByteArrayOutputStream os = new ByteArrayOutputStream();
XmpSerializer serializer = new XmpSerializer();
serializer.serialize(meta, os, true);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
PDMetadata metadataStream = new PDMetadata(document, is);
catalog.setMetadata(metadataStream);
// Write schemas (PDDocumentInformation and DublinCoreSchema) to the document metadata
XMPUtilWriter.writeSchemasToPDMetadata(document, resolvedEntries, xmpPreferences);

// Save
try {
Expand All @@ -292,6 +269,32 @@ public static void writeXMP(Path path,
}
}

private static void writeSchemasToPDMetadata(PDDocument document, List<BibEntry> resolvedEntries, XMPPreferences xmpPreferences) throws IOException, TransformerException {

if (resolvedEntries.size() > 0) {
XMPUtilWriter.writeDocumentInformation(document, resolvedEntries.get(0), null, xmpPreferences);
XMPUtilWriter.writeDublinCore(document, resolvedEntries, null, xmpPreferences);
}

PDDocumentCatalog catalog = document.getDocumentCatalog();
PDMetadata metaRaw = catalog.getMetadata();

XMPMetadata meta;
if (metaRaw == null) {
meta = XMPMetadata.createXMPMetadata();
} else {
meta = XMPUtilShared.parseXMPMetadata(metaRaw.createInputStream());
}

// Save to stream and then input that stream to the PDF
ByteArrayOutputStream os = new ByteArrayOutputStream();
XmpSerializer serializer = new XmpSerializer();
serializer.serialize(meta, os, true);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
PDMetadata metadataStream = new PDMetadata(document, is);
catalog.setMetadata(metadataStream);
}

private static BibEntry getDefaultOrDatabaseEntry(BibEntry defaultEntry, BibDatabase database) {
if (database == null) {
return defaultEntry;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.logic.xmp;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only one empty line (?)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
Expand All @@ -11,8 +10,6 @@
import java.util.List;
import java.util.Optional;

import javax.xml.transform.TransformerException;

import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.fileformat.BibtexParser;
Expand All @@ -21,8 +18,6 @@
import org.jabref.model.util.FileUpdateMonitor;

import com.google.common.io.Resources;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.junit.Assert;
Expand All @@ -35,7 +30,7 @@
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class XMPUtilTest {
public class XMPUtilReaderTest {

@Rule public TemporaryFolder tempFolder = new TemporaryFolder();

Expand All @@ -53,7 +48,6 @@ public class XMPUtilTest {
@Before
public void setUp() {


importFormatPreferences = mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS);
when(importFormatPreferences.getEncoding()).thenReturn(StandardCharsets.UTF_8);
xmpPreferences = mock(XMPPreferences.class);
Expand Down Expand Up @@ -142,31 +136,4 @@ public void testReadPDMetadata() throws IOException, URISyntaxException, ParseEx
Assert.assertEquals(entryFromBibFile.get(), entries.get(0));
}

@Test
public void testWriteXMP() throws IOException, URISyntaxException, TransformerException {

// create a default PDF
File pdfFile = tempFolder.newFile("JabRef.pdf");
try (PDDocument pdf = new PDDocument()) {
// Need a single page to open in Acrobat
pdf.addPage(new PDPage());
pdf.save(pdfFile.getPath());
}

// read a bib entry from the tests before
List<BibEntry> entries = XMPUtilReader.readXMP(Paths.get(XMPUtilShared.class.getResource("/org/jabref/logic/xmp/PD_metadata.pdf").toURI()), xmpPreferences);
BibEntry entry = entries.get(0);
entry.setCiteKey("WriteXMPTest");
entry.setId("ID4711");

// write the changed bib entry to the create PDF
XMPUtilWriter.writeXMP(pdfFile.getAbsolutePath(), entry, null, xmpPreferences);

// read entry again
List<BibEntry> entriesWritten = XMPUtilReader.readXMP(pdfFile.getPath(), xmpPreferences);
BibEntry entryWritten = entriesWritten.get(0);

// compare the two entries
Assert.assertEquals(entry, entryWritten);
}
}
162 changes: 162 additions & 0 deletions src/test/java/org/jabref/logic/xmp/XMPUtilWriterTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package org.jabref.logic.xmp;

import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;

import javax.xml.transform.TransformerException;

import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParseException;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.model.util.FileUpdateMonitor;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.mockito.Answers;

import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class XMPUtilWriterTest {

@Rule public TemporaryFolder tempFolder = new TemporaryFolder();

private static final FileUpdateMonitor fileMonitor = new DummyFileUpdateMonitor();

private XMPPreferences xmpPreferences;

private ImportFormatPreferences importFormatPreferences;

private BibtexParser parser;

private static final String olly2018 = "@article{Olly2018,\r\n" +
" author = {Olly and Johannes},\r\n" +
" title = {Stefan's palace},\r\n" +
" journal = {Test Journal},\r\n" +
" volume = {1},\r\n" +
" number = {1},\r\n" +
" pages = {1-2},\r\n" +
" month = mar,\r\n" +
" issn = {978-123-123},\r\n" +
" note = {That's a note},\r\n" +
" abstract = {That's an abstract},\r\n" +
" comment = {That's a comment},\r\n" +
" doi = {10/3212.3123},\r\n" +
" file = {:article_dublinCore.pdf:PDF},\r\n" +
" groups = {NO},\r\n" +
" howpublished = {Online},\r\n" +
" keywords = {Keyword1, Keyword2},\r\n" +
" owner = {Me},\r\n" +
" review = {Here are the reviews},\r\n" +
" timestamp = {2018-02-15},\r\n" +
" url = {https://www.olly2018.edu},\r\n" +
"}";

private static final String toral2006 = "@InProceedings{Toral2006,\r\n" +
" author = {Toral, Antonio and Munoz, Rafael},\r\n" +
" title = {A proposal to automatically build and maintain gazetteers for Named Entity Recognition by using Wikipedia},\r\n" +
" booktitle = {Proceedings of EACL},\r\n" +
" pages = {56--61},\r\n" +
" date = {2006},\r\n" +
" eprinttype = {asdf},\r\n" +
" eventdate = {2017-05-31},\r\n" +
" owner = {Christoph Schwentker},\r\n" +
" timestamp = {2016.11.07},\r\n" +
" url = {asdfasdfas},\r\n" +
" urldate = {2017-05-31},\r\n" +
"}";

private static final String vapnik2000 = "@Book{Vapnik2000,\r\n" +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please create the BibEntries by hand (using new BibEntry(), setField) and not based on the string representation. The XMP test should be as autonomous as possible, especially they shouldn't fail if the BibParser is changed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DONE

" title = {The Nature of Statistical Learning Theory},\r\n" +
" publisher = {Springer Science + Business Media},\r\n" +
" author = {Vladimir N. Vapnik},\r\n" +
" date = {2000},\r\n" +
" doi = {10.1007/978-1-4757-3264-1},\r\n" +
" owner = {Christoph Schwentker},\r\n" +
" timestamp = {2016.06.20},\r\n" +
"}";

/**
* Create a temporary PDF-file with a single empty page.
*/
@Before
public void setUp() {

importFormatPreferences = mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS);
when(importFormatPreferences.getEncoding()).thenReturn(StandardCharsets.UTF_8);
xmpPreferences = mock(XMPPreferences.class);
// The code assumes privacy filters to be off
when(xmpPreferences.isUseXMPPrivacyFilter()).thenReturn(false);

when(xmpPreferences.getKeywordSeparator()).thenReturn(',');

parser = new BibtexParser(importFormatPreferences, fileMonitor);
}

/**
* Test for writing a PDF file with a single DublinCore metadata entry.
*
* @throws IOException
* @throws URISyntaxException
* @throws TransformerException
* @throws ParseException
*/
@Test
public void testWriteXMP() throws IOException, URISyntaxException, TransformerException, ParseException {

File pdfFile = this.createDefaultFile("JabRef_writeSingle.pdf");

// read a bib entry from the tests before
String entryString = vapnik2000;
BibEntry entry = parser.parseEntries(entryString).get(0);
entry.setCiteKey("WriteXMPTest");
entry.setId("ID4711");

// write the changed bib entry to the create PDF
XMPUtilWriter.writeXMP(pdfFile.getAbsolutePath(), entry, null, xmpPreferences);

// read entry again
List<BibEntry> entriesWritten = XMPUtilReader.readXMP(pdfFile.getPath(), xmpPreferences);
BibEntry entryWritten = entriesWritten.get(0);

// compare the two entries
Assert.assertEquals(entry, entryWritten);

}

@Test
public void testWriteMultipleBibEntries() throws IOException, ParseException, TransformerException {

File pdfFile = this.createDefaultFile("JabRef_writeMultiple.pdf");

List<BibEntry> entries = Arrays.asList(parser.singleFromString(vapnik2000, importFormatPreferences, fileMonitor).get(),
parser.singleFromString(olly2018, importFormatPreferences, fileMonitor).get(),
parser.singleFromString(toral2006, importFormatPreferences, fileMonitor).get());

XMPUtilWriter.writeXMP(Paths.get(pdfFile.getAbsolutePath()), entries, null, xmpPreferences);
}

private File createDefaultFile(String fileName) throws IOException {
// create a default PDF
File pdfFile = tempFolder.newFile(fileName);
try (PDDocument pdf = new PDDocument()) {
// Need a single page to open in Acrobat
pdf.addPage(new PDPage());
pdf.save(pdfFile.getPath());
}

return pdfFile;
}
}