Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Dublin Core #3710

Merged
merged 17 commits into from
Feb 20, 2018
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,9 @@ dependencies {
compile 'com.jgoodies:jgoodies-common:1.8.1'
compile 'com.jgoodies:jgoodies-forms:1.9.0'

// update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517
compile 'org.apache.pdfbox:pdfbox:1.8.13'
compile 'org.apache.pdfbox:fontbox:1.8.13'
compile 'org.apache.pdfbox:jempbox:1.8.13'
compile 'org.apache.pdfbox:pdfbox:2.0.8'
compile 'org.apache.pdfbox:fontbox:2.0.8'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further below (starting at line 218), we added exceptions for the update dependencies task. These are now invalid and should be removed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DONE

compile 'org.apache.pdfbox:xmpbox:2.0.8'

// required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635
compile 'org.bouncycastle:bcprov-jdk15on:1.59'
Expand Down
71 changes: 34 additions & 37 deletions src/main/java/org/jabref/cli/XMPUtilMain.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import java.io.FileReader;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
Expand All @@ -23,8 +23,8 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.preferences.JabRefPreferences;

import org.apache.jempbox.impl.XMLUtil;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.xml.XmpSerializer;

public class XMPUtilMain {

Expand Down Expand Up @@ -65,12 +65,10 @@ public static void main(String[] args) throws IOException, TransformerException
XMPPreferences xmpPreferences = Globals.prefs.getXMPPreferences();
ImportFormatPreferences importFormatPreferences = Globals.prefs.getImportFormatPreferences();

switch (args.length) {
case 0:
int argsLength = args.length;
if (argsLength == 0) {
usage();
break;
case 1:

} else if (argsLength == 1) {
if (args[0].endsWith(".pdf")) {
// Read from pdf and write as BibTex
List<BibEntry> l = XMPUtil.readXMP(new File(args[0]), xmpPreferences);
Expand All @@ -92,63 +90,62 @@ public static void main(String[] args) throws IOException, TransformerException

if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
System.out.println(XMPUtil.toXMP(entries, result.getDatabase(), xmpPreferences));
}
}
} else {
usage();
}
break;
case 2:
} else if (argsLength == 2) {
if ("-x".equals(args[0]) && args[1].endsWith(".pdf")) {
// Read from pdf and write as BibTex
Optional<XMPMetadata> meta = XMPUtil.readRawXMP(new File(args[1]));

if (meta.isPresent()) {
XMLUtil.save(meta.get().getXMPDocument(), System.out, StandardCharsets.UTF_8.name());
XmpSerializer serializer = new XmpSerializer();
serializer.serialize(meta.get(), System.out, true);
} else {
System.err.println("The given pdf does not contain any XMP-metadata.");
}
break;
return;
}

if (args[0].endsWith(".bib") && args[1].endsWith(".pdf")) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(new FileReader(args[0]));
try (FileReader reader = new FileReader(args[0])) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure, but does the parser accepts an inputStream? Then you could use Files.newInputStream...
would be actually preferable over a fileReader

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No overloaded parse method, which would accept an InputStream.


Collection<BibEntry> entries = result.getDatabase().getEntries();
Collection<BibEntry> entries = result.getDatabase().getEntries();

if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
XMPUtil.writeXMP(new File(args[1]), entries, result.getDatabase(), false, xmpPreferences);
System.out.println("XMP written.");
if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
XMPUtil.writeXMP(new File(args[1]), entries, result.getDatabase(), false, xmpPreferences);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this also be either a Path?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. The parameter is a file or sometimes also the filePath as a string is possible.

System.out.println("XMP written.");
}
}
break;
return;
}

usage();
break;
case 3:
} else if (argsLength == 3) {
if (!args[1].endsWith(".bib") && !args[2].endsWith(".pdf")) {
usage();
break;
return;
}

ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(new FileReader(args[1]));
try (FileReader reader = new FileReader(args[1])) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);

Optional<BibEntry> bibEntry = result.getDatabase().getEntryByKey(args[0]);
Optional<BibEntry> bibEntry = result.getDatabase().getEntryByKey(args[0]);

if (bibEntry.isPresent()) {
XMPUtil.writeXMP(new File(args[2]), bibEntry.get(), result.getDatabase(), xmpPreferences);
if (bibEntry.isPresent()) {
XMPUtil.writeXMP(Paths.get(args[2]), bibEntry.get(), result.getDatabase(), xmpPreferences);

System.out.println("XMP written.");
} else {
System.err.println("Could not find BibEntry " + args[0] + " in " + args[0]);
System.out.println("XMP written.");
} else {
System.err.println("Could not find BibEntry " + args[0] + " in " + args[0]);
}
}
break;

default:
} else {
usage();
}
}
Expand All @@ -167,13 +164,13 @@ private static void usage() {
System.out.println("Read from PDF and print raw XMP:");
System.out.println(" xmpUtil -x <pdf>");
System.out
.println("Write the entry in <bib> given by <key> to the PDF:");
.println("Write the entry in <bib> given by <key> to the PDF:");
System.out.println(" xmpUtil <key> <bib> <pdf>");
System.out.println("Write all entries in <bib> to the PDF:");
System.out.println(" xmpUtil <bib> <pdf>");
System.out.println("");
System.out
.println("To report bugs visit https://issues.jabref.org");
.println("To report bugs visit https://issues.jabref.org");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
import javafx.embed.swing.SwingFXUtils;
import javafx.scene.image.Image;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;

/**
* Represents the view model of a pdf page backed by a {@link PDPage}.
Expand All @@ -18,10 +21,12 @@ public class PdfDocumentPageViewModel extends DocumentPageViewModel {

private final PDPage page;
private final int pageNumber;
private final PDDocument document;

public PdfDocumentPageViewModel(PDPage page, int pageNumber) {
public PdfDocumentPageViewModel(PDPage page, int pageNumber, PDDocument document) {
this.page = Objects.requireNonNull(page);
this.pageNumber = pageNumber;
this.document = document;
}

// Taken from http://stackoverflow.com/a/9417836/873661
Expand All @@ -37,10 +42,12 @@ private static BufferedImage resize(BufferedImage img, int newWidth, int newHeig
}

@Override
// Taken from https://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images
public Image render(int width, int height) {
PDFRenderer renderer = new PDFRenderer(document);
try {
int resolution = 96;
BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 2 * resolution);
BufferedImage image = renderer.renderImageWithDPI(pageNumber, 2 * resolution, ImageType.RGB);
return SwingFXUtils.toFXImage(resize(image, width, height), null);
} catch (IOException e) {
// TODO: LOG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import javafx.collections.ObservableList;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;

public class PdfDocumentViewModel extends DocumentViewModel {

Expand All @@ -21,13 +21,12 @@ public PdfDocumentViewModel(PDDocument document) {

@Override
public ObservableList<DocumentPageViewModel> getPages() {
@SuppressWarnings("unchecked")
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
PDPageTree pages = document.getDocumentCatalog().getPages();

// There is apparently no neat way to get the page number from a PDPage...thus this old-style for loop
List<PdfDocumentPageViewModel> pdfPages = new ArrayList<>();
for (int i = 0; i < pages.size(); i++) {
pdfPages.add(new PdfDocumentPageViewModel(pages.get(i), i + 1));
// There is apparently no neat way to get the page number from a PDPage...thus this old-style for loop
for (int i = 0; i < pages.getCount(); i++) {
pdfPages.add(new PdfDocumentPageViewModel(pages.get(i), i + 1, document));
}
return FXCollections.observableArrayList(pdfPages);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public void run() {
for (Path file : files) {
if (Files.exists(file)) {
try {
XMPUtil.writeXMP(file.toFile(), entry, database, Globals.prefs.getXMPPreferences());
XMPUtil.writeXMP(file, entry, database, Globals.prefs.getXMPPreferences());
SwingUtilities.invokeLater(
() -> optDiag.getProgressArea().append(" " + Localization.lang("OK") + ".\n"));
entriesChanged++;
Expand Down
69 changes: 0 additions & 69 deletions src/main/java/org/jabref/gui/importer/EntryFromPDFCreator.java
Original file line number Diff line number Diff line change
@@ -1,28 +1,18 @@
package org.jabref.gui.importer;

import java.io.File;
import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Optional;

import org.jabref.Globals;
import org.jabref.JabRefGUI;
import org.jabref.gui.IconTheme;
import org.jabref.gui.externalfiletype.ExternalFileType;
import org.jabref.gui.externalfiletype.ExternalFileTypes;
import org.jabref.logic.xmp.XMPUtil;
import org.jabref.model.entry.BibEntry;
import org.jabref.pdfimport.PdfImporter;
import org.jabref.pdfimport.PdfImporter.ImportPdfFilesResult;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;

/**
* Uses XMPUtils to get one BibEntry for a PDF-File.
* Also imports the non-XMP Data (PDDocument-Information) using XMPUtil.getBibtexEntryFromDocumentInformation.
Expand Down Expand Up @@ -68,65 +58,6 @@ protected Optional<BibEntry> createBibtexEntry(File pdfFile) {
return Optional.empty();
}

/*addEntryDataFromPDDocumentInformation(pdfFile, entry);
addEntryDataFromXMP(pdfFile, entry);

if (entry.getField(FieldName.TITLE) == null) {
entry.setField(FieldName.TITLE, pdfFile.getName());
}

return entry;*/
}

/** Adds entry data read from the PDDocument information of the file.
* @param pdfFile
* @param entry
*/
private void addEntryDataFromPDDocumentInformation(File pdfFile, BibEntry entry) {
try (PDDocument document = PDDocument.load(pdfFile.getAbsoluteFile())) {
PDDocumentInformation pdfDocInfo = document
.getDocumentInformation();

if (pdfDocInfo != null) {
Optional<BibEntry> entryDI = XMPUtil
.getBibtexEntryFromDocumentInformation(document
.getDocumentInformation());
if (entryDI.isPresent()) {
addEntryDataToEntry(entry, entryDI.get());
Calendar creationDate = pdfDocInfo.getCreationDate();
if (creationDate != null) {
// default time stamp follows ISO-8601. Reason: https://xkcd.com/1179/
String date = LocalDate.of(Calendar.YEAR, Calendar.MONTH + 1, Calendar.DAY_OF_MONTH)
.format(DateTimeFormatter.ISO_LOCAL_DATE);
appendToField(entry, Globals.prefs.getTimestampPreferences().getTimestampField(), date);
}

if (pdfDocInfo.getCustomMetadataValue("bibtex/bibtexkey") != null) {
entry.setId(pdfDocInfo
.getCustomMetadataValue("bibtex/bibtexkey"));
}
}
}
} catch (IOException e) {
// no canceling here, just no data added.
}
}

/**
* Adds all data Found in all the entries of this XMP file to the given
* entry. This was implemented without having much knowledge of the XMP
* format.
*
* @param aFile
* @param entry
*/
private void addEntryDataFromXMP(File aFile, BibEntry entry) {
try {
List<BibEntry> entrys = XMPUtil.readXMP(aFile.getAbsoluteFile(), Globals.prefs.getXMPPreferences());
addEntrysToEntry(entry, entrys);
} catch (IOException e) {
// no canceling here, just no data added.
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.jabref.logic.importer.fileformat;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.Charset;
Expand Down Expand Up @@ -30,7 +29,7 @@

import com.google.common.base.Strings;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripper;

/**
* PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry.
Expand Down Expand Up @@ -205,8 +204,7 @@ public ParserResult importDatabase(String data) throws IOException {
@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
final ArrayList<BibEntry> result = new ArrayList<>(1);
try (FileInputStream fileStream = new FileInputStream(filePath.toFile());
PDDocument document = XMPUtil.loadWithAutomaticDecryption(fileStream)) {
try (PDDocument document = XMPUtil.loadWithAutomaticDecryption(filePath.toFile())) {
String firstPageContents = getFirstPageContents(document);

Optional<DOI> doi = DOI.findInText(firstPageContents);
Expand Down
Loading