Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Use CERMINE as PDF parser #2474

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ repositories {
maven {
url 'https://oss.sonatype.org/content/groups/public'
}
maven {
url 'http://maven.icm.edu.pl/artifactory/repo'
}
}

configurations {
Expand All @@ -81,6 +84,8 @@ dependencies {
compile 'org.apache.pdfbox:fontbox:1.8.13'
compile 'org.apache.pdfbox:jempbox:1.8.13'

compile 'pl.edu.icm.cermine:cermine-impl:1.11'

// required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635
compile 'org.bouncycastle:bcprov-jdk15on:1.55'

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package net.sf.jabref.logic.bibtexkeypattern;

import java.util.Collections;

import net.sf.jabref.model.bibtexkeypattern.GlobalBibtexKeyPattern;

public class BibtexKeyPatternPreferences {
Expand All @@ -12,6 +14,16 @@ public class BibtexKeyPatternPreferences {
private final GlobalBibtexKeyPattern keyPattern;
private Character keywordDelimiter;

public BibtexKeyPatternPreferences() {
this.keyPatternRegex = "";
this.keyPatternReplacement = "";
this.alwaysAddLetter = false;
this.firstLetterA = false;
this.enforceLegalKey = false;
this.keyPattern = new GlobalBibtexKeyPattern(Collections.emptyList());
this.keywordDelimiter = ',';
}

public BibtexKeyPatternPreferences(String keyPatternRegex, String keyPatternReplacement, boolean alwaysAddLetter,
boolean firstLetterA, boolean enforceLegalKey, GlobalBibtexKeyPattern keyPattern,
Character keywordDelimiter) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
package net.sf.jabref.logic.formatter.bibtexfields;

import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.time.temporal.TemporalAccessor;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;

import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.model.cleanup.Formatter;
import net.sf.jabref.model.entry.Date;

/**
* This class transforms date to the format yyyy-mm-dd or yyyy-mm..
Expand All @@ -34,13 +30,8 @@ public String getKey() {
*/
@Override
public String format(String value) {
Optional<TemporalAccessor> parsedDate = tryParseDate(value);
if (!parsedDate.isPresent()) {
return value;
}

DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("uuuu-MM[-dd]");
return dateFormatter.format(parsedDate.get());
Optional<Date> parsedDate = Date.parse(value);
return parsedDate.map(Date::getNormalized).orElse(value);
}

@Override
Expand All @@ -53,29 +44,6 @@ public String getExampleInput() {
return "29.11.2003";
}

/*
* Try to parse the following formats
* "M/y" (covers 9/15, 9/2015, and 09/2015)
* "MMMM (dd), yyyy" (covers September 1, 2015 and September, 2015)
* "yyyy-MM-dd" (covers 2009-1-15)
* "d.M.uuuu" (covers 15.1.2015)
* "uuuu.M.d" (covers 2015.1.15)
* The code is essentially taken from http://stackoverflow.com/questions/4024544/how-to-parse-dates-in-multiple-formats-using-simpledateformat.
*/
private Optional<TemporalAccessor> tryParseDate(String dateString) {
List<String> formatStrings = Arrays.asList("uuuu-M-d", "uuuu-M", "M/uu", "M/uuuu", "MMMM d, uuuu", "MMMM, uuuu",
"d.M.uuuu", "uuuu.M.d");
for (String formatString : formatStrings) {
try {
return Optional.of(DateTimeFormatter.ofPattern(formatString).parse(dateString));
} catch (DateTimeParseException ignored) {
// Ignored
}
}

return Optional.empty();
}

@Override
public int hashCode() {
return defaultHashCode();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package net.sf.jabref.logic.importer;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Set;

import net.sf.jabref.logic.bibtex.FieldContentParserPreferences;
Expand All @@ -16,6 +18,15 @@ public class ImportFormatPreferences {
private final FieldContentParserPreferences fieldContentParserPreferences;
private final boolean keywordSyncEnabled;

public ImportFormatPreferences() {
this.customImportList = Collections.emptySet();
this.encoding = StandardCharsets.UTF_8;
this.keywordSeparator = ',';
this.bibtexKeyPatternPreferences = new BibtexKeyPatternPreferences();
this.fieldContentParserPreferences = new FieldContentParserPreferences();
this.keywordSyncEnabled = false;
}

public ImportFormatPreferences(Set<CustomImporter> customImportList, Charset encoding, Character keywordSeparator,
BibtexKeyPatternPreferences bibtexKeyPatternPreferences,
FieldContentParserPreferences fieldContentParserPreferences, boolean keywordSyncEnabled) {
Expand Down
24 changes: 15 additions & 9 deletions src/main/java/net/sf/jabref/logic/importer/ParserResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@ public class ParserResult {

private static final ParserResult NULL_RESULT = new ParserResult(null, null, null);
private final BibDatabase base;
private MetaData metaData;
private final Map<String, EntryType> entryTypes;
private BibDatabaseContext bibDatabaseContext;

private File file;
private final List<String> warnings = new ArrayList<>();
private final List<String> duplicateKeys = new ArrayList<>();

private MetaData metaData;
private BibDatabaseContext bibDatabaseContext;
private File file;
private String errorMessage;

private boolean invalid;
Expand Down Expand Up @@ -61,6 +59,18 @@ public static ParserResult fromErrorMessage(String message) {
return parserResult;
}

public static ParserResult getNullResult() {
return NULL_RESULT;
}

public static ParserResult fromError(Exception exception) {
return fromErrorMessage(exception.getLocalizedMessage());
}

public static ParserResult fromEntry(BibEntry entry) {
return new ParserResult(Collections.singleton(entry));
}

/**
* Check if this base is marked to be added to the currently open tab. Default is false.
*
Expand Down Expand Up @@ -181,8 +191,4 @@ public boolean hasDatabaseContext() {
public boolean isNullResult() {
return this == NULL_RESULT;
}

public static ParserResult getNullResult() {
return NULL_RESULT;
}
}
Loading