diff --git a/CHANGELOG.md b/CHANGELOG.md
index 93532bbfa36..806c9ea2636 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -131,6 +131,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- We fixed an issue where the same menu for changing entry type had two different sizes and weights. [#4977](https://github.com/JabRef/jabref/issues/4977)
- We fixed an issue where the "Attach file" dialog, in the right-click menu for an entry, started on the working directory instead of the user's main directory. [#4995](https://github.com/JabRef/jabref/issues/4995)
- We fixed an issue where the JabRef Icon in the macOS launchpad was not displayed correctly [#5003](https://github.com/JabRef/jabref/issues/5003)
+- We fixed an issue where the "Search for unlinked local files" would throw an exception when parsing the content of a PDF-file with missing "series" information [#5128](https://github.com/JabRef/jabref/issues/5128)
### Removed
diff --git a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
index da14ac8a3c3..0ff9114e7d3 100644
--- a/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
+++ b/src/main/java/org/jabref/logic/importer/fileformat/PdfContentImporter.java
@@ -18,6 +18,7 @@
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.fetcher.DoiFetcher;
import org.jabref.logic.l10n.Localization;
+import org.jabref.logic.util.OS;
import org.jabref.logic.util.StandardFileType;
import org.jabref.logic.xmp.EncryptedPdfsNotSupportedException;
import org.jabref.logic.xmp.XmpUtilReader;
@@ -27,6 +28,7 @@
import org.jabref.model.entry.StandardEntryType;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
+import org.jabref.model.strings.StringUtil;
import com.google.common.base.Strings;
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -37,7 +39,6 @@
*
* Currently, Springer and IEEE formats are supported.
*
- * Integrating XMP support is future work
*/
public class PdfContentImporter extends Importer {
@@ -50,11 +51,11 @@ public class PdfContentImporter extends Importer {
private String curString;
private String year;
-
public PdfContentImporter(ImportFormatPreferences importFormatPreferences) {
this.importFormatPreferences = importFormatPreferences;
}
+
/**
* Removes all non-letter characters at the end
*
@@ -64,7 +65,7 @@ public PdfContentImporter(ImportFormatPreferences importFormatPreferences) {
* TODO: Additionally replace multiple subsequent spaces by one space, which will cause a rename of this method
*
*/
- private static String removeNonLettersAtEnd(String input) {
+ private String removeNonLettersAtEnd(String input) {
String result = input.trim();
if (result.isEmpty()) {
return result;
@@ -82,7 +83,7 @@ private static String removeNonLettersAtEnd(String input) {
return result;
}
- private static String streamlineNames(String names) {
+ private String streamlineNames(String names) {
// TODO: replace with NormalizeNamesFormatter?!
String res;
// supported formats:
@@ -163,7 +164,7 @@ private static String streamlineNames(String names) {
res = res.concat(" and ");
}
if ("et".equalsIgnoreCase(splitNames[i]) && (splitNames.length > (i + 1))
- && "al.".equalsIgnoreCase(splitNames[i + 1])) {
+ && "al.".equalsIgnoreCase(splitNames[i + 1])) {
res = res.concat("others");
break;
} else {
@@ -178,7 +179,7 @@ private static String streamlineNames(String names) {
return res;
}
- private static String streamlineTitle(String title) {
+ private String streamlineTitle(String title) {
return removeNonLettersAtEnd(title);
}
@@ -190,17 +191,15 @@ public boolean isRecognizedFormat(BufferedReader input) throws IOException {
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
Objects.requireNonNull(reader);
- throw new UnsupportedOperationException(
- "PdfContentImporter does not support importDatabase(BufferedReader reader)."
- + "Instead use importDatabase(Path filePath, Charset defaultEncoding).");
+ throw new UnsupportedOperationException("PdfContentImporter does not support importDatabase(BufferedReader reader)."
+ + "Instead use importDatabase(Path filePath, Charset defaultEncoding).");
}
@Override
public ParserResult importDatabase(String data) throws IOException {
Objects.requireNonNull(data);
- throw new UnsupportedOperationException(
- "PdfContentImporter does not support importDatabase(String data)."
- + "Instead use importDatabase(Path filePath, Charset defaultEncoding).");
+ throw new UnsupportedOperationException("PdfContentImporter does not support importDatabase(String data)."
+ + "Instead use importDatabase(Path filePath, Charset defaultEncoding).");
}
@Override
@@ -218,280 +217,297 @@ public ParserResult importDatabase(Path filePath, Charset defaultEncoding) {
return parserResult;
}
- // idea: split[] contains the different lines
- // blocks are separated by empty lines
- // treat each block
- // or do special treatment at authors (which are not broken)
- // therefore, we do a line-based and not a block-based splitting
- // i points to the current line
- // curString (mostly) contains the current block
- // the different lines are joined into one and thereby separated by " "
- lines = firstPageContents.split(System.lineSeparator());
-
- lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero
-
- proceedToNextNonEmptyLine();
- if (lineIndex >= lines.length) {
- // PDF could not be parsed or is empty
- // return empty list
- return new ParserResult();
- }
+ Optional entry = getEntryFromPDFContent(firstPageContents, OS.NEWLINE);
+ entry.ifPresent(result::add);
- // we start at the current line
- curString = lines[lineIndex];
- // i might get incremented later and curString modified, too
- lineIndex = lineIndex + 1;
-
- String author;
- String editor = null;
- String abstractT = null;
- String keywords = null;
- String title;
- String conference = null;
- String DOI = null;
- String series = null;
- String volume = null;
- String number = null;
- String pages = null;
- // year is a class variable as the method extractYear() uses it;
- String publisher = null;
-
- EntryType type = StandardEntryType.InProceedings;
- if (curString.length() > 4) {
- // special case: possibly conference as first line on the page
- extractYear();
- if (curString.contains("Conference")) {
+ } catch (EncryptedPdfsNotSupportedException e) {
+ return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
+ } catch (IOException exception) {
+ return ParserResult.fromError(exception);
+ } catch (FetcherException e) {
+ return ParserResult.fromErrorMessage(e.getMessage());
+ }
+
+ result.forEach(entry -> entry.addFile(new LinkedFile("", filePath.toAbsolutePath().toString(), "PDF")));
+ return new ParserResult(result);
+ }
+
+ //make this method package visible so we can test it
+ Optional getEntryFromPDFContent(String firstpageContents, String lineSeparator) {
+
+ // idea: split[] contains the different lines
+ // blocks are separated by empty lines
+ // treat each block
+ // or do special treatment at authors (which are not broken)
+ // therefore, we do a line-based and not a block-based splitting
+ // i points to the current line
+ // curString (mostly) contains the current block
+ // the different lines are joined into one and thereby separated by " "
+
+ String firstpageContentsUnifiedLineBreaks = StringUtil.unifyLineBreaks(firstpageContents, lineSeparator);
+
+ lines = firstpageContentsUnifiedLineBreaks.split(lineSeparator);
+
+ lineIndex = 0; //to prevent array index out of bounds exception on second run we need to reset i to zero
+
+ proceedToNextNonEmptyLine();
+ if (lineIndex >= lines.length) {
+ // PDF could not be parsed or is empty
+ // return empty list
+ return Optional.empty();
+ }
+
+ // we start at the current line
+ curString = lines[lineIndex];
+ // i might get incremented later and curString modified, too
+ lineIndex = lineIndex + 1;
+
+ String author;
+ String editor = null;
+ String abstractT = null;
+ String keywords = null;
+ String title;
+ String conference = null;
+ String DOI = null;
+ String series = null;
+ String volume = null;
+ String number = null;
+ String pages = null;
+ // year is a class variable as the method extractYear() uses it;
+ String publisher = null;
+
+ EntryType type = StandardEntryType.InProceedings;
+ if (curString.length() > 4) {
+ // special case: possibly conference as first line on the page
+ extractYear();
+ if (curString.contains("Conference")) {
+ fillCurStringWithNonEmptyLines();
+ conference = curString;
+ curString = "";
+ } else {
+ // e.g. Copyright (c) 1998 by the Genetics Society of America
+ // future work: get year using RegEx
+ String lower = curString.toLowerCase(Locale.ROOT);
+ if (lower.contains("copyright")) {
fillCurStringWithNonEmptyLines();
- conference = curString;
+ publisher = curString;
curString = "";
- } else {
- // e.g. Copyright (c) 1998 by the Genetics Society of America
- // future work: get year using RegEx
- String lower = curString.toLowerCase(Locale.ROOT);
- if (lower.contains("copyright")) {
- fillCurStringWithNonEmptyLines();
- publisher = curString;
- curString = "";
- }
}
}
+ }
+
+ // start: title
+ fillCurStringWithNonEmptyLines();
+ title = streamlineTitle(curString);
+ curString = "";
+ //i points to the next non-empty line
- // start: title
- fillCurStringWithNonEmptyLines();
- title = streamlineTitle(curString);
- curString = "";
- //i points to the next non-empty line
-
- // after title: authors
- author = null;
- while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
- // author names are unlikely to be lines among different lines
- // treat them line by line
- curString = streamlineNames(lines[lineIndex]);
- if (author == null) {
- author = curString;
+ // after title: authors
+ author = null;
+ while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
+ // author names are unlikely to be lines among different lines
+ // treat them line by line
+ curString = streamlineNames(lines[lineIndex]);
+ if (author == null) {
+ author = curString;
+ } else {
+ if ("".equals(curString)) {
+ // if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
} else {
- if ("".equals(curString)) {
- // if lines[i] is "and" then "" is returned by streamlineNames -> do nothing
- } else {
- author = author.concat(" and ").concat(curString);
- }
+ author = author.concat(" and ").concat(curString);
}
- lineIndex++;
}
- curString = "";
lineIndex++;
+ }
+ curString = "";
+ lineIndex++;
- // then, abstract and keywords follow
- while (lineIndex < lines.length) {
- curString = lines[lineIndex];
- if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
- if (curString.length() == "Abstract".length()) {
- // only word "abstract" found -- skip line
- curString = "";
- } else {
- curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
- }
- lineIndex++;
- // fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
- // whereas we need linebreak as separator
- while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
- curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator());
- lineIndex++;
- }
- abstractT = curString.trim();
- lineIndex++;
- } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
- if (curString.length() == "Keywords".length()) {
- // only word "Keywords" found -- skip line
- curString = "";
- } else {
- curString = curString.substring("Keywords".length() + 1).trim();
- }
+ // then, abstract and keywords follow
+ while (lineIndex < lines.length) {
+ curString = lines[lineIndex];
+ if ((curString.length() >= "Abstract".length()) && "Abstract".equalsIgnoreCase(curString.substring(0, "Abstract".length()))) {
+ if (curString.length() == "Abstract".length()) {
+ // only word "abstract" found -- skip line
+ curString = "";
+ } else {
+ curString = curString.substring("Abstract".length() + 1).trim().concat(System.lineSeparator());
+ }
+ lineIndex++;
+ // fillCurStringWithNonEmptyLines() cannot be used as that uses " " as line separator
+ // whereas we need linebreak as separator
+ while ((lineIndex < lines.length) && !"".equals(lines[lineIndex])) {
+ curString = curString.concat(lines[lineIndex]).concat(System.lineSeparator());
lineIndex++;
- fillCurStringWithNonEmptyLines();
- keywords = removeNonLettersAtEnd(curString);
+ }
+ abstractT = curString.trim();
+ lineIndex++;
+ } else if ((curString.length() >= "Keywords".length()) && "Keywords".equalsIgnoreCase(curString.substring(0, "Keywords".length()))) {
+ if (curString.length() == "Keywords".length()) {
+ // only word "Keywords" found -- skip line
+ curString = "";
} else {
- String lower = curString.toLowerCase(Locale.ROOT);
+ curString = curString.substring("Keywords".length() + 1).trim();
+ }
+ lineIndex++;
+ fillCurStringWithNonEmptyLines();
+ keywords = removeNonLettersAtEnd(curString);
+ } else {
+ String lower = curString.toLowerCase(Locale.ROOT);
- int pos = lower.indexOf("technical");
+ int pos = lower.indexOf("technical");
+ if (pos >= 0) {
+ type = StandardEntryType.TechReport;
+ pos = curString.trim().lastIndexOf(' ');
if (pos >= 0) {
- type = StandardEntryType.TechReport;
- pos = curString.trim().lastIndexOf(' ');
- if (pos >= 0) {
- // assumption: last character of curString is NOT ' '
- // otherwise pos+1 leads to an out-of-bounds exception
- number = curString.substring(pos + 1);
- }
+ // assumption: last character of curString is NOT ' '
+ // otherwise pos+1 leads to an out-of-bounds exception
+ number = curString.substring(pos + 1);
}
-
- lineIndex++;
- proceedToNextNonEmptyLine();
}
+
+ lineIndex++;
+ proceedToNextNonEmptyLine();
}
+ }
- lineIndex = lines.length - 1;
+ lineIndex = lines.length - 1;
- // last block: DOI, detailed information
- // sometimes, this information is in the third last block etc...
- // therefore, read until the beginning of the file
+ // last block: DOI, detailed information
+ // sometimes, this information is in the third last block etc...
+ // therefore, read until the beginning of the file
- while (lineIndex >= 0) {
- readLastBlock();
- // i now points to the block before or is -1
- // curString contains the last block, separated by " "
+ while (lineIndex >= 0) {
+ readLastBlock();
+ // i now points to the block before or is -1
+ // curString contains the last block, separated by " "
- extractYear();
+ extractYear();
- int pos = curString.indexOf("(Eds.)");
- if ((pos >= 0) && (publisher == null)) {
- // looks like a Springer last line
- // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
- publisher = "Springer";
- editor = streamlineNames(curString.substring(0, pos - 1));
- curString = curString.substring(pos + "(Eds.)".length() + 2); //+2 because of ":" after (Eds.) and the subsequent space
- String[] springerSplit = curString.split(", ");
- if (springerSplit.length >= 4) {
- conference = springerSplit[0];
+ int pos = curString.indexOf("(Eds.)");
+ if ((pos >= 0) && (publisher == null)) {
+ // looks like a Springer last line
+ // e.g: A. Persson and J. Stirna (Eds.): PoEM 2009, LNBIP 39, pp. 161-175, 2009.
+ publisher = "Springer";
+ editor = streamlineNames(curString.substring(0, pos - 1));
- String seriesData = springerSplit[1];
- int lastSpace = seriesData.lastIndexOf(' ');
- series = seriesData.substring(0, lastSpace);
- volume = seriesData.substring(lastSpace + 1);
+ int edslength = "(Eds.)".length();
+ int posWithEditor = pos + edslength + 2;//+2 because of ":" after (Eds.) and the subsequent space
+ if (posWithEditor > curString.length()) {
+ curString = curString.substring(posWithEditor - 2); //we don't have any spaces after Eds so we substract the 2
+ } else {
+ curString = curString.substring(posWithEditor);
+ }
+ String[] springerSplit = curString.split(", ");
+ if (springerSplit.length >= 4) {
+ conference = springerSplit[0];
- pages = springerSplit[2].substring(4);
+ String seriesData = springerSplit[1];
+ int lastSpace = seriesData.lastIndexOf(' ');
+ series = seriesData.substring(0, lastSpace);
+ volume = seriesData.substring(lastSpace + 1);
- if (springerSplit[3].length() >= 4) {
- year = springerSplit[3].substring(0, 4);
- }
+ pages = springerSplit[2].substring(4);
+
+ if (springerSplit[3].length() >= 4) {
+ year = springerSplit[3].substring(0, 4);
}
- } else {
- if (DOI == null) {
- pos = curString.indexOf("DOI");
- if (pos < 0) {
- pos = curString.indexOf(StandardField.DOI.getName());
+ }
+ } else {
+ if (DOI == null) {
+ pos = curString.indexOf("DOI");
+ if (pos < 0) {
+ pos = curString.indexOf(StandardField.DOI.getName());
+ }
+ if (pos >= 0) {
+ pos += 3;
+ char delimiter = curString.charAt(pos);
+ if ((delimiter == ':') || (delimiter == ' ')) {
+ pos++;
}
- if (pos >= 0) {
- pos += 3;
- char delimiter = curString.charAt(pos);
- if ((delimiter == ':') || (delimiter == ' ')) {
- pos++;
- }
- int nextSpace = curString.indexOf(' ', pos);
- if (nextSpace > 0) {
- DOI = curString.substring(pos, nextSpace);
- } else {
- DOI = curString.substring(pos);
- }
+ int nextSpace = curString.indexOf(' ', pos);
+ if (nextSpace > 0) {
+ DOI = curString.substring(pos, nextSpace);
+ } else {
+ DOI = curString.substring(pos);
}
}
+ }
- if ((publisher == null) && curString.contains("IEEE")) {
- // IEEE has the conference things at the end
- publisher = "IEEE";
-
- // year is extracted by extractYear
- // otherwise, we could it determine as follows:
- // String yearStr = curString.substring(curString.length()-4);
- // if (isYear(yearStr)) {
- // year = yearStr;
- // }
-
- if (conference == null) {
- pos = curString.indexOf('$');
+ if ((publisher == null) && curString.contains("IEEE")) {
+ // IEEE has the conference things at the end
+ publisher = "IEEE";
+
+ // year is extracted by extractYear
+ // otherwise, we could it determine as follows:
+ // String yearStr = curString.substring(curString.length()-4);
+ // if (isYear(yearStr)) {
+ // year = yearStr;
+ // }
+
+ if (conference == null) {
+ pos = curString.indexOf('$');
+ if (pos > 0) {
+ // we found the price
+ // before the price, the ISSN is stated
+ // skip that
+ pos -= 2;
+ while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
+ pos--;
+ }
if (pos > 0) {
- // we found the price
- // before the price, the ISSN is stated
- // skip that
- pos -= 2;
- while ((pos >= 0) && (curString.charAt(pos) != ' ')) {
- pos--;
- }
- if (pos > 0) {
- conference = curString.substring(0, pos);
- }
+ conference = curString.substring(0, pos);
}
}
}
}
}
+ }
- BibEntry entry = new BibEntry();
- entry.setType(type);
+ BibEntry entry = new BibEntry();
+ entry.setType(type);
- // TODO: institution parsing missing
+ // TODO: institution parsing missing
- if (author != null) {
- entry.setField(StandardField.AUTHOR, author);
- }
- if (editor != null) {
- entry.setField(StandardField.EDITOR, editor);
- }
- if (abstractT != null) {
- entry.setField(StandardField.ABSTRACT, abstractT);
- }
- if (!Strings.isNullOrEmpty(keywords)) {
- entry.setField(StandardField.KEYWORDS, keywords);
- }
- if (title != null) {
- entry.setField(StandardField.TITLE, title);
- }
- if (conference != null) {
- entry.setField(StandardField.BOOKTITLE, conference);
- }
- if (DOI != null) {
- entry.setField(StandardField.DOI, DOI);
- }
- if (series != null) {
- entry.setField(StandardField.SERIES, series);
- }
- if (volume != null) {
- entry.setField(StandardField.VOLUME, volume);
- }
- if (number != null) {
- entry.setField(StandardField.NUMBER, number);
- }
- if (pages != null) {
- entry.setField(StandardField.PAGES, pages);
- }
- if (year != null) {
- entry.setField(StandardField.YEAR, year);
- }
- if (publisher != null) {
- entry.setField(StandardField.PUBLISHER, publisher);
- }
-
- result.add(entry);
- } catch (EncryptedPdfsNotSupportedException e) {
- return ParserResult.fromErrorMessage(Localization.lang("Decryption not supported."));
- } catch (IOException exception) {
- return ParserResult.fromError(exception);
- } catch (FetcherException e) {
- return ParserResult.fromErrorMessage(e.getMessage());
+ if (author != null) {
+ entry.setField(StandardField.AUTHOR, author);
}
-
- result.forEach(entry -> entry.addFile(new LinkedFile("", filePath.toAbsolutePath().toString(), "PDF")));
- return new ParserResult(result);
+ if (editor != null) {
+ entry.setField(StandardField.EDITOR, editor);
+ }
+ if (abstractT != null) {
+ entry.setField(StandardField.ABSTRACT, abstractT);
+ }
+ if (!Strings.isNullOrEmpty(keywords)) {
+ entry.setField(StandardField.KEYWORDS, keywords);
+ }
+ if (title != null) {
+ entry.setField(StandardField.TITLE, title);
+ }
+ if (conference != null) {
+ entry.setField(StandardField.BOOKTITLE, conference);
+ }
+ if (DOI != null) {
+ entry.setField(StandardField.DOI, DOI);
+ }
+ if (series != null) {
+ entry.setField(StandardField.SERIES, series);
+ }
+ if (volume != null) {
+ entry.setField(StandardField.VOLUME, volume);
+ }
+ if (number != null) {
+ entry.setField(StandardField.NUMBER, number);
+ }
+ if (pages != null) {
+ entry.setField(StandardField.PAGES, pages);
+ }
+ if (year != null) {
+ entry.setField(StandardField.YEAR, year);
+ }
+ if (publisher != null) {
+ entry.setField(StandardField.PUBLISHER, publisher);
+ }
+ return Optional.of(entry);
}
private String getFirstPageContents(PDDocument document) throws IOException {
diff --git a/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java b/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java
index 502d3b3ee6b..e7df979a3a1 100644
--- a/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java
+++ b/src/test/java/org/jabref/logic/importer/fileformat/PdfContentImporterTest.java
@@ -5,6 +5,7 @@
import java.nio.file.Paths;
import java.util.Collections;
import java.util.List;
+import java.util.Optional;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.util.StandardFileType;
@@ -35,8 +36,7 @@ void testsGetExtensions() {
@Test
void testGetDescription() {
- assertEquals(
- "PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
+ assertEquals("PdfContentImporter parses data of the first page of the PDF and creates a BibTeX entry. Currently, Springer and IEEE formats are supported.",
importer.getDescription());
}
@@ -62,4 +62,31 @@ void importTwiceWorksAsExpected() throws Exception {
assertEquals(Collections.singletonList(expected), resultSecondImport);
}
+ @Test
+ void testParsingEditorWithoutPagesorSeriesInformation() {
+
+ BibEntry entry = new BibEntry(StandardEntryType.InProceedings);
+ entry.setField(StandardField.AUTHOR, "Anke Lüdeling and Merja Kytö (Eds.)");
+ entry.setField(StandardField.EDITOR, "Anke Lüdeling and Merja Kytö");
+ entry.setField(StandardField.PUBLISHER, "Springer");
+ entry.setField(StandardField.TITLE, "Corpus Linguistics – An International Handbook – Lüdeling, Anke, Kytö, Merja (Eds.)");
+
+ String firstPageContents = "Corpus Linguistics – An International Handbook – Lüdeling, Anke,\n" +
+ "Kytö, Merja (Eds.)\n" +
+ "\n" +
+ "Anke Lüdeling, Merja Kytö (Eds.)\n" +
+ "\n" +
+ "VOLUME 2\n" +
+ "\n" +
+ "This handbook provides an up-to-date survey of the field of corpus linguistics, a Handbücher zur Sprach- und\n" +
+ "field whose methodology has revolutionized much of the empirical work done in Kommunikationswissenschaft / Handbooks\n" +
+ "\n" +
+ "of Linguistics and Communication Science\n" +
+ "most fields of linguistic study over the past decade. (HSK) 29/2\n" +
+ "\n" +
+ "vii, 578 pages\n" +
+ "Corpus linguistics investigates human language by starting out from large\n";
+
+ assertEquals(Optional.of(entry), importer.getEntryFromPDFContent(firstPageContents, "\n"));
+ }
}