Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix expansion of bracketed expressions in RegExpBasedFileFinder #7338

Merged
merged 18 commits into from
Jan 24, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where the option "Move file to file directory" was disabled in the entry editor for all files [#7194](https://github.com/JabRef/jabref/issues/7194)
- We fixed an issue where application dialogs were opening in the wrong display when using multiple screens [#7273](https://github.com/JabRef/jabref/pull/7273)
- We fixed an issue where an exception would be displayed for previewing and preferences when a custom theme has been configured but is missing [#7177](https://github.com/JabRef/jabref/issues/7177)
- We fixed an issue where the regex based file search miss-interpreted specific symbols [#4342](https://github.com/JabRef/jabref/issues/4342)
- We fixed an issue where the Harvard RTF exporter used the wrong default file extension. [4508](https://github.com/JabRef/jabref/issues/4508)
- We fixed an issue where the Harvard RTF exporter did not use the new authors formatter and therefore did not export "organization" authors correctly. [4508](https://github.com/JabRef/jabref/issues/4508)
- We fixed an issue where the field `urldate` was not exported to the corresponding fields `YearAccessed`, `MonthAccessed`, `DayAccessed` in MS Office XML [#7354](https://github.com/JabRef/jabref/issues/7354)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public static String expandBrackets(String pattern, Character keywordDelimiter,
* @param database The {@link BibDatabase} for field resolving. May be null.
* @return a function accepting a bracketed expression and returning the result of expanding it
*/
private static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
public static Function<String, String> expandBracketContent(Character keywordDelimiter, BibEntry entry, BibDatabase database) {
return (String bracket) -> {
String expandedPattern;
List<String> fieldParts = parseFieldAndModifiers(bracket);
Expand Down
77 changes: 40 additions & 37 deletions src/main/java/org/jabref/logic/util/io/RegExpBasedFileFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.jabref.logic.citationkeypattern.BracketedPattern;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.strings.StringUtil;

Expand All @@ -28,7 +27,6 @@ class RegExpBasedFileFinder implements FileFinder {

private static final Pattern ESCAPE_PATTERN = Pattern.compile("([^\\\\])\\\\([^\\\\])");

private static final Pattern SQUARE_BRACKETS_PATTERN = Pattern.compile("\\[.*?\\]");
private final String regExp;
private final Character keywordDelimiter;

Expand All @@ -41,21 +39,41 @@ class RegExpBasedFileFinder implements FileFinder {
}

/**
* Takes a string that contains bracketed expression and expands each of these using getFieldAndFormat.
* <p>
* Unknown Bracket expressions are silently dropped.
* Creates a Pattern that matches the file name corresponding to the last element of {@code fileParts} with any bracketed patterns expanded.
*
* @throws IOException throws an IOException if a PatternSyntaxException occurs
*/
public static String expandBrackets(String bracketString, BibEntry entry, BibDatabase database,
Character keywordDelimiter) {
Matcher matcher = SQUARE_BRACKETS_PATTERN.matcher(bracketString);
StringBuilder expandedStringBuffer = new StringBuilder();
while (matcher.find()) {
String replacement = BracketedPattern.expandBrackets(matcher.group(), keywordDelimiter, entry, database);
matcher.appendReplacement(expandedStringBuffer, replacement);
private Pattern createFileNamePattern(String[] fileParts, String extensionRegExp, BibEntry entry) throws IOException {
// Protect the extension marker so that it isn't treated as a bracketed pattern
String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER);

// We need to supply a custom function to deal with the content of a bracketed expression and expandBracketContent is the default function
Function<String, String> expandBracket = BracketedPattern.expandBracketContent(keywordDelimiter, entry, null);
// but, we want to post-process the expanded content so that it can be used as a regex for finding a file name
Function<String, String> bracketToFileNameRegex = expandBracket.andThen(RegExpBasedFileFinder::toFileNameRegex);

String expandedBracketAsFileNameRegex = BracketedPattern.expandBrackets(filePart, bracketToFileNameRegex);

String fileNamePattern = expandedBracketAsFileNameRegex
.replaceAll(EXT_MARKER, extensionRegExp) // Replace the extension marker
.replaceAll("\\\\\\\\", "\\\\");
try {
return Pattern.compile('^' + fileNamePattern + '$', Pattern.CASE_INSENSITIVE);
} catch (PatternSyntaxException e) {
throw new IOException(String.format("There is a syntax error in the regular expression %s used to search for files", fileNamePattern), e);
}
matcher.appendTail(expandedStringBuffer);
}

return expandedStringBuffer.toString();
/**
* Helper method for both exact matching (if the file name were not created by JabRef) and cleaned file name matching.
*
* @param expandedContent the expanded content of a bracketed expression
* @return a String representation of a regex matching the expanded content and the expanded content cleaned for file name use
*/
private static String toFileNameRegex(String expandedContent) {
var cleanedContent = FileNameCleaner.cleanFileName(expandedContent);
return expandedContent.equals(cleanedContent) ? Pattern.quote(expandedContent) :
"(" + Pattern.quote(expandedContent) + ")|(" + Pattern.quote(cleanedContent) + ")";
}

/**
Expand Down Expand Up @@ -142,9 +160,7 @@ private List<Path> findFile(final BibEntry entry, final Path directory, final St
}

for (int index = 0; index < (fileParts.length - 1); index++) {

String dirToProcess = fileParts[index];
dirToProcess = expandBrackets(dirToProcess, entry, null, keywordDelimiter);

if (dirToProcess.matches("^.:$")) { // Windows Drive Letter
actualDirectory = Path.of(dirToProcess + '/');
Expand Down Expand Up @@ -179,33 +195,20 @@ private List<Path> findFile(final BibEntry entry, final Path directory, final St
resultFiles.addAll(findFile(entry, path, restOfFileString, extensionRegExp));
}
} catch (UncheckedIOException ioe) {
throw new IOException(ioe);
throw ioe.getCause();
}
} // End process directory information
}

// Last step: check if the given file can be found in this directory
String filePart = fileParts[fileParts.length - 1].replace("[extension]", EXT_MARKER);
String filenameToLookFor = expandBrackets(filePart, entry, null, keywordDelimiter).replaceAll(EXT_MARKER, extensionRegExp);

try {
final Pattern toMatch = Pattern.compile('^' + filenameToLookFor.replaceAll("\\\\\\\\", "\\\\") + '$',
Pattern.CASE_INSENSITIVE);
BiPredicate<Path, BasicFileAttributes> matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches();
resultFiles.addAll(collectFilesWithMatcher(actualDirectory, matcher));
} catch (UncheckedIOException | PatternSyntaxException e) {
throw new IOException("Could not look for " + filenameToLookFor, e);
}

return resultFiles;
}

private List<Path> collectFilesWithMatcher(Path actualDirectory, BiPredicate<Path, BasicFileAttributes> matcher) {
Pattern toMatch = createFileNamePattern(fileParts, extensionRegExp, entry);
BiPredicate<Path, BasicFileAttributes> matcher = (path, attributes) -> toMatch.matcher(path.getFileName().toString()).matches();
try (Stream<Path> pathStream = Files.find(actualDirectory, 1, matcher, FileVisitOption.FOLLOW_LINKS)) {
return pathStream.collect(Collectors.toList());
} catch (UncheckedIOException | IOException ioe) {
return Collections.emptyList();
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
resultFiles.addAll(pathStream.collect(Collectors.toList()));
} catch (UncheckedIOException uncheckedIOException) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this even an UncheckedIOException?
If needed, the UncheckedException is just a wrapper for the IOEception. So you could call throw uncheckedException.getCause()

Copy link
Member Author

@k3KAW8Pnf7mkmdSMPHz27 k3KAW8Pnf7mkmdSMPHz27 Jan 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Frankly, I am not sure why an UncheckedIOException is caught in this part of the code. I don't have much experience with nio.* but my interpretation of the API is that the UncheckedIOException must be caught in the parts of the code that make use of the Path reference.
I don't know if there are any other potential issues with a lazily loaded file system walk. Based on DirectoryStream and Files.walk I'd guess it could be thrown if depth > 1 and there is a cycle, hence, not in this part of the code unless it is changed.

throw uncheckedIOException.getCause();
}
return resultFiles;
}

private boolean isSubDirectory(Path rootDirectory, Path path) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ void testYearAuthFirstPageFindFiles() throws Exception {
result);
}

@Test
void findAssociatedFilesFindFileContainingBracketsFromBracketedExpression() throws Exception {
var bibEntry = new BibEntry().withField(StandardField.TITLE, "Regexp from [A-Z]");

var extension = Collections.singletonList("pdf");
var directory = Collections.singletonList(Path.of(FILES_DIRECTORY));
RegExpBasedFileFinder fileFinder = new RegExpBasedFileFinder("[TITLE]\\\\.[extension]", ',');

List<Path> result = fileFinder.findAssociatedFiles(bibEntry, directory, extension);
assertEquals(Collections.singletonList(Path.of("src/test/resources/org/jabref/logic/importer/unlinkedFilesTestFolder/Regexp from [A-Z].pdf")),
result);
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
}

@Test
void testAuthorWithDiacritics() throws Exception {
// given
Expand Down Expand Up @@ -143,34 +156,4 @@ void testFindFileNonRecursive() throws Exception {
// then
assertTrue(result.isEmpty());
}

@Test
void testExpandBrackets() {
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved

assertEquals("", RegExpBasedFileFinder.expandBrackets("", entry, database, ','));

assertEquals("dropped", RegExpBasedFileFinder.expandBrackets("drop[unknownkey]ped", entry, database,
','));

assertEquals("Eric von Hippel and Georg von Krogh",
RegExpBasedFileFinder.expandBrackets("[author]", entry, database, ','));

assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.",
RegExpBasedFileFinder.expandBrackets("[author] are two famous authors.", entry, database,
','));

assertEquals("Eric von Hippel and Georg von Krogh are two famous authors.",
RegExpBasedFileFinder.expandBrackets("[author] are two famous authors.", entry, database,
','));

assertEquals(
"Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private-Collective\" Innovation Model: Issues for Organization Science in Organization Science.",
RegExpBasedFileFinder.expandBrackets("[author] have published [fulltitle] in [journal].", entry, database,
','));

assertEquals(
"Eric von Hippel and Georg von Krogh have published Open Source Software and the \"Private Collective\" Innovation Model: Issues for Organization Science in Organization Science.",
RegExpBasedFileFinder.expandBrackets("[author] have published [title] in [journal].", entry, database,
','));
}
}