Skip to content

Commit

Permalink
[core] Add a filter possibility to FileCollector
Browse files Browse the repository at this point in the history
Fixes pmd#4942
  • Loading branch information
adangel committed Apr 12, 2024
1 parent c472fdf commit 2bddfd2
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 14 deletions.
1 change: 1 addition & 0 deletions docs/pages/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ This is a {{ site.pmd.release_type }} release.
### 🐛 Fixed Issues
* core
* [#494](https://github.com/pmd/pmd/issues/494): \[core] Adopt JApiCmp to enforce control over API changes
* [#4942](https://github.com/pmd/pmd/issues/4942): \[core] CPD: `--skip-duplicate-files` has no effect (7.0.0 regression)
* cli
* [#4791](https://github.com/pmd/pmd/issues/4791): \[cli] Could not find or load main class
* [#4913](https://github.com/pmd/pmd/issues/4913): \[cli] cpd-gui closes immediately
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@

import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.sourceforge.pmd.AbstractConfiguration;
import net.sourceforge.pmd.cpd.CPDConfiguration;
import net.sourceforge.pmd.lang.document.FileCollector;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.InternalApiBridge;
Expand All @@ -36,6 +40,34 @@ private FileCollectionUtil() {

}

public static void collectFiles(CPDConfiguration cpdConfiguration, FileCollector collector) {
if (cpdConfiguration.isSkipDuplicates()) {
final Set<String> alreadyAddedFileNamesWithSize = new HashSet<>();
collector.setAddFileFilter(path -> {
if (!Files.isRegularFile(path)) {
// file is not a simple file, maybe inside a ZIP archive
// don't filter these out
LOG.debug("Path {} is not a regular file, skipping addFileFilter", path);
return true;
}

try {
String signature = path.getFileName() + "_" + Files.size(path);
if (!alreadyAddedFileNamesWithSize.add(signature)) {
LOG.info("Skipping {} since it appears to be a duplicate file and --skip-duplicate-files is set",
path);
return false;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return true;
});
}

collectFiles((AbstractConfiguration) cpdConfiguration, collector);
}

public static void collectFiles(AbstractConfiguration configuration, FileCollector collector) {
if (configuration.getSourceEncoding() != null) {
collector.setCharset(configuration.getSourceEncoding());
Expand Down Expand Up @@ -77,23 +109,23 @@ public static void collectFiles(FileCollector collector, List<Path> filePaths) {
try {
addRoot(collector, rootLocation);
} catch (IOException e) {
collector.getReporter().errorEx("Error collecting " + rootLocation, e);
collector.getReporter().errorEx("Error collecting {0}", new Object[]{ rootLocation }, e);
}
}
}

public static void collectFileList(FileCollector collector, Path fileList) {
LOG.debug("Reading file list {}.", fileList);
if (!Files.exists(fileList)) {
collector.getReporter().error("No such file {}", fileList);
collector.getReporter().error("No such file {0}", fileList);
return;
}

List<Path> filePaths;
try {
filePaths = FileUtil.readFilelistEntries(fileList);
} catch (IOException e) {
collector.getReporter().errorEx("Error reading {}", new Object[] { fileList }, e);
collector.getReporter().errorEx("Error reading {0}", new Object[] { fileList }, e);
return;
}
collectFiles(collector, filePaths);
Expand Down Expand Up @@ -135,15 +167,15 @@ public static void collectDB(FileCollector collector, URI uri) {
String source = IOUtil.readToString(sourceCode);
collector.addSourceFile(FileId.fromPathLikeString(falseFilePath), source);
} catch (SQLException ex) {
collector.getReporter().warnEx("Cannot get SourceCode for {} - skipping ...",
collector.getReporter().warnEx("Cannot get SourceCode for {0} - skipping ...",
new Object[] { falseFilePath },
ex);
}
}
} catch (ClassNotFoundException e) {
collector.getReporter().errorEx("Cannot get files from DB - probably missing database JDBC driver", e);
} catch (Exception e) {
collector.getReporter().errorEx("Cannot get files from DB - ''{}''", new Object[] { uri }, e);
collector.getReporter().errorEx("Cannot get files from DB - ''{0}''", new Object[] { uri }, e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.ProviderNotFoundException;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
Expand All @@ -29,6 +30,7 @@
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -60,6 +62,7 @@ public final class FileCollector implements AutoCloseable {
private final FileId outerFsPath;
private boolean closed;
private boolean recursive = true;
private Predicate<Path> addFileFilter = file -> true;

// construction

Expand Down Expand Up @@ -206,6 +209,12 @@ && addFileImpl(TextFile.builderForCharSeq(sourceContents, fileId, version)

private boolean addFileImpl(TextFile textFile) {
LOG.trace("Adding file {} (lang: {}) ", textFile.getFileId().getAbsolutePath(), textFile.getLanguageVersion().getTerseName());

if (!addFileFilter.test(Paths.get(textFile.getFileId().getAbsolutePath()))) {
LOG.trace("File was skipped due to addFileFilter...");
return false;
}

if (allFilesToProcess.add(textFile)) {
return true;
}
Expand Down Expand Up @@ -377,6 +386,17 @@ public void setCharset(Charset charset) {
this.charset = Objects.requireNonNull(charset);
}

/**
* Sets an additional filter that is being called before adding the
* file to the list.
*
* @param addFileFilter the filter should return {@code true} if the file
* should be collected and analyzed.
* @throws NullPointerException if {@code addFileFilter} is {@code null}.
*/
public void setAddFileFilter(Predicate<Path> addFileFilter) {
this.addFileFilter = Objects.requireNonNull(addFileFilter);
}

// filtering

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,25 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang3.SystemUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledOnOs;
import org.junit.jupiter.api.condition.OS;
import org.junit.jupiter.api.io.TempDir;

import net.sourceforge.pmd.lang.DummyLanguageModule;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.TextFile;

/**
* Unit test for {@link CpdAnalysis}
Expand All @@ -29,14 +36,13 @@ class CpdAnalysisTest {
private static final String BASE_TEST_RESOURCE_PATH = "src/test/resources/net/sourceforge/pmd/cpd/files/";
private static final String TARGET_TEST_RESOURCE_PATH = "target/classes/net/sourceforge/pmd/cpd/files/";

@TempDir
private Path tempDir;

// Symlinks are not well supported under Windows - so the tests are
// simply executed only on linux.
private boolean canTestSymLinks = SystemUtils.IS_OS_UNIX;
CPDConfiguration config = new CPDConfiguration();
private CPDConfiguration config = new CPDConfiguration();

@BeforeEach
void setup() throws Exception {
void setup() {
config.setOnlyRecognizeLanguage(DummyLanguageModule.getInstance());
config.setMinimumTileSize(10);
}
Expand All @@ -49,8 +55,6 @@ void setup() throws Exception {
* any error
*/
private void prepareSymLinks() throws Exception {
assumeTrue(canTestSymLinks, "Skipping unit tests with symlinks.");

Runtime runtime = Runtime.getRuntime();
if (!new File(TARGET_TEST_RESOURCE_PATH, "symlink-for-real-file.txt").exists()) {
runtime.exec(new String[] { "ln", "-s", BASE_TEST_RESOURCE_PATH + "real-file.txt",
Expand All @@ -70,6 +74,7 @@ private void prepareSymLinks() throws Exception {
* any error
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testFileSectionWithBrokenSymlinks() throws Exception {
prepareSymLinks();

Expand All @@ -91,6 +96,7 @@ void testFileSectionWithBrokenSymlinks() throws Exception {
* any error
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testFileAddedAsSymlinkAndReal() throws Exception {
prepareSymLinks();

Expand All @@ -109,6 +115,7 @@ void testFileAddedAsSymlinkAndReal() throws Exception {
* A file should be not be added via a sym link.
*/
@Test
@EnabledOnOs(OS.LINUX) // Symlinks are not well supported under Windows
void testNoFileAddedAsSymlink() throws Exception {
prepareSymLinks();

Expand Down Expand Up @@ -180,6 +187,29 @@ void testFileOrderRelevance() throws Exception {
}
}

@Test
void duplicatedFilesShouldBeSkipped() throws IOException {
String filename = "file1.dummy";
Path aFile1 = Files.createDirectory(tempDir.resolve("a")).resolve(filename).toAbsolutePath();
Path bFile1 = Files.createDirectory(tempDir.resolve("b")).resolve(filename).toAbsolutePath();

Files.write(aFile1, "Same content".getBytes(StandardCharsets.UTF_8));
Files.write(bFile1, "Same content".getBytes(StandardCharsets.UTF_8));

config.setSkipDuplicates(true);
config.setInputPathList(Arrays.asList(tempDir));
try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
List<TextFile> collectedFiles = cpd.files().getCollectedFiles();
collectedFiles.stream().map(TextFile::getFileId).map(FileId::getAbsolutePath).forEach(System.out::println);
assertEquals(1, collectedFiles.size());

// the order of directory traversal is most likely not defined, so either one
// of the two files might be added, but not both
String collectedFile = collectedFiles.get(0).getFileId().getAbsolutePath();
assertTrue(collectedFile.equals(aFile1.toString()) || collectedFile.equals(bFile1.toString()));
}
}

/**
* Simple listener that fails, if too many files were added and not skipped.
*/
Expand Down

0 comments on commit 2bddfd2

Please sign in to comment.