diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/BoundaryChangesets.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/BoundaryChangesets.java new file mode 100644 index 00000000000..3fba74cce46 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/BoundaryChangesets.java @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import org.opengrok.indexer.logger.LoggerFactory; +import org.opengrok.indexer.util.Statistics; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Helper class to split sequence of VCS changesets into number of intervals. + * This is then used in {@link Repository#createCache(HistoryCache, String)} + * to store history in chunks, for VCS repositories that support this. + */ +public class BoundaryChangesets { + private int cnt = 0; + private final List result = new ArrayList<>(); + + private final int maxCount; + private final RepositoryWithPerPartesHistory repository; + + private static final Logger LOGGER = LoggerFactory.getLogger(BoundaryChangesets.class); + + public BoundaryChangesets(RepositoryWithPerPartesHistory repository) { + this.repository = repository; + this.maxCount = repository.getPerPartesCount(); + if (maxCount <= 1) { + throw new RuntimeException(String.format("per partes count for repository ''%s'' " + + "must be stricly greater than 1", repository.getDirectoryName())); + } + } + + private void reset() { + cnt = 0; + result.clear(); + } + + /** + * @param sinceRevision start revision ID + * @return immutable list of revision IDs denoting the intervals + * @throws HistoryException if there is problem traversing the changesets in the repository + */ + public synchronized List getBoundaryChangesetIDs(String sinceRevision) throws HistoryException { + reset(); + + LOGGER.log(Level.FINE, "getting boundary changesets for ''{0}''", repository.getDirectoryName()); + Statistics stat = new Statistics(); + + repository.accept(sinceRevision, this::visit); + + // The changesets need to go from oldest to newest. + Collections.reverse(result); + + stat.report(LOGGER, Level.FINE, + String.format("done getting boundary changesets for ''%s'' (%d entries)", + repository.getDirectoryName(), result.size())); + + return List.copyOf(result); + } + + private void visit(String id) { + if (cnt != 0 && cnt % maxCount == 0) { + result.add(id); + } + cnt++; + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java index 7c0b7e53879..ecea0aa756e 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java @@ -47,13 +47,16 @@ import java.util.Collections; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.ListIterator; import java.util.Map; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; @@ -65,7 +68,6 @@ import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.ForbiddenSymlinkException; import org.opengrok.indexer.util.IOUtils; -import org.opengrok.indexer.util.Statistics; import org.opengrok.indexer.util.TandemPath; /** @@ -99,60 +101,57 @@ public boolean isHistoryIndexDone() { } /** - * Generate history cache for single file or directory. + * Generate history cache for single renamed file. + * @param filename file path + * @param repository repository + * @param root root + * @param tillRevision end revision + */ + public void doRenamedFileHistory(String filename, File file, Repository repository, File root, String tillRevision) + throws HistoryException { + + History history; + + if (tillRevision != null) { + if (!(repository instanceof RepositoryWithPerPartesHistory)) { + throw new RuntimeException("cannot use non null tillRevision on repository"); + } + + RepositoryWithPerPartesHistory repo = (RepositoryWithPerPartesHistory) repository; + history = repo.getHistory(file, null, tillRevision); + } else { + history = repository.getHistory(file); + } + + doFileHistory(filename, history, repository, root, true); + } + + /** + * Generate history cache for single file. * @param filename name of the file - * @param historyEntries list of HistoryEntry objects forming the (incremental) history of the file + * @param history history * @param repository repository object in which the file belongs - * @param srcFile file object * @param root root of the source repository * @param renamed true if the file was renamed in the past */ - private void doFileHistory(String filename, List historyEntries, - Repository repository, File srcFile, File root, boolean renamed) + private void doFileHistory(String filename, History history, Repository repository, File root, boolean renamed) throws HistoryException { File file = new File(root, filename); - // Only store directory history for the top-level directory. - if (file.isDirectory() && !filename.equals(repository.getDirectoryName())) { - LOGGER.log(Level.FINE, "Not storing history cache for {0}: not top level directory", file); + if (file.isDirectory()) { return; } - Statistics statRepoHist = new Statistics(); - - /* - * If the file was renamed (in the changesets that are being indexed), - * its history is not stored in the historyEntries so it needs to be acquired - * directly from the repository. - * This ensures that complete history of the file (across renames) will be saved. - */ - History hist; - if (renamed) { - hist = repository.getHistory(srcFile); - } else { - hist = new History(historyEntries); - } - // File based history cache does not store files for individual // changesets so strip them unless it is history for the repository. - for (HistoryEntry ent : hist.getHistoryEntries()) { - if (file.isDirectory()) { - ent.stripTags(); - } else { - ent.strip(); - } - } + history.strip(); // Assign tags to changesets they represent. if (env.isTagsEnabled() && repository.hasFileBasedTags()) { - repository.assignTagsInHistory(hist); + repository.assignTagsInHistory(history); } - storeFile(hist, file, repository, !renamed); - - statRepoHist.report(LOGGER, Level.FINER, - String.format("Done storing history cache for '%s'", filename), - "filehistorycache.history.store"); + storeFile(history, file, repository, !renamed); } private boolean isRenamedFile(String filename, Repository repository, History history) @@ -160,8 +159,7 @@ private boolean isRenamedFile(String filename, Repository repository, History hi String repodir; try { - repodir = env.getPathRelativeToSourceRoot( - new File(repository.getDirectoryName())); + repodir = env.getPathRelativeToSourceRoot(new File(repository.getDirectoryName())); } catch (ForbiddenSymlinkException e) { LOGGER.log(Level.FINER, e.getMessage()); return false; @@ -405,8 +403,8 @@ private void finishStore(Repository repository, String latestRev) { // failure), do not create the CachedRevision file as this would // create confusion (once it starts working again). LOGGER.log(Level.WARNING, - "Could not store history for repository {0}", - repository.getDirectoryName()); + "Could not store history for repository {0}: {1} is not a directory", + new Object[]{repository.getDirectoryName(), histDir}); } else { storeLatestCachedRevision(repository, latestRev); LOGGER.log(Level.FINE, @@ -415,6 +413,11 @@ private void finishStore(Repository repository, String latestRev) { } } + @Override + public void store(History history, Repository repository) throws HistoryException { + store(history, repository, null); + } + /** * Store history for the whole repository in directory hierarchy resembling * the original repository structure. History of individual files will be @@ -425,8 +428,8 @@ private void finishStore(Repository repository, String latestRev) { * @param repository repository object */ @Override - public void store(History history, Repository repository) - throws HistoryException { + public void store(History history, Repository repository, String tillRevision) throws HistoryException { + final boolean handleRenamedFiles = repository.isHandleRenamedFiles(); String latestRev = null; @@ -437,16 +440,9 @@ public void store(History history, Repository repository) return; } - LOGGER.log(Level.FINE, - "Storing history for repository {0}", - new Object[] {repository.getDirectoryName()}); - - // Firstly store the history for the top-level directory. - doFileHistory(repository.getDirectoryName(), history.getHistoryEntries(), - repository, env.getSourceRootFile(), null, false); - HashMap> map = new HashMap<>(); HashMap acceptanceCache = new HashMap<>(); + Set renamedFiles = new HashSet<>(); /* * Go through all history entries for this repository (acquired through @@ -490,6 +486,11 @@ public void store(History history, Repository repository) } } + File histDataDir = new File(getRepositoryHistDataDirname(repository)); + if (!histDataDir.isDirectory() && !histDataDir.mkdirs()) { + LOGGER.log(Level.WARNING, "cannot create history cache directory for ''{0}''", histDataDir); + } + /* * Now traverse the list of files from the hash map built above * and for each file store its history (saved in the value of the @@ -500,45 +501,51 @@ public void store(History history, Repository repository) int fileHistoryCount = 0; for (Map.Entry> map_entry : map.entrySet()) { try { - if (handleRenamedFiles && - isRenamedFile(map_entry.getKey(), repository, history)) { + if (handleRenamedFiles && isRenamedFile(map_entry.getKey(), repository, history)) { + renamedFiles.add(map_entry.getKey()); continue; } } catch (IOException ex) { LOGGER.log(Level.WARNING, "isRenamedFile() got exception", ex); } - doFileHistory(map_entry.getKey(), map_entry.getValue(), - repository, null, root, false); + doFileHistory(map_entry.getKey(), new History(map_entry.getValue()), + repository, root, false); fileHistoryCount++; } - LOGGER.log(Level.FINE, "Stored history for {0} files", fileHistoryCount); + LOGGER.log(Level.FINE, "Stored history for {0} files in repository ''{1}''", + new Object[]{fileHistoryCount, repository.getDirectoryName()}); if (!handleRenamedFiles) { finishStore(repository, latestRev); return; } - /* - * Now handle renamed files (in parallel). - */ - HashMap> renamedMap = new HashMap<>(); - for (final Map.Entry> mapEntry : map.entrySet()) { - try { - if (isRenamedFile(mapEntry.getKey(), repository, history)) { - renamedMap.put(mapEntry.getKey(), mapEntry.getValue()); - } - } catch (IOException ex) { - LOGGER.log(Level.WARNING, - "isRenamedFile() got exception ", ex); - } + storeRenamed(renamedFiles, repository, tillRevision); + + finishStore(repository, latestRev); + } + + /** + * handle renamed files (in parallel). + * @param renamedFiles set of renamed file paths + * @param repository repository + */ + public void storeRenamed(Set renamedFiles, Repository repository, String tillRevision) throws HistoryException { + final File root = env.getSourceRootFile(); + if (renamedFiles.isEmpty()) { + return; } + + renamedFiles = renamedFiles.stream().filter(f -> new File(env.getSourceRootPath() + f).exists()). + collect(Collectors.toSet()); + // The directories for the renamed files have to be created before // the actual files otherwise storeFile() might be racing for // mkdirs() if there are multiple renamed files from single directory // handled in parallel. - for (final String file : renamedMap.keySet()) { + for (final String file : renamedFiles) { File cache; try { cache = getCachedFile(new File(env.getSourceRootPath() + file)); @@ -550,27 +557,26 @@ public void store(History history, Repository repository) if (!dir.isDirectory() && !dir.mkdirs()) { LOGGER.log(Level.WARNING, - "Unable to create cache directory ' {0} '.", dir); + "Unable to create cache directory ' {0} '.", dir); } } final Repository repositoryF = repository; - final CountDownLatch latch = new CountDownLatch(renamedMap.size()); + final CountDownLatch latch = new CountDownLatch(renamedFiles.size()); AtomicInteger renamedFileHistoryCount = new AtomicInteger(); - for (final Map.Entry> mapEntry : renamedMap.entrySet()) { + for (final String file : renamedFiles) { env.getIndexerParallelizer().getHistoryRenamedExecutor().submit(() -> { - try { - doFileHistory(mapEntry.getKey(), mapEntry.getValue(), - repositoryF, - new File(env.getSourceRootPath() + mapEntry.getKey()), - root, true); - renamedFileHistoryCount.getAndIncrement(); - } catch (Exception ex) { - // We want to catch any exception since we are in thread. - LOGGER.log(Level.WARNING, + try { + doRenamedFileHistory(file, + new File(env.getSourceRootPath() + file), + repositoryF, root, tillRevision); + renamedFileHistoryCount.getAndIncrement(); + } catch (Exception ex) { + // We want to catch any exception since we are in thread. + LOGGER.log(Level.WARNING, "doFileHistory() got exception ", ex); - } finally { - latch.countDown(); - } + } finally { + latch.countDown(); + } }); } @@ -581,9 +587,8 @@ public void store(History history, Repository repository) } catch (InterruptedException ex) { LOGGER.log(Level.SEVERE, "latch exception", ex); } - LOGGER.log(Level.FINE, "Stored history for {0} renamed files", - renamedFileHistoryCount.intValue()); - finishStore(repository, latestRev); + LOGGER.log(Level.FINE, "Stored history for {0} renamed files in repository ''{1}''", + new Object[]{renamedFileHistoryCount.intValue(), repository.getDirectoryName()}); } @Override @@ -664,34 +669,6 @@ private boolean isUpToDate(File file, File cachedFile) { file.lastModified() <= cachedFile.lastModified(); } - /** - * Check if the directory is in the cache. - * @param directory the directory to check - * @return {@code true} if the directory is in the cache - */ - @Override - public boolean hasCacheForDirectory(File directory, Repository repository) - throws HistoryException { - assert directory.isDirectory(); - Repository repos = HistoryGuru.getInstance().getRepository(directory); - if (repos == null) { - return true; - } - File dir = env.getDataRootFile(); - dir = new File(dir, FileHistoryCache.HISTORY_CACHE_DIR_NAME); - try { - dir = new File(dir, env.getPathRelativeToSourceRoot( - new File(repos.getDirectoryName()))); - } catch (ForbiddenSymlinkException e) { - LOGGER.log(Level.FINER, e.getMessage()); - return false; - } catch (IOException e) { - throw new HistoryException("Could not resolve " + - repos.getDirectoryName() + " relative to source root", e); - } - return dir.exists(); - } - @Override public boolean hasCacheForFile(File file) throws HistoryException { try { diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 71559d25933..bd9b8253833 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -34,6 +34,7 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.Date; +import java.util.HashSet; import java.util.List; import java.util.ArrayList; import java.util.Arrays; @@ -48,6 +49,7 @@ import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -68,6 +70,7 @@ import org.eclipse.jgit.lib.ObjectReader; import org.eclipse.jgit.lib.PersonIdent; import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.revwalk.FollowFilter; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevTree; @@ -86,7 +89,6 @@ import org.opengrok.indexer.util.Executor; import org.opengrok.indexer.util.ForbiddenSymlinkException; import org.opengrok.indexer.util.LazilyInstantiate; -import org.opengrok.indexer.util.Version; import static org.opengrok.indexer.history.HistoryEntry.TAGS_SEPARATOR; @@ -94,7 +96,7 @@ * Access to a Git repository. * */ -public class GitRepository extends Repository { +public class GitRepository extends RepositoryWithPerPartesHistory { private static final Logger LOGGER = LoggerFactory.getLogger(GitRepository.class); @@ -114,19 +116,6 @@ public class GitRepository extends Repository { private static final int CSET_LEN = 8; private static final String ABBREV_LOG = "--abbrev=" + CSET_LEN; - /** - * All git commands that emit date that needs to be parsed by - * {@code getDateFormat()} should use this option. - */ - private static final String GIT_DATE_OPT = "--date=iso8601-strict"; - - /** - * Minimum git version which supports the date format. - * - * @see #GIT_DATE_OPT - */ - private static final Version MINIMUM_VERSION = new Version(2, 1, 2); - /** * This is a static replacement for 'working' field. Effectively, check if git is working once in a JVM * instead of calling it for every GitRepository instance. @@ -134,16 +123,10 @@ public class GitRepository extends Repository { private static final LazilyInstantiate GIT_IS_WORKING = LazilyInstantiate.using(GitRepository::isGitWorking); public static final int GIT_ABBREV_LEN = 8; + public static final int MAX_CHANGESETS = 512; public GitRepository() { type = "git"; - /* - * This should match the 'iso-strict' format used by - * {@code getHistoryLogExecutor}. - */ - datePatterns = new String[] { - "yyyy-MM-dd'T'HH:mm:ssXXX" - }; ignoredDirs.add(".git"); ignoredFiles.add(".git"); @@ -152,16 +135,7 @@ public GitRepository() { private static boolean isGitWorking() { String repoCommand = getCommand(GitRepository.class, CMD_PROPERTY_KEY, CMD_FALLBACK); Executor exec = new Executor(new String[] {repoCommand, "--version"}); - if (exec.exec(false) == 0) { - final String outputVersion = exec.getOutputString(); - final String version = outputVersion.replaceAll(".*? version (\\d+(\\.\\d+)*).*", "$1"); - try { - return Version.from(version).compareTo(MINIMUM_VERSION) >= 0; - } catch (NumberFormatException ex) { - LOGGER.log(Level.WARNING, String.format("Unable to detect git version from %s", outputVersion), ex); - } - } - return false; + return (exec.exec(false) == 0); } /** @@ -524,9 +498,14 @@ History getHistory(File file) throws HistoryException { @Override History getHistory(File file, String sinceRevision) throws HistoryException { - final List entries = new ArrayList<>(); - final List renamedFiles = new ArrayList<>(); + return getHistory(file, sinceRevision, null); + } + + public int getPerPartesCount() { + return MAX_CHANGESETS; + } + public void accept(String sinceRevision, Consumer visitor) throws HistoryException { try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName()); RevWalk walk = new RevWalk(repository)) { @@ -535,6 +514,34 @@ History getHistory(File file, String sinceRevision) throws HistoryException { } walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD))); + for (RevCommit commit : walk) { + // Do not abbreviate the Id as this could cause AmbiguousObjectException in getHistory(). + visitor.accept(commit.getId().name()); + } + } catch (IOException e) { + throw new HistoryException(e); + } + } + + public History getHistory(File file, String sinceRevision, String tillRevision) throws HistoryException { + final List entries = new ArrayList<>(); + final Set renamedFiles = new HashSet<>(); + + boolean isDirectory = file.isDirectory(); + + try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName()); + RevWalk walk = new RevWalk(repository)) { + + if (sinceRevision != null) { + walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision))); + } + + if (tillRevision != null) { + walk.markStart(walk.lookupCommit(repository.resolve(tillRevision))); + } else { + walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD))); + } + String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file); if (!getDirectoryNameRelative().equals(relativePath)) { if (isHandleRenamedFiles()) { @@ -549,8 +556,7 @@ History getHistory(File file, String sinceRevision) throws HistoryException { } for (RevCommit commit : walk) { - int numParents = commit.getParentCount(); - if (numParents > 1 && !isMergeCommitsEnabled()) { + if (commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { continue; } @@ -560,24 +566,12 @@ History getHistory(File file, String sinceRevision) throws HistoryException { " <" + commit.getAuthorIdent().getEmailAddress() + ">", null, commit.getFullMessage(), true); - SortedSet files = new TreeSet<>(); - if (numParents == 1) { - getFiles(repository, commit.getParent(0), commit, files, renamedFiles); - } else if (numParents == 0) { // first commit - try (TreeWalk treeWalk = new TreeWalk(repository)) { - treeWalk.addTree(commit.getTree()); - treeWalk.setRecursive(true); - - while (treeWalk.next()) { - files.add(getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(treeWalk.getPathString())); - } - } - } else { - getFiles(repository, commit.getParent(0), commit, files, renamedFiles); + if (isDirectory) { + SortedSet files = new TreeSet<>(); + getFilesForCommit(renamedFiles, files, commit, repository); + historyEntry.setFiles(files); } - historyEntry.setFiles(files); entries.add(historyEntry); } } catch (IOException | ForbiddenSymlinkException e) { @@ -596,6 +590,36 @@ History getHistory(File file, String sinceRevision) throws HistoryException { return result; } + /** + * Accumulate list of changed files and renamed files (if enabled) for given commit. + * @param renamedFiles result containing the renamed files in this commit + * @param files result containing changed files in this commit + * @param commit RevCommit object + * @param repository repository object + * @throws IOException on error traversing the commit tree + */ + private void getFilesForCommit(Set renamedFiles, SortedSet files, RevCommit commit, + Repository repository) throws IOException { + + int numParents = commit.getParentCount(); + + if (numParents == 1) { + getFiles(repository, commit.getParent(0), commit, files, renamedFiles); + } else if (numParents == 0) { // first commit + try (TreeWalk treeWalk = new TreeWalk(repository)) { + treeWalk.addTree(commit.getTree()); + treeWalk.setRecursive(true); + + while (treeWalk.next()) { + files.add(getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(treeWalk.getPathString())); + } + } + } else { + getFiles(repository, commit.getParent(0), commit, files, renamedFiles); + } + } + private static String getNativePath(String path) { if (!File.separator.equals("/")) { return path.replace("/", File.separator); @@ -604,24 +628,38 @@ private static String getNativePath(String path) { return path; } + /** + * Assemble list of files that changed between 2 commits. + * @param repository repository object + * @param oldCommit parent commit + * @param newCommit new commit (the mehotd assumes oldCommit is its parent) + * @param files set of files that changed (excludes renamed files) + * @param renamedFiles set of renamed files (if renamed handling is enabled) + * @throws IOException on I/O problem + */ private void getFiles(org.eclipse.jgit.lib.Repository repository, RevCommit oldCommit, RevCommit newCommit, - Set files, List renamedFiles) + Set files, Set renamedFiles) throws IOException { OutputStream outputStream = NullOutputStream.INSTANCE; try (DiffFormatter formatter = new DiffFormatter(outputStream)) { formatter.setRepository(repository); - formatter.setDetectRenames(true); + if (isHandleRenamedFiles()) { + formatter.setDetectRenames(true); + } List diffs = formatter.scan(prepareTreeParser(repository, oldCommit), prepareTreeParser(repository, newCommit)); for (DiffEntry diff : diffs) { if (diff.getChangeType() != DiffEntry.ChangeType.DELETE) { - files.add(getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getNewPath())); + if (files != null) { + files.add(getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getNewPath())); + } } + if (diff.getChangeType() == DiffEntry.ChangeType.RENAME && isHandleRenamedFiles()) { renamedFiles.add(getNativePath(diff.getNewPath())); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java index b063fbc1bd2..83aedf8ec7a 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java @@ -55,7 +55,12 @@ public History() { this.entries = entries; this.renamedFiles = new HashSet<>(renamed); } - + + History(List entries, Set renamed) { + this.entries = entries; + this.renamedFiles = renamed; + } + /** * Set the list of log entries for the file. The first entry is the most * recent one. @@ -115,6 +120,8 @@ public boolean hasTags() { /** * Gets a value indicating if {@code file} is in the list of renamed files. + * @param file file path + * @return is file renamed */ public boolean isRenamed(String file) { return renamedFiles.contains(file); @@ -124,6 +131,16 @@ public Set getRenamedFiles() { return renamedFiles; } + /** + * Strip files and tags from history entries. + * @see HistoryEntry#strip() + */ + public void strip() { + for (HistoryEntry ent : this.getHistoryEntries()) { + ent.strip(); + } + } + @Override public boolean equals(Object o) { if (this == o) { diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java index d466a64c49f..0849beaadca 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java @@ -71,8 +71,9 @@ History get(File file, Repository repository, boolean withFiles) * @param repository The repository whose history to store * @throws HistoryException if the history cannot be stored */ - void store(History history, Repository repository) - throws HistoryException; + void store(History history, Repository repository) throws HistoryException; + + void store(History history, Repository repository, String tillRevision) throws HistoryException; /** * Optimize how the history is stored on disk. This method is typically @@ -86,16 +87,6 @@ void store(History history, Repository repository) */ void optimize() throws HistoryException; - /** - * Check if the specified directory is present in the cache. - * @param directory the directory to check - * @param repository the repository in which the directory is stored - * @return {@code true} if the directory is in the cache, {@code false} - * otherwise - */ - boolean hasCacheForDirectory(File directory, Repository repository) - throws HistoryException; - /** * Check if the specified file is present in the cache. * @param file the file to check diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index e8dfd9b63fe..2fcbd5cd2f7 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -102,7 +102,8 @@ public abstract class Repository extends RepositoryInfo { abstract boolean hasHistoryForDirectories(); /** - * Get the history log for the specified file or directory. + * Get the history for the specified file or directory. + * It is expected that {@link History#getRenamedFiles()} and {@link HistoryEntry#getFiles()} are empty for files. * * @param file the file to get the history for * @return history log for file @@ -141,8 +142,7 @@ public String getRepoCommand() { * @return partial history for file * @throws HistoryException on error accessing the history */ - History getHistory(File file, String sinceRevision) - throws HistoryException { + History getHistory(File file, String sinceRevision) throws HistoryException { // If we want an incremental history update and get here, warn that // it may be slow. @@ -348,6 +348,10 @@ protected String getRevisionForAnnotate(String history_revision) { return history_revision; } + protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { + finishCreateCache(cache, getHistory(directory, sinceRevision), null); + } + /** * Create a history log cache for all files in this repository. * {@code getHistory()} is used to fetch the history for the entire @@ -361,17 +365,16 @@ protected String getRevisionForAnnotate(String history_revision) { * * @throws HistoryException on error */ - final void createCache(HistoryCache cache, String sinceRevision) - throws HistoryException { + final void createCache(HistoryCache cache, String sinceRevision) throws HistoryException { + if (!isWorking()) { return; } - // If we don't have a directory parser, we can't create the cache + // If it is not possible to get history for a directory, we can't create the cache // this way. Just give up and return. if (!hasHistoryForDirectories()) { - LOGGER.log( - Level.INFO, + LOGGER.log(Level.INFO, "Skipping creation of history cache for {0}, since retrieval " + "of history for directories is not implemented for this " + "repository type.", getDirectoryName()); @@ -380,32 +383,10 @@ final void createCache(HistoryCache cache, String sinceRevision) File directory = new File(getDirectoryName()); - History history; - try { - history = getHistory(directory, sinceRevision); - } catch (HistoryException he) { - if (sinceRevision == null) { - // Failed to get full history, so fail. - throw he; - } - // Failed to get partial history. This may have been caused - // by changes in the revision numbers since the last update - // (bug #14724) so we'll try to regenerate the cache from - // scratch instead. - LOGGER.log(Level.WARNING, - "Failed to get partial history. Attempting to " - + "recreate the history cache from scratch.", he); - history = null; - } - - if (sinceRevision != null && history == null) { - // Failed to get partial history, now get full history instead. - history = getHistory(directory); - // Got full history successfully. Clear the history cache so that - // we can recreate it from scratch. - cache.clear(this); - } + doCreateCache(cache, sinceRevision, directory); + } + void finishCreateCache(HistoryCache cache, History history, String tillRevision) throws HistoryException { // We need to refresh list of tags for incremental reindex. RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.isTagsEnabled() && this.hasFileBasedTags()) { @@ -413,7 +394,7 @@ final void createCache(HistoryCache cache, String sinceRevision) } if (history != null) { - cache.store(history, this); + cache.store(history, this, tillRevision); } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java new file mode 100644 index 00000000000..5c0c07e94fd --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import org.opengrok.indexer.logger.LoggerFactory; +import org.opengrok.indexer.util.Statistics; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Repositories extending this class will benefit from per partes history + * indexing which saves memory. + */ +public abstract class RepositoryWithPerPartesHistory extends Repository { + private static final long serialVersionUID = -3433255821312805064L; + public static final int MAX_CHANGESETS = 128; + + private static final Logger LOGGER = LoggerFactory.getLogger(RepositoryWithPerPartesHistory.class); + + /** + * Just like for {@link Repository#getHistory(File)} it is expected that the lists of (renamed) files + * individual files (i.e. not directory) are empty. + * @param file file to retrieve history for + * @param sinceRevision start revision (non inclusive) + * @param tillRevision end revision (inclusive) + * @return history object + * @throws HistoryException if history retrieval fails + */ + abstract History getHistory(File file, String sinceRevision, String tillRevision) throws HistoryException; + + /** + * @return maximum number of entries to retrieve + */ + public int getPerPartesCount() { + return MAX_CHANGESETS; + } + + /** + * Traverse the changesets using the visitor pattern. + * @param sinceRevision start revision + * @param visitor consumer of revisions + * @throws HistoryException on error during history retrieval + */ + public abstract void accept(String sinceRevision, Consumer visitor) throws HistoryException; + + @Override + protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { + // For repositories that supports this, avoid storing complete History in memory + // (which can be sizeable, at least for the initial indexing, esp. if merge changeset support is enabled), + // by splitting the work into multiple chunks. + BoundaryChangesets boundaryChangesets = new BoundaryChangesets(this); + List boundaryChangesetList = new ArrayList<>(boundaryChangesets.getBoundaryChangesetIDs(sinceRevision)); + boundaryChangesetList.add(null); // to finish the last step in the cycle below + LOGGER.log(Level.FINE, "boundary changesets: {0}", boundaryChangesetList); + int cnt = 0; + for (String tillRevision: boundaryChangesetList) { + Statistics stat = new Statistics(); + LOGGER.log(Level.FINEST, "getting history for ({0}, {1})", new Object[]{sinceRevision, tillRevision}); + finishCreateCache(cache, getHistory(directory, sinceRevision, tillRevision), tillRevision); + sinceRevision = tillRevision; + + stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''", + ++cnt, boundaryChangesetList.size(), this.getDirectoryName())); + } + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 9e950ff7643..464c6d06b62 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -1665,9 +1665,10 @@ public static Definitions getDefinitions(File file) throws ParseException, IOExc /** * @param file File object of a file under source root * @return Document object for the file or {@code null} + * @throws IOException on I/O error + * @throws ParseException on problem with building Query */ - public static Document getDocument(File file) - throws IOException, ParseException { + public static Document getDocument(File file) throws IOException, ParseException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); String path; try { diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/BoundaryChangesetsTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/BoundaryChangesetsTest.java new file mode 100644 index 00000000000..46dd61989b7 --- /dev/null +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/BoundaryChangesetsTest.java @@ -0,0 +1,127 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import org.apache.commons.lang3.tuple.ImmutableTriple; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mockito; +import org.opengrok.indexer.util.TestRepository; + +import java.io.File; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class BoundaryChangesetsTest { + + private TestRepository repositories; + + private GitRepository gitRepository; + + @BeforeEach + public void setUp() throws Exception { + repositories = new TestRepository(); + repositories.create(getClass().getResourceAsStream("repositories.zip")); + + File reposRoot = new File(repositories.getSourceRoot(), "git"); + Repository repo = RepositoryFactory.getRepository(reposRoot); + assertNotNull(repo); + + assertTrue(repo instanceof RepositoryWithPerPartesHistory); + gitRepository = (GitRepository) repo; + } + + @AfterEach + public void tearDown() { + repositories.destroy(); + repositories = null; + } + + @ParameterizedTest + @ValueSource(ints = {0, 1}) + void testInvalidMaxCount(int maxCount) { + GitRepository gitSpyRepository = Mockito.spy(gitRepository); + Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(maxCount); + assertThrows(RuntimeException.class, () -> new BoundaryChangesets(gitSpyRepository)); + } + + /** + * Used to supply test data for testing {@link BoundaryChangesets#getBoundaryChangesetIDs(String)}. + * @return triplets of (maximum count, start revision, list of expected revisions) + * + * The test expects this sequence of changesets in the repository: + * + *
+     * {@code 84599b3cccb3eeb5aa9aec64771678d6526bcecb (HEAD -> master) renaming directories
+     * 67dfbe2648c94a8825671b0f2c132828d0d43079 renaming renamed -> renamed2
+     * 1086eaf5bca6d5a056097aa76017a8ab0eade20f adding some lines into renamed.c
+     * b6413947a59f481ddc0a05e0d181731233557f6e moved renamed.c to new location
+     * ce4c98ec1d22473d4aa799c046c2a90ae05832f1 adding simple file for renamed file testing
+     * aa35c25882b9a60a97758e0ceb276a3f8cb4ae3a Add lint make target and fix lint warnings
+     * 8482156421620efbb44a7b6f0eb19d1f191163c7 Add the result of a make on Solaris x86
+     * bb74b7e849170c31dc1b1b5801c83bf0094a3b10 Added a small test program}
+     * 
+ */ + private static Stream>> provideMapsForTestPerPartesHistory() { + // Cannot use List.of() because of the null element. + List expectedChangesets2 = Arrays.asList("8482156421620efbb44a7b6f0eb19d1f191163c7", + "ce4c98ec1d22473d4aa799c046c2a90ae05832f1", + "1086eaf5bca6d5a056097aa76017a8ab0eade20f"); + + List expectedChangesets4 = Arrays.asList("ce4c98ec1d22473d4aa799c046c2a90ae05832f1"); + + List expectedChangesets2Middle = Arrays.asList("ce4c98ec1d22473d4aa799c046c2a90ae05832f1", + "1086eaf5bca6d5a056097aa76017a8ab0eade20f"); + + return Stream.of(ImmutableTriple.of(2, null, expectedChangesets2), + ImmutableTriple.of(4, null, expectedChangesets4), + ImmutableTriple.of(2, "aa35c25882b9a60a97758e0ceb276a3f8cb4ae3a", + expectedChangesets2Middle)); + } + + /** + * Test of {@link BoundaryChangesets#getBoundaryChangesetIDs(String)}. + * @throws Exception on error + */ + @ParameterizedTest + @MethodSource("provideMapsForTestPerPartesHistory") + void testBasic(ImmutableTriple> integerListImmutableTriple) throws Exception { + GitRepository gitSpyRepository = Mockito.spy(gitRepository); + Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(integerListImmutableTriple.getLeft()); + + BoundaryChangesets boundaryChangesets = new BoundaryChangesets(gitSpyRepository); + List boundaryChangesetList = boundaryChangesets. + getBoundaryChangesetIDs(integerListImmutableTriple.getMiddle()); + assertEquals(integerListImmutableTriple.getRight().size(), boundaryChangesetList.size()); + assertEquals(integerListImmutableTriple.getRight(), boundaryChangesetList); + } +} diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java index d065b5ba7a5..7d4842ee058 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java @@ -36,7 +36,9 @@ import static org.opengrok.indexer.history.MercurialRepositoryTest.runHgCommand; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.Date; import java.util.Iterator; @@ -44,6 +46,7 @@ import java.util.List; import org.apache.commons.lang3.time.DateUtils; +import org.eclipse.jgit.api.Git; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -51,6 +54,7 @@ import org.junit.jupiter.api.condition.OS; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.Mockito; import org.opengrok.indexer.condition.EnabledForRepository; import org.opengrok.indexer.configuration.Filter; import org.opengrok.indexer.configuration.IgnoredNames; @@ -112,12 +116,12 @@ public void tearDown() { } /** - * Assert that two HistoryEntry objects are equal. + * Assert that two lists of HistoryEntry objects are equal. * - * @param expected the expected entry - * @param actual the actual entry + * @param expected the expected list of entries + * @param actual the actual list of entries * @param isdir was the history generated for a directory - * @throws AssertionError if the two entries don't match + * @throws AssertionError if the two lists don't match */ private void assertSameEntries(List expected, List actual, boolean isdir) { assertEquals(expected.size(), actual.size(), "Unexpected size"); @@ -129,12 +133,12 @@ private void assertSameEntries(List expected, List a } /** - * Assert that two lists of HistoryEntry objects are equal. + * Assert that two HistoryEntry objects are equal. * - * @param expected the expected list of entries - * @param actual the actual list of entries + * @param expected the expected instance + * @param actual the actual instance * @param isdir was the history generated for directory - * @throws AssertionError if the two lists don't match + * @throws AssertionError if the two instances don't match */ private void assertSameEntry(HistoryEntry expected, HistoryEntry actual, boolean isdir) { assertEquals(expected.getAuthor(), actual.getAuthor()); @@ -624,7 +628,6 @@ public void testRenamedFilePlusChangesBranched() throws Exception { updatedHistory.getHistoryEntries(), false); } - /** * Make sure produces correct history where several files are renamed in a single commit. */ @@ -682,6 +685,80 @@ public void testMultipleRenamedFiles() throws Exception { assertSameEntries(histConstruct.getHistoryEntries(), updatedHistory.getHistoryEntries(), false); } + private void changeFileAndCommit(Git git, File file, String comment) throws Exception { + String authorName = "Foo Bar"; + String authorEmail = "foo@bar.com"; + + try (FileOutputStream fos = new FileOutputStream(file, true)) { + fos.write(comment.getBytes(StandardCharsets.UTF_8)); + } + + git.commit().setMessage(comment).setAuthor(authorName, authorEmail).setAll(true).call(); + } + + /** + * Renamed files need special treatment when given repository supports per partes history retrieval. + * Specifically, when a file is detected as renamed, its history needs to be retrieved with upper bound, + * otherwise there would be duplicate history entries if there were subsequent changes to the file + * in the following history chunks. This test prevents that. + * @param maxCount maximum number of changesets to store in one go + * @throws Exception on error + */ + @ParameterizedTest + @ValueSource(ints = {2, 3, 4}) + void testRenamedFileHistoryWithPerPartes(int maxCount) throws Exception { + File repositoryRoot = new File(repositories.getSourceRoot(), "gitRenamedPerPartes"); + assertTrue(repositoryRoot.mkdir()); + File fooFile = new File(repositoryRoot, "foo.txt"); + if (!fooFile.createNewFile()) { + throw new IOException("Could not create file " + fooFile); + } + File barFile = new File(repositoryRoot, "bar.txt"); + + // Looks like the file needs to have content of some length for the add/rm below + // to be detected as file rename by the [J]Git heuristics. + try (FileOutputStream fos = new FileOutputStream(fooFile)) { + fos.write("foo bar foo bar foo bar".getBytes(StandardCharsets.UTF_8)); + } + + // Create a series of commits to one (renamed) file: + // - add the file + // - bunch of content commits to the file + // - rename the file + // - bunch of content commits to the renamed file + try (Git git = Git.init().setDirectory(repositoryRoot).call()) { + git.add().addFilepattern("foo.txt").call(); + changeFileAndCommit(git, fooFile, "initial"); + + changeFileAndCommit(git, fooFile, "1"); + changeFileAndCommit(git, fooFile, "2"); + changeFileAndCommit(git, fooFile, "3"); + + // git mv + assertTrue(fooFile.renameTo(barFile)); + git.add().addFilepattern("bar.txt").call(); + git.rm().addFilepattern("foo.txt").call(); + git.commit().setMessage("rename").setAuthor("foo", "foo@bar").setAll(true).call(); + + changeFileAndCommit(git, barFile, "4"); + changeFileAndCommit(git, barFile, "5"); + changeFileAndCommit(git, barFile, "6"); + } + + env.setHandleHistoryOfRenamedFiles(true); + Repository repository = RepositoryFactory.getRepository(repositoryRoot); + History history = repository.getHistory(repositoryRoot); + assertEquals(1, history.getRenamedFiles().size()); + GitRepository gitRepository = (GitRepository) repository; + GitRepository gitSpyRepository = Mockito.spy(gitRepository); + Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(maxCount); + gitSpyRepository.createCache(cache, null); + + assertTrue(barFile.isFile()); + History updatedHistory = cache.get(barFile, gitSpyRepository, false); + assertEquals(8, updatedHistory.getHistoryEntries().size()); + } + /** * Make sure produces correct history for a renamed and moved file in Subversion. */ @@ -749,7 +826,6 @@ public void testRenamedFile() throws Exception { updatedHistory.getHistoryEntries(), false); } - private void checkNoHistoryFetchRepo(String reponame, String filename, boolean hasHistory, boolean historyFileExists) throws Exception { diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/GitRepositoryTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/GitRepositoryTest.java index 033e4551325..08a11fca7f7 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/GitRepositoryTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/GitRepositoryTest.java @@ -28,7 +28,6 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Paths; -import java.text.ParseException; import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -36,6 +35,7 @@ import java.util.List; import java.util.Set; import java.util.TreeSet; +import java.util.stream.Collectors; import org.eclipse.jgit.api.Git; import org.eclipse.jgit.lib.ObjectId; @@ -56,7 +56,6 @@ import org.opengrok.indexer.util.TestRepository; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; @@ -294,27 +293,6 @@ public void fileHasHistory() { assertTrue(result); } - @Test - public void testDateFormats() { - String[][] tests = new String[][] { - {"abcd", "expected exception"}, - {"2016-01-01 10:00:00", "expected exception"}, - {"2016 Sat, 5 Apr 2008 15:12:51 +0000", "expected exception"}, - {"2017-07-25T13:17:44+02:00", null}, - }; - - final GitRepository repository = new GitRepository(); - - for (String[] test : tests) { - try { - repository.parse(test[0]); - assertNull(test[1], "Shouldn't be able to parse the date: " + test[0]); - } catch (ParseException ex) { - assertNotNull(test[1], "Shouldn't throw a parsing exception for date: " + test[0]); - } - } - } - /** * For the following renamed tests the structure in the git repo is as following: *
@@ -639,13 +617,7 @@ public void testRenamedSingleHistory() throws Exception {
         assertEquals(5, history.getHistoryEntries().size());
 
         assertNotNull(history.getRenamedFiles());
-        assertEquals(3, history.getRenamedFiles().size());
-
-        assertTrue(history.isRenamed(Paths.get("moved", "renamed2.c").toString()));
-        assertTrue(history.isRenamed(Paths.get("moved2", "renamed2.c").toString()));
-        assertTrue(history.isRenamed(Paths.get("moved", "renamed.c").toString()));
-        assertFalse(history.isRenamed("non-existent.c"));
-        assertFalse(history.isRenamed("renamed.c"));
+        assertEquals(0, history.getRenamedFiles().size());
 
         assertEquals("84599b3c", history.getHistoryEntries().get(0).getRevision());
         assertEquals("67dfbe26", history.getHistoryEntries().get(1).getRevision());
@@ -654,6 +626,63 @@ public void testRenamedSingleHistory() throws Exception {
         assertEquals("ce4c98ec", history.getHistoryEntries().get(4).getRevision());
     }
 
+    @Test
+    void testGetHistorySinceTillNullNull() throws Exception {
+        File root = new File(repository.getSourceRoot(), "git");
+        GitRepository gitrepo = (GitRepository) RepositoryFactory.getRepository(root);
+
+        History history = gitrepo.getHistory(root, null, null);
+        assertNotNull(history);
+        assertNotNull(history.getHistoryEntries());
+        assertEquals(8, history.getHistoryEntries().size());
+        List revisions = history.getHistoryEntries().stream().map(HistoryEntry::getRevision).
+                collect(Collectors.toList());
+        assertEquals(List.of("84599b3c", "67dfbe26", "1086eaf5", "b6413947", "ce4c98ec", "aa35c258", "84821564",
+                "bb74b7e8"), revisions);
+    }
+
+    @Test
+    void testGetHistorySinceTillNullRev() throws Exception {
+        File root = new File(repository.getSourceRoot(), "git");
+        GitRepository gitrepo = (GitRepository) RepositoryFactory.getRepository(root);
+
+        History history = gitrepo.getHistory(root, null, "ce4c98ec");
+        assertNotNull(history);
+        assertNotNull(history.getHistoryEntries());
+        assertEquals(4, history.getHistoryEntries().size());
+        List revisions = history.getHistoryEntries().stream().map(HistoryEntry::getRevision).
+                collect(Collectors.toList());
+        assertEquals(List.of("ce4c98ec", "aa35c258", "84821564", "bb74b7e8"), revisions);
+    }
+
+    @Test
+    void testGetHistorySinceTillRevNull() throws Exception {
+        File root = new File(repository.getSourceRoot(), "git");
+        GitRepository gitrepo = (GitRepository) RepositoryFactory.getRepository(root);
+
+        History history = gitrepo.getHistory(root, "aa35c258", null);
+        assertNotNull(history);
+        assertNotNull(history.getHistoryEntries());
+        assertEquals(5, history.getHistoryEntries().size());
+        List revisions = history.getHistoryEntries().stream().map(HistoryEntry::getRevision).
+                collect(Collectors.toList());
+        assertEquals(List.of("84599b3c", "67dfbe26", "1086eaf5", "b6413947", "ce4c98ec"), revisions);
+    }
+
+    @Test
+    void testGetHistorySinceTillRevRev() throws Exception {
+        File root = new File(repository.getSourceRoot(), "git");
+        GitRepository gitrepo = (GitRepository) RepositoryFactory.getRepository(root);
+
+        History history = gitrepo.getHistory(root, "ce4c98ec", "1086eaf5");
+        assertNotNull(history);
+        assertNotNull(history.getHistoryEntries());
+        assertEquals(2, history.getHistoryEntries().size());
+        List revisions = history.getHistoryEntries().stream().map(HistoryEntry::getRevision).
+                collect(Collectors.toList());
+        assertEquals(List.of("1086eaf5", "b6413947"), revisions);
+    }
+
     @Test
     public void testBuildTagListEmpty() throws Exception {
         File root = new File(repository.getSourceRoot(), "git");
diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java
new file mode 100644
index 00000000000..54d22b50700
--- /dev/null
+++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ */
+package org.opengrok.indexer.history;
+
+import org.apache.commons.lang3.tuple.ImmutableTriple;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+import org.opengrok.indexer.configuration.RuntimeEnvironment;
+import org.opengrok.indexer.util.TestRepository;
+
+import java.io.File;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.isNull;
+import static org.mockito.Mockito.times;
+
+public class RepositoryWithPerPartesHistoryTest {
+    private TestRepository repositories;
+
+    private GitRepository gitRepository;
+
+    @BeforeEach
+    public void setUp() throws Exception {
+        repositories = new TestRepository();
+        repositories.create(getClass().getResourceAsStream("repositories.zip"));
+
+        File reposRoot = new File(repositories.getSourceRoot(), "git");
+        Repository repo = RepositoryFactory.getRepository(reposRoot);
+        assertNotNull(repo);
+
+        assertTrue(repo instanceof RepositoryWithPerPartesHistory);
+        gitRepository = (GitRepository) repo;
+    }
+
+    @AfterEach
+    public void tearDown() {
+        repositories.destroy();
+        repositories = null;
+    }
+
+    /**
+     * The way how history is split into chunks is tested in {@link BoundaryChangesetsTest#testBasic(ImmutableTriple)},
+     * this tests how that translates into calls to
+     * {@link RepositoryWithPerPartesHistory#getHistory(File, String, String)}, esp. w.r.t. the first and last boundary.
+     * @throws HistoryException on error
+     */
+    @Test
+    void testChangesets() throws HistoryException {
+        // To avoid calling getHistory() for individual files via createCache() below.
+        RuntimeEnvironment.getInstance().setHandleHistoryOfRenamedFiles(false);
+
+        ArgumentCaptor stringArgumentCaptor1 = ArgumentCaptor.forClass(String.class);
+        ArgumentCaptor stringArgumentCaptor2 = ArgumentCaptor.forClass(String.class);
+
+        FileHistoryCache cache = new FileHistoryCache();
+        GitRepository gitSpyRepository = Mockito.spy(gitRepository);
+        Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(3);
+        gitSpyRepository.createCache(cache, null);
+        Mockito.verify(gitSpyRepository, times(3)).
+                getHistory(any(), stringArgumentCaptor1.capture(), stringArgumentCaptor2.capture());
+
+        List sinceRevisions = new ArrayList<>();
+        sinceRevisions.add(null);
+        sinceRevisions.add("8482156421620efbb44a7b6f0eb19d1f191163c7");
+        sinceRevisions.add("b6413947a59f481ddc0a05e0d181731233557f6e");
+        assertEquals(sinceRevisions, stringArgumentCaptor1.getAllValues());
+
+        List tillRevisions = new ArrayList<>();
+        tillRevisions.add("8482156421620efbb44a7b6f0eb19d1f191163c7");
+        tillRevisions.add("b6413947a59f481ddc0a05e0d181731233557f6e");
+        tillRevisions.add(null);
+        assertEquals(tillRevisions, stringArgumentCaptor2.getAllValues());
+    }
+
+    /**
+     * Test a (perceived) corner case to simulate there is exactly one "incoming" changeset
+     * for a repository with per partes history handling. This changeset has to be stored in history cache.
+     * @throws Exception on error
+     */
+    @Test
+    void testPseudoIncomingChangeset() throws Exception {
+        FileHistoryCache cache = new FileHistoryCache();
+        GitRepository gitSpyRepository = Mockito.spy(gitRepository);
+        Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(3);
+        List historyEntries = gitRepository.getHistory(new File(gitRepository.getDirectoryName())).
+                getHistoryEntries();
+        assertFalse(historyEntries.isEmpty());
+
+        gitSpyRepository.createCache(cache, historyEntries.get(1).getRevision());
+        Mockito.verify(gitSpyRepository, times(1)).
+                getHistory(any(), anyString(), isNull());
+        assertEquals(historyEntries.get(0).getRevision(), cache.getLatestCachedRevision(gitSpyRepository));
+        History cachedHistory = cache.get(Paths.get(gitRepository.getDirectoryName(), "moved2", "renamed2.c").toFile(),
+                gitSpyRepository, false);
+        assertNotNull(cachedHistory);
+        assertEquals(1, cachedHistory.getHistoryEntries().size());
+        assertEquals(historyEntries.get(0).getRevision(), cachedHistory.getHistoryEntries().get(0).getRevision());
+    }
+}