Skip to content

Commit

Permalink
Speed up class uniqueness analyses by caching results of reading jars
Browse files Browse the repository at this point in the history
This will likely close to double the runtime performance of a single task
(sharing results between main and test classpaths) and should
significantly improve the performance of these tasks in monorepos.
  • Loading branch information
Alex Landau committed Jul 22, 2021
1 parent 04312b9 commit 8261c85
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@

package com.palantir.baseline.plugins;

import com.palantir.baseline.services.JarClassHasher;
import com.palantir.baseline.tasks.CheckClassUniquenessLockTask;
import org.gradle.api.Project;
import org.gradle.api.artifacts.Configuration;
import org.gradle.api.plugins.JavaPlugin;
import org.gradle.api.provider.Provider;
import org.gradle.api.tasks.TaskProvider;
import org.gradle.language.base.plugins.LifecycleBasePlugin;

Expand All @@ -33,8 +35,14 @@
public class BaselineClassUniquenessPlugin extends AbstractBaselinePlugin {
@Override
public final void apply(Project project) {
TaskProvider<CheckClassUniquenessLockTask> checkClassUniqueness =
project.getTasks().register("checkClassUniqueness", CheckClassUniquenessLockTask.class);
Provider<JarClassHasher> jarClassHasher = project.getGradle()
.getSharedServices()
.registerIfAbsent("jarClassHasher", JarClassHasher.class, _spec -> {});
TaskProvider<CheckClassUniquenessLockTask> checkClassUniqueness = project.getTasks()
.register("checkClassUniqueness", CheckClassUniquenessLockTask.class, task -> {
task.jarClassHasher.set(jarClassHasher);
task.usesService(jarClassHasher);
});
project.getPlugins().apply(LifecycleBasePlugin.class);
project.getTasks().getByName(LifecycleBasePlugin.CHECK_TASK_NAME).dependsOn(checkClassUniqueness);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* (c) Copyright 2021 Palantir Technologies Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.palantir.baseline.services;

import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashingInputStream;
import com.google.common.io.ByteStreams;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import org.gradle.api.artifacts.ModuleVersionIdentifier;
import org.gradle.api.artifacts.ResolvedArtifact;
import org.gradle.api.services.BuildService;
import org.gradle.api.services.BuildServiceParameters;

public abstract class JarClassHasher implements BuildService<BuildServiceParameters.None>, AutoCloseable {
private final Cache<ModuleVersionIdentifier, Result> cache =
Caffeine.newBuilder().build();

public static class Result {
private final ImmutableSetMultimap<String, HashCode> hashesByClassName;

private Result(ImmutableSetMultimap<String, HashCode> hashesByClassName) {
this.hashesByClassName = hashesByClassName;
}

public ImmutableSetMultimap<String, HashCode> getHashesByClassName() {
return hashesByClassName;
}

public static Result empty() {
return new Result(ImmutableSetMultimap.of());
}
}

public final Result hashClasses(ResolvedArtifact resolvedArtifact) {
return cache.get(resolvedArtifact.getModuleVersion().getId(), _moduleId -> {
File file = resolvedArtifact.getFile();
if (!file.exists()) {
return Result.empty();
}

ImmutableSetMultimap.Builder<String, HashCode> hashesByClassName = ImmutableSetMultimap.builder();
try (FileInputStream fileInputStream = new FileInputStream(file);
JarInputStream jarInputStream = new JarInputStream(fileInputStream)) {
JarEntry entry;
while ((entry = jarInputStream.getNextJarEntry()) != null) {
if (entry.isDirectory() || !entry.getName().endsWith(".class")) {
continue;
}

if (entry.getName().contains("module-info.class")) {
// Java 9 allows jars to have a module-info.class file in the root,
// we shouldn't complain about these.
continue;
}

String className = entry.getName().replaceAll("/", ".").replaceAll("\\.class$", "");
HashingInputStream inputStream = new HashingInputStream(Hashing.sha256(), jarInputStream);
ByteStreams.exhaust(inputStream);

hashesByClassName.put(className, inputStream.hash());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return new Result(hashesByClassName.build());
});
}

@Override
public final void close() {
// Try to free up memory when this is no longer needed
cache.invalidateAll();
cache.cleanUp();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSortedMap;
import com.palantir.baseline.services.JarClassHasher;
import difflib.DiffUtils;
import difflib.Patch;
import java.io.File;
Expand All @@ -36,6 +37,7 @@
import org.gradle.api.Task;
import org.gradle.api.artifacts.Configuration;
import org.gradle.api.artifacts.ModuleVersionIdentifier;
import org.gradle.api.provider.Property;
import org.gradle.api.provider.SetProperty;
import org.gradle.api.specs.Spec;
import org.gradle.api.tasks.CacheableTask;
Expand All @@ -55,10 +57,14 @@ public class CheckClassUniquenessLockTask extends DefaultTask {
@SuppressWarnings("VisibilityModifier")
public final SetProperty<Configuration> configurations;

@SuppressWarnings("VisibilityModifier")
public final Property<JarClassHasher> jarClassHasher;

private final File lockFile;

public CheckClassUniquenessLockTask() {
this.configurations = getProject().getObjects().setProperty(Configuration.class);
this.jarClassHasher = getProject().getObjects().property(JarClassHasher.class);
this.lockFile = getProject().file("baseline-class-uniqueness.lock");
onlyIf(new Spec<Task>() {
@Override
Expand Down Expand Up @@ -91,8 +97,8 @@ public final void doIt() {
ImmutableSortedMap<String, Optional<String>> resultsByConfiguration = configurations.get().stream()
.collect(ImmutableSortedMap.toImmutableSortedMap(
Comparator.naturalOrder(), Configuration::getName, configuration -> {
ClassUniquenessAnalyzer analyzer =
new ClassUniquenessAnalyzer(getProject().getLogger());
ClassUniquenessAnalyzer analyzer = new ClassUniquenessAnalyzer(
jarClassHasher.get(), getProject().getLogger());
analyzer.analyzeConfiguration(configuration);
Collection<Set<ModuleVersionIdentifier>> problemJars = analyzer.getDifferingProblemJars();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,31 @@

import static java.util.stream.Collectors.toSet;

import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashingInputStream;
import com.google.common.io.ByteStreams;
import com.palantir.baseline.services.JarClassHasher;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.time.Duration;
import java.time.Instant;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.jar.JarEntry;
import java.util.jar.JarInputStream;
import java.util.stream.Collectors;
import org.gradle.api.artifacts.Configuration;
import org.gradle.api.artifacts.ModuleVersionIdentifier;
import org.gradle.api.artifacts.ResolvedArtifact;
import org.slf4j.Logger;

public final class ClassUniquenessAnalyzer {

private final JarClassHasher jarHasher;
private final Map<Set<ModuleVersionIdentifier>, Set<String>> jarsToClasses = new HashMap<>();
private final Map<String, Set<HashCode>> classToHashCodes = new HashMap<>();
private final Logger log;

public ClassUniquenessAnalyzer(Logger log) {
public ClassUniquenessAnalyzer(JarClassHasher jarHasher, Logger log) {
this.jarHasher = jarHasher;
this.log = log;
}

Expand All @@ -60,43 +56,26 @@ public void analyzeConfiguration(Configuration configuration) {
Map<String, Set<ModuleVersionIdentifier>> classToJars = new HashMap<>();
Map<String, Set<HashCode>> tempClassToHashCodes = new HashMap<>();

dependencies.stream().forEach(resolvedArtifact -> {
for (ResolvedArtifact resolvedArtifact : dependencies) {
File file = resolvedArtifact.getFile();
if (!file.exists()) {
log.info("Skipping non-existent jar {}: {}", resolvedArtifact, file);
return;
}

try (FileInputStream fileInputStream = new FileInputStream(file);
JarInputStream jarInputStream = new JarInputStream(fileInputStream)) {
JarEntry entry;
while ((entry = jarInputStream.getNextJarEntry()) != null) {
if (entry.isDirectory() || !entry.getName().endsWith(".class")) {
continue;
}

if (entry.getName().contains("module-info.class")) {
// Java 9 allows jars to have a module-info.class file in the root,
// we shouldn't complain about these.
continue;
}

String className = entry.getName().replaceAll("/", ".").replaceAll(".class", "");
HashingInputStream inputStream = new HashingInputStream(Hashing.sha256(), jarInputStream);
ByteStreams.exhaust(inputStream);

multiMapPut(
classToJars,
className,
resolvedArtifact.getModuleVersion().getId());

multiMapPut(tempClassToHashCodes, className, inputStream.hash());
}
} catch (IOException e) {
log.error("Failed to read JarFile {}", resolvedArtifact, e);
throw new RuntimeException(e);
ImmutableSetMultimap<String, HashCode> hashes =
jarHasher.hashClasses(resolvedArtifact).getHashesByClassName();

for (Map.Entry<String, HashCode> entry : hashes.entries()) {
String className = entry.getKey();
HashCode hashValue = entry.getValue();
multiMapPut(
classToJars,
className,
resolvedArtifact.getModuleVersion().getId());
multiMapPut(tempClassToHashCodes, className, hashValue);
}
});
}

// discard all the classes that only come from one jar - these are completely safe!
classToJars.entrySet().stream()
Expand Down

0 comments on commit 8261c85

Please sign in to comment.