Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[JENKINS-39547] - Corrupt jar cache #130

Merged
merged 4 commits into from
Dec 16, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 1 addition & 39 deletions src/main/java/hudson/remoting/Checksum.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

/**
* Represents 128bit checksum of a jar file.
Expand Down Expand Up @@ -74,42 +72,10 @@ static Checksum forFile(File file) throws IOException {
* Returns the checksum for the given URL.
*/
static Checksum forURL(URL url) throws IOException {
if (CHECKSUMS_BY_URL.containsKey(url)) {
return CHECKSUMS_BY_URL.get(url);
}

return calculateFor(url);
}

/**
* Allow calculating checksums only one at a time.
*
* <p>This method caches calculated checksums so future calls to
* {@link #forURL)} should not need to re-calculate the value.
*
* <p>Even if many slaves connect at around the same time, the checksums
* should only be calculated once. Making this method synchronized ensures
* this behavior.
*
* <p>Previously when a large number of slaves connected at the same time
* the master would experience a spike in CPU and probably I/O. By caching
* the results and synchronizing the calculation of the results this issue
* is addressed.
*/
private synchronized static Checksum calculateFor(URL url) throws IOException {
// When callers all request the checksum of a large jar the calls to
// forURL will all fall through to this method since the first caller's
// calculation may take a while. Hence re-check the cache at the start.
if (CHECKSUMS_BY_URL.containsKey(url)) {
return CHECKSUMS_BY_URL.get(url);
}

try {
MessageDigest md = MessageDigest.getInstance(JarLoaderImpl.DIGEST_ALGORITHM);
Util.copy(url.openStream(), new DigestOutputStream(new NullOutputStream(), md));
Checksum checksum = new Checksum(md.digest(), md.getDigestLength() / 8);
CHECKSUMS_BY_URL.putIfAbsent(url, checksum);
return checksum;
return new Checksum(md.digest(), md.getDigestLength() / 8);
} catch (NoSuchAlgorithmException e) {
throw new AssertionError(e);
}
Expand All @@ -128,8 +94,4 @@ public void write(byte[] b) {
public void write(byte[] b, int off, int len) {
}
}

@edu.umd.cs.findbugs.annotations.SuppressWarnings("DMI_COLLECTION_OF_URLS")
private static final ConcurrentMap<URL,Checksum> CHECKSUMS_BY_URL =
new ConcurrentHashMap<URL,Checksum>();
}
52 changes: 44 additions & 8 deletions src/main/java/hudson/remoting/FileSystemJarCache.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package hudson.remoting;

import javax.annotation.Nonnull;
import javax.annotation.concurrent.GuardedBy;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand All @@ -27,6 +30,12 @@ public class FileSystemJarCache extends JarCacheSupport {
*/
private final Set<Checksum> notified = Collections.synchronizedSet(new HashSet<Checksum>());

/**
* Cache of computer checksums for cached jars.
*/
@GuardedBy("itself")
private final Map<String, Checksum> checksumsByPath = new HashMap<>();

/**
* @param touch
* True to touch the cached jar file that's used. This enables external LRU based cache
Expand Down Expand Up @@ -60,13 +69,24 @@ protected URL lookInCache(Channel channel, long sum1, long sum2) throws IOExcept

@Override
protected URL retrieve(Channel channel, long sum1, long sum2) throws IOException, InterruptedException {
Checksum expected = new Checksum(sum1, sum2);
File target = map(sum1, sum2);

if (target.exists()) {
// Assume its already been fetched correctly before. ie. We are not going to validate
// the checksum.
LOGGER.fine(String.format("Jar file already exists: %16X%16X", sum1, sum2));
return target.toURI().toURL();
Checksum actual = fileChecksum(target);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Improvement follow-up proposal: Perform checksum calculation for JAR cache during the remoting startup (with option to disable it) before establishing the connection. Should help with performance issues during first job execution due to classloading.

if (expected.equals(actual)) {
LOGGER.fine(String.format("Jar file already exists: %s", expected));
return target.toURI().toURL();
}

LOGGER.warning(String.format(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it really a warning? I suppose it's a valid behavior when somebody updates jars on the remote side (e.g. plugin update)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is, when you update a jar, it should have different checksum and therefore be stored on different location so this code path will not be executed. This warning is a signal that what we found cached as ABC (by location) is in fact DEF (by checksum). This can be caused by manipulating the cache dir (what this PR addresses), bug in remoting that cached jar on wrong location or change of checksum algorithm. In all these cases it is undesirable to load the jar blindly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed

"Cached file checksum mismatch: %s%nExpected: %s%n Actual: %s",
target.getAbsolutePath(), expected, actual
));
target.delete();
synchronized (checksumsByPath) {
checksumsByPath.remove(target.getCanonicalPath());
}
}

try {
Expand All @@ -81,7 +101,6 @@ protected URL retrieve(Channel channel, long sum1, long sum2) throws IOException
}

// Verify the checksum of the download.
Checksum expected = new Checksum(sum1, sum2);
Checksum actual = Checksum.forFile(tmp);
if (!expected.equals(actual)) {
throw new IOException(String.format(
Expand All @@ -93,21 +112,19 @@ protected URL retrieve(Channel channel, long sum1, long sum2) throws IOException
if (!target.exists()) {
throw new IOException("Unable to create " + target + " from " + tmp);
}

// Even if we fail to rename, we are OK as long as the target actually exists at
// this point. This can happen if two FileSystemJarCache instances share the
// same cache dir.
//
// Verify the checksum to be sure the target is correct.
actual = Checksum.forFile(target);
actual = fileChecksum(target);
if (!expected.equals(actual)) {
throw new IOException(String.format(
"Incorrect checksum of previous jar: %s%nExpected: %s%nActual: %s",
target.getAbsolutePath(), expected, actual));
}
}


return target.toURI().toURL();
} finally {
tmp.delete();
Expand All @@ -117,6 +134,25 @@ protected URL retrieve(Channel channel, long sum1, long sum2) throws IOException
}
}

/**
* Get file checksum calculating it or retrieving from cache.
*/
private Checksum fileChecksum(File file) throws IOException {
String location = file.getCanonicalPath();

// When callers all request the checksum of a large jar, the first thread
// will calculate the checksum and the other treads will be blocked here
// until calculated to be picked up from cache right away.
synchronized (checksumsByPath) {
Checksum checksum = checksumsByPath.get(location);
if (checksum != null) return checksum;

checksum = Checksum.forFile(file);
checksumsByPath.put(location, checksum);
return checksum;
}
}

/*package for testing*/ File createTempJar(@Nonnull File target) throws IOException {
File parent = target.getParentFile();
Util.mkdirs(parent);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/hudson/remoting/JarLoaderImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings("SE_BAD_FIELD")
class JarLoaderImpl implements JarLoader, Serializable {
private final ConcurrentMap<Checksum,URL> knownJars = new ConcurrentHashMap<Checksum,URL>();
private final ConcurrentMap<Checksum,URL> knownJars = new ConcurrentHashMap<>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Modification of the serializable class. Would it cause compatibility issues if we send it over the channel with different remoting versions on sides. I'd guess so. 🐜

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just ignore. 🤦


@edu.umd.cs.findbugs.annotations.SuppressWarnings("DMI_COLLECTION_OF_URLS") // TODO: fix this
private final ConcurrentMap<URL,Checksum> checksums = new ConcurrentHashMap<URL,Checksum>();
private final ConcurrentMap<URL,Checksum> checksums = new ConcurrentHashMap<>();

private final Set<Checksum> presentOnRemote = Collections.synchronizedSet(new HashSet<Checksum>());

Expand Down
11 changes: 0 additions & 11 deletions src/test/java/hudson/remoting/ChecksumTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,6 @@ public void testForFileAndURL() throws Exception {
assertNotEquals(Checksum.forFile(tmpFile1), Checksum.forFile(tmpFile2));
}

@Test
public void testCaching() throws Exception {
File tmpFile = createTmpFile("file.txt", FILE_CONTENTS1);
HashCode hash = Files.hash(tmpFile, Hashing.sha256());
assertEquals(createdExpectedChecksum(hash), Checksum.forFile(tmpFile));

tmpFile.delete();
assertFalse(tmpFile.exists());
assertEquals(createdExpectedChecksum(hash), Checksum.forFile(tmpFile));
}

private File createTmpFile(String name, String contents) throws Exception {
File tmpFile = tmp.newFile(name);
Files.append(contents, tmpFile, Charsets.UTF_8);
Expand Down
89 changes: 52 additions & 37 deletions src/test/java/hudson/remoting/FileSystemJarCacheTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.TemporaryFolder;
import org.jvnet.hudson.test.Bug;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.nio.charset.Charset;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
Expand Down Expand Up @@ -61,26 +62,23 @@ public void testRetrieveAlreadyExists() throws Exception {
File expectedFile = fileSystemJarCache.map(expectedChecksum.sum1, expectedChecksum.sum2);
expectedFile.getParentFile().mkdirs();
assertTrue(expectedFile.createNewFile());
writeToFile(expectedFile, CONTENTS);

URL url = fileSystemJarCache.retrieve(
mockChannel, expectedChecksum.sum1, expectedChecksum.sum2);
assertEquals(expectedFile.toURI().toURL(), url);

// Changing the content after successfully cached is not an expected use-case.
// Here used to verity checksums are cached.
writeToFile(expectedFile, "Something else");
url = fileSystemJarCache.retrieve(
mockChannel, expectedChecksum.sum1, expectedChecksum.sum2);
assertEquals(expectedFile.toURI().toURL(), url);
}

@Test
public void testSuccessfulRetrieve() throws Exception {
when(mockChannel.getProperty(JarLoader.THEIRS)).thenReturn(mockJarLoader);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
RemoteOutputStream o = (RemoteOutputStream) invocationOnMock.getArguments()[2];
o.write(CONTENTS.getBytes(Charsets.UTF_8));
return null;
}
}).when(mockJarLoader).writeJarTo(
eq(expectedChecksum.sum1),
eq(expectedChecksum.sum2),
any(RemoteOutputStream.class));
mockCorrectLoad();

URL url = fileSystemJarCache.retrieve(
mockChannel, expectedChecksum.sum1, expectedChecksum.sum2);
Expand Down Expand Up @@ -109,25 +107,38 @@ public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
mockChannel, expectedChecksum.sum1, expectedChecksum.sum2);
}

@Test
@Bug(39547)
public void retrieveInvalidChecksum() throws Exception {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference issue?

when(mockChannel.getProperty(JarLoader.THEIRS)).thenReturn(mockJarLoader);

File expected = fileSystemJarCache.map(expectedChecksum.sum1, expectedChecksum.sum2);
writeToFile(expected, "This is no going to match the checksum");

mockCorrectLoad();

URL url = fileSystemJarCache.retrieve(mockChannel, expectedChecksum.sum1, expectedChecksum.sum2);
assertEquals(expectedChecksum, Checksum.forURL(url));
}

private void writeToFile(File expected, String content) throws IOException {
expected.getParentFile().mkdirs();
FileWriter fileWriter = new FileWriter(expected);
try {
fileWriter.write(content);
} finally {
fileWriter.close();
}
}

@Test
public void testRenameFailsAndNoTarget() throws Exception {
File expectedFile = fileSystemJarCache.map(expectedChecksum.sum1, expectedChecksum.sum2);
File spy = spy(tmp.newFile());
FileSystemJarCache jarCache = spy(fileSystemJarCache);
doReturn(spy).when(jarCache).createTempJar(any(File.class));

when(mockChannel.getProperty(JarLoader.THEIRS)).thenReturn(mockJarLoader);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
RemoteOutputStream o = (RemoteOutputStream) invocationOnMock.getArguments()[2];
o.write(CONTENTS.getBytes(Charsets.UTF_8));
return null;
}
}).when(mockJarLoader).writeJarTo(
eq(expectedChecksum.sum1),
eq(expectedChecksum.sum2),
any(RemoteOutputStream.class));
mockCorrectLoad();

when(spy.renameTo(expectedFile)).thenReturn(false);
assertFalse(expectedFile.exists());
Expand All @@ -145,18 +156,7 @@ public void testRenameFailsAndBadPreviousTarget() throws Exception {
FileSystemJarCache jarCache = spy(fileSystemJarCache);
doReturn(fileSpy).when(jarCache).createTempJar(any(File.class));

when(mockChannel.getProperty(JarLoader.THEIRS)).thenReturn(mockJarLoader);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
RemoteOutputStream o = (RemoteOutputStream) invocationOnMock.getArguments()[2];
o.write(CONTENTS.getBytes(Charsets.UTF_8));
return null;
}
}).when(mockJarLoader).writeJarTo(
eq(expectedChecksum.sum1),
eq(expectedChecksum.sum2),
any(RemoteOutputStream.class));
mockCorrectLoad();
doAnswer(new Answer<Boolean>() {
@Override
public Boolean answer(InvocationOnMock invocationOnMock) throws Throwable {
Expand All @@ -173,4 +173,19 @@ public Boolean answer(InvocationOnMock invocationOnMock) throws Throwable {

jarCache.retrieve(mockChannel, expectedChecksum.sum1, expectedChecksum.sum2);
}

private void mockCorrectLoad() throws IOException, InterruptedException {
when(mockChannel.getProperty(JarLoader.THEIRS)).thenReturn(mockJarLoader);
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
RemoteOutputStream o = (RemoteOutputStream) invocationOnMock.getArguments()[2];
o.write(CONTENTS.getBytes(Charsets.UTF_8));
return null;
}
}).when(mockJarLoader).writeJarTo(
eq(expectedChecksum.sum1),
eq(expectedChecksum.sum2),
any(RemoteOutputStream.class));
}
}