Skip to content
This repository has been archived by the owner on Oct 24, 2022. It is now read-only.

Add a file hashing utility to the Utilities class. #234

Merged
merged 3 commits into from
Oct 27, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
resolvers += Resolver.jcenterRepo

libraryDependencies ++= Seq(
"com.typesafe" % "config" % "1.2.0"
"com.typesafe" % "config" % "1.2.0",
"org.scalatest" %% "scalatest" % "2.2.1" % "test"
)

organization := "org.allenai.plugins"
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/org/allenai/plugins/DeployPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -251,12 +251,12 @@ object DeployPlugin extends AutoPlugin {
val fileName = f.getName
filteredFilenames.exists(fileName.startsWith)
}
val hashes = filesToHash.map(Hash.apply).map(Hash.toHex)
val fileHash = Utilities.hashFiles(filesToHash, stageDir)

// We sort so that we're not dependent on filesystem or git sorting remaining stable in order
// for the cacheKey to not change.
val cacheKey = Hash.toHex(
Hash((hashes ++ dependentGitCommits.value :+ gitLocalSha1.value).sorted.mkString)
Hash((dependentGitCommits.value :+ fileHash :+ gitLocalSha1.value).sorted.mkString)
)
val cacheKeyConfFile = new java.io.File(s"${stageDir.getCanonicalPath}/conf/cacheKey.Sha1")

Expand Down
24 changes: 23 additions & 1 deletion src/main/scala/org/allenai/plugins/Utilities.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package org.allenai.plugins

import sbt.{ Artifact, IO, ModuleID }
import sbt.{ Artifact, Hash, IO, ModuleID }

import java.io.File

Expand Down Expand Up @@ -49,4 +49,26 @@ object Utilities {
}
IO.write(destination, contents)
}

/** Given a set of files on disk, produce a consistent SHA1 hash of their contents. This hash will
* change when the contents of the files change, or when the file name changes relative to the
* given file.
* @param filesToHash the files whose contents and names should be hashed
* @param rootDir the root directory the file names should be resolved against before hashing.
* Only path changes relative to this directory will cause the hash to change.
*/
def hashFiles(filesToHash: Seq[File], rootDir: File): String = {
// Resolve the filenames relative to the root directory.
val rootDirPath = rootDir.toPath.normalize
val relativizedNames =
filesToHash.map(_.toPath.normalize).map(rootDirPath.relativize).map(_.toString)
// Create a hash of the sorted names, joined by an empty string.
val nameHash = Hash.toHex(Hash(relativizedNames.sorted.mkString))

// Hash the contents of each file.
val fileHashes = filesToHash.map(Hash.apply).map(Hash.toHex)

// Finally, join the name hash with the content hashes, and hash the resulting string.
Hash.toHex(Hash((nameHash +: fileHashes).sorted.mkString))
}
}
111 changes: 111 additions & 0 deletions src/test/scala/org/allenai/plugins/UtilitiesSpec.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package org.allenai.plugins

import org.scalatest.{ BeforeAndAfter, FlatSpecLike, Matchers, OneInstancePerTest }
import sbt.{ Hash, IO }

import java.io.File
import java.nio.file.Files

/** Tests for the plugin utilities. */
class UtilitiesSpec extends FlatSpecLike with Matchers with OneInstancePerTest with BeforeAndAfter {
val tempDirectory = {
val directory = Files.createTempDirectory("utilities-test").toFile
directory.deleteOnExit
directory
}

val fooFile = {
val foo = new File(tempDirectory, "foo.txt")
IO.write(foo, "foo")
foo
}

val barFile = {
val bar = new File(tempDirectory, "bar.txt")
IO.write(bar, "bar")
bar
}

after {
tempDirectory.delete()
}

"hashFiles" should "return the same hash when called on the same files" in {
val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
val secondHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
firstHash shouldBe secondHash
}

it should "return the same hash when called with files in a different order" in {
val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
// Swap the file order.
val secondHash = Utilities.hashFiles(Seq(barFile, fooFile), tempDirectory)
firstHash shouldBe secondHash
}

it should "return the same hash when called with the same files in a different directory" in {
val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)

// Create the same directory structure in a new temp directory.
val newTempDirectory = Files.createTempDirectory("utilities-test").toFile
try {
val newFoo = new File(newTempDirectory, "foo.txt")
val newBar = new File(newTempDirectory, "bar.txt")
IO.copyFile(fooFile, newFoo)
IO.copyFile(barFile, newBar)
val secondHash = Utilities.hashFiles(Seq(newFoo, newBar), newTempDirectory)

firstHash shouldBe secondHash
} finally {
newTempDirectory.delete()
}
}

it should "return the same hash when files have the same relative paths w.r.t. the root" in {
// Use two subdirectories of `tempDirectory`, and hash from both of them. Note that these
// directories don't currently have to exist, since we're operating only on the paths, but they
// are created in case this changes.
val subdir1 = new File(tempDirectory, "sub1")
subdir1.mkdir()
val subdir2 = new File(tempDirectory, "sub2")
subdir2.mkdir()

val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), subdir1)
val secondHash = Utilities.hashFiles(Seq(fooFile, barFile), subdir2)

firstHash shouldBe secondHash
}

it should "return the same hash when files have the same normalized path" in {
// Create versions of fooFile and barFile which are semantically the same, but have different
// paths.
val fooRelative = new File("foo.txt")
val barRelative = new File(tempDirectory, ".." + File.pathSeparatorChar +
tempDirectory.getName + File.pathSeparatorChar + "bar.txt")

val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
val secondHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the second hash attempt use a different (denormalized) path to barFile?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hah! Indeed. I'll send a follow-up.


firstHash shouldBe secondHash
}

it should "return a different hash when a file is renamed" in {
val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)

val newFoo = new File(tempDirectory, "new-foo.txt")
IO.move(fooFile, newFoo)

val secondHash = Utilities.hashFiles(Seq(newFoo, barFile), tempDirectory)

firstHash shouldNot be(secondHash)
}

it should "return a different hash when a file's contents change" in {
val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
IO.write(fooFile, "foo2")

val secondHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)

firstHash shouldNot be(secondHash)
}
}