Skip to content
This repository has been archived by the owner on Oct 24, 2022. It is now read-only.

Add a file hashing utility to the Utilities class. #234

Merged
merged 3 commits into from
Oct 27, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/main/scala/org/allenai/plugins/Utilities.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ object Utilities {
*/
def hashFiles(filesToHash: Seq[File], rootDir: File): String = {
// Resolve the filenames relative to the root directory.
val rootDirPath = rootDir.toPath
val relativizedNames = filesToHash.map(_.toPath).map(rootDirPath.relativize).map(_.toString)
val rootDirPath = rootDir.toPath.normalize
val relativizedNames =
filesToHash.map(_.toPath.normalize).map(rootDirPath.relativize).map(_.toString)
// Create a hash of the sorted names, joined by an empty string.
val nameHash = Hash.toHex(Hash(relativizedNames.sorted.mkString))

Expand Down
15 changes: 14 additions & 1 deletion src/test/scala/org/allenai/plugins/UtilitiesSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class UtilitiesSpec extends FlatSpecLike with Matchers with OneInstancePerTest w
}
}

it should "return the same hash when files have the same relative paths" in {
it should "return the same hash when files have the same relative paths w.r.t. the root" in {
// Use two subdirectories of `tempDirectory`, and hash from both of them. Note that these
// directories don't currently have to exist, since we're operating only on the paths, but they
// are created in case this changes.
Expand All @@ -76,6 +76,19 @@ class UtilitiesSpec extends FlatSpecLike with Matchers with OneInstancePerTest w
firstHash shouldBe secondHash
}

it should "return the same hash when files have the same normalized path" in {
// Create versions of fooFile and barFile which are semantically the same, but have different
// paths.
val fooRelative = new File("foo.txt")
val barRelative = new File(tempDirectory, ".." + File.pathSeparatorChar +
tempDirectory.getName + File.pathSeparatorChar + "bar.txt")

val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
val secondHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the second hash attempt use a different (denormalized) path to barFile?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hah! Indeed. I'll send a follow-up.


firstHash shouldBe secondHash
}

it should "return a different hash when a file is renamed" in {
val firstHash = Utilities.hashFiles(Seq(fooFile, barFile), tempDirectory)

Expand Down