From 2242b749e248840fa2f8d94535f3cea6171a0f58 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 16 Dec 2022 10:08:45 -0500 Subject: [PATCH] WX-867 Translate crc32c hashes to b64 for getm (#6970) * Translate crc32c hashes to b64 for getm * Update tests * Remove obsolete b64 handling for md5, centralize hex validation * Restore old test, fix other test --- .../localizer/downloaders/GetmChecksum.scala | 34 ++++++++++--------- .../downloaders/GetmChecksumSpec.scala | 22 ++++++++++-- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala index 551099b8e21..2a39a6543a3 100644 --- a/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala +++ b/cromwell-drs-localizer/src/main/scala/drs/localizer/downloaders/GetmChecksum.scala @@ -4,9 +4,8 @@ import cats.syntax.validated._ import cloud.nio.impl.drs.AccessUrl import common.validation.ErrorOr.ErrorOr import drs.localizer.downloaders.AccessUrlDownloader.Hashes -import mouse.all.anySyntaxMouse -import org.apache.commons.codec.binary.Base64.{decodeBase64, isBase64} -import org.apache.commons.codec.binary.Hex.encodeHexString +import org.apache.commons.codec.binary.Base64.encodeBase64String +import org.apache.commons.codec.binary.Hex.decodeHex import org.apache.commons.text.StringEscapeUtils @@ -26,23 +25,18 @@ sealed trait GetmChecksum { } case class Md5(override val rawValue: String) extends GetmChecksum { - override def value: ErrorOr[String] = { - val trimmed = rawValue.trim - if (trimmed.matches("[A-Fa-f0-9]+")) - trimmed.validNel - // TDR currently returns a base64-encoded MD5 because that's what Azure seems to do. However, - // the DRS spec does not specify that any checksums should be base64-encoded, and `getm` also - // does not expect base64. This case handles the current behavior in the short term until - // https://broadworkbench.atlassian.net/browse/DR-2259 is done. - else if (isBase64(trimmed)) - (trimmed |> decodeBase64 |> encodeHexString).validNel - else - s"Invalid md5 checksum value is neither hex nor base64: $rawValue".invalidNel - } + override def value: ErrorOr[String] = GetmChecksum.validateHex(rawValue) override def getmAlgorithm: String = "md5" } case class Crc32c(override val rawValue: String) extends GetmChecksum { + // The DRS spec says that all hash values should be hex strings, + // but getm expects crc32c values to be base64. + override def value: ErrorOr[String] = + GetmChecksum.validateHex(rawValue) + .map(decodeHex) + .map(encodeBase64String) + override def getmAlgorithm: String = "gs_crc32c" } case class AwsEtag(override val rawValue: String) extends GetmChecksum { @@ -88,4 +82,12 @@ object GetmChecksum { case _ => Null // None or an empty hashes map. } } + + def validateHex(s: String): ErrorOr[String] = { + val trimmed = s.trim + if (trimmed.matches("[A-Fa-f0-9]+")) + trimmed.validNel + else + s"Invalid checksum value, expected hex but got: $trimmed".invalidNel + } } diff --git a/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/GetmChecksumSpec.scala b/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/GetmChecksumSpec.scala index 24ea087cb8f..69c063ff616 100644 --- a/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/GetmChecksumSpec.scala +++ b/cromwell-drs-localizer/src/test/scala/drs/localizer/downloaders/GetmChecksumSpec.scala @@ -31,9 +31,10 @@ class GetmChecksumSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matcher val results = Table( ("description", "algorithm", "expected"), ("md5 hex", Md5("abcdef"), "--checksum-algorithm 'md5' --checksum abcdef".validNel), - ("md5 base64", Md5("cR84lXY1y17c3q7/7riLEA=="), "--checksum-algorithm 'md5' --checksum 711f38957635cb5edcdeaeffeeb88b10".validNel), - ("md5 gibberish", Md5("what is this???"), "Invalid md5 checksum value is neither hex nor base64: what is this???".invalidNel), - ("crc32c", Crc32c("012345"), "--checksum-algorithm 'gs_crc32c' --checksum 012345".validNel), + ("md5 base64", Md5("cR84lXY1y17c3q7/7riLEA=="), "Invalid checksum value, expected hex but got: cR84lXY1y17c3q7/7riLEA==".invalidNel), + ("md5 gibberish", Md5("what is this???"), "Invalid checksum value, expected hex but got: what is this???".invalidNel), + ("crc32c", Crc32c("012345"), "--checksum-algorithm 'gs_crc32c' --checksum ASNF".validNel), + ("crc32c gibberish", Crc32c("????"), "Invalid checksum value, expected hex but got: ????".invalidNel), ("AWS ETag", AwsEtag("012345"), "--checksum-algorithm 's3_etag' --checksum 012345".validNel), // Escape checksum values constructed from unvalidated data returned by DRS servers. ("Unsupported", Unsupported("Robert'); DROP TABLE Students;\n --\\"), raw"--checksum-algorithm 'null' --checksum Robert\'\)\;\ DROP\ TABLE\ Students\;\ --\\".validNel), @@ -46,4 +47,19 @@ class GetmChecksumSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matcher } } } + + it should "correctly validate hex strings" in { + val results = Table( + ("test string", "expected"), + ("", "Invalid checksum value, expected hex but got: ".invalidNel), + (" ", "Invalid checksum value, expected hex but got: ".invalidNel), + ("myfavoritestring", "Invalid checksum value, expected hex but got: myfavoritestring".invalidNel), + (" AbC123 ", "AbC123".validNel), + ("456", "456".validNel), + ) + + forAll(results) { (testString, expected) => + GetmChecksum.validateHex(testString) shouldBe expected + } + } }