From 1ace5cc8232536bcc336042aec686fed1204f799 Mon Sep 17 00:00:00 2001 From: Min Shen Date: Mon, 31 Jul 2017 16:57:12 -0700 Subject: [PATCH] SPARK-10878 Fix race condidtion when multiple clients resolves artifacts at the same time. --- .../org/apache/spark/deploy/SparkSubmit.scala | 44 ++++++++++++++----- .../spark/deploy/SparkSubmitUtilsSuite.scala | 17 +++++++ 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0ea14361b2f7..8e3631615d8c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -1216,7 +1216,36 @@ private[spark] object SparkSubmitUtils { /** A nice function to use in tests as well. Values are dummy strings. */ def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance( - ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0")) + // Include timestamp in module name, so multiple clients resolving maven coordinates at the + // same time do not modify the same resolution file concurrently. + ModuleRevisionId.newInstance("org.apache.spark", + "spark-submit-parent-" + System.currentTimeMillis().toString, + "1.0")) + + /** + * clear ivy resolution from current launch. The resolution file is usually at + * ~/.ivy2/org.apache.spark-spark-submit-parent-$timestamp-default.xml, + * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$timestamp-1.0.xml, and + * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$timestamp-1.0.properties. + * Since each launch will have its own resolution files created, delete them after + * each resolution to prevent accumulation of these files in the ivy cache dir. + */ + private def clearIvyResolutionFiles( + mdId: ModuleRevisionId, + ivySettings: IvySettings, + ivyConfName: String): Unit = { + val currentResolutionFiles = Seq[File]( + new File(ivySettings.getDefaultCache, + s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml"), + new File(ivySettings.getDefaultCache, + s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml"), + new File(ivySettings.getDefaultCache, + s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties") + ) + currentResolutionFiles.foreach{ file => + if (file.exists) file.delete + } + } /** * Resolves any dependencies that were supplied through maven coordinates @@ -1267,14 +1296,6 @@ private[spark] object SparkSubmitUtils { // A Module descriptor must be specified. Entries are dummy strings val md = getModuleDescriptor - // clear ivy resolution from previous launches. The resolution file is usually at - // ~/.ivy2/org.apache.spark-spark-submit-parent-default.xml. In between runs, this file - // leads to confusion with Ivy when the files can no longer be found at the repository - // declared in that file/ - val mdId = md.getModuleRevisionId - val previousResolution = new File(ivySettings.getDefaultCache, - s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml") - if (previousResolution.exists) previousResolution.delete md.setDefaultConf(ivyConfName) @@ -1295,7 +1316,10 @@ private[spark] object SparkSubmitUtils { packagesDirectory.getAbsolutePath + File.separator + "[organization]_[artifact]-[revision].[ext]", retrieveOptions.setConfs(Array(ivyConfName))) - resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) + val paths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) + val mdId = md.getModuleRevisionId + clearIvyResolutionFiles(mdId, ivySettings, ivyConfName) + paths } finally { System.setOut(sysOut) } diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 88b77e514342..4adc237fd141 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -21,6 +21,7 @@ import java.io.{File, OutputStream, PrintStream} import java.nio.charset.StandardCharsets import scala.collection.mutable.ArrayBuffer +import scala.reflect.io.Path._ import com.google.common.io.Files import org.apache.ivy.core.module.descriptor.MDArtifact @@ -255,4 +256,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(jarPath.indexOf("mydep") >= 0, "should find dependency") } } + + test("test resolution files cleaned after resolving artifact") { + val main = new MavenCoordinate("my.great.lib", "mylib", "0.1") + + IvyTestUtils.withRepository(main, None, None) { repo => + val ivySettings = SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)) + val jarPath = SparkSubmitUtils.resolveMavenCoordinates( + main.toString, + ivySettings, + isTest = true) + val r = """.*org.apache.spark-spark-submit-parent-.*""".r + assert(ivySettings.getDefaultCache.toDirectory.files.map(_.name) + .forall{ case n @ r() => false case _ => true }, + "resolution files should be cleaned") + } + } }