-
Notifications
You must be signed in to change notification settings - Fork 29k
SPARK-10878 Fix race condition when multiple clients resolves artifacts at the same time #18801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1216,7 +1216,36 @@ private[spark] object SparkSubmitUtils { | |
|
|
||
| /** A nice function to use in tests as well. Values are dummy strings. */ | ||
| def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance( | ||
| ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0")) | ||
| // Include timestamp in module name, so multiple clients resolving maven coordinates at the | ||
| // same time do not modify the same resolution file concurrently. | ||
| ModuleRevisionId.newInstance("org.apache.spark", | ||
| "spark-submit-parent-" + System.currentTimeMillis().toString, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
EDIT: this is not a file, so go with UUID. |
||
| "1.0")) | ||
|
|
||
| /** | ||
| * clear ivy resolution from current launch. The resolution file is usually at | ||
| * ~/.ivy2/org.apache.spark-spark-submit-parent-$timestamp-default.xml, | ||
| * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$timestamp-1.0.xml, and | ||
| * ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$timestamp-1.0.properties. | ||
| * Since each launch will have its own resolution files created, delete them after | ||
| * each resolution to prevent accumulation of these files in the ivy cache dir. | ||
| */ | ||
| private def clearIvyResolutionFiles( | ||
| mdId: ModuleRevisionId, | ||
| ivySettings: IvySettings, | ||
| ivyConfName: String): Unit = { | ||
| val currentResolutionFiles = Seq[File]( | ||
| new File(ivySettings.getDefaultCache, | ||
| s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml"), | ||
| new File(ivySettings.getDefaultCache, | ||
| s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml"), | ||
| new File(ivySettings.getDefaultCache, | ||
| s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties") | ||
| ) | ||
| currentResolutionFiles.foreach{ file => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. space before |
||
| if (file.exists) file.delete | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Resolves any dependencies that were supplied through maven coordinates | ||
|
|
@@ -1267,14 +1296,6 @@ private[spark] object SparkSubmitUtils { | |
|
|
||
| // A Module descriptor must be specified. Entries are dummy strings | ||
| val md = getModuleDescriptor | ||
| // clear ivy resolution from previous launches. The resolution file is usually at | ||
| // ~/.ivy2/org.apache.spark-spark-submit-parent-default.xml. In between runs, this file | ||
| // leads to confusion with Ivy when the files can no longer be found at the repository | ||
| // declared in that file/ | ||
| val mdId = md.getModuleRevisionId | ||
| val previousResolution = new File(ivySettings.getDefaultCache, | ||
| s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml") | ||
| if (previousResolution.exists) previousResolution.delete | ||
|
|
||
| md.setDefaultConf(ivyConfName) | ||
|
|
||
|
|
@@ -1295,7 +1316,10 @@ private[spark] object SparkSubmitUtils { | |
| packagesDirectory.getAbsolutePath + File.separator + | ||
| "[organization]_[artifact]-[revision].[ext]", | ||
| retrieveOptions.setConfs(Array(ivyConfName))) | ||
| resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) | ||
| val paths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) | ||
| val mdId = md.getModuleRevisionId | ||
| clearIvyResolutionFiles(mdId, ivySettings, ivyConfName) | ||
| paths | ||
| } finally { | ||
| System.setOut(sysOut) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ import java.io.{File, OutputStream, PrintStream} | |
| import java.nio.charset.StandardCharsets | ||
|
|
||
| import scala.collection.mutable.ArrayBuffer | ||
| import scala.reflect.io.Path._ | ||
|
|
||
| import com.google.common.io.Files | ||
| import org.apache.ivy.core.module.descriptor.MDArtifact | ||
|
|
@@ -255,4 +256,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { | |
| assert(jarPath.indexOf("mydep") >= 0, "should find dependency") | ||
| } | ||
| } | ||
|
|
||
| test("test resolution files cleaned after resolving artifact") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test case doesn't test the senario we describe in the PR, that is, having multiple clients trying to resolve artifacts at the same time.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually triggering the original race in a test could be pretty hard, so I'm not sure that we necessarily should block this fix on having an end-to-end integration test which could reproduce the original race. I think that the use of the UUID should be sufficient and therefore the only important thing to test is that we're still cleaning up the files properly (as is being done here). Therefore I would welcome a re-submitted and cleaned-up version of this PR which addresses the other review comments (which I hope to merge soon if it's in good shape). |
||
| val main = new MavenCoordinate("my.great.lib", "mylib", "0.1") | ||
|
|
||
| IvyTestUtils.withRepository(main, None, None) { repo => | ||
| val ivySettings = SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)) | ||
| val jarPath = SparkSubmitUtils.resolveMavenCoordinates( | ||
| main.toString, | ||
| ivySettings, | ||
| isTest = true) | ||
| val r = """.*org.apache.spark-spark-submit-parent-.*""".r | ||
| assert(ivySettings.getDefaultCache.toDirectory.files.map(_.name) | ||
| .forall{ case n @ r() => false case _ => true }, | ||
| "resolution files should be cleaned") | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
coordinates->coordinate?