Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1216,7 +1216,36 @@ private[spark] object SparkSubmitUtils {

/** A nice function to use in tests as well. Values are dummy strings. */
def getModuleDescriptor: DefaultModuleDescriptor = DefaultModuleDescriptor.newDefaultInstance(
ModuleRevisionId.newInstance("org.apache.spark", "spark-submit-parent", "1.0"))
// Include timestamp in module name, so multiple clients resolving maven coordinates at the
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: coordinates -> coordinate ?

// same time do not modify the same resolution file concurrently.
ModuleRevisionId.newInstance("org.apache.spark",
"spark-submit-parent-" + System.currentTimeMillis().toString,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: "spark-submit-parent-" + System.currentTimeMillis().toString -> s"spark-submit-parent-${System.currentTimeMillis}"

Copy link
Contributor

@vanzin vanzin Dec 12, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

System.currentTimeMillis() is not very unique. If you want uniqueness, use a UUID, or better yet, java.nio.file.Files.createTempFile.

EDIT: this is not a file, so go with UUID.

"1.0"))

/**
* clear ivy resolution from current launch. The resolution file is usually at
* ~/.ivy2/org.apache.spark-spark-submit-parent-$timestamp-default.xml,
* ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$timestamp-1.0.xml, and
* ~/.ivy2/resolved-org.apache.spark-spark-submit-parent-$timestamp-1.0.properties.
* Since each launch will have its own resolution files created, delete them after
* each resolution to prevent accumulation of these files in the ivy cache dir.
*/
private def clearIvyResolutionFiles(
mdId: ModuleRevisionId,
ivySettings: IvySettings,
ivyConfName: String): Unit = {
val currentResolutionFiles = Seq[File](
new File(ivySettings.getDefaultCache,
s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml"),
new File(ivySettings.getDefaultCache,
s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.xml"),
new File(ivySettings.getDefaultCache,
s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties")
)
currentResolutionFiles.foreach{ file =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

space before {

if (file.exists) file.delete
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.exists() is not needed before .delete(). Also, add parentheses.

}
}

/**
* Resolves any dependencies that were supplied through maven coordinates
Expand Down Expand Up @@ -1267,14 +1296,6 @@ private[spark] object SparkSubmitUtils {

// A Module descriptor must be specified. Entries are dummy strings
val md = getModuleDescriptor
// clear ivy resolution from previous launches. The resolution file is usually at
// ~/.ivy2/org.apache.spark-spark-submit-parent-default.xml. In between runs, this file
// leads to confusion with Ivy when the files can no longer be found at the repository
// declared in that file/
val mdId = md.getModuleRevisionId
val previousResolution = new File(ivySettings.getDefaultCache,
s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml")
if (previousResolution.exists) previousResolution.delete

md.setDefaultConf(ivyConfName)

Expand All @@ -1295,7 +1316,10 @@ private[spark] object SparkSubmitUtils {
packagesDirectory.getAbsolutePath + File.separator +
"[organization]_[artifact]-[revision].[ext]",
retrieveOptions.setConfs(Array(ivyConfName)))
resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
val paths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
val mdId = md.getModuleRevisionId
clearIvyResolutionFiles(mdId, ivySettings, ivyConfName)
paths
} finally {
System.setOut(sysOut)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.io.{File, OutputStream, PrintStream}
import java.nio.charset.StandardCharsets

import scala.collection.mutable.ArrayBuffer
import scala.reflect.io.Path._

import com.google.common.io.Files
import org.apache.ivy.core.module.descriptor.MDArtifact
Expand Down Expand Up @@ -255,4 +256,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
}
}

test("test resolution files cleaned after resolving artifact") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test case doesn't test the senario we describe in the PR, that is, having multiple clients trying to resolve artifacts at the same time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually triggering the original race in a test could be pretty hard, so I'm not sure that we necessarily should block this fix on having an end-to-end integration test which could reproduce the original race. I think that the use of the UUID should be sufficient and therefore the only important thing to test is that we're still cleaning up the files properly (as is being done here).

Therefore I would welcome a re-submitted and cleaned-up version of this PR which addresses the other review comments (which I hope to merge soon if it's in good shape).

val main = new MavenCoordinate("my.great.lib", "mylib", "0.1")

IvyTestUtils.withRepository(main, None, None) { repo =>
val ivySettings = SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath))
val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
main.toString,
ivySettings,
isTest = true)
val r = """.*org.apache.spark-spark-submit-parent-.*""".r
assert(ivySettings.getDefaultCache.toDirectory.files.map(_.name)
.forall{ case n @ r() => false case _ => true },
"resolution files should be cleaned")
}
}
}