Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 34 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1848,11 +1848,43 @@ class Dataset[T] private[sql](
Except(logicalPlan, other.logicalPlan)
}

/**
* Returns a new [[Dataset]] by sampling a fraction of rows (without replacement),
* using a user-supplied seed.
*
* @param fraction Fraction of rows to generate, range [0.0, 1.0].
* @param seed Seed for sampling.
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
* of the given [[Dataset]].
*
* @group typedrel
* @since 2.3.0
*/
def sample(fraction: Double, seed: Long): Dataset[T] = {
sample(withReplacement = false, fraction = fraction, seed = seed)
}

/**
* Returns a new [[Dataset]] by sampling a fraction of rows (without replacement).
*
* @param fraction Fraction of rows to generate, range [0.0, 1.0].
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
* of the given [[Dataset]].
*
* @group typedrel
* @since 2.3.0
*/
def sample(fraction: Double): Dataset[T] = {
sample(withReplacement = false, fraction = fraction)
}

/**
* Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
*
* @param withReplacement Sample with replacement or not.
* @param fraction Fraction of rows to generate.
* @param fraction Fraction of rows to generate, range [0.0, 1.0].
* @param seed Seed for sampling.
*
* @note This is NOT guaranteed to provide exactly the fraction of the count
Expand All @@ -1871,7 +1903,7 @@ class Dataset[T] private[sql](
* Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
*
* @param withReplacement Sample with replacement or not.
* @param fraction Fraction of rows to generate.
* @param fraction Fraction of rows to generate, range [0.0, 1.0].
*
* @note This is NOT guaranteed to provide exactly the fraction of the total count
* of the given [[Dataset]].
Expand Down