Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,8 @@ jobs:
run: ./dev/lint-r
- name: Run documentation build
run: |
# We need this link because the jekyll build calls `python`.
ln -s "$(which python3.9)" "/usr/local/bin/python"
# Build docs first with SKIP_API to ensure they are buildable without requiring any
# language docs to be built beforehand.
cd docs; SKIP_API=1 bundle exec jekyll build; cd ..
Expand Down
222 changes: 0 additions & 222 deletions core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ package org.apache.spark
import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.Locale

import scala.jdk.CollectionConverters._
import scala.util.Properties.lineSeparator
import scala.util.matching.Regex

import com.fasterxml.jackson.annotation.JsonInclude.Include
import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION
Expand All @@ -48,12 +45,6 @@ class SparkThrowableSuite extends SparkFunSuite {
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
"core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\""
}}}

To regenerate the error class document. Run:
{{{
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
"core/testOnly *SparkThrowableSuite -- -t \"Error classes match with document\""
}}}
*/
private val regenerateCommand = "SPARK_GENERATE_GOLDEN_FILES=1 build/sbt " +
"\"core/testOnly *SparkThrowableSuite -- -t \\\"Error classes match with document\\\"\""
Expand Down Expand Up @@ -173,219 +164,6 @@ class SparkThrowableSuite extends SparkFunSuite {
checkIfUnique(messageFormats)
}

test("Error classes match with document") {
val errors = errorReader.errorInfoMap

// the black list of error class name which should not add quote
val contentQuoteBlackList = Seq(
"INCOMPLETE_TYPE_DEFINITION.MAP",
"INCOMPLETE_TYPE_DEFINITION.STRUCT")

def quoteParameter(content: String, errorName: String): String = {
if (contentQuoteBlackList.contains(errorName)) {
content
} else {
"<(.*?)>".r.replaceAllIn(content, (m: Regex.Match) => {
val matchStr = m.group(1)
if (matchStr.nonEmpty) {
s"`<$matchStr>`"
} else {
m.matched
}
}).replaceAll("%(.*?)\\$", "`\\%$1\\$`")
}
}

val sqlStates = IOUtils.toString(getWorkspaceFilePath("docs",
"sql-error-conditions-sqlstates.md").toUri, StandardCharsets.UTF_8).split("\n")
.filter(_.startsWith("##")).map(s => {

val errorHeader = s.split("[`|:|#|\\s]+").filter(_.nonEmpty)
val sqlState = errorHeader(1)
(sqlState, errorHeader.head.toLowerCase(Locale.ROOT) + "-" + sqlState + "-" +
errorHeader.takeRight(errorHeader.length - 2).mkString("-").toLowerCase(Locale.ROOT))
}).toMap

def getSqlState(sqlState: Option[String]): String = {
if (sqlState.isDefined) {
val prefix = sqlState.get.substring(0, 2)
if (sqlStates.contains(prefix)) {
s"[SQLSTATE: ${sqlState.get}](sql-error-conditions-sqlstates.html#${sqlStates(prefix)})"
} else {
"SQLSTATE: " + sqlState.get
}
} else {
"SQLSTATE: none assigned"
}
}

def getErrorPath(error: String): String = {
s"sql-error-conditions-${error.toLowerCase(Locale.ROOT).replaceAll("_", "-")}-error-class"
}

def getHeader(title: String): String = {
s"""---
|layout: global
|title: $title
|displayTitle: $title
|license: |
| Licensed to the Apache Software Foundation (ASF) under one or more
| contributor license agreements. See the NOTICE file distributed with
| this work for additional information regarding copyright ownership.
| The ASF licenses this file to You under the Apache License, Version 2.0
| (the "License"); you may not use this file except in compliance with
| the License. You may obtain a copy of the License at
|
| http://www.apache.org/licenses/LICENSE-2.0
|
| Unless required by applicable law or agreed to in writing, software
| distributed under the License is distributed on an "AS IS" BASIS,
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
| See the License for the specific language governing permissions and
| limitations under the License.
|---
|
|<!--
| DO NOT EDIT THIS FILE.
| It was generated automatically by `${getClass().getName()}`.
|-->""".stripMargin
}

def orphanedGoldenFiles(): Iterable[File] = {
val subErrorFileNames = errors.filter(_._2.subClass.isDefined).map(error => {
getErrorPath(error._1) + ".md"
}).toSet

val docsDir = getWorkspaceFilePath("docs")
val orphans = FileUtils.listFiles(docsDir.toFile, Array("md"), false).asScala.filter { f =>
(f.getName.startsWith("sql-error-conditions-") && f.getName.endsWith("-error-class.md")) &&
!subErrorFileNames.contains(f.getName)
}
orphans
}

val sqlErrorParentDocContent = errors.toSeq.filter(!_._1.startsWith("_LEGACY_ERROR"))
.sortBy(_._1).map(error => {
val name = error._1
val info = error._2
if (info.subClass.isDefined) {
val title = s"[$name](${getErrorPath(name)}.html)"
s"""|### $title
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|
|For more details see $title
|""".stripMargin
} else {
s"""|### $name
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|""".stripMargin
}
}).mkString("\n")

val sqlErrorParentDoc =
s"""${getHeader("Error Conditions")}
|
|This is a list of common, named error conditions returned by Spark SQL.
|
|Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
|
|$sqlErrorParentDocContent""".stripMargin

errors.filter(_._2.subClass.isDefined).foreach(error => {
val name = error._1
val info = error._2

val subErrorContent = info.subClass.get.toSeq.sortBy(_._1).map(subError => {
s"""|## ${subError._1}
|
|${quoteParameter(subError._2.messageTemplate, s"$name.${subError._1}")}
|""".stripMargin
}).mkString("\n")

val subErrorDoc =
s"""${getHeader(name + " error class")}
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|
|This error class has the following derived error classes:
|
|$subErrorContent
|""".stripMargin

val errorDocPath = getWorkspaceFilePath("docs", getErrorPath(name) + ".md")
val errorsInDoc = if (errorDocPath.toFile.exists()) {
IOUtils.toString(errorDocPath.toUri, StandardCharsets.UTF_8)
} else {
""
}
if (regenerateGoldenFiles) {
if (subErrorDoc.trim != errorsInDoc.trim) {
logInfo(s"Regenerating sub error class document $errorDocPath")
if (errorDocPath.toFile.exists()) {
Files.delete(errorDocPath)
}
FileUtils.writeStringToFile(
errorDocPath.toFile,
subErrorDoc + lineSeparator,
StandardCharsets.UTF_8)
}
} else {
assert(subErrorDoc.trim == errorsInDoc.trim,
"The error class document is not up to date. " +
s"Please regenerate it by running `$regenerateCommand`")
}
})

val parentDocPath = getWorkspaceFilePath("docs", "sql-error-conditions.md")
val commonErrorsInDoc = if (parentDocPath.toFile.exists()) {
IOUtils.toString(parentDocPath.toUri, StandardCharsets.UTF_8)
} else {
""
}
if (regenerateGoldenFiles) {
if (sqlErrorParentDoc.trim != commonErrorsInDoc.trim) {
logInfo(s"Regenerating error class document $parentDocPath")
if (parentDocPath.toFile.exists()) {
Files.delete(parentDocPath)
}
FileUtils.writeStringToFile(
parentDocPath.toFile,
sqlErrorParentDoc,
StandardCharsets.UTF_8)
}
} else {
assert(sqlErrorParentDoc.trim == commonErrorsInDoc.trim,
"The error class document is not up to date. " +
s"Please regenerate it by running `$regenerateCommand`")
}

val orphans = orphanedGoldenFiles()
if (regenerateGoldenFiles) {
if (orphans.nonEmpty) {
logInfo(s"Orphaned error class documents (${orphans.size}) is not empty, " +
"executing cleanup operation.")
orphans.foreach { f =>
FileUtils.deleteQuietly(f)
logInfo(s"Cleanup orphaned error document: ${f.getName}.")
}
} else {
logInfo("Orphaned error class documents is empty")
}
} else {
assert(orphans.isEmpty,
"Exist orphaned error class documents. " +
s"Please regenerate it by running `$regenerateCommand`")
}
}

test("Round trip") {
val tmpFile = File.createTempFile("rewritten", ".json")
val mapper = JsonMapper.builder()
Expand Down
24 changes: 13 additions & 11 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,22 @@ You need to have [Ruby 3][ruby] and [Python 3][python] installed. Make sure the
$ gem install bundler
```

After this all the required ruby dependencies can be installed from the `docs/` directory via the Bundler:
After this all the required Ruby dependencies can be installed from the `docs/` directory via Bundler:

```sh
$ cd docs
$ cd "$SPARK_HOME"/docs
$ bundle install
```

To generate the Python or R docs, you'll need to [install Pandoc](https://pandoc.org/installing.html).

### SQL and Python API Documentation (Optional)

To generate SQL and Python API docs, you'll need to install these libraries:
And the required Python dependencies can be installed using pip:

Run the following command from $SPARK_HOME:
```sh
$ cd "$SPARK_HOME"
$ pip install --upgrade -r dev/requirements.txt
```

To generate the Python or R API docs, you'll also need to [install Pandoc](https://pandoc.org/installing.html).

### R API Documentation (Optional)

If you'd like to generate R API documentation, install these libraries:
Expand Down Expand Up @@ -121,6 +119,10 @@ The jekyll plugin also generates the PySpark docs using [Sphinx](http://sphinx-d
using [roxygen2](https://cran.r-project.org/web/packages/roxygen2/index.html) and SQL docs
using [MkDocs](https://www.mkdocs.org/).

NOTE: To skip the step of building and copying over the Scala, Java, Python, R and SQL API docs, run `SKIP_API=1
bundle exec jekyll build`. In addition, `SKIP_SCALADOC=1`, `SKIP_PYTHONDOC=1`, `SKIP_RDOC=1` and `SKIP_SQLDOC=1` can be used
to skip a single step of the corresponding language. `SKIP_SCALADOC` indicates skipping both the Scala and Java docs.
To control what API docs get built, you can set any combination of the following shell variables before you run `bundle exec jekyll build`:
* `SKIP_API=1`: Skip building all the API docs.
* `SKIP_SCALADOC=1`: Skip the Scala and Java API docs.
* `SKIP_PYTHONDOC=1`: Skip the Python API docs.
* `SKIP_RDOC=1`: Skip the R API docs.
* `SKIP_SQLDOC=1`: Skip the SQL API docs.

45 changes: 0 additions & 45 deletions docs/_data/menu-sql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,48 +106,3 @@
url: sql-ref-syntax.html#auxiliary-statements
- text: Error Conditions
url: sql-error-conditions.html
subitems:
- text: SQLSTATE Codes
url: sql-error-conditions-sqlstates.html
- text: COLLECTION_SIZE_LIMIT_EXCEEDED error class
url: sql-error-conditions-collection-size-limit-exceeded-error-class.html
- text: CONNECT error class
url: sql-error-conditions-connect-error-class.html
- text: DATATYPE_MISMATCH error class
url: sql-error-conditions-datatype-mismatch-error-class.html
- text: INCOMPATIBLE_DATA_FOR_TABLE error class
url: sql-error-conditions-incompatible-data-for-table-error-class.html
- text: INCOMPLETE_TYPE_DEFINITION error class
url: sql-error-conditions-incomplete-type-definition-error-class.html
- text: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
url: sql-error-conditions-inconsistent-behavior-cross-version-error-class.html
- text: INVALID_FORMAT error class
url: sql-error-conditions-invalid-format-error-class.html
- text: INVALID_OPTIONS error class
url: sql-error-conditions-invalid-options-error-class.html
- text: INVALID_PARAMETER_VALUE error class
url: sql-error-conditions-invalid-parameter-value-error-class.html
- text: INVALID_SCHEMA error class
url: sql-error-conditions-invalid-schema-error-class.html
- text: INVALID_SUBQUERY_EXPRESSION error class
url: sql-error-conditions-invalid-subquery-expression-error-class.html
- text: NOT_NULL_CONSTRAINT_VIOLATION error class
url: sql-error-conditions-not-null-constraint-violation-error-class.html
- text: UNRESOLVED_COLUMN error class
url: sql-error-conditions-unresolved-column-error-class.html
- text: UNRESOLVED_FIELD error class
url: sql-error-conditions-unresolved-field-error-class.html
- text: UNRESOLVED_MAP_KEY error class
url: sql-error-conditions-unresolved-map-key-error-class.html
- text: UNSUPPORTED_DESERIALIZER error class
url: sql-error-conditions-unsupported-deserializer-error-class.html
- text: UNSUPPORTED_FEATURE error class
url: sql-error-conditions-unsupported-feature-error-class.html
- text: UNSUPPORTED_GENERATOR error class
url: sql-error-conditions-unsupported-generator-error-class.html
- text: UNSUPPORTED_SAVE_MODE error class
url: sql-error-conditions-unsupported-save-mode-error-class.html
- text: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
url: sql-error-conditions-unsupported-subquery-expression-category-error-class.html
- text: WRONG_NUM_ARGS error class
url: sql-error-conditions-wrong-num-args-error-class.html
8 changes: 8 additions & 0 deletions docs/_plugins/build_api_docs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ def build_sql_docs
cp_r("../sql/site/.", "api/sql")
end

def build_error_docs
print_header "Building error docs."
system("python '#{SPARK_PROJECT_ROOT}/docs/util/build-error-docs.py'") \
|| raise("Error doc generation failed")
end

build_error_docs

if not (ENV['SKIP_API'] == '1')
if not (ENV['SKIP_SCALADOC'] == '1')
build_scala_and_java_docs
Expand Down
23 changes: 23 additions & 0 deletions docs/css/custom.css
Original file line number Diff line number Diff line change
Expand Up @@ -988,3 +988,26 @@ table.spark-config th:nth-child(4),
table.spark-config td:nth-child(4) {
width: 90px;
}

table#error-conditions {
table-layout: fixed;

span.error-condition-name {
/* Any error names that wrap will have the wrapped lines indented
relative to the first line thanks to these three rules.
*/
padding-left: 2em;
text-indent: -2em;
display: block;
}

th:nth-child(1),
td:nth-child(1) {
/* width: 85px; */
width: 105px;
}

td.error-sub-condition {
padding-left: 2.5em;
}
}
Loading