From 8c511d94474cd5d2accf5b07acb907fabb80bc1b Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:04:32 -0500 Subject: [PATCH 01/10] assign config tables to spark-config CSS class --- connector/profiler/README.md | 2 +- docs/configuration.md | 38 +++++++++---------- docs/monitoring.md | 2 +- docs/running-on-kubernetes.md | 2 +- docs/running-on-yarn.md | 6 +-- docs/security.md | 18 ++++----- docs/spark-standalone.md | 16 ++++---- docs/sql-data-sources-avro.md | 8 ++-- docs/sql-data-sources-hive-tables.md | 2 +- docs/sql-data-sources-orc.md | 4 +- docs/sql-data-sources-parquet.md | 2 +- docs/sql-performance-tuning.md | 16 ++++---- .../structured-streaming-kafka-integration.md | 8 ++-- 13 files changed, 62 insertions(+), 62 deletions(-) diff --git a/connector/profiler/README.md b/connector/profiler/README.md index 3512dadb0791..527f8b487d4d 100644 --- a/connector/profiler/README.md +++ b/connector/profiler/README.md @@ -40,7 +40,7 @@ Then enable the profiling in the configuration. ### Code profiling configuration - +
diff --git a/docs/configuration.md b/docs/configuration.md index 7fef09781a15..0f80a892c067 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -139,7 +139,7 @@ of the most common options to set are: ### Application Properties -
Property NameDefaultMeaningSince Version
spark.executor.profiling.enabled
+
@@ -553,7 +553,7 @@ Apart from these, the following properties are also available, and may be useful ### Runtime Environment -
Property NameDefaultMeaningSince Version
spark.app.name
+
@@ -940,7 +940,7 @@ Apart from these, the following properties are also available, and may be useful ### Shuffle Behavior -
Property NameDefaultMeaningSince Version
spark.driver.extraClassPath
+
@@ -1315,7 +1315,7 @@ Apart from these, the following properties are also available, and may be useful ### Spark UI -
Property NameDefaultMeaningSince Version
spark.reducer.maxSizeInFlight
+
@@ -1755,7 +1755,7 @@ Apart from these, the following properties are also available, and may be useful ### Compression and Serialization -
Property NameDefaultMeaningSince Version
spark.eventLog.logBlockUpdates.enabled
+
@@ -1972,7 +1972,7 @@ Apart from these, the following properties are also available, and may be useful ### Memory Management -
Property NameDefaultMeaningSince Version
spark.broadcast.compress
+
@@ -2097,7 +2097,7 @@ Apart from these, the following properties are also available, and may be useful ### Execution Behavior -
Property NameDefaultMeaningSince Version
spark.memory.fraction
+
@@ -2342,7 +2342,7 @@ Apart from these, the following properties are also available, and may be useful ### Executor Metrics -
Property NameDefaultMeaningSince Version
spark.broadcast.blockSize
+
@@ -2410,7 +2410,7 @@ Apart from these, the following properties are also available, and may be useful ### Networking -
Property NameDefaultMeaningSince Version
spark.eventLog.logStageExecutorMetrics
+
@@ -2573,7 +2573,7 @@ Apart from these, the following properties are also available, and may be useful ### Scheduling -
Property NameDefaultMeaningSince Version
spark.rpc.message.maxSize
+
@@ -3054,7 +3054,7 @@ Apart from these, the following properties are also available, and may be useful ### Barrier Execution Mode -
Property NameDefaultMeaningSince Version
spark.cores.max
+
@@ -3101,7 +3101,7 @@ Apart from these, the following properties are also available, and may be useful ### Dynamic Allocation -
Property NameDefaultMeaningSince Version
spark.barrier.sync.timeout
+
@@ -3243,7 +3243,7 @@ finer granularity starting from driver and executor. Take RPC module as example like shuffle, just replace "rpc" with "shuffle" in the property names except spark.{driver|executor}.rpc.netty.dispatcher.numThreads, which is only for RPC module. -
Property NameDefaultMeaningSince Version
spark.dynamicAllocation.enabled
+
@@ -3281,7 +3281,7 @@ the driver or executor, or, in the absence of that value, the number of cores av Server configurations are set in Spark Connect server, for example, when you start the Spark Connect server with `./sbin/start-connect-server.sh`. They are typically set via the config file and command-lineoptions with `--conf/-c`. -
Property NameDefaultMeaningSince Version
spark.{driver|executor}.rpc.io.serverThreads
+
@@ -3373,7 +3373,7 @@ External users can query the static sql config values via `SparkSession.conf` or ### Spark Streaming -
Property NameDefaultMeaningSince Version
spark.connect.grpc.binding.port
+
@@ -3505,7 +3505,7 @@ External users can query the static sql config values via `SparkSession.conf` or ### SparkR -
Property NameDefaultMeaningSince Version
spark.streaming.backpressure.enabled
+
@@ -3561,7 +3561,7 @@ External users can query the static sql config values via `SparkSession.conf` or ### GraphX -
Property NameDefaultMeaningSince Version
spark.r.numRBackendThreads
+
@@ -3735,7 +3735,7 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl ### External Shuffle service(server) side configuration options -
Property NameDefaultMeaningSince Version
spark.graphx.pregel.checkpointInterval
+
@@ -3769,7 +3769,7 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl ### Client side configuration options -
Property NameDefaultMeaningSince Version
spark.shuffle.push.server.mergedShuffleFileManagerImpl
+
diff --git a/docs/monitoring.md b/docs/monitoring.md index 8d3dbe375b82..79bbb93e50d1 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -145,7 +145,7 @@ Use it with caution. Security options for the Spark History Server are covered more detail in the [Security](security.html#web-ui) page. -
Property NameDefaultMeaningSince Version
spark.shuffle.push.enabled
+
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 4b4dc9d304fb..4cdb450ffd74 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -592,7 +592,7 @@ See the [configuration page](configuration.html) for information on Spark config #### Spark Properties -
Property Name
+
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 02547b30d2e5..aab8ee60a256 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -143,7 +143,7 @@ To use a custom metrics.properties for the application master and executors, upd #### Spark Properties -
Property NameDefaultMeaningSince Version
spark.kubernetes.context
+
@@ -766,7 +766,7 @@ staging directory of the Spark application. ## YARN-specific Kerberos Configuration -
Property NameDefaultMeaningSince Version
spark.yarn.am.memory
+
@@ -865,7 +865,7 @@ to avoid garbage collection issues during shuffle. The following extra configuration options are available when the shuffle service is running on YARN: -
Property NameDefaultMeaningSince Version
spark.kerberos.keytab
+
diff --git a/docs/security.md b/docs/security.md index 00e35ce2f499..61d5bf8e9d3a 100644 --- a/docs/security.md +++ b/docs/security.md @@ -60,7 +60,7 @@ distributing the shared secret. Each application will use a unique shared secret the case of YARN, this feature relies on YARN RPC encryption being enabled for the distribution of secrets to be secure. -
Property NameDefaultMeaningSince Version
spark.yarn.shuffle.stopOnFailure
+
@@ -82,7 +82,7 @@ that any user that can list pods in the namespace where the Spark application is also see their authentication secret. Access control rules should be properly set up by the Kubernetes admin to ensure that Spark authentication is secure. -
Property NameDefaultMeaningSince Version
spark.yarn.shuffle.server.recovery.disabled
+
@@ -103,7 +103,7 @@ Kubernetes admin to ensure that Spark authentication is secure. Alternatively, one can mount authentication secrets using files and Kubernetes secrets that the user mounts into their pods. -
Property NameDefaultMeaningSince Version
spark.authenticate
+
@@ -178,7 +178,7 @@ is still required when talking to shuffle services from Spark versions older tha The following table describes the different options available for configuring this feature. -
Property NameDefaultMeaningSince Version
spark.authenticate.secret.file
+
@@ -249,7 +249,7 @@ encrypting output data generated by applications with APIs such as `saveAsHadoop The following settings cover enabling encryption for data written to disk: -
Property NameDefaultMeaningSince Version
spark.network.crypto.enabled
+
@@ -317,7 +317,7 @@ below. The following options control the authentication of Web UIs: -
Property NameDefaultMeaningSince Version
spark.io.encryption.enabled
+
@@ -421,7 +421,7 @@ servlet filters. To enable authorization in the SHS, a few extra options are used: -
Property NameDefaultMeaningSince Version
spark.ui.allowFramingFrom
+
@@ -734,7 +734,7 @@ Apache Spark can be configured to include HTTP headers to aid in preventing Cros (XSS), Cross-Frame Scripting (XFS), MIME-Sniffing, and also to enforce HTTP Strict Transport Security. -
Property NameDefaultMeaningSince Version
spark.history.ui.acls.enable
+
@@ -917,7 +917,7 @@ deployment-specific page for more information. The following options provides finer-grained control for this feature: -
Property NameDefaultMeaningSince Version
spark.ui.xXssProtection
+
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index fdc28aac934d..6e46ec23d27f 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -200,7 +200,7 @@ You can optionally configure the cluster further by setting environment variable SPARK_MASTER_OPTS supports the following system properties: -
Property NameDefaultMeaningSince Version
spark.security.credentials.${service}.enabled
+
@@ -416,7 +416,7 @@ SPARK_MASTER_OPTS supports the following system properties: SPARK_WORKER_OPTS supports the following system properties: -
Property NameDefaultMeaningSince Version
spark.master.ui.port
+
@@ -549,8 +549,8 @@ You can also pass an option `--total-executor-cores ` to control the n Spark applications supports the following configuration properties specific to standalone mode: -
Property NameDefaultMeaningSince Version
spark.worker.initialRegistrationRetries
- +
Property NameDefault ValueMeaningSince Version
+ @@ -599,8 +599,8 @@ via http://[host:port]/[version]/submissions/[action] where version is a protocol version, v1 as of today, and action is one of the following supported actions. -
Property NameDefault ValueMeaningSince Version
spark.standalone.submit.waitAppCompletion false
- +
CommandDescriptionHTTP METHODSince Version
+ @@ -778,8 +778,8 @@ ZooKeeper is the best way to go for production-level high availability, but if y In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env using this configuration: -
CommandDescriptionHTTP METHODSince Version
create Create a Spark driver via cluster mode.
- +
System propertyDefault ValueMeaningSince Version
+ diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index ddfdc89370b1..cbc3367e5f85 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -233,8 +233,8 @@ Data source options of Avro can be set via: * the `.option` method on `DataFrameReader` or `DataFrameWriter`. * the `options` parameter in function `from_avro`. -
System propertyDefault ValueMeaningSince Version
spark.deploy.recoveryMode NONE
- +
Property NameDefaultMeaningScopeSince Version
+ @@ -331,8 +331,8 @@ Data source options of Avro can be set via: ## Configuration Configuration of Avro can be done via `spark.conf.set` or by running `SET key=value` commands using SQL. -
Property NameDefaultMeaningScopeSince Version
avroSchema None
- +
Property NameDefaultMeaningSince Version
+ diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md index 0d16272ed6f8..b51cde53bd8f 100644 --- a/docs/sql-data-sources-hive-tables.md +++ b/docs/sql-data-sources-hive-tables.md @@ -123,7 +123,7 @@ will compile against built-in Hive and use those classes for internal execution The following options can be used to configure the version of Hive that is used to retrieve metadata: -
Property NameDefaultMeaningSince Version
spark.sql.legacy.replaceDatabricksSparkAvro.enabled true
+
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md index abd1901d24e4..8267d39e949e 100644 --- a/docs/sql-data-sources-orc.md +++ b/docs/sql-data-sources-orc.md @@ -129,8 +129,8 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC ### Configuration -
Property NameDefaultMeaningSince Version
spark.sql.hive.metastore.version
- +
Property NameDefaultMeaningSince Version
+ diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md index 7d8034321481..e944db24d76b 100644 --- a/docs/sql-data-sources-parquet.md +++ b/docs/sql-data-sources-parquet.md @@ -434,7 +434,7 @@ Other generic options can be found in diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md index 4ede18d1938b..1dbe1bb7e1a2 100644 --- a/docs/sql-performance-tuning.md +++ b/docs/sql-performance-tuning.md @@ -34,7 +34,7 @@ memory usage and GC pressure. You can call `spark.catalog.uncacheTable("tableNam Configuration of in-memory caching can be done via `spark.conf.set` or by running `SET key=value` commands using SQL. -
Property NameDefaultMeaningSince Version
spark.sql.orc.impl native
Property NameDefaultMeaningSince Version
spark.sql.parquet.binaryAsString
+
@@ -62,7 +62,7 @@ Configuration of in-memory caching can be done via `spark.conf.set` or by runnin The following options can also be used to tune the performance of query execution. It is possible that these options will be deprecated in future release as more optimizations are performed automatically. -
Property NameDefaultMeaningSince Version
spark.sql.inMemoryColumnarStorage.compressed
+
@@ -253,7 +253,7 @@ Adaptive Query Execution (AQE) is an optimization technique in Spark SQL that ma ### Coalescing Post Shuffle Partitions This feature coalesces the post shuffle partitions based on the map output statistics when both `spark.sql.adaptive.enabled` and `spark.sql.adaptive.coalescePartitions.enabled` configurations are true. This feature simplifies the tuning of shuffle partition number when running queries. You do not need to set a proper shuffle partition number to fit your dataset. Spark can pick the proper shuffle partition number at runtime once you set a large enough initial number of shuffle partitions via `spark.sql.adaptive.coalescePartitions.initialPartitionNum` configuration. -
Property NameDefaultMeaningSince Version
spark.sql.files.maxPartitionBytes
+
@@ -298,7 +298,7 @@ This feature coalesces the post shuffle partitions based on the map output stati
Property NameDefaultMeaningSince Version
spark.sql.adaptive.coalescePartitions.enabled
### Splitting skewed shuffle partitions - +
@@ -320,7 +320,7 @@ This feature coalesces the post shuffle partitions based on the map output stati ### Converting sort-merge join to broadcast join AQE converts sort-merge join to broadcast hash join when the runtime statistics of any join side is smaller than the adaptive broadcast hash join threshold. This is not as efficient as planning a broadcast hash join in the first place, but it's better than keep doing the sort-merge join, as we can save the sorting of both the join sides, and read shuffle files locally to save network traffic(if `spark.sql.adaptive.localShuffleReader.enabled` is true) -
Property NameDefaultMeaningSince Version
spark.sql.adaptive.optimizeSkewsInRebalancePartitions.enabled
+
@@ -342,7 +342,7 @@ AQE converts sort-merge join to broadcast hash join when the runtime statistics ### Converting sort-merge join to shuffled hash join AQE converts sort-merge join to shuffled hash join when all post shuffle partitions are smaller than a threshold, the max threshold can see the config `spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold`. -
Property NameDefaultMeaningSince Version
spark.sql.adaptive.autoBroadcastJoinThreshold
+
@@ -356,7 +356,7 @@ AQE converts sort-merge join to shuffled hash join when all post shuffle partiti ### Optimizing Skew Join Data skew can severely downgrade the performance of join queries. This feature dynamically handles skew in sort-merge join by splitting (and replicating if needed) skewed tasks into roughly evenly sized tasks. It takes effect when both `spark.sql.adaptive.enabled` and `spark.sql.adaptive.skewJoin.enabled` configurations are enabled. -
Property NameDefaultMeaningSince Version
spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold
+
@@ -393,7 +393,7 @@ Data skew can severely downgrade the performance of join queries. This feature d
Property NameDefaultMeaningSince Version
spark.sql.adaptive.skewJoin.enabled
### Misc - +
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md index c5ffdf025b17..37846216fc75 100644 --- a/docs/structured-streaming-kafka-integration.md +++ b/docs/structured-streaming-kafka-integration.md @@ -607,7 +607,7 @@ The caching key is built up from the following information: The following properties are available to configure the consumer pool: -
Property NameDefaultMeaningSince Version
spark.sql.adaptive.optimizer.excludedRules
+
@@ -657,7 +657,7 @@ Note that it doesn't leverage Apache Commons Pool due to the difference of chara The following properties are available to configure the fetched data pool: -
Property NameDefaultMeaningSince Version
spark.kafka.consumer.cache.capacity
+
@@ -912,7 +912,7 @@ It will use different Kafka producer when delegation token is renewed; Kafka pro The following properties are available to configure the producer pool: -
Property NameDefaultMeaningSince Version
spark.kafka.consumer.fetchedData.cache.timeout
+
@@ -1039,7 +1039,7 @@ When none of the above applies then unsecure connection assumed. Delegation tokens can be obtained from multiple clusters and ${cluster} is an arbitrary unique identifier which helps to group different configurations. -
Property NameDefaultMeaningSince Version
spark.kafka.producer.cache.timeout
+
From 5c77d63a0a026dfa7613adb212ab0741d8d23cfc Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:05:17 -0500 Subject: [PATCH 02/10] this styling workaround is not needed anymore --- docs/spark-standalone.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 6e46ec23d27f..a21d16419fd1 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -403,9 +403,7 @@ SPARK_MASTER_OPTS supports the following system properties:
Property NameDefaultMeaningSince Version
spark.kafka.clusters.${cluster}.auth.bootstrap.servers Path to resources file which is used to find various resources while worker starting up. The content of resources file should be formatted like - [{"id":{"componentName": - "spark.worker", "resourceName":"gpu"}, - "addresses":["0","1","2"]}]. + [{"id":{"componentName": "spark.worker", "resourceName":"gpu"}, "addresses":["0","1","2"]}]. If a particular resource is not found in the resources file, the discovery script would be used to find that resource. If the discovery script also does not find the resources, the worker will fail to start up. From d0584d686a220987a6f05cff1b6f318c4d26000c Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:05:49 -0500 Subject: [PATCH 03/10] port config table from Markdown to HTML --- docs/sql-ref-ansi-compliance.md | 38 +++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 93af3e669847..9b933ec1f65c 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -28,10 +28,40 @@ The casting behaviours are defined as store assignment rules in the standard. When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules. This is a separate configuration because its default value is `ANSI`, while the configuration `spark.sql.ansi.enabled` is disabled by default. -|Property Name|Default| Meaning |Since Version| -|-------------|-------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| -|`spark.sql.ansi.enabled`|false| When true, Spark tries to conform to the ANSI SQL specification:
1. Spark SQL will throw runtime exceptions on invalid operations, including integer overflow errors, string parsing errors, etc.
2. Spark will use different type coercion rules for resolving conflicts among data types. The rules are consistently based on data type precedence. |3.0.0| -|`spark.sql.storeAssignmentPolicy`|ANSI| When inserting a value into a column with different data type, Spark will perform type conversion. Currently, we support 3 policies for the type coercion rules: ANSI, legacy and strict.
1. With ANSI policy, Spark performs the type coercion as per ANSI SQL. In practice, the behavior is mostly the same as PostgreSQL. It disallows certain unreasonable type conversions such as converting string to int or double to boolean. On inserting a numeric type column, an overflow error will be thrown if the value is out of the target data type's range.
2. With legacy policy, Spark allows the type coercion as long as it is a valid Cast, which is very loose. e.g. converting string to int or double to boolean is allowed. It is also the only behavior in Spark 2.x and it is compatible with Hive.
3. With strict policy, Spark doesn't allow any possible precision loss or data truncation in type coercion, e.g. converting double to int or decimal to double is not allowed. |3.0.0| + + + + + + + + + + + + + + +
Property NameDefaultMeaningSince Version
spark.sql.ansi.enabledfalse + When true, Spark tries to conform to the ANSI SQL specification:
+ 1. Spark SQL will throw runtime exceptions on invalid operations, including integer overflow + errors, string parsing errors, etc.
+ 2. Spark will use different type coercion rules for resolving conflicts among data types. + The rules are consistently based on data type precedence. +
3.0.0
spark.sql.storeAssignmentPolicyANSI + When inserting a value into a column with different data type, Spark will perform type + conversion. Currently, we support 3 policies for the type coercion rules: ANSI, legacy and + strict.
+ 1. With ANSI policy, Spark performs the type coercion as per ANSI SQL. In practice, the behavior + is mostly the same as PostgreSQL. It disallows certain unreasonable type conversions such as + converting string to int or double to boolean. On inserting a numeric type column, an overflow + error will be thrown if the value is out of the target data type's range.
+ 2. With legacy policy, Spark allows the type coercion as long as it is a valid Cast, which is + very loose. e.g. converting string to int or double to boolean is allowed. It is also the only + behavior in Spark 2.x and it is compatible with Hive.
+ 3. With strict policy, Spark doesn't allow any possible precision loss or data truncation in + type coercion, e.g. converting double to int or decimal to double is not allowed. +
3.0.0
The following subsections present behaviour changes in arithmetic operations, type conversions, and SQL parsing when the ANSI mode enabled. For type conversions in Spark SQL, there are three kinds of them and this article will introduce them one by one: cast, store assignment and type coercion. From 4b38047452314757211d047fe4c837ad6ac686d5 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:06:27 -0500 Subject: [PATCH 04/10] generated SQL config tables also get assigned to spark-config class --- sql/gen-sql-config-docs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index 83334b6a1f53..b69a903b44f9 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -56,7 +56,7 @@ def generate_sql_configs_table_html(sql_configs, path): The table will look something like this: ```html - +
@@ -76,7 +76,7 @@ def generate_sql_configs_table_html(sql_configs, path): with open(path, 'w') as f: f.write(dedent( """ -
Property NameDefaultMeaningSince Version
+
""" )) From 714cde7373403f6e7be7c53d08c1443ed5deba43 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:12:20 -0500 Subject: [PATCH 05/10] remove `.global code` styling it overrides the plain `code` styling because of its specificity and doesn't really help --- docs/css/custom.css | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docs/css/custom.css b/docs/css/custom.css index 1fabf7be3ac8..5fe48bee57a1 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -557,7 +557,6 @@ pre { border-radius: 4px; } -code, pre { font: 1em Menlo, Monaco, Consolas, "Courier New", monospace; } @@ -741,7 +740,6 @@ h3 { margin: 0; } -.global code, .global pre { font: 1em Menlo, Monaco, Consolas, "Courier New", monospace; } @@ -761,15 +759,6 @@ h3 { border-radius: 4px; } -.global code { - font: 90% "Menlo", "Lucida Console", Consolas, monospace; - white-space: nowrap; - background: transparent; - border-radius: 4px; - padding: 0; - color: inherit; -} - .global pre code { padding: 0; font-size: inherit; From 9ee72bed33437417a34ba518e7e00956b272b465 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:13:07 -0500 Subject: [PATCH 06/10] remove max-width on content --- docs/css/custom.css | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/css/custom.css b/docs/css/custom.css index 5fe48bee57a1..d7e60260a173 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -807,7 +807,6 @@ blockquote { z-index: 1; position: relative; background-color: #FFF; - max-width: 914px; line-height: 1.6; } @@ -815,7 +814,7 @@ blockquote { z-index: 1; position: relative; background-color: #FFF; - max-width: 914px; + max-width: 80%; line-height: 1.6; padding-left: 30px; min-height: 100vh; From 02fcf19cb150d4ac0b2413d319ca65ee2be6a1d8 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:13:58 -0500 Subject: [PATCH 07/10] allow tables and table code to break to prevent overflow --- docs/css/custom.css | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/css/custom.css b/docs/css/custom.css index d7e60260a173..39181e271a26 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -924,8 +924,14 @@ img { table { width: 100%; - overflow-wrap: normal; + overflow-wrap: break-word; border-collapse: collapse; + white-space: normal; +} + +table code { + overflow-wrap: break-word; + white-space: normal; } table th, From 53aead936893ea24cda1490926b816d97f63cd0e Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 12:14:12 -0500 Subject: [PATCH 08/10] give spark config tables their own styling --- docs/css/custom.css | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/css/custom.css b/docs/css/custom.css index 39181e271a26..a5a45f0c636f 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -950,3 +950,38 @@ table tr { table tr:nth-child(2n) { background-color: #F1F4F5; } + +table.spark-config { + width: 100%; + white-space: normal; + overflow-wrap: anywhere; +} + +/* We have long config names and formulas that often show up in tables. To prevent + * any table column from become super wide, we allow the browser to break words at + * any point. + */ +table.spark-config code, +table.spark-config th, +table.spark-config td { + overflow-wrap: anywhere; + white-space: normal; +} + +/* CSS does not respect max-width on tables or table parts (like cells, columns, etc.). + See: https://stackoverflow.com/a/8465980 + */ +table.spark-config th:nth-child(1), +table.spark-config td:nth-child(1) { + width: 25%; +} + +table.spark-config th:nth-child(2), +table.spark-config td:nth-child(2) { + width: 20%; +} + +table.spark-config th:nth-child(4), +table.spark-config td:nth-child(4) { + width: 90px; +} From f206238b54246c0f36fedf2a2f606d5815d0c246 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 13:27:53 -0500 Subject: [PATCH 09/10] Revert "remove max-width on content" This reverts commit 9ee72bed33437417a34ba518e7e00956b272b465. --- docs/css/custom.css | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/css/custom.css b/docs/css/custom.css index a5a45f0c636f..2b83929667a5 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -807,6 +807,7 @@ blockquote { z-index: 1; position: relative; background-color: #FFF; + max-width: 914px; line-height: 1.6; } @@ -814,7 +815,7 @@ blockquote { z-index: 1; position: relative; background-color: #FFF; - max-width: 80%; + max-width: 914px; line-height: 1.6; padding-left: 30px; min-height: 100vh; From 9380ad920827cbac943f73c85fa65a63bc3c8679 Mon Sep 17 00:00:00 2001 From: Nicholas Chammas Date: Tue, 30 Jan 2024 15:15:52 -0500 Subject: [PATCH 10/10] css tweaks --- docs/css/custom.css | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/css/custom.css b/docs/css/custom.css index 2b83929667a5..22175068023b 100644 --- a/docs/css/custom.css +++ b/docs/css/custom.css @@ -954,8 +954,9 @@ table tr:nth-child(2n) { table.spark-config { width: 100%; + table-layout: fixed; white-space: normal; - overflow-wrap: anywhere; + overflow-wrap: break-word; } /* We have long config names and formulas that often show up in tables. To prevent @@ -965,16 +966,17 @@ table.spark-config { table.spark-config code, table.spark-config th, table.spark-config td { - overflow-wrap: anywhere; white-space: normal; + overflow-wrap: break-word; } -/* CSS does not respect max-width on tables or table parts (like cells, columns, etc.). +/* CSS does not respect max-width on tables or table parts (like cells, columns, etc.), + so we have to pick a fixed width for each column. See: https://stackoverflow.com/a/8465980 */ table.spark-config th:nth-child(1), table.spark-config td:nth-child(1) { - width: 25%; + width: 30%; } table.spark-config th:nth-child(2),
Property NameDefaultMeaningSince Version