Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,14 +567,32 @@ def schema(self) -> StructType:

Examples
--------
Example 1: Retrieve the inferred schema of the current DataFrame.

>>> df = spark.createDataFrame(
... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
>>> df.schema
StructType([StructField('age', LongType(), True),
StructField('name', StringType(), True)])

Retrieve the schema of the current DataFrame.
Example 2: Retrieve the schema of the current DataFrame (DDL-formatted schema).

>>> df = spark.createDataFrame(
... [(14, "Tom"), (23, "Alice"), (16, "Bob")],
... "age INT, name STRING")
>>> df.schema
StructType([StructField('age', LongType(), True),
StructType([StructField('age', IntegerType(), True),
StructField('name', StringType(), True)])

Example 3: Retrieve the specified schema of the current DataFrame.

>>> from pyspark.sql.types import StructType, StructField, StringType
>>> df = spark.createDataFrame(
... [("a",), ("b",), ("c",)],
... StructType([StructField("value", StringType(), False)]))
>>> df.schema
StructType([StructField('value', StringType(), False)])

"""
if self._schema is None:
try:
Expand Down Expand Up @@ -606,25 +624,39 @@ def printSchema(self, level: Optional[int] = None) -> None:

Examples
--------
Example 1: Printing the schema of a DataFrame with basic columns

>>> df = spark.createDataFrame(
... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
>>> df.printSchema()
root
|-- age: long (nullable = true)
|-- name: string (nullable = true)

>>> df = spark.createDataFrame([(1, (2,2))], ["a", "b"])
Example 2: Printing the schema with a specified level for nested columns

>>> df = spark.createDataFrame([(1, (2, 2))], ["a", "b"])
>>> df.printSchema(1)
root
|-- a: long (nullable = true)
|-- b: struct (nullable = true)

Example 3: Printing the schema with deeper nesting level

>>> df.printSchema(2)
root
|-- a: long (nullable = true)
|-- b: struct (nullable = true)
| |-- _1: long (nullable = true)
| |-- _2: long (nullable = true)

Example 4: Printing the schema of a DataFrame with nullable and non-nullable columns

>>> df = spark.range(1).selectExpr("id AS nonnullable", "NULL AS nullable")
>>> df.printSchema()
root
|-- nonnullable: long (nullable = false)
|-- nullable: void (nullable = true)
"""
if level:
print(self._jdf.schema().treeString(level))
Expand Down Expand Up @@ -662,18 +694,17 @@ def explain(

Examples
--------
Example 1: Print out the physical plan only (default).

>>> df = spark.createDataFrame(
... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])

Print out the physical plan only (default).

>>> df.explain() # doctest: +SKIP
== Physical Plan ==
*(1) Scan ExistingRDD[age...,name...]

Print out all of the parsed, analyzed, optimized and physical plans.
Example 2: Print out all parsed, analyzed, optimized, and physical plans.

>>> df.explain(True)
>>> df.explain(extended=True)
== Parsed Logical Plan ==
...
== Analyzed Logical Plan ==
Expand All @@ -683,7 +714,7 @@ def explain(
== Physical Plan ==
...

Print out the plans with two sections: a physical plan outline and node details
Example 3: Print out the plans with two sections: a physical plan outline and node details.

>>> df.explain(mode="formatted") # doctest: +SKIP
== Physical Plan ==
Expand All @@ -692,9 +723,9 @@ def explain(
Output [2]: [age..., name...]
...

Print a logical plan and statistics if they are available.
Example 4: Print a logical plan and statistics if they are available.

>>> df.explain("cost")
>>> df.explain(mode="cost")
== Optimized Logical Plan ==
...Statistics...
...
Expand Down