diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 1419d1f3cb635..48ec45e6b6ee1 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -567,14 +567,32 @@ def schema(self) -> StructType: Examples -------- + Example 1: Retrieve the inferred schema of the current DataFrame. + >>> df = spark.createDataFrame( ... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"]) + >>> df.schema + StructType([StructField('age', LongType(), True), + StructField('name', StringType(), True)]) - Retrieve the schema of the current DataFrame. + Example 2: Retrieve the schema of the current DataFrame (DDL-formatted schema). + >>> df = spark.createDataFrame( + ... [(14, "Tom"), (23, "Alice"), (16, "Bob")], + ... "age INT, name STRING") >>> df.schema - StructType([StructField('age', LongType(), True), + StructType([StructField('age', IntegerType(), True), StructField('name', StringType(), True)]) + + Example 3: Retrieve the specified schema of the current DataFrame. + + >>> from pyspark.sql.types import StructType, StructField, StringType + >>> df = spark.createDataFrame( + ... [("a",), ("b",), ("c",)], + ... StructType([StructField("value", StringType(), False)])) + >>> df.schema + StructType([StructField('value', StringType(), False)]) + """ if self._schema is None: try: @@ -606,6 +624,8 @@ def printSchema(self, level: Optional[int] = None) -> None: Examples -------- + Example 1: Printing the schema of a DataFrame with basic columns + >>> df = spark.createDataFrame( ... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"]) >>> df.printSchema() @@ -613,18 +633,30 @@ def printSchema(self, level: Optional[int] = None) -> None: |-- age: long (nullable = true) |-- name: string (nullable = true) - >>> df = spark.createDataFrame([(1, (2,2))], ["a", "b"]) + Example 2: Printing the schema with a specified level for nested columns + + >>> df = spark.createDataFrame([(1, (2, 2))], ["a", "b"]) >>> df.printSchema(1) root |-- a: long (nullable = true) |-- b: struct (nullable = true) + Example 3: Printing the schema with deeper nesting level + >>> df.printSchema(2) root |-- a: long (nullable = true) |-- b: struct (nullable = true) | |-- _1: long (nullable = true) | |-- _2: long (nullable = true) + + Example 4: Printing the schema of a DataFrame with nullable and non-nullable columns + + >>> df = spark.range(1).selectExpr("id AS nonnullable", "NULL AS nullable") + >>> df.printSchema() + root + |-- nonnullable: long (nullable = false) + |-- nullable: void (nullable = true) """ if level: print(self._jdf.schema().treeString(level)) @@ -662,18 +694,17 @@ def explain( Examples -------- + Example 1: Print out the physical plan only (default). + >>> df = spark.createDataFrame( ... [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"]) - - Print out the physical plan only (default). - >>> df.explain() # doctest: +SKIP == Physical Plan == *(1) Scan ExistingRDD[age...,name...] - Print out all of the parsed, analyzed, optimized and physical plans. + Example 2: Print out all parsed, analyzed, optimized, and physical plans. - >>> df.explain(True) + >>> df.explain(extended=True) == Parsed Logical Plan == ... == Analyzed Logical Plan == @@ -683,7 +714,7 @@ def explain( == Physical Plan == ... - Print out the plans with two sections: a physical plan outline and node details + Example 3: Print out the plans with two sections: a physical plan outline and node details. >>> df.explain(mode="formatted") # doctest: +SKIP == Physical Plan == @@ -692,9 +723,9 @@ def explain( Output [2]: [age..., name...] ... - Print a logical plan and statistics if they are available. + Example 4: Print a logical plan and statistics if they are available. - >>> df.explain("cost") + >>> df.explain(mode="cost") == Optimized Logical Plan == ...Statistics... ...