diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index dbc66cab3f9b3..65d8bfde1411f 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -7157,27 +7157,46 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non Examples -------- + Example 1: Specify both base number and the input value + >>> from pyspark.sql import functions as sf >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)") - >>> df.select(sf.log(2.0, df.value).alias('log2_value')).show() - +----------+ - |log2_value| - +----------+ - | 0.0| - | 1.0| - | 2.0| - +----------+ + >>> df.select("*", sf.log(2.0, df.value)).show() + +-----+---------------+ + |value|LOG(2.0, value)| + +-----+---------------+ + | 1| 0.0| + | 2| 1.0| + | 4| 2.0| + +-----+---------------+ - And Natural logarithm + Example 2: Return NULL for invalid input values - >>> df.select(sf.log(df.value).alias('ln_value')).show() - +------------------+ - | ln_value| - +------------------+ - | 0.0| - |0.6931471805599453| - |1.3862943611198906| - +------------------+ + >>> from pyspark.sql import functions as sf + >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (0), (-1), (NULL) AS t(value)") + >>> df.select("*", sf.log(3.0, df.value)).show() + +-----+------------------+ + |value| LOG(3.0, value)| + +-----+------------------+ + | 1| 0.0| + | 2|0.6309297535714...| + | 0| NULL| + | -1| NULL| + | NULL| NULL| + +-----+------------------+ + + Example 3: Specify only the input value (Natural logarithm) + + >>> from pyspark.sql import functions as sf + >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)") + >>> df.select("*", sf.log(df.value)).show() + +-----+------------------+ + |value| ln(value)| + +-----+------------------+ + | 1| 0.0| + | 2|0.6931471805599...| + | 4|1.3862943611198...| + +-----+------------------+ """ from pyspark.sql.classic.column import _to_java_column @@ -7205,13 +7224,22 @@ def ln(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([(4,)], ['a']) - >>> df.select(ln('a')).show() - +------------------+ - | ln(a)| - +------------------+ - |1.3862943611198906| - +------------------+ + >>> from pyspark.sql import functions as sf + >>> spark.range(10).select("*", sf.ln('id')).show() + +---+------------------+ + | id| ln(id)| + +---+------------------+ + | 0| NULL| + | 1| 0.0| + | 2|0.6931471805599...| + | 3|1.0986122886681...| + | 4|1.3862943611198...| + | 5|1.6094379124341...| + | 6| 1.791759469228...| + | 7|1.9459101490553...| + | 8|2.0794415416798...| + | 9|2.1972245773362...| + +---+------------------+ """ return _invoke_function_over_columns("ln", col) @@ -7237,13 +7265,22 @@ def log2(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([(4,)], ['a']) - >>> df.select(log2('a').alias('log2')).show() - +----+ - |log2| - +----+ - | 2.0| - +----+ + >>> from pyspark.sql import functions as sf + >>> spark.range(10).select("*", sf.log2('id')).show() + +---+------------------+ + | id| LOG2(id)| + +---+------------------+ + | 0| NULL| + | 1| 0.0| + | 2| 1.0| + | 3| 1.584962500721...| + | 4| 2.0| + | 5| 2.321928094887...| + | 6| 2.584962500721...| + | 7| 2.807354922057...| + | 8| 3.0| + | 9|3.1699250014423...| + +---+------------------+ """ return _invoke_function_over_columns("log2", col) @@ -7274,9 +7311,16 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column: Examples -------- - >>> df = spark.createDataFrame([("010101",)], ['n']) - >>> df.select(conv(df.n, 2, 16).alias('hex')).collect() - [Row(hex='15')] + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([("010101",), ( "101",), ("001",)], ['n']) + >>> df.select("*", sf.conv(df.n, 2, 16)).show() + +------+--------------+ + | n|conv(n, 2, 16)| + +------+--------------+ + |010101| 15| + | 101| 5| + | 001| 1| + +------+--------------+ """ from pyspark.sql.classic.column import _to_java_column @@ -7307,9 +7351,22 @@ def factorial(col: "ColumnOrName") -> Column: Examples -------- - >>> df = spark.createDataFrame([(5,)], ['n']) - >>> df.select(factorial(df.n).alias('f')).collect() - [Row(f=120)] + >>> from pyspark.sql import functions as sf + >>> spark.range(10).select("*", sf.factorial('id')).show() + +---+-------------+ + | id|factorial(id)| + +---+-------------+ + | 0| 1| + | 1| 1| + | 2| 2| + | 3| 6| + | 4| 24| + | 5| 120| + | 6| 720| + | 7| 5040| + | 8| 40320| + | 9| 362880| + +---+-------------+ """ return _invoke_function_over_columns("factorial", col)