Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 168 additions & 88 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17014,37 +17014,47 @@ def convert_timezone(
Parameters
----------
sourceTz : :class:`~pyspark.sql.Column`, optional
the time zone for the input timestamp. If it is missed,
The time zone for the input timestamp. If it is missed,
the current session time zone is used as the source time zone.
targetTz : :class:`~pyspark.sql.Column`
the time zone to which the input timestamp should be converted.
The time zone to which the input timestamp should be converted.
sourceTs : :class:`~pyspark.sql.Column`
a timestamp without time zone.
A timestamp without time zone.

Returns
-------
:class:`~pyspark.sql.Column`
timestamp for converted time zone.
A new column that contains a timestamp for converted time zone.

Examples
--------

Example 1: Converts the timestamp without time zone `sourceTs`,
the source time zone `sourceTz` is None.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
>>> df.select(convert_timezone( # doctest: +SKIP
... None, lit('Asia/Hong_Kong'), 'dt').alias('ts')
>>> df.select(sf.convert_timezone( # doctest: +SKIP
... None, sf.lit('Asia/Hong_Kong'), 'dt')
... ).show()
+-------------------+
| ts|
+-------------------+
|2015-04-08 00:00:00|
+-------------------+
>>> df.select(convert_timezone(
... lit('America/Los_Angeles'), lit('Asia/Hong_Kong'), 'dt').alias('ts')
+--------------------------------------------------------+
|convert_timezone(current_timezone(), Asia/Hong_Kong, dt)|
+--------------------------------------------------------+
| 2015-04-08 00:00:00|
+--------------------------------------------------------+

Example 2: Converts the timestamp without time zone `sourceTs`.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
>>> df.select(sf.convert_timezone(
... sf.lit('America/Los_Angeles'), sf.lit('Asia/Hong_Kong'), 'dt')
... ).show()
+-------------------+
| ts|
+-------------------+
|2015-04-08 15:00:00|
+-------------------+
+---------------------------------------------------------+
|convert_timezone(America/Los_Angeles, Asia/Hong_Kong, dt)|
+---------------------------------------------------------+
| 2015-04-08 15:00:00|
+---------------------------------------------------------+
"""
if sourceTz is None:
return _invoke_function_over_columns("convert_timezone", targetTz, sourceTs)
Expand All @@ -17067,55 +17077,78 @@ def make_dt_interval(
Parameters
----------
days : :class:`~pyspark.sql.Column` or str, optional
the number of days, positive or negative
The number of days, positive or negative.
hours : :class:`~pyspark.sql.Column` or str, optional
the number of hours, positive or negative
The number of hours, positive or negative.
mins : :class:`~pyspark.sql.Column` or str, optional
the number of minutes, positive or negative
The number of minutes, positive or negative.
secs : :class:`~pyspark.sql.Column` or str, optional
the number of seconds with the fractional part in microsecond precision.
The number of seconds with the fractional part in microsecond precision.

Returns
-------
:class:`~pyspark.sql.Column`
A new column that contains a DayTimeIntervalType duration.

Examples
--------

Example 1: Make DayTimeIntervalType duration from days, hours, mins and secs.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
... ["day", "hour", "min", "sec"])
>>> df.select(make_dt_interval(
... df.day, df.hour, df.min, df.sec).alias('r')
... ).show(truncate=False)
>>> df.select(sf.make_dt_interval(df.day, df.hour, df.min, df.sec)).show(truncate=False)
+------------------------------------------+
|r |
|make_dt_interval(day, hour, min, sec) |
+------------------------------------------+
|INTERVAL '1 12:30:01.001001' DAY TO SECOND|
+------------------------------------------+

>>> df.select(make_dt_interval(
... df.day, df.hour, df.min).alias('r')
... ).show(truncate=False)
Example 2: Make DayTimeIntervalType duration from days, hours and mins.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
... ["day", "hour", "min", "sec"])
>>> df.select(sf.make_dt_interval(df.day, df.hour, df.min)).show(truncate=False)
+-----------------------------------+
|r |
|make_dt_interval(day, hour, min, 0)|
+-----------------------------------+
|INTERVAL '1 12:30:00' DAY TO SECOND|
+-----------------------------------+

>>> df.select(make_dt_interval(
... df.day, df.hour).alias('r')
... ).show(truncate=False)
Example 3: Make DayTimeIntervalType duration from days and hours.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
... ["day", "hour", "min", "sec"])
>>> df.select(sf.make_dt_interval(df.day, df.hour)).show(truncate=False)
+-----------------------------------+
|r |
|make_dt_interval(day, hour, 0, 0) |
+-----------------------------------+
|INTERVAL '1 12:00:00' DAY TO SECOND|
+-----------------------------------+

>>> df.select(make_dt_interval(df.day).alias('r')).show(truncate=False)
Example 4: Make DayTimeIntervalType duration from days.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
... ["day", "hour", "min", "sec"])
>>> df.select(sf.make_dt_interval(df.day)).show(truncate=False)
+-----------------------------------+
|r |
|make_dt_interval(day, 0, 0, 0) |
+-----------------------------------+
|INTERVAL '1 00:00:00' DAY TO SECOND|
+-----------------------------------+

>>> df.select(make_dt_interval().alias('r')).show(truncate=False)
Example 5: Make DayTimeIntervalType duration.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
... ["day", "hour", "min", "sec"])
>>> df.select(sf.make_dt_interval()).show(truncate=False)
+-----------------------------------+
|r |
|make_dt_interval(0, 0, 0, 0) |
+-----------------------------------+
|INTERVAL '0 00:00:00' DAY TO SECOND|
+-----------------------------------+
Expand Down Expand Up @@ -17145,82 +17178,129 @@ def make_interval(
Parameters
----------
years : :class:`~pyspark.sql.Column` or str, optional
the number of years, positive or negative
The number of years, positive or negative.
months : :class:`~pyspark.sql.Column` or str, optional
the number of months, positive or negative
The number of months, positive or negative.
weeks : :class:`~pyspark.sql.Column` or str, optional
the number of weeks, positive or negative
The number of weeks, positive or negative.
days : :class:`~pyspark.sql.Column` or str, optional
the number of days, positive or negative
The number of days, positive or negative.
hours : :class:`~pyspark.sql.Column` or str, optional
the number of hours, positive or negative
The number of hours, positive or negative.
mins : :class:`~pyspark.sql.Column` or str, optional
the number of minutes, positive or negative
The number of minutes, positive or negative.
secs : :class:`~pyspark.sql.Column` or str, optional
the number of seconds with the fractional part in microsecond precision.
The number of seconds with the fractional part in microsecond precision.

Returns
-------
:class:`~pyspark.sql.Column`
A new column that contains an interval.

Examples
--------

Example 1: Make interval from years, months, weeks, days, hours, mins and secs.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(make_interval(
... df.year, df.month, df.week, df.day, df.hour, df.min, df.sec).alias('r')
>>> df.select(sf.make_interval(
... df.year, df.month, df.week, df.day, df.hour, df.min, df.sec)
... ).show(truncate=False)
+---------------------------------------------------------------+
|r |
|make_interval(year, month, week, day, hour, min, sec) |
+---------------------------------------------------------------+
|100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds|
+---------------------------------------------------------------+

>>> df.select(make_interval(
... df.year, df.month, df.week, df.day, df.hour, df.min).alias('r')
Example 2: Make interval from years, months, weeks, days, hours and mins.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval(
... df.year, df.month, df.week, df.day, df.hour, df.min)
... ).show(truncate=False)
+---------------------------------------------------+
|make_interval(year, month, week, day, hour, min, 0)|
+---------------------------------------------------+
|100 years 11 months 8 days 12 hours 30 minutes |
+---------------------------------------------------+

Example 3: Make interval from years, months, weeks, days and hours.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval(
... df.year, df.month, df.week, df.day, df.hour)
... ).show(truncate=False)
+-------------------------------------------------+
|make_interval(year, month, week, day, hour, 0, 0)|
+-------------------------------------------------+
|100 years 11 months 8 days 12 hours |
+-------------------------------------------------+

Example 4: Make interval from years, months, weeks and days.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval(df.year, df.month, df.week, df.day)).show(truncate=False)
+----------------------------------------------+
|r |
|make_interval(year, month, week, day, 0, 0, 0)|
+----------------------------------------------+
|100 years 11 months 8 days 12 hours 30 minutes|
|100 years 11 months 8 days |
+----------------------------------------------+

>>> df.select(make_interval(
... df.year, df.month, df.week, df.day, df.hour).alias('r')
... ).show(truncate=False)
+-----------------------------------+
|r |
+-----------------------------------+
|100 years 11 months 8 days 12 hours|
+-----------------------------------+
Example 5: Make interval from years, months and weeks.

>>> df.select(make_interval(
... df.year, df.month, df.week, df.day).alias('r')
... ).show(truncate=False)
+--------------------------+
|r |
+--------------------------+
|100 years 11 months 8 days|
+--------------------------+
>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval(df.year, df.month, df.week)).show(truncate=False)
+--------------------------------------------+
|make_interval(year, month, week, 0, 0, 0, 0)|
+--------------------------------------------+
|100 years 11 months 7 days |
+--------------------------------------------+

>>> df.select(make_interval(
... df.year, df.month, df.week).alias('r')
... ).show(truncate=False)
+--------------------------+
|r |
+--------------------------+
|100 years 11 months 7 days|
+--------------------------+
Example 6: Make interval from years and months.

>>> df.select(make_interval(df.year, df.month).alias('r')).show(truncate=False)
+-------------------+
|r |
+-------------------+
|100 years 11 months|
+-------------------+
>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval(df.year, df.month)).show(truncate=False)
+-----------------------------------------+
|make_interval(year, month, 0, 0, 0, 0, 0)|
+-----------------------------------------+
|100 years 11 months |
+-----------------------------------------+

>>> df.select(make_interval(df.year).alias('r')).show(truncate=False)
+---------+
|r |
+---------+
|100 years|
+---------+
Example 7: Make interval from years.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval(df.year)).show(truncate=False)
+-------------------------------------+
|make_interval(year, 0, 0, 0, 0, 0, 0)|
+-------------------------------------+
|100 years |
+-------------------------------------+

Example 8: Make interval.

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
... ["year", "month", "week", "day", "hour", "min", "sec"])
>>> df.select(sf.make_interval()).show(truncate=False)
+----------------------------------+
|make_interval(0, 0, 0, 0, 0, 0, 0)|
+----------------------------------+
|0 seconds |
+----------------------------------+
"""
_years = lit(0) if years is None else years
_months = lit(0) if months is None else months
Expand Down