Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaR
Python Software Foundation License
----------------------------------

python/pyspark/loose_version.py
python/docs/source/_static/copybutton.js
Comment on lines +224 to 225
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it's compatible but we need to take a look once more before doing that. It's because our Apache Spark (up to 3.5.0) binary distribution doesn't include Python Software Foundation yet.

So actually we already have PSF License stuff?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes and no~

Yes, we had copybutton.js file.

spark-3.5.0-bin-hadoop3:$ find . -name copybutton.js
./python/docs/source/_static/copybutton.js

But, still no. We didn't have no PSF entry in LICENSE-binary which is a part of Apache Spark binary distribution. So, I added in this PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about copybutton.js but we need to add loose_version to binary because we had python/pyspark/cloudpickle.py and python/pyspark/join.py in BSD 3-Clause section. So, I added it to LICENSE-binary too.


BSD 3-Clause
Expand Down
6 changes: 6 additions & 0 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,12 @@ This section summarizes those components and their licenses. See licenses-binary
for text of these licenses.


Python Software Foundation License
----------------------------------

python/pyspark/loose_version.py


BSD 2-Clause
------------

Expand Down
1 change: 1 addition & 0 deletions dev/.rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,4 @@ empty.proto
LimitedInputStream.java
TimSort.java
xml-resources/*
loose_version.py
5 changes: 0 additions & 5 deletions python/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@
from functools import wraps
import types
from typing import cast, Any, Callable, Optional, TypeVar, Union
from warnings import filterwarnings

filterwarnings(
"ignore", message="distutils Version classes are deprecated. Use packaging.version instead."
)

from pyspark.conf import SparkConf
from pyspark.rdd import RDD, RDDBarrier
Expand Down
75 changes: 75 additions & 0 deletions python/pyspark/loose_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
# https://github.com/python/cpython/blob/3.11/LICENSE
# File originates from the cpython source
# https://github.com/python/cpython/blob/3.11/Lib/distutils/version.py

import re
from typing import Optional


class LooseVersion:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this copy from distutils? If so, maybe we need to add a few comment here to explain it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, python/docs/source/_static/copybutton.js has few lines of comments.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, it's reimplemented by squashing Version class into the existing LooseVersion class. Let me add that.

component_re = re.compile(r"(\d+ | [a-z]+ | \.)", re.VERBOSE)

def __init__(self, vstring: Optional[str]) -> None:
if vstring:
self.parse(vstring)

def parse(self, vstring: str) -> None:
self.vstring = vstring
components = [x for x in self.component_re.split(vstring) if x and x != "."]
for i, obj in enumerate(components):
try:
components[i] = int(obj)
except ValueError:
pass

self.version = components

def __str__(self) -> str:
return self.vstring

def __repr__(self) -> str:
return "LooseVersion ('%s')" % str(self)

def __eq__(self, other): # type: ignore[no-untyped-def]
c = self._cmp(other)
if c is NotImplemented:
return c
return c == 0

def __lt__(self, other): # type: ignore[no-untyped-def]
c = self._cmp(other)
if c is NotImplemented:
return c
return c < 0

def __le__(self, other): # type: ignore[no-untyped-def]
c = self._cmp(other)
if c is NotImplemented:
return c
return c <= 0

def __gt__(self, other): # type: ignore[no-untyped-def]
c = self._cmp(other)
if c is NotImplemented:
return c
return c > 0

def __ge__(self, other): # type: ignore[no-untyped-def]
c = self._cmp(other)
if c is NotImplemented:
return c
return c >= 0

def _cmp(self, other): # type: ignore[no-untyped-def]
if isinstance(other, str):
other = LooseVersion(other)
elif not isinstance(other, LooseVersion):
return NotImplemented

if self.version == other.version:
return 0
if self.version < other.version:
return -1
if self.version > other.version:
return 1
2 changes: 1 addition & 1 deletion python/pyspark/pandas/plot/matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
#

from distutils.version import LooseVersion
from pyspark.loose_version import LooseVersion

import matplotlib as mat
import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/supported_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
Generate 'Supported pandas APIs' documentation file
"""
import warnings
from distutils.version import LooseVersion
from pyspark.loose_version import LooseVersion
from enum import Enum, unique
from inspect import getmembers, isclass, isfunction, signature
from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/computation/test_any_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/computation/test_corrwith.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest


Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/computation/test_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest
import decimal

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import datetime
import unittest
from distutils.version import LooseVersion

import pandas as pd
from pandas.api.types import CategoricalDtype
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion

import pandas as pd
import numpy as np

from pyspark import pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.pandas.config import set_option, reset_option
from pyspark.testing.pandasutils import PandasOnSparkTestCase
from pyspark.testing.sqlutils import SQLTestUtils
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_apply_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_head_tail.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_split_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/groupby/test_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from distutils.version import LooseVersion
import unittest

import numpy as np
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

import inspect
import unittest
from distutils.version import LooseVersion
from datetime import datetime, timedelta

import numpy as np
import pandas as pd

import pyspark.pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.pandas.exceptions import PandasNotImplementedError
from pyspark.pandas.missing.indexes import (
MissingPandasLikeDatetimeIndex,
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
#

import unittest
from distutils.version import LooseVersion

import pandas as pd
from pandas.api.types import CategoricalDtype

import pyspark.pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils


Expand Down
2 changes: 0 additions & 2 deletions python/pyspark/pandas/tests/indexes/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

import datetime

from distutils.version import LooseVersion

import numpy as np
import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/indexes/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import unittest
from datetime import timedelta
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/series/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion
from itertools import product

import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import tempfile
import unittest
from contextlib import contextmanager
from distutils.version import LooseVersion

import pandas as pd
import numpy as np
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_dataframe_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import tempfile
import unittest
import sys
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/pandas/tests/test_dataframe_spark_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import unittest
import glob
import os
from distutils.version import LooseVersion

import numpy as np
import pandas as pd

from pyspark import pandas as ps
from pyspark.loose_version import LooseVersion
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils


Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#

from distutils.version import LooseVersion
from itertools import product
import unittest

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#

import unittest
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# limitations under the License.
#
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_series_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import unittest
import sys
from distutils.version import LooseVersion

import pandas as pd

Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_series_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import datetime
import unittest
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_series_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import numpy as np
import re
import unittest
from distutils.version import LooseVersion

from pyspark import pandas as ps
from pyspark.testing.pandasutils import PandasOnSparkTestCase
Expand Down
1 change: 0 additions & 1 deletion python/pyspark/pandas/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#

import unittest
from distutils.version import LooseVersion
import numpy as np
import pandas as pd

Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/sql/connect/client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"SparkConnectClient",
]

from pyspark.loose_version import LooseVersion
from pyspark.sql.connect.utils import check_dependencies

check_dependencies(__name__)
Expand All @@ -31,7 +32,6 @@
import urllib.parse
import uuid
import sys
from distutils.version import LooseVersion
from types import TracebackType
from typing import (
Iterable,
Expand Down
Loading