From 70815eb088d26bbea458ed1513ee815fc649664b Mon Sep 17 00:00:00 2001 From: Travis Addair Date: Mon, 1 Jun 2020 17:09:41 -0700 Subject: [PATCH] Remove driver param for hdfs.connect when using pyarrow 0.17 and above (#556) --- petastorm/hdfs/namenode.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/petastorm/hdfs/namenode.py b/petastorm/hdfs/namenode.py index fec59e7f8..4d86e50a3 100644 --- a/petastorm/hdfs/namenode.py +++ b/petastorm/hdfs/namenode.py @@ -263,7 +263,13 @@ def hdfs_connect_namenode(cls, url, driver='libhdfs3', user=None): else: hostname = six.text_type(url.hostname or 'default') driver = six.text_type(driver) - return pyarrow.hdfs.connect(hostname, url.port or 8020, driver=driver, user=user) + + kwargs = dict(user=user) + if LooseVersion(pyarrow.__version__) < LooseVersion('0.17.0'): + # Support for libhdfs3 was removed in v0.17.0, we include it here for backwards + # compatibility + kwargs['driver'] = driver + return pyarrow.hdfs.connect(hostname, url.port or 8020, **kwargs) @classmethod def connect_to_either_namenode(cls, list_of_namenodes, user=None):