Skip to content

Commit

Permalink
Add FTPS support (#33) (#739)
Browse files Browse the repository at this point in the history
* Add FTPS support (#33)

* Simplify helper script, add docstring

It's simpler if we just run the whole script under sudo.
The docstring helps our users.
Also, we don't need read-text and write-text in FTP, that gets handled in a
different part of smart_open.

* patch CI scripts

since helpers.sh runs under sudo, it doesn't have access to the
environment set in the workflow

* more fixes to helpers.sh

* bring back ab mode

looks like it _does_ get used after all

Co-authored-by: Michael Penkov <m@penkov.dev>
  • Loading branch information
RachitSharma2001 and mpenkov authored Nov 25, 2022
1 parent ebf2158 commit a0bb975
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 39 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ jobs:
- run: bash ci_helpers/helpers.sh enable_moto_server
if: ${{ matrix.moto_server }}

- run: bash ci_helpers/helpers.sh create_ftp_server
- run: |
sudo apt-get install vsftpd
sudo bash ci_helpers/helpers.sh create_ftp_ftps_servers
- name: Run integration tests
run: python ci_helpers/run_integration_tests.py
Expand All @@ -149,7 +151,7 @@ jobs:
- run: bash ci_helpers/helpers.sh disable_moto_server
if: ${{ matrix.moto_server }}

- run: bash ci_helpers/helpers.sh delete_ftp_server
- run: sudo bash ci_helpers/helpers.sh delete_ftp_ftps_servers

benchmarks:
needs: [linters,unit_tests]
Expand Down
45 changes: 40 additions & 5 deletions ci_helpers/helpers.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,52 @@ enable_moto_server(){
moto_server -p5000 2>/dev/null&
}

create_ftp_server(){
docker run -d -p 21:21 -p 21000-21010:21000-21010 -e USERS="user|123|/home/user/dir" -e ADDRESS=localhost --name my-ftp-server delfer/alpine-ftp-server
create_ftp_ftps_servers(){
#
# Must be run as root
#
home_dir=/home/user
user=user
pass=123
ftp_port=21
ftps_port=90

mkdir $home_dir
useradd -p $(echo $pass | openssl passwd -1 -stdin) -d $home_dir $user
chown $user:$user $home_dir

server_setup='''
listen=YES
listen_ipv6=NO
write_enable=YES
pasv_enable=YES
pasv_min_port=40000
pasv_max_port=40009
chroot_local_user=YES
allow_writeable_chroot=YES'''

additional_ssl_setup='''
ssl_enable=YES
allow_anon_ssl=NO
force_local_data_ssl=NO
force_local_logins_ssl=NO
require_ssl_reuse=NO
'''

cp /etc/vsftpd.conf /etc/vsftpd-ssl.conf
echo -e "$server_setup\nlisten_port=${ftp_port}" >> /etc/vsftpd.conf
echo -e "$server_setup\nlisten_port=${ftps_port}\n$additional_ssl_setup" >> /etc/vsftpd-ssl.conf

service vsftpd restart
vsftpd /etc/vsftpd-ssl.conf &
}

disable_moto_server(){
lsof -i tcp:5000 | tail -n1 | cut -f2 -d" " | xargs kill -9
}

delete_ftp_server(){
docker kill my-ftp-server
docker rm my-ftp-server
delete_ftp_ftps_servers(){
service vsftpd stop
}

"$@"
51 changes: 32 additions & 19 deletions integration-tests/test_ftp.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,84 @@
from __future__ import unicode_literals

import pytest
from smart_open import open

def test_nonbinary():

@pytest.fixture(params=[("ftp", 21), ("ftps", 90)])
def server_info(request):
return request.param

def test_nonbinary(server_info):
server_type = server_info[0]
port_num = server_info[1]
file_contents = "Test Test \n new test \n another tests"
appended_content1 = "Added \n to end"

with open("ftp://user:123@localhost:21/home/user/dir/file", "w") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file", "w") as f:
f.write(file_contents)

with open("ftp://user:123@localhost:21/home/user/dir/file", "r") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file", "r") as f:
read_contents = f.read()
assert read_contents == file_contents

with open("ftp://user:123@localhost:21/home/user/dir/file", "a") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file", "a") as f:
f.write(appended_content1)

with open("ftp://user:123@localhost:21/home/user/dir/file", "r") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file", "r") as f:
read_contents = f.read()
assert read_contents == file_contents + appended_content1

def test_binary():
def test_binary(server_info):
server_type = server_info[0]
port_num = server_info[1]
file_contents = b"Test Test \n new test \n another tests"
appended_content1 = b"Added \n to end"

with open("ftp://user:123@localhost:21/home/user/dir/file2", "wb") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file2", "wb") as f:
f.write(file_contents)

with open("ftp://user:123@localhost:21/home/user/dir/file2", "rb") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file2", "rb") as f:
read_contents = f.read()
assert read_contents == file_contents

with open("ftp://user:123@localhost:21/home/user/dir/file2", "ab") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file2", "ab") as f:
f.write(appended_content1)

with open("ftp://user:123@localhost:21/home/user/dir/file2", "rb") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file2", "rb") as f:
read_contents = f.read()
assert read_contents == file_contents + appended_content1

def test_line_endings_non_binary():
def test_line_endings_non_binary(server_info):
server_type = server_info[0]
port_num = server_info[1]
B_CLRF = b'\r\n'
CLRF = '\r\n'
file_contents = f"Test Test {CLRF} new test {CLRF} another tests{CLRF}"

with open("ftp://user:123@localhost:21/home/user/dir/file3", "w") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file3", "w") as f:
f.write(file_contents)

with open("ftp://user:123@localhost:21/home/user/dir/file3", "r") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file3", "r") as f:
for line in f:
assert not CLRF in line

with open("ftp://user:123@localhost:21/home/user/dir/file3", "rb") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file3", "rb") as f:
for line in f:
assert B_CLRF in line

def test_line_endings_binary():
def test_line_endings_binary(server_info):
server_type = server_info[0]
port_num = server_info[1]
B_CLRF = b'\r\n'
CLRF = '\r\n'
file_contents = f"Test Test {CLRF} new test {CLRF} another tests{CLRF}".encode('utf-8')

with open("ftp://user:123@localhost:21/home/user/dir/file4", "wb") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file4", "wb") as f:
f.write(file_contents)

with open("ftp://user:123@localhost:21/home/user/dir/file4", "r") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file4", "r") as f:
for line in f:
assert not CLRF in line

with open("ftp://user:123@localhost:21/home/user/dir/file4", "rb") as f:
with open(f"{server_type}://user:123@localhost:{port_num}/file4", "rb") as f:
for line in f:
assert B_CLRF in line
55 changes: 42 additions & 13 deletions smart_open/ftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
import logging
import urllib.parse
import smart_open.utils
from ftplib import FTP, error_reply
from ftplib import FTP, FTP_TLS, error_reply
import types
logger = logging.getLogger(__name__)

SCHEME = "ftp"
SCHEMES = ("ftp", "ftps")

"""Supported URL schemes."""

Expand All @@ -26,6 +26,9 @@
"ftp://username@host/path/file",
"ftp://username:password@host/path/file",
"ftp://username:password@host:port/path/file",
"ftps://username@host/path/file",
"ftps://username:password@host/path/file",
"ftps://username:password@host:port/path/file",
)


Expand All @@ -35,7 +38,7 @@ def _unquote(text):

def parse_uri(uri_as_string):
split_uri = urllib.parse.urlsplit(uri_as_string)
assert split_uri.scheme == SCHEME, 'unexpected scheme: %r' % split_uri.scheme
assert split_uri.scheme in SCHEMES
return dict(
scheme=split_uri.scheme,
uri_path=_unquote(split_uri.path),
Expand All @@ -50,8 +53,10 @@ def open_uri(uri, mode, transport_params):
smart_open.utils.check_kwargs(open, transport_params)
parsed_uri = parse_uri(uri)
uri_path = parsed_uri.pop("uri_path")
parsed_uri.pop("scheme")
return open(uri_path, mode, transport_params=transport_params, **parsed_uri)
scheme = parsed_uri.pop("scheme")
secure_conn = True if scheme == "ftps" else False
return open(uri_path, mode, secure_connection=secure_conn,
transport_params=transport_params, **parsed_uri)


def convert_transport_params_to_args(transport_params):
Expand All @@ -71,9 +76,12 @@ def convert_transport_params_to_args(transport_params):
return kwargs


def _connect(hostname, username, port, password, transport_params):
def _connect(hostname, username, port, password, secure_connection, transport_params):
kwargs = convert_transport_params_to_args(transport_params)
ftp = FTP(**kwargs)
if secure_connection:
ftp = FTP_TLS(**kwargs)
else:
ftp = FTP(**kwargs)
try:
ftp.connect(hostname, port)
except Exception as e:
Expand All @@ -84,33 +92,54 @@ def _connect(hostname, username, port, password, transport_params):
except error_reply as e:
logger.error("Unable to login to FTP server: try checking the username and password!")
raise e
if secure_connection:
ftp.prot_p()
return ftp


# transport paramaters can include any extra parameters that you want to be passed into FTP_TLS
def open(
path,
mode="r",
host=None,
user=None,
password=None,
port=DEFAULT_PORT,
secure_connection=False,
transport_params=None,
):
"""Open a file for reading or writing via FTP/FTPS.
Parameters
----------
path: str
The path on the remote server
mode: str
Must be "rb" or "wb"
host: str
The host to connect to
user: str
The username to use for the connection
password: str
The password for the specified username
port: int
The port to connect to
secure_connection: bool
True for FTPS, False for FTP
transport_params: dict
Additional parameters for the FTP connection.
Currently supported parameters: timeout, source_address, encoding.
"""
if not host:
raise ValueError("you must specify the host to connect to")
if not user:
raise ValueError("you must specify the user")
if not transport_params:
transport_params = {}
conn = _connect(host, user, port, password, transport_params)
conn = _connect(host, user, port, password, secure_connection, transport_params)
mode_to_ftp_cmds = {
"r": ("RETR", "r"),
"rb": ("RETR", "rb"),
"w": ("STOR", "w"),
"wb": ("STOR", "wb"),
"a": ("APPE", "w"),
"ab": ("APPE", "wb")
"ab": ("APPE", "wb"),
}
try:
ftp_mode, file_obj_mode = mode_to_ftp_cmds[mode]
Expand Down

0 comments on commit a0bb975

Please sign in to comment.