Skip to content

Commit 68e543f

Browse files
pmrowlaskshetryefiop
authored
remotes.http: Support dvc push for http remotes (#3343)
* remote.http: support _upload() via HTTP POST - uploaded files are sent as chunked encoding POST data Fixes #3247 * remote.http: add support for HTTP basic and digest auth - username, password, ask_password options work the same way as for ssh remotes - basic_auth (bool) and digest_auth (bool) options added for http(s) remotes - digest_auth takes precedence over basic_auth if both are enabled * remote.http: auth_method() unit tests * tests: Add functional tests for HTTP remotes - HTTP remotes now tested locally using a SimpleHTTPServer instance that allows reading/writing to a temp directory * tests: fix http tests on py3.5, 3.6 * remote.http: add custom auth method * s/custom_header/custom_auth_header/ * Apply suggestions from code review Co-Authored-By: Saugat Pachhai <suagatchhetri@outlook.com> Co-Authored-By: Ruslan Kuprieiev <kupruser@gmail.com> Co-authored-by: Saugat Pachhai <suagatchhetri@outlook.com> Co-authored-by: Ruslan Kuprieiev <kupruser@gmail.com>
1 parent 411057d commit 68e543f

File tree

7 files changed

+213
-6
lines changed

7 files changed

+213
-6
lines changed

dvc/config.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ class RelPath(str):
101101
"shared": All(Lower, Choices("group")),
102102
Optional("slow_link_warning", default=True): Bool,
103103
}
104+
HTTP_COMMON = {
105+
"auth": All(Lower, Choices("basic", "digest", "custom")),
106+
"custom_auth_header": str,
107+
"user": str,
108+
"password": str,
109+
"ask_password": Bool,
110+
}
104111
SCHEMA = {
105112
"core": {
106113
"remote": Lower,
@@ -169,8 +176,8 @@ class RelPath(str):
169176
"gdrive_user_credentials_file": str,
170177
**REMOTE_COMMON,
171178
},
172-
"http": REMOTE_COMMON,
173-
"https": REMOTE_COMMON,
179+
"http": {**HTTP_COMMON, **REMOTE_COMMON},
180+
"https": {**HTTP_COMMON, **REMOTE_COMMON},
174181
"remote": {str: object}, # Any of the above options are valid
175182
}
176183
)

dvc/remote/http.py

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import logging
2+
import os.path
23
import threading
34

4-
from funcy import cached_property, wrap_prop
5+
from funcy import cached_property, memoize, wrap_prop, wrap_with
56

7+
import dvc.prompt as prompt
68
from dvc.config import ConfigError
79
from dvc.exceptions import DvcException, HTTPError
810
from dvc.progress import Tqdm
@@ -12,6 +14,15 @@
1214
logger = logging.getLogger(__name__)
1315

1416

17+
@wrap_with(threading.Lock())
18+
@memoize
19+
def ask_password(host, user):
20+
return prompt.password(
21+
"Enter a password for "
22+
"host '{host}' user '{user}'".format(host=host, user=user)
23+
)
24+
25+
1526
class RemoteHTTP(RemoteBASE):
1627
scheme = Schemes.HTTP
1728
SESSION_RETRIES = 5
@@ -24,14 +35,26 @@ def __init__(self, repo, config):
2435
super().__init__(repo, config)
2536

2637
url = config.get("url")
27-
self.path_info = self.path_cls(url) if url else None
38+
if url:
39+
self.path_info = self.path_cls(url)
40+
user = config.get("user", None)
41+
if user:
42+
self.path_info.user = user
43+
else:
44+
self.path_info = None
2845

2946
if not self.no_traverse:
3047
raise ConfigError(
3148
"HTTP doesn't support traversing the remote to list existing "
3249
"files. Use: `dvc remote modify <name> no_traverse true`"
3350
)
3451

52+
self.auth = config.get("auth", None)
53+
self.custom_auth_header = config.get("custom_auth_header", None)
54+
self.password = config.get("password", None)
55+
self.ask_password = config.get("ask_password", False)
56+
self.headers = {}
57+
3558
def _download(self, from_info, to_file, name=None, no_progress_bar=False):
3659
response = self._request("GET", from_info.url, stream=True)
3760
if response.status_code != 200:
@@ -48,6 +71,28 @@ def _download(self, from_info, to_file, name=None, no_progress_bar=False):
4871
fd.write(chunk)
4972
pbar.update(len(chunk))
5073

74+
def _upload(self, from_file, to_info, name=None, no_progress_bar=False):
75+
with Tqdm(
76+
total=None if no_progress_bar else os.path.getsize(from_file),
77+
leave=False,
78+
bytes=True,
79+
desc=to_info.url if name is None else name,
80+
disable=no_progress_bar,
81+
) as pbar:
82+
83+
def chunks():
84+
with open(from_file, "rb") as fd:
85+
while True:
86+
chunk = fd.read(self.CHUNK_SIZE)
87+
if not chunk:
88+
break
89+
pbar.update(len(chunk))
90+
yield chunk
91+
92+
response = self._request("POST", to_info.url, data=chunks())
93+
if response.status_code not in (200, 201):
94+
raise HTTPError(response.status_code, response.reason)
95+
5196
def exists(self, path_info):
5297
return bool(self._request("HEAD", path_info.url))
5398

@@ -74,6 +119,24 @@ def get_file_checksum(self, path_info):
74119

75120
return etag
76121

122+
def auth_method(self, path_info=None):
123+
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
124+
125+
if path_info is None:
126+
path_info = self.path_info
127+
128+
if self.auth:
129+
if self.ask_password and self.password is None:
130+
host, user = path_info.host, path_info.user
131+
self.password = ask_password(host, user)
132+
if self.auth == "basic":
133+
return HTTPBasicAuth(path_info.user, self.password)
134+
if self.auth == "digest":
135+
return HTTPDigestAuth(path_info.user, self.password)
136+
if self.auth == "custom" and self.custom_auth_header:
137+
self.headers.update({self.custom_auth_header: self.password})
138+
return None
139+
77140
@wrap_prop(threading.Lock())
78141
@cached_property
79142
def _session(self):
@@ -100,7 +163,13 @@ def _request(self, method, url, **kwargs):
100163
kwargs.setdefault("timeout", self.REQUEST_TIMEOUT)
101164

102165
try:
103-
res = self._session.request(method, url, **kwargs)
166+
res = self._session.request(
167+
method,
168+
url,
169+
auth=self.auth_method(),
170+
headers=self.headers,
171+
**kwargs,
172+
)
104173

105174
redirect_no_location = (
106175
kwargs["allow_redirects"]

tests/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from dvc.remote.ssh.connection import SSHConnection
7+
from tests.utils.httpd import PushRequestHandler, StaticFileServer
78
from .dir_helpers import * # noqa
89

910

@@ -57,3 +58,9 @@ def _close_pools():
5758

5859
yield
5960
close_pools()
61+
62+
63+
@pytest.fixture
64+
def http_server(tmp_dir):
65+
with StaticFileServer(handler_class=PushRequestHandler) as httpd:
66+
yield httpd

tests/func/test_data_cloud.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
GCP,
3131
GDrive,
3232
HDFS,
33+
HTTP,
3334
Local,
3435
S3,
3536
SSHMocked,
@@ -290,6 +291,20 @@ def _get_cloud_class(self):
290291
return RemoteHDFS
291292

292293

294+
@pytest.mark.usefixtures("http_server")
295+
class TestRemoteHTTP(HTTP, TestDataCloudBase):
296+
@pytest.fixture(autouse=True)
297+
def setup_method_fixture(self, request, http_server):
298+
self.http_server = http_server
299+
self.method_name = request.function.__name__
300+
301+
def get_url(self):
302+
return super().get_url(self.http_server.server_port)
303+
304+
def _get_cloud_class(self):
305+
return RemoteHTTP
306+
307+
293308
class TestDataCloudCLIBase(TestDvc):
294309
def main(self, args):
295310
ret = main(args)

tests/remotes.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,3 +275,11 @@ def get_url():
275275
return "hdfs://{}@127.0.0.1{}".format(
276276
getpass.getuser(), Local.get_storagepath()
277277
)
278+
279+
280+
class HTTP:
281+
should_test = always_test
282+
283+
@staticmethod
284+
def get_url(port):
285+
return "http://127.0.0.1:{}".format(port)

tests/unit/remote/test_http.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,74 @@ def test_download_fails_on_error_code(dvc):
2727

2828
with pytest.raises(HTTPError):
2929
remote._download(URLInfo(url) / "missing.txt", "missing.txt")
30+
31+
32+
def test_public_auth_method(dvc):
33+
config = {
34+
"url": "http://example.com/",
35+
"path_info": "file.html",
36+
"user": "",
37+
"password": "",
38+
}
39+
40+
remote = RemoteHTTP(dvc, config)
41+
42+
assert remote.auth_method() is None
43+
44+
45+
def test_basic_auth_method(dvc):
46+
from requests.auth import HTTPBasicAuth
47+
48+
user = "username"
49+
password = "password"
50+
auth = HTTPBasicAuth(user, password)
51+
config = {
52+
"url": "http://example.com/",
53+
"path_info": "file.html",
54+
"auth": "basic",
55+
"user": user,
56+
"password": password,
57+
}
58+
59+
remote = RemoteHTTP(dvc, config)
60+
61+
assert remote.auth_method() == auth
62+
assert isinstance(remote.auth_method(), HTTPBasicAuth)
63+
64+
65+
def test_digest_auth_method(dvc):
66+
from requests.auth import HTTPDigestAuth
67+
68+
user = "username"
69+
password = "password"
70+
auth = HTTPDigestAuth(user, password)
71+
config = {
72+
"url": "http://example.com/",
73+
"path_info": "file.html",
74+
"auth": "digest",
75+
"user": user,
76+
"password": password,
77+
}
78+
79+
remote = RemoteHTTP(dvc, config)
80+
81+
assert remote.auth_method() == auth
82+
assert isinstance(remote.auth_method(), HTTPDigestAuth)
83+
84+
85+
def test_custom_auth_method(dvc):
86+
header = "Custom-Header"
87+
password = "password"
88+
config = {
89+
"url": "http://example.com/",
90+
"path_info": "file.html",
91+
"auth": "custom",
92+
"custom_auth_header": header,
93+
"password": password,
94+
}
95+
96+
remote = RemoteHTTP(dvc, config)
97+
98+
assert remote.auth_method() is None
99+
assert header in remote.headers
100+
assert remote.headers[header] == password

tests/utils/httpd.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import hashlib
22
import os
33
import threading
4-
from http.server import HTTPServer
4+
from http import HTTPStatus
5+
from http.server import HTTPServer, SimpleHTTPRequestHandler
56
from RangeHTTPServer import RangeRequestHandler
67

78

@@ -35,6 +36,35 @@ class ContentMD5Handler(TestRequestHandler):
3536
checksum_header = "Content-MD5"
3637

3738

39+
class PushRequestHandler(SimpleHTTPRequestHandler):
40+
def _chunks(self):
41+
while True:
42+
data = self.rfile.readline(65537)
43+
chunk_size = int(data[:-2], 16)
44+
if chunk_size == 0:
45+
return
46+
data = self.rfile.read(chunk_size)
47+
yield data
48+
self.rfile.read(2)
49+
50+
def do_POST(self):
51+
chunked = self.headers.get("Transfer-Encoding", "") == "chunked"
52+
path = self.translate_path(self.path)
53+
try:
54+
os.makedirs(os.path.dirname(path), exist_ok=True)
55+
with open(path, "wb") as fd:
56+
if chunked:
57+
for chunk in self._chunks():
58+
fd.write(chunk)
59+
else:
60+
size = int(self.headers.get("Content-Length", 0))
61+
fd.write(self.rfile.read(size))
62+
except OSError as e:
63+
self.send_error(HTTPStatus.INTERNAL_SERVER_ERROR, str(e))
64+
self.send_response(HTTPStatus.OK)
65+
self.end_headers()
66+
67+
3868
class StaticFileServer:
3969
_lock = threading.Lock()
4070

0 commit comments

Comments
 (0)