88
99from dvc .utils .compat import str , builtin_str , basestring , is_py2
1010from dvc .utils .compat import pathlib , urlparse
11+ from dvc .utils import relpath
1112
1213
1314# On Python 2.7/Windows sys.getfilesystemencoding() is set to mbcs,
1415# which is lossy, thus we can't use that,
1516# see https://github.com/mcmtroffaes/pathlib2/issues/56.
16- from dvc .utils import relpath
17-
1817if is_py2 :
1918 fs_encoding = "utf-8"
2019
@@ -112,52 +111,80 @@ class PosixPathInfo(PathInfo, pathlib.PurePosixPath):
112111 pass
113112
114113
114+ class _URLPathInfo (PosixPathInfo ):
115+ def __str__ (self ):
116+ return self .__fspath__ ()
117+
118+ __unicode__ = __str__
119+
120+
115121class _URLPathParents (object ):
116- def __init__ (self , pathcls , scheme , netloc , path ):
117- self ._scheme = scheme
118- self ._netloc = netloc
119- self ._parents = path .parents
120- self ._pathcls = pathcls
122+ def __init__ (self , src ):
123+ self .src = src
124+ self ._parents = self .src ._path .parents
121125
122126 def __len__ (self ):
123127 return len (self ._parents )
124128
125129 def __getitem__ (self , idx ):
126- return self ._pathcls .from_parts (
127- scheme = self ._scheme ,
128- netloc = self ._netloc ,
129- path = self ._parents [idx ].fspath ,
130- )
130+ return self .src .replace (path = self ._parents [idx ])
131131
132132 def __repr__ (self ):
133- return "<{}.parents>" .format (self ._pathcls . __name__ )
133+ return "<{}.parents>" .format (self .src )
134134
135135
136136class URLInfo (object ):
137137 DEFAULT_PORTS = {"http" : 80 , "https" : 443 , "ssh" : 22 , "hdfs" : 0 }
138138
139139 def __init__ (self , url ):
140- self .parsed = urlparse (url )
141- assert self .parsed .scheme != "remote"
140+ p = urlparse (url )
141+ assert not p .query and not p .params and not p .fragment
142+ assert p .password is None
143+
144+ self .fill_parts (p .scheme , p .hostname , p .username , p .port , p .path )
142145
143146 @classmethod
144147 def from_parts (
145- cls , scheme = None , netloc = None , host = None , user = None , port = None , path = ""
148+ cls , scheme = None , host = None , user = None , port = None , path = "" , netloc = None
146149 ):
147- assert scheme and (bool (host ) ^ bool (netloc ))
150+ assert bool (host ) ^ bool (netloc )
151+
152+ if netloc is not None :
153+ return cls ("{}://{}{}" .format (scheme , netloc , path ))
154+
155+ obj = cls .__new__ (cls )
156+ obj .fill_parts (scheme , host , user , port , path )
157+ return obj
158+
159+ def fill_parts (self , scheme , host , user , port , path ):
160+ assert scheme != "remote"
161+ assert isinstance (path , (basestring , _URLPathInfo ))
162+
163+ self .scheme , self .host , self .user = scheme , host , user
164+ self .port = int (port ) if port else self .DEFAULT_PORTS .get (self .scheme )
165+
166+ if isinstance (path , _URLPathInfo ):
167+ self ._spath = builtin_str (path )
168+ self ._path = path
169+ else :
170+ if path and path [0 ] != "/" :
171+ path = "/" + path
172+ self ._spath = path
173+
174+ @property
175+ def _base_parts (self ):
176+ return (self .scheme , self .host , self .user , self .port )
177+
178+ @property
179+ def parts (self ):
180+ return self ._base_parts + self ._path .parts
148181
149- if netloc is None :
150- netloc = host
151- if user :
152- netloc = user + "@" + host
153- if port :
154- netloc += ":" + str (port )
155- return cls ("{}://{}{}" .format (scheme , netloc , path ))
182+ def replace (self , path = None ):
183+ return self .from_parts (* self ._base_parts , path = path )
156184
157185 @cached_property
158186 def url (self ):
159- p = self .parsed
160- return "{}://{}{}" .format (p .scheme , self .netloc , p .path )
187+ return "{}://{}{}" .format (self .scheme , self .netloc , self ._spath )
161188
162189 def __str__ (self ):
163190 return self .url
@@ -170,107 +197,73 @@ def __eq__(self, other):
170197 other = self .__class__ (other )
171198 return (
172199 self .__class__ == other .__class__
173- and self .scheme == other .scheme
174- and self .netloc == other .netloc
200+ and self ._base_parts == other ._base_parts
175201 and self ._path == other ._path
176202 )
177203
178204 def __hash__ (self ):
179- return hash (self .url )
205+ return hash (self .parts )
180206
181207 def __div__ (self , other ):
182- p = self .parsed
183- new_path = posixpath .join (p .path , str (other ))
184- if not new_path .startswith ("/" ):
185- new_path = "/" + new_path
186- new_url = "{}://{}{}" .format (p .scheme , p .netloc , new_path )
187- return self .__class__ (new_url )
208+ return self .replace (path = posixpath .join (self ._spath , other ))
188209
189210 __truediv__ = __div__
190211
191- def __getattr__ (self , name ):
192- # When deepcopy is called, it creates and object without __init__,
193- # self.parsed is not initialized and it causes infinite recursion.
194- # More on this special casing here:
195- # https://stackoverflow.com/a/47300262/298182
196- if name .startswith ("__" ):
197- raise AttributeError (name )
198- return getattr (self .parsed , name )
199-
200- @cached_property
201- def netloc (self ):
202- p = self .parsed
203- netloc = p .hostname
204- if p .username :
205- netloc = p .username + "@" + netloc
206- if p .port and int (p .port ) != self .DEFAULT_PORTS .get (p .scheme ):
207- netloc += ":" + str (p .port )
208- return netloc
209-
210212 @property
211- def port (self ):
212- return self .parsed .port or self .DEFAULT_PORTS .get (self .parsed .scheme )
213-
214- @property
215- def host (self ):
216- return self .parsed .hostname
217-
218- @property
219- def user (self ):
220- return self .parsed .username
213+ def path (self ):
214+ return self ._spath
221215
222216 @cached_property
223217 def _path (self ):
224- return PosixPathInfo (self .parsed . path )
218+ return _URLPathInfo (self ._spath )
225219
226220 @property
227221 def name (self ):
228222 return self ._path .name
229223
230- @property
231- def parts (self ):
232- return (self .scheme , self .netloc ) + self ._path .parts
224+ @cached_property
225+ def netloc (self ):
226+ netloc = self .host
227+ if self .user :
228+ netloc = self .user + "@" + netloc
229+ if self .port and int (self .port ) != self .DEFAULT_PORTS .get (self .scheme ):
230+ netloc += ":" + str (self .port )
231+ return netloc
233232
234233 @property
235234 def bucket (self ):
236- return self .parsed . netloc
235+ return self .netloc
237236
238237 @property
239238 def parent (self ):
240- return self .from_parts (
241- scheme = self .scheme ,
242- netloc = self .parsed .netloc ,
243- path = self ._path .parent .fspath ,
244- )
239+ return self .replace (path = self ._path .parent )
245240
246241 @property
247242 def parents (self ):
248- return _URLPathParents (
249- type (self ), self .scheme , self .parsed .netloc , self ._path
250- )
243+ return _URLPathParents (self )
251244
252245 def relative_to (self , other ):
253- if isinstance (other , str ):
254- other = URLInfo (other )
255- if self .scheme != other .scheme or self .netloc != other .netloc :
256- raise ValueError (
257- "'{}' does not start with '{}'" .format (self , other )
258- )
246+ if isinstance (other , basestring ):
247+ other = self .__class__ (other )
248+ if self .__class__ != other .__class__ :
249+ msg = "'{}' has incompatible class with '{}'" .format (self , other )
250+ raise ValueError (msg )
251+ if self ._base_parts != other ._base_parts :
252+ msg = "'{}' does not start with '{}'" .format (self , other )
253+ raise ValueError (msg )
259254 return self ._path .relative_to (other ._path )
260255
261256 def isin (self , other ):
262257 if isinstance (other , basestring ):
263258 other = self .__class__ (other )
264259 elif self .__class__ != other .__class__ :
265260 return False
266- return (
267- self .scheme == other .scheme
268- and self .netloc == other .netloc
269- and self ._path .isin (other ._path )
261+ return self ._base_parts == other ._base_parts and self ._path .isin (
262+ other ._path
270263 )
271264
272265
273266class CloudURLInfo (URLInfo ):
274267 @property
275268 def path (self ):
276- return self .parsed . path .lstrip ("/" )
269+ return self ._spath .lstrip ("/" )
0 commit comments