-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
uri.cr
749 lines (678 loc) · 21.9 KB
/
uri.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
require "./uri/uri_parser"
require "./uri/encoding"
require "./uri/params"
# This class represents a URI reference as defined by [RFC 3986: Uniform Resource Identifier
# (URI): Generic Syntax](https://www.ietf.org/rfc/rfc3986.txt).
#
# This class provides constructors for creating URI instances from
# their components or by parsing their string forms and methods for accessing the various
# components of an instance.
#
# NOTE: To use `URI`, you must explicitly import it with `require "uri"`
#
# Basic example:
#
# ```
# require "uri"
#
# uri = URI.parse "http://foo.com/posts?id=30&limit=5#time=1305298413"
# # => #<URI:0x1003f1e40 @scheme="http", @host="foo.com", @port=nil, @path="/posts", @query="id=30&limit=5", ... >
# uri.scheme # => "http"
# uri.host # => "foo.com"
# uri.query # => "id=30&limit=5"
# uri.to_s # => "http://foo.com/posts?id=30&limit=5#time=1305298413"
# ```
#
# ## Resolution and Relativization
#
# *Resolution* is the process of resolving one URI against another, *base* URI.
# The resulting URI is constructed from components of both URIs in the manner specified by
# [RFC 3986 section 5.2](https://tools.ietf.org/html/rfc3986#section-5.2.2), taking components
# from the base URI for those not specified in the original.
# For hierarchical URIs, the path of the original is resolved against the path of the base
# and then normalized. See `#resolve` for examples.
#
# *Relativization* is the inverse of resolution as that it procures an URI that
# resolves to the original when resolved against the base.
#
# For normalized URIs, the following is true:
#
# ```
# a.relativize(a.resolve(b)) # => b
# a.resolve(a.relativize(b)) # => b
# ```
#
# This operation is often useful when constructing a document containing URIs that must
# be made relative to the base URI of the document wherever possible.
#
# # URL Encoding
#
# This class provides a number of methods for encoding and decoding strings using
# URL Encoding (also known as Percent Encoding) as defined in [RFC 3986](https://www.ietf.org/rfc/rfc3986.txt)
# as well as [`x-www-form-urlencoded`](https://url.spec.whatwg.org/#urlencoded-serializing).
#
# Each method has two variants, one returns a string, the other writes directly
# to an IO.
#
# * `.decode(string : String, *, plus_to_space : Bool = false) : String`: Decodes a URL-encoded string.
# * `.decode(string : String, io : IO, *, plus_to_space : Bool = false) : Nil`: Decodes a URL-encoded string to an IO.
# * `.encode_path(string : String) : String`: URL-encodes a string.
# * `.encode_path(string : String, io : IO) : Nil`: URL-encodes a string to an IO.
# * `.encode_path_segment(string : String) : String`: URL-encodes a string, escaping `/`.
# * `.encode_path_segment(string : String, io : IO) : Nil`: URL-encodes a string to an IO, escaping `/`.
# * `.decode_www_form(string : String, *, plus_to_space : Bool = true) : String`: Decodes an `x-www-form-urlencoded` string component.
# * `.decode_www_form(string : String, io : IO, *, plus_to_space : Bool = true) : Nil`: Decodes an `x-www-form-urlencoded` string component to an IO.
# * `.encode_www_form(string : String, *, space_to_plus : Bool = true) : String`: Encodes a string as a `x-www-form-urlencoded` component.
# * `.encode_www_form(string : String, io : IO, *, space_to_plus : Bool = true) : Nil`: Encodes a string as a `x-www-form-urlencoded` component to an IO.
#
# `.encode_www_form` encodes white space (` `) as `+`, while `.encode_path`
# and `.encode_path_segment` encode it as `%20`. The decode methods differ regarding
# the handling of `+` characters, respectively.
#
# NOTE: `URI::Params` provides a higher-level API for handling `x-www-form-urlencoded`
# serialized data.
class URI
class Error < Exception
end
# Returns the scheme component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://foo.com").scheme # => "http"
# URI.parse("mailto:alice@example.com").scheme # => "mailto"
# ```
getter scheme : String?
# Sets the scheme component of the URI.
setter scheme : String?
# Returns the host component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://foo.com").host # => "foo.com"
# ```
getter host : String?
# Sets the host component of the URI.
setter host : String?
# Returns the port component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://foo.com:5432").port # => 5432
# ```
getter port : Int32?
# Sets the port component of the URI.
setter port : Int32?
# Returns the path component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://foo.com/bar").path # => "/bar"
# ```
getter path : String
# Sets the path component of the URI.
setter path : String
# Returns the query component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://foo.com/bar?q=1").query # => "q=1"
# ```
getter query : String?
# Sets the query component of the URI.
setter query : String?
# Returns the user component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://admin:password@foo.com").user # => "admin"
# ```
getter user : String?
# Sets the user component of the URI.
setter user : String?
# Returns the password component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://admin:password@foo.com").password # => "password"
# ```
getter password : String?
# Sets the password component of the URI.
setter password : String?
# Returns the fragment component of the URI.
#
# ```
# require "uri"
#
# URI.parse("http://foo.com/bar#section1").fragment # => "section1"
# ```
getter fragment : String?
# Sets the fragment component of the URI.
setter fragment : String?
def_equals_and_hash scheme, host, port, path, query, user, password, fragment
def initialize(@scheme = nil, @host = nil, @port = nil, @path = "", query : String | Params | Nil = nil, @user = nil, @password = nil, @fragment = nil)
@query = query.try(&.to_s)
end
# Returns the host part of the URI and unwrap brackets for IPv6 addresses.
#
# ```
# require "uri"
#
# URI.parse("http://[::1]/bar").hostname # => "::1"
# URI.parse("http://[::1]/bar").host # => "[::1]"
# ```
def hostname : String?
host.try { |host| self.class.unwrap_ipv6(host) }
end
# Unwraps IPv6 address wrapped in square brackets.
#
# Everything that is not wrapped in square brackets is returned unchanged.
#
# ```
# URI.unwrap_ipv6("[::1]") # => "::1"
# URI.unwrap_ipv6("127.0.0.1") # => "127.0.0.1"
# URI.unwrap_ipv6("example.com") # => "example.com"
# ```
def self.unwrap_ipv6(host) : String
if host.starts_with?('[') && host.ends_with?(']')
host.byte_slice(1, host.bytesize - 2)
else
host
end
end
# Returns the concatenation of `path` and `query` as it would be used as a
# request target in an HTTP request.
#
# If `path` is empty in an hierarchical URI, `"/"` is used.
#
# ```
# require "uri"
#
# uri = URI.parse "http://example.com/posts?id=30&limit=5#time=1305298413"
# uri.request_target # => "/posts?id=30&limit=5"
#
# uri = URI.new(path: "", query: "foo=bar")
# uri.request_target # => "/?foo=bar"
# ```
def request_target : String
# Minimal size is 1 for an empty path (`"/"`)
string_size = @path.empty? ? 1 : @path.bytesize
if query = @query
# Add 1 for the query designator (`?`)
string_size += query.bytesize + 1
end
String.build(string_size) do |str|
if @path.empty?
str << "/" unless opaque?
else
str << @path
end
if (query = @query) && !query.empty?
str << '?' << query
end
end
end
# Returns `true` if URI has a *scheme* specified.
def absolute? : Bool
@scheme ? true : false
end
# Returns `true` if URI does not have a *scheme* specified.
def relative? : Bool
!absolute?
end
# Returns `true` if this URI is opaque.
#
# A URI is considered opaque if it has a `scheme` but no hierarchical part,
# i.e. no `host` and the first character of `path` is not a slash (`/`).
def opaque? : Bool
!@scheme.nil? && @host.nil? && !@path.starts_with?('/')
end
# Returns a `URI::Params` of the URI#query.
#
# ```
# require "uri"
#
# uri = URI.parse "http://foo.com?id=30&limit=5#time=1305298413"
# uri.query_params # => URI::Params{"id" => ["30"], "limit" => ["5"]}
# ```
def query_params : URI::Params
URI::Params.parse(@query || "")
end
# Sets `query` to stringified *params*.
#
# ```
# require "uri"
#
# uri = URI.new
# uri.query_params = URI::Params.parse("foo=bar&foo=baz")
# uri.to_s # => "?foo=bar&foo=baz"
# ```
def query_params=(params : URI::Params)
self.query = params.to_s
end
# Yields the value of `#query_params` commits any modifications of the `URI::Params` instance to self.
# Returns the modified `URI::Params`
#
# ```
# require "uri"
# uri = URI.parse("http://foo.com?id=30&limit=5#time=1305298413")
# uri.update_query_params { |params| params.delete_all("limit") } # => URI::Params{"id" => ["30"]}
#
# puts uri.to_s # => "http://foo.com?id=30#time=1305298413"
# ```
def update_query_params(& : URI::Params -> _) : URI
params = query_params
yield params
self.query_params = params
self
end
# Returns the authority component of this URI.
# It is formatted as `user:pass@host:port` with missing parts being omitted.
#
# If the URI does not have any authority information, the result is `nil`.
#
# ```
# uri = URI.parse "http://user:pass@example.com:80/path?query"
# uri.authority # => "user:pass@example.com:80"
#
# uri = URI.parse("/relative")
# uri.authority # => nil
# ```
def authority : String?
return unless @host || @user || @port
String.build do |io|
authority(io)
end
end
# :ditto:
def authority(io : IO) : Nil
if user = @user
userinfo(user, io)
io << '@'
end
if host = @host
# https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.2
#
# host = IP-literal / IPv4address / reg-name
#
# The valid characters include unreserved, sub-delims, ':', '[', ']' (IPv6-Address)
URI.encode(host, io) { |byte| URI.unreserved?(byte) || URI.sub_delim?(byte) || byte.unsafe_chr.in?(':', '[', ']') }
end
if port = @port
io << ':' << port
end
end
def to_s(io : IO) : Nil
if scheme
io << scheme
io << ':'
end
has_authority = @host || @user || @port
io << "//" if has_authority
authority(io)
if has_authority
if !@path.empty? && !@path.starts_with?('/')
io << '/'
end
elsif @path.starts_with?("//")
io << "/."
end
io << @path
if query
io << '?'
io << query
end
if fragment
io << '#'
io << fragment
end
end
# Returns a normalized copy of this URI.
#
# See `#normalize!` for details.
def normalize : URI
dup.normalize!
end
# Normalizes this URI instance.
#
# The following normalizations are applied to the individual components (if available):
#
# * `scheme` is lowercased.
# * `host` is lowercased.
# * `port` is removed if it is the `.default_port?` of the scheme.
# * `path` is resolved to a minimal, semantic equivalent representation removing
# dot segments `/.` and `/..`.
#
# ```
# uri = URI.parse("HTTP://example.COM:80/./foo/../bar/")
# uri.normalize!
# uri # => "http://example.com/bar/"
# ```
def normalize! : URI
@scheme = @scheme.try &.downcase
@host = @host.try &.downcase
@port = nil if default_port?
@path = remove_dot_segments(path)
self
end
# Resolves *uri* against this URI.
#
# If *uri* is `absolute?`, or if this URI is `opaque?`, then an exact copy of *uri* is returned.
#
# Otherwise the URI is resolved according to the specifications in [RFC 3986 section 5.2](https://tools.ietf.org/html/rfc3986#section-5.2.2).
#
# ```
# URI.parse("http://foo.com/bar/baz").resolve("../quux") # => "http://foo.com/quux"
# URI.parse("http://foo.com/bar/baz").resolve("/quux") # => "http://foo.com/quux"
# URI.parse("http://foo.com/bar/baz").resolve("http://quux.com") # => "http://quux.com"
# URI.parse("http://foo.com/bar/baz").resolve("#quux") # => "http://foo.com/bar/baz#quux"
# ```
#
# This method is the inverse operation to `#relativize` (see [Resolution and Relativization](#Resolution and Relativization)).
def resolve(uri : URI | String) : URI
if uri.is_a?(URI)
target = uri.dup
else
target = URI.parse(uri)
end
if target.absolute? || opaque?
return target
end
target.scheme = scheme
unless target.host || target.user
target.host = host
target.port = port
target.user = user
target.password = password
if target.path.empty?
target.path = remove_dot_segments(path)
target.query ||= query
else
base = path
if base.empty? && target.absolute?
base = "/"
end
target.path = resolve_path(target.path, base: base)
end
end
target
end
private def resolve_path(path : String, base : String) : String
unless path.starts_with?('/')
if path.empty?
path = base
elsif !base.empty?
out_base = base.ends_with?('/') ? base : base[0..base.rindex('/')]
path = String.interpolation(out_base, path)
end
end
remove_dot_segments(path)
end
# Relativizes *uri* against this URI.
#
# An exact copy of *uri* is returned if
# * this URI or *uri* are `opaque?`, or
# * the scheme and authority (`host`, `port`, `user`, `password`) components are not identical.
#
# Otherwise a new relative hierarchical URI is constructed with `query` and `fragment` components
# from *uri* and with a path component that describes a minimum-difference relative
# path from `#path` to *uri*'s path.
#
# ```
# URI.parse("http://foo.com/bar/baz").relativize("http://foo.com/quux") # => "../quux"
# URI.parse("http://foo.com/bar/baz").relativize("http://foo.com/bar/quux") # => "quux"
# URI.parse("http://foo.com/bar/baz").relativize("http://quux.com") # => "http://quux.com"
# URI.parse("http://foo.com/bar/baz").relativize("http://foo.com/bar/baz#quux") # => "#quux"
# ```
#
# This method is the inverse operation to `#resolve` (see [Resolution and Relativization](#Resolution and Relativization)).
def relativize(uri : URI | String) : URI
if uri.is_a?(URI)
uri = uri.dup
else
uri = URI.parse(uri)
end
if uri.opaque? || opaque? || uri.scheme.try &.downcase != @scheme.try &.downcase ||
uri.host.try &.downcase != @host.try &.downcase || uri.port != @port || uri.user != @user ||
uri.password != @password
return uri
end
query = uri.query
query = nil if query == @query
path = relativize_path(@path, uri.path)
URI.new(path: path, query: query, fragment: uri.fragment)
end
private def relativize_path(base : String, dst : String) : String
return "" if base == dst
if base =~ %r{(?:\A|/)\.\.?(?:/|\z)} && dst.starts_with?('/')
# dst has abnormal absolute path,
# like "/./", "/../", "/x/../", ...
return dst
end
base_path = base.split('/', remove_empty: true)
dst_path = dst.split('/', remove_empty: true)
dst_path << "" if dst.ends_with?('/')
base_path.pop? unless base.ends_with?('/')
# discard same parts
while !dst_path.empty? && !base_path.empty?
dst = dst_path.first
base = base_path.first
if dst == base
base_path.shift
dst_path.shift
elsif dst == "."
dst_path.shift
elsif base == "."
base_path.shift
else
break
end
end
# calculate
if base_path.empty?
if dst_path.empty?
"./"
elsif dst_path.first.includes?(':') # (see RFC2396 Section 5)
string_size = 1 + dst_path.sum(&.bytesize) + dst_path.size
String.build(string_size) do |io|
io << "./"
dst_path.join(io, '/')
end
else
if dst_path.empty? || dst_path.first.empty?
"./"
else
dst_path.join('/')
end
end
else
string_size = 3 * base_path.size + dst_path.sum(&.bytesize) + dst_path.size - 1
String.build(string_size) do |io|
base_path.size.times { io << "../" }
dst_path.join(io, '/')
end
end
end
# Parses the given *raw_url* into an URI. The *raw_url* may be relative or absolute.
#
# ```
# require "uri"
#
# uri = URI.parse("http://crystal-lang.org") # => #<URI:0x1068a7e40 @scheme="http", @host="crystal-lang.org", ... >
# uri.scheme # => "http"
# uri.host # => "crystal-lang.org"
# ```
def self.parse(raw_url : String) : URI
URI::Parser.new(raw_url).run.uri
end
# Returns the user-information component containing
# the provided username and password.
#
# ```
# require "uri"
#
# uri = URI.parse "http://admin:password@foo.com"
# uri.userinfo # => "admin:password"
# ```
#
# The return value is URL encoded (see `#encode_www_form`).
def userinfo : String?
if user = @user
String.build { |io| userinfo(user, io) }
end
end
private def userinfo(user, io)
URI.encode_www_form(user, io)
if password = @password
io << ':'
URI.encode_www_form(password, io)
end
end
# [RFC 3986 6.2.2.3](https://tools.ietf.org/html/rfc3986#section-5.2.4)
private def remove_dot_segments(path : String)
result = [] of String
while path.size > 0
# A. If the input buffer begins with a prefix of "../" or "./",
# then remove that prefix from the input buffer; otherwise,
if path.starts_with?("../")
path = path[3..-1]
elsif path.starts_with?("./")
path = path[2..-1]
# B. if the input buffer begins with a prefix of "/./" or "/.",
# where "." is a complete path segment, then replace that
# prefix with "/" in the input buffer; otherwise,
elsif path.starts_with?("/./")
path = "/" + path[3..-1]
elsif path == "/."
path = "/" + path[2..-1]
# C. if the input buffer begins with a prefix of "/../" or "/..",
# where ".." is a complete path segment, then replace that
# prefix with "/" in the input buffer and remove the last
# segment and its preceding "/" (if any) from the output
# buffer; otherwise,
elsif path.starts_with?("/../")
path = "/" + path[4..-1]
result.pop if result.size > 0
elsif path == "/.."
path = "/" + path[3..-1]
result.pop if result.size > 0
# D. if the input buffer consists only of "." or "..", then remove
# that from the input buffer; otherwise,
elsif path.in?("..", ".")
path = ""
# E. move the first path segment in the input buffer to the end of
# the output buffer, including the initial "/" character (if
# any) and any subsequent characters up to, but not including,
# the next "/" character or the end of the input buffer.
else
slash_search_idx = path[0] == '/' ? 1 : 0
segment_end_idx = path.index("/", slash_search_idx)
segment_end_idx ||= path.size
result << path[0...segment_end_idx]
path = path[segment_end_idx..-1]
end
end
first = result.first?
if first && !first.starts_with?('/') && first.includes?(':')
result.unshift "./"
end
result.join
end
# A map of schemes and their respective default ports, seeded
# with some well-known schemes sourced from the IANA [Uniform
# Resource Identifier (URI) Schemes][1] and [Service Name and
# Transport Protocol Port Number Registry][2] via Mahmoud
# Hashemi's [scheme_port_map.json][3].
#
# [1]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
# [2]: https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml
# [3]: https://gist.github.com/mahmoud/2fe281a8daaff26cfe9c15d2c5bf5c8b
@@default_ports = {
"acap" => 674,
"afp" => 548,
"dict" => 2628,
"dns" => 53,
"ftp" => 21,
"ftps" => 990,
"git" => 9418,
"gopher" => 70,
"http" => 80,
"https" => 443,
"imap" => 143,
"ipp" => 631,
"ipps" => 631,
"irc" => 194,
"ircs" => 6697,
"ldap" => 389,
"ldaps" => 636,
"mms" => 1755,
"msrp" => 2855,
"mtqp" => 1038,
"nfs" => 111,
"nntp" => 119,
"nntps" => 563,
"pop" => 110,
"prospero" => 1525,
"redis" => 6379,
"rsync" => 873,
"rtsp" => 554,
"rtsps" => 322,
"rtspu" => 5005,
"scp" => 22,
"sftp" => 22,
"smb" => 445,
"snmp" => 161,
"ssh" => 22,
"svn" => 3690,
"telnet" => 23,
"ventrilo" => 3784,
"vnc" => 5900,
"wais" => 210,
"ws" => 80,
"wss" => 443,
}
# Returns the default port for the given *scheme* if known,
# otherwise returns `nil`.
#
# ```
# require "uri"
#
# URI.default_port "http" # => 80
# URI.default_port "ponzi" # => nil
# ```
def self.default_port(scheme : String) : Int32?
@@default_ports[scheme.downcase]?
end
# Registers the default port for the given *scheme*.
#
# If *port* is `nil`, the existing default port for the
# *scheme*, if any, will be unregistered.
#
# ```
# require "uri"
#
# URI.set_default_port "ponzi", 9999
# ```
def self.set_default_port(scheme : String, port : Int32?) : Nil
if port
@@default_ports[scheme.downcase] = port
else
@@default_ports.delete scheme.downcase
end
end
# Returns `true` if this URI's *port* is the default port for
# its *scheme*.
private def default_port?
if (scheme = @scheme) && (port = @port)
port == URI.default_port(scheme)
else
false
end
end
end