Skip to content

Commit

Permalink
deps: update ada to 1.0.4
Browse files Browse the repository at this point in the history
PR-URL: #46853
Fixes: #46850
Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com>
Reviewed-By: Mohammed Keyvanzadeh <mohammadkeyvanzade94@gmail.com>
Reviewed-By: Xuguang Mei <meixuguang@gmail.com>
nodejs-github-bot authored and targos committed Mar 14, 2023

Verified

This commit was signed with the committer’s verified signature.
targos Michaël Zasso
1 parent b199acd commit b825e2d
Showing 2 changed files with 391 additions and 72 deletions.
392 changes: 345 additions & 47 deletions deps/ada/ada.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/* auto-generated on 2023-02-22 14:24:01 -0500. Do not edit! */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=ada.cpp
/* auto-generated on 2023-02-26 15:07:41 -0500. Do not edit! */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=ada.cpp
/* begin file src/ada.cpp */
#include "ada.h"
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=checkers.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=checkers.cpp
/* begin file src/checkers.cpp */
#include <algorithm>

@@ -24,10 +24,10 @@ namespace ada::checkers {
}


// for use with path_signature
// for use with path_signature, we include all characters that need percent encoding.
static constexpr uint8_t path_signature_table[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -37,8 +37,28 @@ namespace ada::checkers {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
static_assert(path_signature_table[uint8_t('?')] == 1);
static_assert(path_signature_table[uint8_t('`')] == 1);
static_assert(path_signature_table[uint8_t('{')] == 1);
static_assert(path_signature_table[uint8_t('}')] == 1);
//
static_assert(path_signature_table[uint8_t(' ')] == 1);
static_assert(path_signature_table[uint8_t('?')] == 1);
static_assert(path_signature_table[uint8_t('"')] == 1);
static_assert(path_signature_table[uint8_t('#')] == 1);
static_assert(path_signature_table[uint8_t('<')] == 1);
static_assert(path_signature_table[uint8_t('>')] == 1);
//
static_assert(path_signature_table[0] == 1);
static_assert(path_signature_table[31] == 1);
static_assert(path_signature_table[127] == 1);
static_assert(path_signature_table[128] == 1);
static_assert(path_signature_table[255] == 1);

ada_really_inline constexpr uint8_t path_signature(std::string_view input) noexcept {
// The path percent-encode set is the query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
// The query percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
// The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~).
size_t i = 0;
uint8_t accumulator{};
for (; i + 7 < input.size(); i += 8) {
@@ -52,14 +72,14 @@ namespace ada::checkers {
path_signature_table[uint8_t(input[i + 7])]);
}
for (; i < input.size(); i++) {
accumulator |= path_signature_table[uint8_t(input[i])];
accumulator |= uint8_t(path_signature_table[uint8_t(input[i])]);
}
return accumulator;
}


ada_really_inline constexpr bool verify_dns_length(std::string_view input) noexcept {
if(input.back() == '.') {
if(input.back() == '.') {
if(input.size() > 254) return false;
} else if (input.size() > 253) return false;

@@ -79,7 +99,7 @@ namespace ada::checkers {
}
} // namespace ada::checkers
/* end file src/checkers.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=unicode.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=unicode.cpp
/* begin file src/unicode.cpp */

#include <algorithm>
@@ -610,7 +630,7 @@ constexpr static uint8_t is_forbidden_domain_code_point_table[] = {

} // namespace ada::unicode
/* end file src/unicode.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=serializers.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=serializers.cpp
/* begin file src/serializers.cpp */

#include <array>
@@ -683,7 +703,7 @@ namespace ada::serializers {

} // namespace ada::serializers
/* end file src/serializers.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=implementation.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=implementation.cpp
/* begin file src/implementation.cpp */
#include <string_view>

@@ -734,7 +754,7 @@ namespace ada {

} // namespace ada
/* end file src/implementation.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=helpers.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=helpers.cpp
/* begin file src/helpers.cpp */

#include <algorithm>
@@ -829,27 +849,216 @@ namespace ada::helpers {
return pos > input.size() ? std::string_view() : input.substr(pos);
}

ada_really_inline size_t get_host_delimiter_location(const ada::url& url, std::string_view& view, bool& inside_brackets) noexcept {
size_t location = url.is_special() ? view.find_first_of(":[/?\\") : view.find_first_of(":[/?");
// Reverse the byte order.
ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept {
// performance: this often compiles to a single instruction (e.g., bswap)
return ((((val) & 0xff00000000000000ull) >> 56) |
(((val) & 0x00ff000000000000ull) >> 40) |
(((val) & 0x0000ff0000000000ull) >> 24) |
(((val) & 0x000000ff00000000ull) >> 8 ) |
(((val) & 0x00000000ff000000ull) << 8 ) |
(((val) & 0x0000000000ff0000ull) << 24) |
(((val) & 0x000000000000ff00ull) << 40) |
(((val) & 0x00000000000000ffull) << 56));
}

ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept {
// performance: under little-endian systems (most systems), this function
// is free (just returns the input).
#if ADA_IS_BIG_ENDIAN
return swap_bytes(val);
#else
return val; // unchanged (trivial)
#endif
}

// Next while loop is almost never taken!
while((location != std::string_view::npos) && (view[location] == '[')) {
location = view.find(']',location);
if(location == std::string_view::npos) {
inside_brackets = true;
/**
* TODO: Ok. So if we arrive here then view has an unclosed [,
* Is the URL valid???
*/
} else {
location = url.is_special() ? view.find_first_of(":[/?\\#", location) : view.find_first_of(":[/?#", location);
// starting at index location, this finds the next location of a character
// :, /, \\, ? or [. If none is found, view.size() is returned.
// For use within get_host_delimiter_location.
ada_really_inline size_t find_next_host_delimiter_special(std::string_view view, size_t location) noexcept {
// performance: if you plan to call find_next_host_delimiter more than once,
// you *really* want find_next_host_delimiter to be inlined, because
// otherwise, the constants may get reloaded each time (bad).
auto has_zero_byte = [](uint64_t v) {
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
};
auto index_of_first_set_byte = [](uint64_t v) {
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
};
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
size_t i = location;
uint64_t mask1 = broadcast(':');
uint64_t mask2 = broadcast('/');
uint64_t mask3 = broadcast('\\');
uint64_t mask4 = broadcast('?');
uint64_t mask5 = broadcast('[');
// This loop will get autovectorized under many optimizing compilers,
// so you get actually SIMD!
for (; i + 7 < view.size(); i += 8) {
uint64_t word{};
// performance: the next memcpy translates into a single CPU instruction.
memcpy(&word, view.data() + i, sizeof(word));
// performance: on little-endian systems (most systems), this next line is free.
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t xor5 = word ^ mask5;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4) | has_zero_byte(xor5);
if(is_match) {
return i + index_of_first_set_byte(is_match);
}
}
if (i < view.size()) {
uint64_t word{};
// performance: the next memcpy translates into a function call, but
// that is difficult to avoid. Might be a bit expensive.
memcpy(&word, view.data() + i, view.size() - i);
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t xor5 = word ^ mask5;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4) | has_zero_byte(xor5);
if(is_match) {
return i + index_of_first_set_byte(is_match);
}
}
return view.size();
}

if (location != std::string_view::npos) {
view.remove_suffix(view.size() - location);
// starting at index location, this finds the next location of a character
// :, /, ? or [. If none is found, view.size() is returned.
// For use within get_host_delimiter_location.
ada_really_inline size_t find_next_host_delimiter(std::string_view view, size_t location) noexcept {
// performance: if you plan to call find_next_host_delimiter more than once,
// you *really* want find_next_host_delimiter to be inlined, because
// otherwise, the constants may get reloaded each time (bad).
auto has_zero_byte = [](uint64_t v) {
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
};
auto index_of_first_set_byte = [](uint64_t v) {
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
};
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
size_t i = location;
uint64_t mask1 = broadcast(':');
uint64_t mask2 = broadcast('/');
uint64_t mask4 = broadcast('?');
uint64_t mask5 = broadcast('[');
// This loop will get autovectorized under many optimizing compilers,
// so you get actually SIMD!
for (; i + 7 < view.size(); i += 8) {
uint64_t word{};
// performance: the next memcpy translates into a single CPU instruction.
memcpy(&word, view.data() + i, sizeof(word));
// performance: on little-endian systems (most systems), this next line is free.
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor4 = word ^ mask4;
uint64_t xor5 = word ^ mask5;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor4) | has_zero_byte(xor5);
if(is_match) {
return i + index_of_first_set_byte(is_match);
}
}
if (i < view.size()) {
uint64_t word{};
// performance: the next memcpy translates into a function call, but
// that is difficult to avoid. Might be a bit expensive.
memcpy(&word, view.data() + i, view.size() - i);
// performance: on little-endian systems (most systems), this next line is free.
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor4 = word ^ mask4;
uint64_t xor5 = word ^ mask5;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor4) | has_zero_byte(xor5);
if(is_match) {
return i + index_of_first_set_byte(is_match);
}
}
return view.size();
}

ada_really_inline std::pair<size_t,bool> get_host_delimiter_location(const bool is_special, std::string_view& view) noexcept {
/**
* The spec at https://url.spec.whatwg.org/#hostname-state expects us to compute
* a variable called insideBrackets but this variable is only used once, to check
* whether a ':' character was found outside brackets.
* Exact text:
* "Otherwise, if c is U+003A (:) and insideBrackets is false, then:".
* It is conceptually simpler and arguably more efficient to just return a Boolean
* indicating whether ':' was found outside brackets.
*/
const size_t view_size = view.size();
size_t location = 0;
bool found_colon = false;
/**
* Performance analysis:
*
* We are basically seeking the end of the hostname which can be indicated
* by the end of the view, or by one of the characters ':', '/', '?', '\\' (where '\\' is only
* applicable for special URLs). However, these must appear outside a bracket range. E.g.,
* if you have [something?]fd: then the '?' does not count.
*
* So we can skip ahead to the next delimiter, as long as we include '[' in the set of delimiters,
* and that we handle it first.
*
* So the trick is to have a fast function that locates the next delimiter. Unless we find '[',
* then it only needs to be called once! Ideally, such a function would be provided by the C++
* standard library, but it seems that find_first_of is not very fast, so we are forced to roll
* our own.
*
* We do not break into two loops for speed, but for clarity.
*/
if(is_special) {
// We move to the next delimiter.
location = find_next_host_delimiter_special(view, location);
// Unless we find '[' then we are going only going to have to call
// find_next_host_delimiter_special once.
for (;location < view_size; location = find_next_host_delimiter_special(view, location)) {
if (view[location] == '[') {
location = view.find(']', location);
if (location == std::string_view::npos) {
// performance: view.find might get translated to a memchr, which
// has no notion of std::string_view::npos, so the code does not
// reflect the assembly.
location = view_size;
break;
}
} else {
found_colon = view[location] == ':';
break;
}
}
} else {
// We move to the next delimiter.
location = find_next_host_delimiter(view, location);
// Unless we find '[' then we are going only going to have to call
// find_next_host_delimiter_special once.
for (;location < view_size; location = find_next_host_delimiter(view, location)) {
if (view[location] == '[') {
location = view.find(']', location);
if (location == std::string_view::npos) {
// performance: view.find might get translated to a memchr, which
// has no notion of std::string_view::npos, so the code does not
// reflect the assembly.
location = view_size;
break;
}
} else {
found_colon = view[location] == ':';
break;
}
}
}
return location;
// performance: remove_suffix may translate into a single instruction.
view.remove_suffix(view_size - location);
return {location, found_colon};
}

ada_really_inline void trim_c0_whitespace(std::string_view& input) noexcept {
@@ -899,11 +1108,11 @@ namespace ada::helpers {
if(path.empty()) { path = '/'; return true; }
// Fast case where we have nothing to do:
if(path.back() == '/') { return true; }
// If you have the path "/joe/myfriend",
// If you have the path "/joe/myfriend",
// then you delete 'myfriend'.
path.resize(path.rfind('/') + 1);
return true;
}
}
path += '/';
if (path_view != ".") {
path.append(path_view);
@@ -981,6 +1190,93 @@ namespace ada::helpers {
if (url.query.has_value()) return;
while (!url.path.empty() && url.path.back() == ' ') { url.path.resize(url.path.size()-1); }
}

ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept {
auto has_zero_byte = [](uint64_t v) {
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
};
auto index_of_first_set_byte = [](uint64_t v) {
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
};
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
size_t i = 0;
uint64_t mask1 = broadcast('@');
uint64_t mask2 = broadcast('/');
uint64_t mask3 = broadcast('?');
uint64_t mask4 = broadcast('\\');

for (; i + 7 < view.size(); i += 8) {
uint64_t word{};
memcpy(&word, view.data() + i, sizeof(word));
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
if (is_match) {
return i + index_of_first_set_byte(is_match);
}
}

if (i < view.size()) {
uint64_t word{};
memcpy(&word, view.data() + i, view.size() - i);
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t xor4 = word ^ mask4;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3) | has_zero_byte(xor4);
if (is_match) {
return i + index_of_first_set_byte(is_match);
}
}

return view.size();
}

ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept {
auto has_zero_byte = [](uint64_t v) {
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
};
auto index_of_first_set_byte = [](uint64_t v) {
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
};
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
size_t i = 0;
uint64_t mask1 = broadcast('@');
uint64_t mask2 = broadcast('/');
uint64_t mask3 = broadcast('?');

for (; i + 7 < view.size(); i += 8) {
uint64_t word{};
memcpy(&word, view.data() + i, sizeof(word));
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
if (is_match) {
return i + index_of_first_set_byte(is_match);
}
}

if (i < view.size()) {
uint64_t word{};
memcpy(&word, view.data() + i, view.size() - i);
word = swap_bytes_if_big_endian(word);
uint64_t xor1 = word ^ mask1;
uint64_t xor2 = word ^ mask2;
uint64_t xor3 = word ^ mask3;
uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
if (is_match) {
return i + index_of_first_set_byte(is_match);
}
}

return view.size();
}
} // namespace ada::helpers

namespace ada {
@@ -989,7 +1285,7 @@ namespace ada {
}
}
/* end file src/helpers.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=url.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=url.cpp
/* begin file src/url.cpp */

#include <numeric>
@@ -1514,7 +1810,7 @@ namespace ada {
}
} // namespace ada
/* end file src/url.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=url-getters.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=url-getters.cpp
/* begin file src/url-getters.cpp */
/**
* @file url-getters.cpp
@@ -1624,7 +1920,7 @@ namespace ada {

} // namespace ada
/* end file src/url-getters.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=url-setters.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=url-setters.cpp
/* begin file src/url-setters.cpp */
/**
* @file url-setters.cpp
@@ -1711,23 +2007,22 @@ namespace ada {
std::optional<std::string> previous_host = host;
std::optional<uint16_t> previous_port = port;

std::string_view::iterator _host_end = std::find(input.begin(), input.end(), '#');
std::string _host(input.data(), std::distance(input.begin(), _host_end));
size_t host_end_pos = input.find('#');
std::string _host(input.data(), host_end_pos != std::string_view::npos ? host_end_pos : input.size());
helpers::remove_ascii_tab_or_newline(_host);
std::string_view new_host(_host);

// If url's scheme is "file", then set state to file host state, instead of host state.
if (get_scheme_type() != ada::scheme::type::FILE) {
std::string_view host_view(_host.data(), _host.length());
bool inside_brackets{false};
size_t location = helpers::get_host_delimiter_location(*this, host_view, inside_brackets);
std::string_view::iterator pointer = (location != std::string_view::npos) ? new_host.begin() + location : new_host.end();
auto [location,found_colon] = helpers::get_host_delimiter_location(is_special(), host_view);

// Otherwise, if c is U+003A (:) and insideBrackets is false, then:
// Note: we cannot access *pointer safely if (pointer == pointer_end).
if ((pointer != new_host.end()) && (*pointer == ':') && !inside_brackets) {
// Note: the 'found_colon' value is true if and only if a colon was encountered
// while not inside brackets.
if (found_colon) {
if (override_hostname) { return false; }
std::string_view buffer(&*(pointer + 1));
std::string_view buffer = new_host.substr(location+1);
if (!buffer.empty()) { set_port(buffer); }
}
// If url is special and host_view is the empty string, validation error, return failure.
@@ -1821,7 +2116,7 @@ namespace ada {

} // namespace ada
/* end file src/url-setters.cpp */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/src, filename=parser.cpp
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/src, filename=parser.cpp
/* begin file src/parser.cpp */

#include <iostream>
@@ -1991,8 +2286,8 @@ namespace ada::parser {
bool password_token_seen{false};
do {
std::string_view view = helpers::substring(url_data, input_position);
size_t location = url.is_special() ? view.find_first_of("@/?\\") : view.find_first_of("@/?");
std::string_view authority_view(view.data(), (location != std::string_view::npos) ? location : view.size());
size_t location = url.is_special() ? helpers::find_authority_delimiter_special(view) : helpers::find_authority_delimiter(view);
std::string_view authority_view(view.data(), location);
size_t end_of_authority = input_position + authority_view.size();
// If c is U+0040 (@), then:
if ((end_of_authority != input_size) && (url_data[end_of_authority] == '@')) {
@@ -2197,11 +2492,12 @@ namespace ada::parser {
ada_log("HOST ", helpers::substring(url_data, input_position));

std::string_view host_view = helpers::substring(url_data, input_position);
bool inside_brackets{false};
size_t location = helpers::get_host_delimiter_location(url, host_view, inside_brackets);
auto [location, found_colon] = helpers::get_host_delimiter_location(url.is_special(), host_view);
input_position = (location != std::string_view::npos) ? input_position + location : input_size;
// Otherwise, if c is U+003A (:) and insideBrackets is false, then:
if ((input_position != input_size) && (url_data[input_position] == ':') && !inside_brackets) {
// Note: the 'found_colon' value is true if and only if a colon was encountered
// while not inside brackets.
if (found_colon) {
// If buffer is the empty string, validation error, return failure.
// Let host be the result of host parsing buffer with url is not special.
ada_log("HOST parsing ", host_view);
@@ -2214,7 +2510,9 @@ namespace ada::parser {
// Otherwise, if one of the following is true:
// - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
// - url is special and c is U+005C (\)
else if (input_position == input_size || url_data[input_position] == '/' || url_data[input_position] == '?' || (url.is_special() && url_data[input_position] == '\\')) {
// The get_host_delimiter_location function either brings us to
// the colon outside of the bracket, or to one of those characters.
else {

// If url is special and host_view is the empty string, validation error, return failure.
if (url.is_special() && host_view.empty()) {
71 changes: 46 additions & 25 deletions deps/ada/ada.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* auto-generated on 2023-02-22 14:24:01 -0500. Do not edit! */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada.h
/* auto-generated on 2023-02-26 15:07:41 -0500. Do not edit! */
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada.h
/* begin file include/ada.h */
/**
* @file ada.h
@@ -8,7 +8,7 @@
#ifndef ADA_H
#define ADA_H

// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/character_sets-inl.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/character_sets-inl.h
/* begin file include/ada/character_sets-inl.h */
/**
* @file character_sets-inl.h
@@ -19,7 +19,7 @@
#ifndef ADA_CHARACTER_SETS_INL_H
#define ADA_CHARACTER_SETS_INL_H

// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/character_sets.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/character_sets.h
/* begin file include/ada/character_sets.h */
/**
* @file character_sets.h
@@ -30,7 +30,7 @@
#ifndef ADA_CHARACTER_SETS_H
#define ADA_CHARACTER_SETS_H

// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/common_defs.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/common_defs.h
/* begin file include/ada/common_defs.h */
/**
* @file common_defs.h
@@ -715,7 +715,7 @@ namespace ada::character_sets {

#endif // ADA_CHARACTER_SETS_H
/* end file include/ada/character_sets-inl.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/checkers-inl.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/checkers-inl.h
/* begin file include/ada/checkers-inl.h */
/**
* @file checkers-inl.h
@@ -770,7 +770,7 @@ namespace ada::checkers {

#endif //ADA_CHECKERS_H
/* end file include/ada/checkers-inl.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/log.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/log.h
/* begin file include/ada/log.h */
/**
* @file log.h
@@ -851,7 +851,7 @@ ada_really_inline void log([[maybe_unused]] T t) {

#endif // ADA_LOG_H
/* end file include/ada/log.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/encoding_type.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/encoding_type.h
/* begin file include/ada/encoding_type.h */
/**
* @file encoding_type.h
@@ -885,7 +885,7 @@ namespace ada {

#endif // ADA_ENCODING_TYPE_H
/* end file include/ada/encoding_type.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/helpers.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/helpers.h
/* begin file include/ada/helpers.h */
/**
* @file helpers.h
@@ -894,7 +894,7 @@ namespace ada {
#ifndef ADA_HELPERS_H
#define ADA_HELPERS_H

// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/url.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/url.h
/* begin file include/ada/url.h */
/**
* @file url.h
@@ -903,7 +903,7 @@ namespace ada {
#ifndef ADA_URL_H
#define ADA_URL_H

// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/checkers.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/checkers.h
/* begin file include/ada/checkers.h */
/**
* @file checkers.h
@@ -1000,7 +1000,7 @@ namespace ada::checkers {

#endif //ADA_CHECKERS_H
/* end file include/ada/checkers.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/scheme.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/scheme.h
/* begin file include/ada/scheme.h */
/**
* @file scheme.h
@@ -1072,7 +1072,7 @@ namespace ada::scheme {

#endif // ADA_SCHEME_H
/* end file include/ada/scheme.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/serializers.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/serializers.h
/* begin file include/ada/serializers.h */
/**
* @file serializers.h
@@ -1115,7 +1115,7 @@ namespace ada::serializers {

#endif // ADA_SERIALIZERS_H
/* end file include/ada/serializers.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/unicode.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/unicode.h
/* begin file include/ada/unicode.h */
/**
* @file unicode.h
@@ -1669,7 +1669,7 @@ namespace ada {

#endif // ADA_URL_H
/* end file include/ada/url.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/state.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/state.h
/* begin file include/ada/state.h */
/**
* @file state.h
@@ -1773,10 +1773,11 @@ namespace ada::helpers {
ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept;

/**
* Returns a host's delimiter location depending on the state of the instance.
* Returns a host's delimiter location depending on the state of the instance, and
* whether a colon was found outside brackets.
* Used by the host parser.
*/
ada_really_inline size_t get_host_delimiter_location(const ada::url& url, std::string_view& view, bool& inside_brackets) noexcept;
ada_really_inline std::pair<size_t,bool> get_host_delimiter_location(const bool is_special, std::string_view& view) noexcept;

/**
* Removes leading and trailing C0 control and whitespace characters from string.
@@ -1788,11 +1789,31 @@ namespace ada::helpers {
*/
ada_really_inline void strip_trailing_spaces_from_opaque_path(ada::url& url) noexcept;

/**
* Reverse the order of the bytes.
*/
ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept;

/**
* Reverse the order of the bytes but only if the system is big endian
*/
ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept;

/**
* Finds the delimiter of a view in authority state.
*/
ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept;

/**
* Finds the delimiter of a view in authority state.
*/
ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept;

} // namespace ada::helpers

#endif // ADA_HELPERS_H
/* end file include/ada/helpers.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/parser.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/parser.h
/* begin file include/ada/parser.h */
/**
* @file parser.h
@@ -1801,7 +1822,7 @@ namespace ada::helpers {
#ifndef ADA_PARSER_H
#define ADA_PARSER_H

// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/expected.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/expected.h
/* begin file include/ada/expected.h */
/**
* @file expected.h
@@ -4197,7 +4218,7 @@ namespace ada::parser {

#endif // ADA_PARSER_H
/* end file include/ada/parser.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/scheme-inl.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/scheme-inl.h
/* begin file include/ada/scheme-inl.h */
/**
* @file scheme-inl.h
@@ -4252,7 +4273,7 @@ namespace ada::scheme {

#endif // ADA_SCHEME_H
/* end file include/ada/scheme-inl.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/url-inl.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/url-inl.h
/* begin file include/ada/url-inl.h */
/**
* @file url-inl.h
@@ -4333,7 +4354,7 @@ namespace ada {
/* end file include/ada/url-inl.h */

// Public API
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/ada_version.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/ada_version.h
/* begin file include/ada/ada_version.h */
/**
* @file ada_version.h
@@ -4342,21 +4363,21 @@ namespace ada {
#ifndef ADA_ADA_VERSION_H
#define ADA_ADA_VERSION_H

#define ADA_VERSION "1.0.3"
#define ADA_VERSION "1.0.4"

namespace ada {

enum {
ADA_VERSION_MAJOR = 1,
ADA_VERSION_MINOR = 0,
ADA_VERSION_REVISION = 3,
ADA_VERSION_REVISION = 4,
};

} // namespace ada

#endif // ADA_ADA_VERSION_H
/* end file include/ada/ada_version.h */
// dofile: invoked with prepath=/Users/yagiz/Developer/url-parser/include, filename=ada/implementation.h
// dofile: invoked with prepath=/Users/dlemire/CVS/github/ada/include, filename=ada/implementation.h
/* begin file include/ada/implementation.h */
/**
* @file implementation.h

0 comments on commit b825e2d

Please sign in to comment.