14template <
class result_type,
bool store_values>
16 const result_type* base_url) {
22 constexpr bool result_type_is_ada_url =
23 std::is_same<ada::url, result_type>::value;
24 constexpr bool result_type_is_ada_url_aggregator =
25 std::is_same<ada::url_aggregator, result_type>::value;
26 static_assert(result_type_is_ada_url ||
27 result_type_is_ada_url_aggregator);
30 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
31 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
39 if (user_input.size() > std::numeric_limits<uint32_t>::max()) {
45 if (base_url !=
nullptr) {
46 url.is_valid &= base_url->is_valid;
51 if constexpr (result_type_is_ada_url_aggregator && store_values) {
63 uint32_t reserve_capacity =
65 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
67 url.reserve(reserve_capacity);
69 std::string tmp_buffer;
70 std::string_view internal_input;
71 if (unicode::has_tabs_or_newline(user_input)) {
72 tmp_buffer = user_input;
75 helpers::remove_ascii_tab_or_newline(tmp_buffer);
76 internal_input = tmp_buffer;
78 internal_input = user_input;
83 std::string_view url_data = internal_input;
84 helpers::trim_c0_whitespace(url_data);
87 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
96 size_t input_position = 0;
97 const size_t input_size = url_data.size();
102 while (input_position <= input_size) {
103 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
107 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
110 if ((input_position != input_size) &&
122 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
125 while ((input_position != input_size) &&
126 (ada::unicode::is_alnum_plus(url_data[input_position]))) {
130 if ((input_position != input_size) &&
131 (url_data[input_position] ==
':')) {
132 ada_log(
"SCHEME the scheme should be ",
133 url_data.substr(0, input_position));
134 if constexpr (result_type_is_ada_url) {
135 if (!url.parse_scheme(url_data.substr(0, input_position))) {
140 if (!url.parse_scheme_with_colon(
141 url_data.substr(0, input_position + 1))) {
145 ada_log(
"SCHEME the scheme is ", url.get_protocol());
148 if (url.type == ada::scheme::type::FILE) {
155 else if (url.is_special() && base_url !=
nullptr &&
156 base_url->type == url.type) {
162 else if (url.is_special()) {
167 else if (input_position + 1 < input_size &&
168 url_data[input_position + 1] ==
'/') {
190 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
193 if (base_url ==
nullptr ||
194 (base_url->has_opaque_path && !fragment.has_value())) {
195 ada_log(
"NO_SCHEME validation error");
196 url.is_valid =
false;
202 else if (base_url->has_opaque_path && fragment.has_value() &&
203 input_position == input_size) {
204 ada_log(
"NO_SCHEME opaque base with fragment");
205 url.copy_scheme(*base_url);
206 url.has_opaque_path = base_url->has_opaque_path;
208 if constexpr (result_type_is_ada_url) {
209 url.path = base_url->path;
210 url.query = base_url->query;
212 url.update_base_pathname(base_url->get_pathname());
213 url.update_base_search(base_url->get_search());
215 url.update_unencoded_base_hash(*fragment);
220 else if (base_url->type != ada::scheme::type::FILE) {
221 ada_log(
"NO_SCHEME non-file relative path");
226 ada_log(
"NO_SCHEME file base type");
232 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
241 const bool contains_ampersand =
242 (url_data.find(
'@', input_position) != std::string_view::npos);
244 if (!contains_ampersand) {
248 bool at_sign_seen{
false};
249 bool password_token_seen{
false};
256 std::string_view view = helpers::substring(url_data, input_position);
259 url.is_special() ? helpers::find_authority_delimiter_special(view)
260 : helpers::find_authority_delimiter(view);
261 std::string_view authority_view(view.data(), location);
262 size_t end_of_authority = input_position + authority_view.size();
264 if ((end_of_authority != input_size) &&
265 (url_data[end_of_authority] ==
'@')) {
268 if (password_token_seen) {
269 if constexpr (result_type_is_ada_url) {
270 url.password +=
"%40";
272 url.append_base_password(
"%40");
275 if constexpr (result_type_is_ada_url) {
276 url.username +=
"%40";
278 url.append_base_username(
"%40");
285 if (!password_token_seen) {
286 size_t password_token_location = authority_view.find(
':');
287 password_token_seen =
288 password_token_location != std::string_view::npos;
290 if constexpr (store_values) {
291 if (!password_token_seen) {
292 if constexpr (result_type_is_ada_url) {
293 url.username += unicode::percent_encode(
297 url.append_base_username(unicode::percent_encode(
302 if constexpr (result_type_is_ada_url) {
303 url.username += unicode::percent_encode(
304 authority_view.substr(0, password_token_location),
306 url.password += unicode::percent_encode(
307 authority_view.substr(password_token_location + 1),
310 url.append_base_username(unicode::percent_encode(
311 authority_view.substr(0, password_token_location),
313 url.append_base_password(unicode::percent_encode(
314 authority_view.substr(password_token_location + 1),
319 }
else if constexpr (store_values) {
320 if constexpr (result_type_is_ada_url) {
321 url.password += unicode::percent_encode(
324 url.append_base_password(unicode::percent_encode(
332 else if (end_of_authority == input_size ||
333 url_data[end_of_authority] ==
'/' ||
334 url_data[end_of_authority] ==
'?' ||
335 (url.is_special() && url_data[end_of_authority] ==
'\\')) {
338 if (at_sign_seen && authority_view.empty()) {
339 url.is_valid =
false;
345 if (end_of_authority == input_size) {
346 if constexpr (store_values) {
347 if (fragment.has_value()) {
348 url.update_unencoded_base_hash(*fragment);
353 input_position = end_of_authority + 1;
359 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
360 helpers::substring(url_data, input_position));
365 std::string_view view = helpers::substring(url_data, input_position);
379 helpers::substring(url_data, input_position));
382 if ((input_position != input_size) &&
383 (url_data[input_position] ==
'/')) {
395 helpers::substring(url_data, input_position));
398 url.copy_scheme(*base_url);
401 if ((input_position != input_size) &&
402 (url_data[input_position] ==
'/')) {
404 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
407 }
else if (url.is_special() && (input_position != input_size) &&
408 (url_data[input_position] ==
'\\')) {
412 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
413 "error, set state to relative slash state");
416 ada_log(
"RELATIVE_SCHEME otherwise");
421 if constexpr (result_type_is_ada_url) {
422 url.username = base_url->username;
423 url.password = base_url->password;
424 url.host = base_url->host;
425 url.port = base_url->port;
427 url.has_opaque_path = base_url->has_opaque_path;
428 url.path = base_url->path;
429 url.query = base_url->query;
431 url.update_base_authority(base_url->get_href(),
432 base_url->get_components());
435 url.set_hostname(base_url->get_hostname());
436 url.update_base_port(base_url->retrieve_base_port());
438 url.has_opaque_path = base_url->has_opaque_path;
439 url.update_base_pathname(base_url->get_pathname());
440 url.update_base_search(base_url->get_search());
443 url.has_opaque_path = base_url->has_opaque_path;
447 if ((input_position != input_size) &&
448 (url_data[input_position] ==
'?')) {
452 else if (input_position != input_size) {
455 if constexpr (result_type_is_ada_url) {
457 helpers::shorten_path(url.path, url.type);
459 std::string_view path = url.get_pathname();
460 if (helpers::shorten_path(path, url.type)) {
461 url.update_base_pathname(std::string(path));
474 helpers::substring(url_data, input_position));
477 if (url.is_special() && (input_position != input_size) &&
478 (url_data[input_position] ==
'/' ||
479 url_data[input_position] ==
'\\')) {
484 else if ((input_position != input_size) &&
485 (url_data[input_position] ==
'/')) {
495 if constexpr (result_type_is_ada_url) {
496 url.username = base_url->username;
497 url.password = base_url->password;
498 url.host = base_url->host;
499 url.port = base_url->port;
501 url.update_base_authority(base_url->get_href(),
502 base_url->get_components());
505 url.set_hostname(base_url->get_hostname());
506 url.update_base_port(base_url->retrieve_base_port());
516 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
517 helpers::substring(url_data, input_position));
522 std::string_view view = helpers::substring(url_data, input_position);
530 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
531 helpers::substring(url_data, input_position));
535 while ((input_position != input_size) &&
536 ((url_data[input_position] ==
'/') ||
537 (url_data[input_position] ==
'\\'))) {
545 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
546 if constexpr (store_values) {
549 const uint8_t* query_percent_encode_set =
556 url.update_base_search(helpers::substring(url_data, input_position),
557 query_percent_encode_set);
558 ada_log(
"QUERY update_base_search completed ");
559 if (fragment.has_value()) {
560 url.update_unencoded_base_hash(*fragment);
566 ada_log(
"HOST ", helpers::substring(url_data, input_position));
568 std::string_view host_view =
569 helpers::substring(url_data, input_position);
570 auto [location, found_colon] =
571 helpers::get_host_delimiter_location(url.is_special(), host_view);
572 input_position = (location != std::string_view::npos)
573 ? input_position + location
582 ada_log(
"HOST parsing ", host_view);
583 if (!url.parse_host(host_view)) {
586 ada_log(
"HOST parsing results in ", url.get_hostname());
600 if (url.is_special() && host_view.empty()) {
601 url.is_valid =
false;
604 ada_log(
"HOST parsing ", host_view,
" href=", url.get_href());
607 if (host_view.empty()) {
608 url.update_base_hostname(
"");
609 }
else if (!url.parse_host(host_view)) {
612 ada_log(
"HOST parsing results in ", url.get_hostname(),
613 " href=", url.get_href());
622 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
623 std::string_view view = helpers::substring(url_data, input_position);
626 size_t location = view.find(
'?');
627 if (location != std::string_view::npos) {
628 view.remove_suffix(view.size() - location);
630 input_position += location + 1;
632 input_position = input_size + 1;
634 url.has_opaque_path =
true;
637 url.update_base_pathname(unicode::percent_encode(
642 ada_log(
"PORT ", helpers::substring(url_data, input_position));
643 std::string_view port_view =
644 helpers::substring(url_data, input_position);
645 size_t consumed_bytes = url.parse_port(port_view,
true);
646 input_position += consumed_bytes;
654 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
657 if (url.is_special()) {
663 if (input_position == input_size) {
664 if constexpr (store_values) {
665 url.update_base_pathname(
"/");
666 if (fragment.has_value()) {
667 url.update_unencoded_base_hash(*fragment);
675 if ((url_data[input_position] !=
'/') &&
676 (url_data[input_position] !=
'\\')) {
682 else if ((input_position != input_size) &&
683 (url_data[input_position] ==
'?')) {
687 else if (input_position != input_size) {
692 if (url_data[input_position] !=
'/') {
701 std::string_view view = helpers::substring(url_data, input_position);
702 ada_log(
"PATH ", helpers::substring(url_data, input_position));
706 size_t locofquestionmark = view.find(
'?');
707 if (locofquestionmark != std::string_view::npos) {
709 view.remove_suffix(view.size() - locofquestionmark);
710 input_position += locofquestionmark + 1;
712 input_position = input_size + 1;
714 if constexpr (store_values) {
715 if constexpr (result_type_is_ada_url) {
716 helpers::parse_prepared_path(view, url.type, url.path);
718 url.consume_prepared_path(view);
725 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
728 if ((input_position != input_size) &&
729 (url_data[input_position] ==
'/' ||
730 url_data[input_position] ==
'\\')) {
731 ada_log(
"FILE_SLASH c is U+002F or U+005C");
736 ada_log(
"FILE_SLASH otherwise");
740 if (base_url !=
nullptr &&
741 base_url->type == ada::scheme::type::FILE) {
743 if constexpr (result_type_is_ada_url) {
744 url.host = base_url->host;
747 url.set_host(base_url->get_host());
753 if (!base_url->get_pathname().empty()) {
755 helpers::substring(url_data, input_position))) {
756 std::string_view first_base_url_path =
757 base_url->get_pathname().substr(1);
758 size_t loc = first_base_url_path.find(
'/');
759 if (loc != std::string_view::npos) {
760 helpers::resize(first_base_url_path, loc);
763 first_base_url_path)) {
764 if constexpr (result_type_is_ada_url) {
766 url.path += first_base_url_path;
768 url.append_base_pathname(
769 helpers::concat(
"/", first_base_url_path));
783 std::string_view view = helpers::substring(url_data, input_position);
784 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
786 size_t location = view.find_first_of(
"/\\?");
787 std::string_view file_host_buffer(
789 (location != std::string_view::npos) ? location : view.size());
793 }
else if (file_host_buffer.empty()) {
795 if constexpr (result_type_is_ada_url) {
798 url.update_base_hostname(
"");
803 size_t consumed_bytes = file_host_buffer.size();
804 input_position += consumed_bytes;
807 if (!url.parse_host(file_host_buffer)) {
811 if constexpr (result_type_is_ada_url) {
813 if (url.host.has_value() && url.host.value() ==
"localhost") {
817 if (url.get_hostname() ==
"localhost") {
818 url.update_base_hostname(
"");
829 ada_log(
"FILE ", helpers::substring(url_data, input_position));
830 std::string_view file_view =
831 helpers::substring(url_data, input_position);
833 url.set_protocol_as_file();
834 if constexpr (result_type_is_ada_url) {
838 url.update_base_hostname(
"");
841 if (input_position != input_size &&
842 (url_data[input_position] ==
'/' ||
843 url_data[input_position] ==
'\\')) {
844 ada_log(
"FILE c is U+002F or U+005C");
849 else if (base_url !=
nullptr &&
850 base_url->type == ada::scheme::type::FILE) {
854 if constexpr (result_type_is_ada_url) {
855 url.host = base_url->host;
856 url.path = base_url->path;
857 url.query = base_url->query;
861 url.set_hostname(base_url->get_hostname());
862 url.update_base_pathname(base_url->get_pathname());
863 url.update_base_search(base_url->get_search());
865 url.has_opaque_path = base_url->has_opaque_path;
869 if (input_position != input_size && url_data[input_position] ==
'?') {
873 else if (input_position != input_size) {
879 if constexpr (result_type_is_ada_url) {
880 helpers::shorten_path(url.path, url.type);
882 std::string_view path = url.get_pathname();
883 if (helpers::shorten_path(path, url.type)) {
884 url.update_base_pathname(std::string(path));
891 url.clear_pathname();
892 url.has_opaque_path =
true;
914 if constexpr (store_values) {
915 if (fragment.has_value()) {
916 url.update_unencoded_base_hash(*fragment);
923 const url* base_url =
nullptr);
925 std::string_view user_input,
const url_aggregator* base_url =
nullptr);
927template <
class result_type>
928result_type
parse_url(std::string_view user_input,
929 const result_type* base_url) {
934 const url* base_url =
nullptr);
936 std::string_view user_input,
const url_aggregator* base_url =
nullptr);
Definitions of the character sets used by unicode functions.
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32]
constexpr uint8_t USERINFO_PERCENT_ENCODE[32]
constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32]
constexpr uint8_t QUERY_PERCENT_ENCODE[32]
constexpr bool is_normalized_windows_drive_letter(std::string_view input) noexcept
constexpr bool is_windows_drive_letter(std::string_view input) noexcept
constexpr bool is_alpha(char x) noexcept
ada_really_inline bool begins_with(std::string_view view, std::string_view prefix)
Includes the definitions for supported parsers.
template url parse_url< url >(std::string_view user_input, const url *base_url)
result_type parse_url(std::string_view user_input, const result_type *base_url=nullptr)
template url_aggregator parse_url< url_aggregator >(std::string_view user_input, const url_aggregator *base_url)
result_type parse_url_impl(std::string_view user_input, const result_type *base_url=nullptr)
ada_warn_unused std::string to_string(encoding_type type)
@ SPECIAL_RELATIVE_OR_AUTHORITY
@ SPECIAL_AUTHORITY_SLASHES
@ SPECIAL_AUTHORITY_IGNORE_SLASHES
Definitions for the parser.
Definitions for all unicode specific functions.