| 
									
										
										
										
											2020-01-18 09:38:21 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org> | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |  * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch> | 
					
						
							| 
									
										
										
										
											2020-01-18 09:38:21 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2021-04-22 01:24:48 -07:00
										 |  |  |  * SPDX-License-Identifier: BSD-2-Clause | 
					
						
							| 
									
										
										
										
											2020-01-18 09:38:21 +01:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-27 21:05:07 +02:00
										 |  |  | #include <AK/Debug.h>
 | 
					
						
							| 
									
										
										
										
											2020-05-26 14:52:44 +03:00
										 |  |  | #include <AK/LexicalPath.h>
 | 
					
						
							| 
									
										
										
										
											2019-08-10 17:27:56 +02:00
										 |  |  | #include <AK/StringBuilder.h>
 | 
					
						
							|  |  |  | #include <AK/URL.h>
 | 
					
						
							| 
									
										
										
										
											2021-05-27 21:05:07 +02:00
										 |  |  | #include <AK/URLParser.h>
 | 
					
						
							| 
									
										
										
										
											2021-05-25 13:50:03 +02:00
										 |  |  | #include <AK/Utf8View.h>
 | 
					
						
							| 
									
										
										
										
											2019-08-10 17:27:56 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace AK { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-25 13:50:03 +02:00
										 |  |  | constexpr bool is_ascii_alpha(u32 code_point) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return ('a' <= code_point && code_point <= 'z') || ('A' <= code_point && code_point <= 'Z'); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | constexpr bool is_ascii_digit(u32 code_point) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return '0' <= code_point && code_point <= '9'; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | constexpr bool is_ascii_alphanumeric(u32 code_point) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return is_ascii_alpha(code_point) || is_ascii_digit(code_point); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | constexpr bool is_ascii_hex_digit(u32 code_point) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return is_ascii_digit(code_point) || (code_point >= 'a' && code_point <= 'f') || (code_point >= 'A' && code_point <= 'F'); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-27 21:05:07 +02:00
										 |  |  | // FIXME: It could make sense to force users of URL to use URLParser::parse() explicitly instead of using a constructor.
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | URL::URL(StringView const& string) | 
					
						
							| 
									
										
										
										
											2021-05-27 21:05:07 +02:00
										 |  |  |     : URL(URLParser::parse({}, string)) | 
					
						
							| 
									
										
										
										
											2019-08-10 17:27:56 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-05-27 21:05:07 +02:00
										 |  |  |     if constexpr (URL_PARSER_DEBUG) { | 
					
						
							|  |  |  |         if (m_valid) | 
					
						
							|  |  |  |             dbgln("URL constructor: Parsed URL to be '{}'.", serialize()); | 
					
						
							|  |  |  |         else | 
					
						
							|  |  |  |             dbgln("URL constructor: Parsed URL to be invalid."); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2019-08-10 17:27:56 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  | String URL::path() const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (cannot_be_a_base_url()) | 
					
						
							|  |  |  |         return paths()[0]; | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  |     for (auto& path : m_paths) { | 
					
						
							|  |  |  |         builder.append('/'); | 
					
						
							|  |  |  |         builder.append(path); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return builder.to_string(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | URL URL::complete_url(String const& string) const | 
					
						
							| 
									
										
										
										
											2019-11-18 22:04:39 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2020-06-07 18:23:33 +02:00
										 |  |  |     if (!is_valid()) | 
					
						
							|  |  |  |         return {}; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-27 21:05:07 +02:00
										 |  |  |     return URLParser::parse({}, string, this); | 
					
						
							| 
									
										
										
										
											2019-11-18 22:04:39 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_scheme(String scheme) | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_scheme = move(scheme); | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  |     m_valid = compute_validity(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_username(String username) | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_username = move(username); | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  |     m_valid = compute_validity(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_password(String password) | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_password = move(password); | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  |     m_valid = compute_validity(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_host(String host) | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_host = move(host); | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  |     m_valid = compute_validity(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_port(u16 port) | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  |     if (port == default_port_for_scheme(m_scheme)) { | 
					
						
							|  |  |  |         m_port = 0; | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |     m_port = port; | 
					
						
							|  |  |  |     m_valid = compute_validity(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_paths(Vector<String> paths) | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_paths = move(paths); | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  |     m_valid = compute_validity(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_query(String query) | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_query = move(query); | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | void URL::set_fragment(String fragment) | 
					
						
							| 
									
										
										
										
											2020-04-11 23:38:13 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  |     m_fragment = move(fragment); | 
					
						
							| 
									
										
										
										
											2020-04-11 23:38:13 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-29 20:46:49 +02:00
										 |  |  | // FIXME: This is by no means complete.
 | 
					
						
							|  |  |  | // NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong.
 | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  | bool URL::compute_validity() const | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (m_scheme.is_empty()) | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  |         return false; | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (m_scheme == "data") { | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         if (m_data_mime_type.is_empty()) | 
					
						
							| 
									
										
										
										
											2020-04-19 11:36:56 +03:00
										 |  |  |             return false; | 
					
						
							| 
									
										
										
										
											2021-05-29 20:46:49 +02:00
										 |  |  |         if (m_data_payload_is_base64) { | 
					
						
							|  |  |  |             if (m_data_payload.length() % 4 != 0) | 
					
						
							|  |  |  |                 return false; | 
					
						
							|  |  |  |             for (auto character : m_data_payload) { | 
					
						
							|  |  |  |                 if (!is_ascii_alphanumeric(character) || character == '+' || character == '/' || character == '=') | 
					
						
							|  |  |  |                     return false; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else if (m_cannot_be_a_base_url) { | 
					
						
							|  |  |  |         if (m_paths.size() != 1) | 
					
						
							|  |  |  |             return false; | 
					
						
							|  |  |  |         if (m_paths[0].is_empty()) | 
					
						
							|  |  |  |             return false; | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         if (m_scheme.is_one_of("about", "mailto")) | 
					
						
							|  |  |  |             return false; | 
					
						
							|  |  |  |         // NOTE: Maybe it is allowed to have a zero-segment path.
 | 
					
						
							|  |  |  |         if (m_paths.size() == 0) | 
					
						
							|  |  |  |             return false; | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-29 20:46:49 +02:00
										 |  |  |     // NOTE: A file URL's host should be the empty string for localhost, not null.
 | 
					
						
							|  |  |  |     if (m_scheme == "file" && m_host.is_null()) | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         return false; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-11 23:07:23 +02:00
										 |  |  |     return true; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | bool URL::scheme_requires_port(StringView const& scheme) | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     return (default_port_for_scheme(scheme) != 0); | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | u16 URL::default_port_for_scheme(StringView const& scheme) | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "http") | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         return 80; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "https") | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         return 443; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "gemini") | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         return 1965; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "irc") | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         return 6667; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "ircs") | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |         return 6697; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "ws") | 
					
						
							| 
									
										
										
										
											2021-04-16 15:21:03 +02:00
										 |  |  |         return 80; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     if (scheme == "wss") | 
					
						
							| 
									
										
										
										
											2021-04-16 15:21:03 +02:00
										 |  |  |         return 443; | 
					
						
							| 
									
										
										
										
											2020-11-04 06:20:20 +00:00
										 |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | URL URL::create_with_file_scheme(String const& path, String const& fragment, String const& hostname) | 
					
						
							| 
									
										
										
										
											2020-04-18 22:02:04 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-05-27 21:40:02 +02:00
										 |  |  |     LexicalPath lexical_path(path); | 
					
						
							|  |  |  |     if (!lexical_path.is_valid() || !lexical_path.is_absolute()) | 
					
						
							|  |  |  |         return {}; | 
					
						
							| 
									
										
										
										
											2021-05-29 21:57:20 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-18 22:02:04 +02:00
										 |  |  |     URL url; | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     url.set_scheme("file"); | 
					
						
							| 
									
										
										
										
											2021-05-29 21:57:20 +02:00
										 |  |  |     // NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
 | 
					
						
							|  |  |  |     //       This is because a file URL always needs a non-null hostname.
 | 
					
						
							|  |  |  |     url.set_host(hostname.is_null() || hostname == "localhost" ? String::empty() : hostname); | 
					
						
							| 
									
										
										
										
											2021-05-27 21:40:02 +02:00
										 |  |  |     url.set_paths(lexical_path.parts()); | 
					
						
							|  |  |  |     // NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
 | 
					
						
							|  |  |  |     if (path.ends_with('/')) | 
					
						
							|  |  |  |         url.append_path(""); | 
					
						
							| 
									
										
										
										
											2021-03-01 23:24:34 +02:00
										 |  |  |     url.set_fragment(fragment); | 
					
						
							| 
									
										
										
										
											2020-04-18 22:02:04 +02:00
										 |  |  |     return url; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | URL URL::create_with_url_or_path(String const& url_or_path) | 
					
						
							| 
									
										
										
										
											2020-04-19 11:55:59 +03:00
										 |  |  | { | 
					
						
							|  |  |  |     URL url = url_or_path; | 
					
						
							|  |  |  |     if (url.is_valid()) | 
					
						
							|  |  |  |         return url; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-26 14:52:44 +03:00
										 |  |  |     String path = LexicalPath::canonicalized_path(url_or_path); | 
					
						
							| 
									
										
										
										
											2021-05-23 23:31:16 +02:00
										 |  |  |     return URL::create_with_file_scheme(path); | 
					
						
							| 
									
										
										
										
											2020-04-19 11:55:59 +03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-25 22:05:01 +02:00
										 |  |  | // https://url.spec.whatwg.org/#special-scheme
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | bool URL::is_special_scheme(StringView const& scheme) | 
					
						
							| 
									
										
										
										
											2021-05-25 22:05:01 +02:00
										 |  |  | { | 
					
						
							|  |  |  |     return scheme.is_one_of("ftp", "file", "http", "https", "ws", "wss"); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-25 22:32:39 +02:00
										 |  |  | String URL::serialize_data_url() const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     VERIFY(m_scheme == "data"); | 
					
						
							|  |  |  |     VERIFY(!m_data_mime_type.is_null()); | 
					
						
							|  |  |  |     VERIFY(!m_data_payload.is_null()); | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  |     builder.append(m_scheme); | 
					
						
							|  |  |  |     builder.append(':'); | 
					
						
							|  |  |  |     builder.append(m_data_mime_type); | 
					
						
							|  |  |  |     if (m_data_payload_is_base64) | 
					
						
							|  |  |  |         builder.append(";base64"); | 
					
						
							|  |  |  |     builder.append(','); | 
					
						
							|  |  |  |     // NOTE: The specification does not say anything about encoding this, but we should encode at least control and non-ASCII
 | 
					
						
							|  |  |  |     //       characters (since this is also a valid representation of the same data URL).
 | 
					
						
							|  |  |  |     builder.append(URL::percent_encode(m_data_payload, PercentEncodeSet::C0Control)); | 
					
						
							|  |  |  |     return builder.to_string(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // https://url.spec.whatwg.org/#concept-url-serializer
 | 
					
						
							|  |  |  | String URL::serialize(ExcludeFragment exclude_fragment) const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (m_scheme == "data") | 
					
						
							|  |  |  |         return serialize_data_url(); | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  |     builder.append(m_scheme); | 
					
						
							|  |  |  |     builder.append(':'); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!m_host.is_null()) { | 
					
						
							|  |  |  |         builder.append("//"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (includes_credentials()) { | 
					
						
							|  |  |  |             builder.append(percent_encode(m_username, PercentEncodeSet::Userinfo)); | 
					
						
							|  |  |  |             if (!m_password.is_empty()) { | 
					
						
							|  |  |  |                 builder.append(':'); | 
					
						
							|  |  |  |                 builder.append(percent_encode(m_password, PercentEncodeSet::Userinfo)); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             builder.append('@'); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         builder.append(m_host); | 
					
						
							|  |  |  |         if (m_port != 0) | 
					
						
							|  |  |  |             builder.appendff(":{}", m_port); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (cannot_be_a_base_url()) { | 
					
						
							|  |  |  |         builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path)); | 
					
						
							|  |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2021-05-27 21:40:02 +02:00
										 |  |  |         if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty()) | 
					
						
							|  |  |  |             builder.append("/."); | 
					
						
							|  |  |  |         for (auto& segment : m_paths) { | 
					
						
							|  |  |  |             builder.append('/'); | 
					
						
							|  |  |  |             builder.append(percent_encode(segment, PercentEncodeSet::Path)); | 
					
						
							| 
									
										
										
										
											2021-05-25 22:32:39 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!m_query.is_null()) { | 
					
						
							|  |  |  |         builder.append('?'); | 
					
						
							|  |  |  |         builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (exclude_fragment == ExcludeFragment::No && !m_fragment.is_null()) { | 
					
						
							|  |  |  |         builder.append('#'); | 
					
						
							|  |  |  |         builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return builder.to_string(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // https://url.spec.whatwg.org/#url-rendering
 | 
					
						
							|  |  |  | // NOTE: This does e.g. not display credentials.
 | 
					
						
							|  |  |  | // FIXME: Parts of the URL other than the host should have their sequences of percent-encoded bytes replaced with code points
 | 
					
						
							|  |  |  | //        resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible.
 | 
					
						
							|  |  |  | String URL::serialize_for_display() const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     VERIFY(m_valid); | 
					
						
							|  |  |  |     if (m_scheme == "data") | 
					
						
							|  |  |  |         return serialize_data_url(); | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  |     builder.append(m_scheme); | 
					
						
							|  |  |  |     builder.append(':'); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!m_host.is_null()) { | 
					
						
							|  |  |  |         builder.append("//"); | 
					
						
							|  |  |  |         builder.append(m_host); | 
					
						
							|  |  |  |         if (m_port != 0) | 
					
						
							|  |  |  |             builder.appendff(":{}", m_port); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (cannot_be_a_base_url()) { | 
					
						
							|  |  |  |         builder.append(percent_encode(m_paths[0], PercentEncodeSet::Path)); | 
					
						
							|  |  |  |     } else { | 
					
						
							| 
									
										
										
										
											2021-05-27 21:40:02 +02:00
										 |  |  |         if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty()) | 
					
						
							|  |  |  |             builder.append("/."); | 
					
						
							|  |  |  |         for (auto& segment : m_paths) { | 
					
						
							|  |  |  |             builder.append('/'); | 
					
						
							|  |  |  |             builder.append(percent_encode(segment, PercentEncodeSet::Path)); | 
					
						
							| 
									
										
										
										
											2021-05-25 22:32:39 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!m_query.is_null()) { | 
					
						
							|  |  |  |         builder.append('?'); | 
					
						
							|  |  |  |         builder.append(percent_encode(m_query, is_special() ? URL::PercentEncodeSet::SpecialQuery : URL::PercentEncodeSet::Query)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!m_fragment.is_null()) { | 
					
						
							|  |  |  |         builder.append('#'); | 
					
						
							|  |  |  |         builder.append(percent_encode(m_fragment, PercentEncodeSet::Fragment)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return builder.to_string(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const | 
					
						
							| 
									
										
										
										
											2021-05-27 21:38:16 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-06-01 11:14:30 +02:00
										 |  |  |     if (this == &other) | 
					
						
							|  |  |  |         return true; | 
					
						
							| 
									
										
										
										
											2021-05-27 21:38:16 +02:00
										 |  |  |     if (!m_valid || !other.m_valid) | 
					
						
							|  |  |  |         return false; | 
					
						
							|  |  |  |     return serialize(exclude_fragments) == other.serialize(exclude_fragments); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-05 23:56:35 +02:00
										 |  |  | String URL::basename() const | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (!m_valid) | 
					
						
							|  |  |  |         return {}; | 
					
						
							| 
									
										
										
										
											2021-05-25 21:32:20 +02:00
										 |  |  |     if (m_paths.is_empty()) | 
					
						
							|  |  |  |         return {}; | 
					
						
							|  |  |  |     return m_paths.last(); | 
					
						
							| 
									
										
										
										
											2020-05-05 23:56:35 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-25 13:50:03 +02:00
										 |  |  | void URL::append_percent_encoded(StringBuilder& builder, u32 code_point) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (code_point <= 0x7f) | 
					
						
							|  |  |  |         builder.appendff("%{:02X}", code_point); | 
					
						
							|  |  |  |     else if (code_point <= 0x07ff) | 
					
						
							|  |  |  |         builder.appendff("%{:02X}%{:02X}", ((code_point >> 6) & 0x1f) | 0xc0, (code_point & 0x3f) | 0x80); | 
					
						
							|  |  |  |     else if (code_point <= 0xffff) | 
					
						
							|  |  |  |         builder.appendff("%{:02X}%{:02X}%{:02X}", ((code_point >> 12) & 0x0f) | 0xe0, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80); | 
					
						
							|  |  |  |     else if (code_point <= 0x10ffff) | 
					
						
							|  |  |  |         builder.appendff("%{:02X}%{:02X}%{:02X}%{:02X}", ((code_point >> 18) & 0x07) | 0xf0, ((code_point >> 12) & 0x3f) | 0x80, ((code_point >> 6) & 0x3f) | 0x80, (code_point & 0x3f) | 0x80); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |         VERIFY_NOT_REACHED(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // https://url.spec.whatwg.org/#c0-control-percent-encode-set
 | 
					
						
							|  |  |  | constexpr bool code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncodeSet set) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     switch (set) { | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::C0Control: | 
					
						
							|  |  |  |         return code_point < 0x20 || code_point > 0x7E; | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::Fragment: | 
					
						
							|  |  |  |         return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"<>`"sv.contains(code_point); | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::Query: | 
					
						
							|  |  |  |         return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::C0Control) || " \"#<>"sv.contains(code_point); | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::SpecialQuery: | 
					
						
							|  |  |  |         return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || code_point == '\''; | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::Path: | 
					
						
							|  |  |  |         return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Query) || "?`{}"sv.contains(code_point); | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::Userinfo: | 
					
						
							|  |  |  |         return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Path) || "/:;=@[\\]^|"sv.contains(code_point); | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::Component: | 
					
						
							|  |  |  |         return code_point_is_in_percent_encode_set(code_point, URL::PercentEncodeSet::Userinfo) || "$%&+,"sv.contains(code_point); | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::ApplicationXWWWFormUrlencoded: | 
					
						
							|  |  |  |         return code_point >= 0x7E || !(is_ascii_alphanumeric(code_point) || "!'()~"sv.contains(code_point)); | 
					
						
							|  |  |  |     case URL::PercentEncodeSet::EncodeURI: | 
					
						
							|  |  |  |         // NOTE: This is the same percent encode set that JS encodeURI() uses.
 | 
					
						
							|  |  |  |         // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
 | 
					
						
							|  |  |  |         return code_point >= 0x7E || (!is_ascii_alphanumeric(code_point) && !";,/?:@&=+$-_.!~*'()#"sv.contains(code_point)); | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |         VERIFY_NOT_REACHED(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void URL::append_percent_encoded_if_necessary(StringBuilder& builder, u32 code_point, URL::PercentEncodeSet set) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (code_point_is_in_percent_encode_set(code_point, set)) | 
					
						
							|  |  |  |         append_percent_encoded(builder, code_point); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |         builder.append_code_point(code_point); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | String URL::percent_encode(StringView const& input, URL::PercentEncodeSet set) | 
					
						
							| 
									
										
										
										
											2021-05-25 13:50:03 +02:00
										 |  |  | { | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  |     for (auto code_point : Utf8View(input)) { | 
					
						
							|  |  |  |         append_percent_encoded_if_necessary(builder, code_point, set); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return builder.to_string(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | constexpr u8 parse_hex_digit(u8 digit) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (digit >= '0' && digit <= '9') | 
					
						
							|  |  |  |         return digit - '0'; | 
					
						
							|  |  |  |     if (digit >= 'a' && digit <= 'f') | 
					
						
							|  |  |  |         return digit - 'a' + 10; | 
					
						
							|  |  |  |     if (digit >= 'A' && digit <= 'F') | 
					
						
							|  |  |  |         return digit - 'A' + 10; | 
					
						
							|  |  |  |     VERIFY_NOT_REACHED(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-01 10:58:27 +02:00
										 |  |  | String URL::percent_decode(StringView const& input) | 
					
						
							| 
									
										
										
										
											2021-05-25 13:50:03 +02:00
										 |  |  | { | 
					
						
							|  |  |  |     if (!input.contains('%')) | 
					
						
							|  |  |  |         return input; | 
					
						
							|  |  |  |     StringBuilder builder; | 
					
						
							|  |  |  |     Utf8View utf8_view(input); | 
					
						
							|  |  |  |     for (auto it = utf8_view.begin(); !it.done(); ++it) { | 
					
						
							|  |  |  |         if (*it != '%') { | 
					
						
							|  |  |  |             builder.append_code_point(*it); | 
					
						
							|  |  |  |         } else if (!is_ascii_hex_digit(it.peek(1).value_or(0)) || !is_ascii_hex_digit(it.peek(2).value_or(0))) { | 
					
						
							|  |  |  |             builder.append_code_point(*it); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             ++it; | 
					
						
							|  |  |  |             u8 byte = parse_hex_digit(*it) << 4; | 
					
						
							|  |  |  |             ++it; | 
					
						
							|  |  |  |             byte += parse_hex_digit(*it); | 
					
						
							|  |  |  |             builder.append(byte); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return builder.to_string(); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-10 17:27:56 +02:00
										 |  |  | } |