mirror of
				https://github.com/LadybirdBrowser/ladybird.git
				synced 2025-10-25 18:34:14 +00:00 
			
		
		
		
	 dbf4b189a4
			
		
	
	
		dbf4b189a4
		
	
	
	
	
		
			
			This is to prepare for custom search engines. If we use AK::format, it
would be trivial for a user (or bad actor) to come up with a template
search engine URL that ultimately crashes the browser due to internal
assertions in AK::format. For example:
    https://example.com/crash={1}
Rather than coming up with a complicated pre-format validator, let's
just not use AK::format. Custom URLs will signify their template query
parameters with "%s". So we can do the same with our built-in engines.
When it comes time to format the URL, we will do a simple string
replacement.
		
	
			
		
			
				
	
	
		
			200 lines
		
	
	
	
		
			9.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
	
		
			9.9 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
 | |
|  * Copyright (c) 2025, Manuel Zahariev <manuel@duck.com>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #include <LibTest/TestCase.h>
 | |
| #include <LibWebView/SearchEngine.h>
 | |
| #include <LibWebView/URL.h>
 | |
| 
 | |
| static WebView::SearchEngine s_test_engine {
 | |
|     .name = "Test"_string,
 | |
|     .query_url = "https://ecosia.org/search?q=%s"_string
 | |
| };
 | |
| 
 | |
| static void compare_url_parts(StringView url, WebView::URLParts const& expected)
 | |
| {
 | |
|     auto result = WebView::break_url_into_parts(url);
 | |
|     VERIFY(result.has_value());
 | |
| 
 | |
|     EXPECT_EQ(result->scheme_and_subdomain, expected.scheme_and_subdomain);
 | |
|     EXPECT_EQ(result->effective_tld_plus_one, expected.effective_tld_plus_one);
 | |
|     EXPECT_EQ(result->remainder, expected.remainder);
 | |
| }
 | |
| 
 | |
| static bool is_sanitized_url_the_same(StringView url)
 | |
| {
 | |
|     auto sanitized_url = WebView::sanitize_url(url);
 | |
|     if (!sanitized_url.has_value())
 | |
|         return false;
 | |
|     return sanitized_url->to_string() == url;
 | |
| }
 | |
| 
 | |
| static void expect_url_equals_sanitized_url(StringView test_url, StringView url, WebView::AppendTLD append_tld = WebView::AppendTLD::No)
 | |
| {
 | |
|     auto sanitized_url = WebView::sanitize_url(url, s_test_engine, append_tld);
 | |
| 
 | |
|     EXPECT(sanitized_url.has_value());
 | |
|     EXPECT_EQ(sanitized_url->to_string(), test_url);
 | |
| }
 | |
| 
 | |
| static void expect_search_url_equals_sanitized_url(StringView url)
 | |
| {
 | |
|     auto search_url = s_test_engine.format_search_query_for_navigation(url);
 | |
|     auto sanitized_url = WebView::sanitize_url(url, s_test_engine);
 | |
| 
 | |
|     EXPECT(sanitized_url.has_value());
 | |
|     EXPECT_EQ(sanitized_url->to_string(), search_url);
 | |
| }
 | |
| 
 | |
| TEST_CASE(invalid_url)
 | |
| {
 | |
|     EXPECT(!WebView::break_url_into_parts(""sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts(":"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts(":/"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("://"sv).has_value());
 | |
| 
 | |
|     EXPECT(!WebView::break_url_into_parts("/"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("//"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("/h"sv).has_value());
 | |
| 
 | |
|     EXPECT(!WebView::break_url_into_parts("f"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("fi"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("fil"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("file"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("file:"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("file:/"sv).has_value());
 | |
| 
 | |
|     EXPECT(!WebView::break_url_into_parts("h"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("ht"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("htt"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("http"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("http:"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("http:/"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("http://"sv).has_value());
 | |
| 
 | |
|     EXPECT(!WebView::break_url_into_parts("https"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("https:"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("https:/"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("https://"sv).has_value());
 | |
| 
 | |
|     EXPECT(!WebView::break_url_into_parts("a"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("ab"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("abo"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("abou"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("about"sv).has_value());
 | |
| 
 | |
|     EXPECT(!WebView::break_url_into_parts("d"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("da"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("dat"sv).has_value());
 | |
|     EXPECT(!WebView::break_url_into_parts("data"sv).has_value());
 | |
| }
 | |
| 
 | |
| TEST_CASE(file_url)
 | |
| {
 | |
|     compare_url_parts("file://"sv, { "file://"sv, ""sv, {} });
 | |
|     compare_url_parts("file://a"sv, { "file://"sv, "a"sv, {} });
 | |
|     compare_url_parts("file:///a"sv, { "file://"sv, "/a"sv, {} });
 | |
|     compare_url_parts("file:///abc"sv, { "file://"sv, "/abc"sv, {} });
 | |
| }
 | |
| 
 | |
| TEST_CASE(http_url)
 | |
| {
 | |
|     compare_url_parts("http://a"sv, { "http://"sv, "a"sv, {} });
 | |
|     compare_url_parts("http://abc"sv, { "http://"sv, "abc"sv, {} });
 | |
|     compare_url_parts("http://com"sv, { "http://"sv, "com"sv, {} });
 | |
|     compare_url_parts("http://abc."sv, { "http://"sv, "abc."sv, {} });
 | |
|     compare_url_parts("http://abc.c"sv, { "http://"sv, "abc.c"sv, {} });
 | |
|     compare_url_parts("http://abc.com"sv, { "http://"sv, "abc.com"sv, {} });
 | |
|     compare_url_parts("http://abc.com."sv, { "http://"sv, "abc.com."sv, {} });
 | |
|     compare_url_parts("http://abc.com."sv, { "http://"sv, "abc.com."sv, {} });
 | |
|     compare_url_parts("http://abc.com.org"sv, { "http://abc."sv, "com.org"sv, {} });
 | |
|     compare_url_parts("http://abc.com.org.gov"sv, { "http://abc.com."sv, "org.gov"sv, {} });
 | |
| 
 | |
|     compare_url_parts("http://abc/path"sv, { "http://"sv, "abc"sv, "/path"sv });
 | |
|     compare_url_parts("http://abc#anchor"sv, { "http://"sv, "abc"sv, "#anchor"sv });
 | |
|     compare_url_parts("http://abc?query"sv, { "http://"sv, "abc"sv, "?query"sv });
 | |
| 
 | |
|     compare_url_parts("http://abc.def.com"sv, { "http://abc."sv, "def.com"sv, {} });
 | |
|     compare_url_parts("http://abc.def.com/path"sv, { "http://abc."sv, "def.com"sv, "/path"sv });
 | |
|     compare_url_parts("http://abc.def.com#anchor"sv, { "http://abc."sv, "def.com"sv, "#anchor"sv });
 | |
|     compare_url_parts("http://abc.def.com?query"sv, { "http://abc."sv, "def.com"sv, "?query"sv });
 | |
| }
 | |
| 
 | |
| TEST_CASE(about_url)
 | |
| {
 | |
|     compare_url_parts("about:"sv, { "about:"sv, {}, {} });
 | |
|     compare_url_parts("about:a"sv, { "about:"sv, "a"sv, {} });
 | |
|     compare_url_parts("about:ab"sv, { "about:"sv, "ab"sv, {} });
 | |
|     compare_url_parts("about:abc"sv, { "about:"sv, "abc"sv, {} });
 | |
|     compare_url_parts("about:abc/def"sv, { "about:"sv, "abc/def"sv, {} });
 | |
| 
 | |
|     EXPECT(!is_sanitized_url_the_same("about"sv));
 | |
|     EXPECT(!is_sanitized_url_the_same("about blabla:"sv));
 | |
|     EXPECT(!is_sanitized_url_the_same("blabla about:"sv));
 | |
| 
 | |
|     EXPECT(is_sanitized_url_the_same("about:about"sv));
 | |
|     EXPECT(is_sanitized_url_the_same("about:version"sv));
 | |
| }
 | |
| 
 | |
| TEST_CASE(data_url)
 | |
| {
 | |
|     compare_url_parts("data:"sv, { "data:"sv, {}, {} });
 | |
|     compare_url_parts("data:a"sv, { "data:"sv, "a"sv, {} });
 | |
|     compare_url_parts("data:ab"sv, { "data:"sv, "ab"sv, {} });
 | |
|     compare_url_parts("data:abc"sv, { "data:"sv, "abc"sv, {} });
 | |
|     compare_url_parts("data:abc/def"sv, { "data:"sv, "abc/def"sv, {} });
 | |
| 
 | |
|     EXPECT(is_sanitized_url_the_same("data:text/html"sv));
 | |
| 
 | |
|     EXPECT(!is_sanitized_url_the_same("data text/html"sv));
 | |
|     EXPECT(!is_sanitized_url_the_same("text/html data:"sv));
 | |
| }
 | |
| 
 | |
| TEST_CASE(location_to_search_or_url)
 | |
| {
 | |
|     expect_search_url_equals_sanitized_url("hello"sv); // Search.
 | |
|     expect_search_url_equals_sanitized_url("hello world"sv);
 | |
|     expect_search_url_equals_sanitized_url("\"example.org\""sv);
 | |
|     expect_search_url_equals_sanitized_url("\"example.org"sv);
 | |
|     expect_search_url_equals_sanitized_url("\"http://example.org\""sv);
 | |
|     expect_search_url_equals_sanitized_url("example.org hello"sv);
 | |
|     expect_search_url_equals_sanitized_url("http://example.org and example sites"sv);
 | |
|     expect_search_url_equals_sanitized_url("ftp://example.org"sv); // ftp:// is not in SUPPORTED_SCHEMES
 | |
|     expect_search_url_equals_sanitized_url("https://exa\"mple.com/what"sv);
 | |
| 
 | |
|     // If it can feed create_with_url_or_path -- it is a url.
 | |
|     expect_url_equals_sanitized_url("https://example.com/%20some%20cool%20page"sv, "https://example.com/ some cool page"sv);
 | |
|     expect_url_equals_sanitized_url("https://example.com/some%20cool%20page"sv, "https://example.com/some cool page"sv);
 | |
|     expect_url_equals_sanitized_url("https://example.com/%22what%22"sv, "https://example.com/\"what\""sv);
 | |
| 
 | |
|     expect_url_equals_sanitized_url("https://example.org/"sv, "example.org"sv);            // Valid domain.
 | |
|     expect_url_equals_sanitized_url("https://example.abc/"sv, "example.abc"sv);            // .abc is a recognized TLD.
 | |
|     expect_url_equals_sanitized_url("https://example.test/path"sv, "example.test/path"sv); // Reserved TLDs.
 | |
|     expect_url_equals_sanitized_url("https://example.example/path"sv, "example.example/path"sv);
 | |
|     expect_url_equals_sanitized_url("https://example.invalid/path"sv, "example.invalid/path"sv);
 | |
|     expect_url_equals_sanitized_url("https://example.localhost/path"sv, "example.localhost/path"sv);
 | |
| 
 | |
|     expect_search_url_equals_sanitized_url("example.def"sv); // Invalid domain but no scheme: search (Like Firefox or Chrome).
 | |
| 
 | |
|     expect_url_equals_sanitized_url("https://example.org/"sv, "https://example.org"sv); // Scheme.
 | |
|     // Respect the user if the url has a valid scheme but not a public suffix (.def is not a recognized TLD).
 | |
|     expect_url_equals_sanitized_url("https://example.def/"sv, "https://example.def"sv);
 | |
| 
 | |
|     expect_url_equals_sanitized_url("https://localhost/"sv, "localhost"sv); // Respect localhost.
 | |
|     expect_url_equals_sanitized_url("https://localhost/hello"sv, "localhost/hello"sv);
 | |
|     expect_url_equals_sanitized_url("https://localhost/hello.world"sv, "localhost/hello.world"sv);
 | |
|     expect_url_equals_sanitized_url("https://localhost/hello.world?query=123"sv, "localhost/hello.world?query=123"sv);
 | |
| 
 | |
|     expect_url_equals_sanitized_url("https://example.com/"sv, "example"sv, WebView::AppendTLD::Yes); // User holds down the Ctrl key.
 | |
|     expect_url_equals_sanitized_url("https://example.def.com/"sv, "example.def"sv, WebView::AppendTLD::Yes);
 | |
|     expect_url_equals_sanitized_url("https://com.com/"sv, "com"sv, WebView::AppendTLD::Yes);
 | |
|     expect_url_equals_sanitized_url("https://example.com/index.html"sv, "example/index.html"sv, WebView::AppendTLD::Yes);
 | |
| 
 | |
|     expect_search_url_equals_sanitized_url("whatever:example.com"sv);     // Invalid scheme.
 | |
|     expect_search_url_equals_sanitized_url("mailto:hello@example.com"sv); // For now, unsupported scheme.
 | |
|     // FIXME: Add support for opening mailto: scheme (below). Firefox opens mailto: locations
 | |
|     // expect_url_equals_sanitized_url("mailto:hello@example.com"sv, "mailto:hello@example.com"sv);
 | |
| }
 |