2020-01-18 09:38:21 +01:00
/*
2024-10-04 13:19:50 +02:00
* Copyright ( c ) 2018 - 2020 , Andreas Kling < andreas @ ladybird . org >
2021-05-23 23:31:16 +02:00
* Copyright ( c ) 2021 , Max Wipfli < mail @ maxwipfli . ch >
2025-02-22 21:52:44 +13:00
* Copyright ( c ) 2023 - 2025 , Shannon Booth < shannon @ serenityos . org >
2020-01-18 09:38:21 +01:00
*
2021-04-22 01:24:48 -07:00
* SPDX - License - Identifier : BSD - 2 - Clause
2020-01-18 09:38:21 +01:00
*/
2019-08-10 17:27:56 +02:00
# pragma once
2023-12-16 17:49:34 +03:30
# include <AK/ByteString.h>
2024-09-10 11:05:56 +02:00
# include <AK/CopyOnWrite.h>
2023-02-28 21:35:41 +01:00
# include <AK/String.h>
2019-08-10 17:27:56 +02:00
# include <AK/StringView.h>
2021-11-10 11:05:21 +01:00
# include <AK/Vector.h>
2024-10-05 16:14:27 +13:00
# include <LibURL/Host.h>
2024-10-05 16:54:27 +13:00
# include <LibURL/Origin.h>
2019-08-10 17:27:56 +02:00
2022-09-25 20:54:06 +02:00
// On Linux distros that use mlibc `basename` is defined as a macro that expands to `__mlibc_gnu_basename` or `__mlibc_gnu_basename_c`, so we undefine it.
# if defined(AK_OS_LINUX) && defined(basename)
# undef basename
# endif
2024-03-18 16:22:27 +13:00
namespace URL {
enum class PercentEncodeSet {
C0Control ,
Fragment ,
Query ,
SpecialQuery ,
Path ,
Userinfo ,
Component ,
ApplicationXWWWFormUrlencoded ,
EncodeURI
} ;
enum class ExcludeFragment {
No ,
Yes
} ;
2024-05-05 20:32:20 +12:00
// https://w3c.github.io/FileAPI/#blob-url-entry
struct BlobURLEntry {
2025-01-19 18:12:46 +13:00
// This represents the raw bytes behind a 'Blob' (and does not yet support a MediaSourceQuery).
struct Object {
String type ;
ByteBuffer data ;
} ;
// This represents the parts of HTML::Environment that we need for a BlobURL entry.
struct Environment {
Origin origin ;
} ;
Object object ;
Environment environment ;
2024-05-05 20:32:20 +12:00
} ;
2024-03-18 16:22:27 +13:00
void append_percent_encoded_if_necessary ( StringBuilder & , u32 code_point , PercentEncodeSet set = PercentEncodeSet : : Userinfo ) ;
void append_percent_encoded ( StringBuilder & , u32 code_point ) ;
bool code_point_is_in_percent_encode_set ( u32 code_point , PercentEncodeSet ) ;
Optional < u16 > default_port_for_scheme ( StringView ) ;
2025-03-18 19:22:16 +13:00
ReadonlySpan < StringView > special_schemes ( ) ;
2024-03-18 16:22:27 +13:00
bool is_special_scheme ( StringView ) ;
enum class SpaceAsPlus {
No ,
Yes ,
} ;
2024-08-10 13:12:19 +12:00
String percent_encode ( StringView input , PercentEncodeSet set = PercentEncodeSet : : Userinfo , SpaceAsPlus = SpaceAsPlus : : No ) ;
2024-03-18 16:22:27 +13:00
ByteString percent_decode ( StringView input ) ;
2019-08-10 17:27:56 +02:00
2023-07-23 20:10:32 +12:00
// https://url.spec.whatwg.org/#url-representation
// A URL is a struct that represents a universal identifier. To disambiguate from a valid URL string it can also be referred to as a URL record.
2019-08-10 17:27:56 +02:00
class URL {
2024-03-18 16:22:27 +13:00
friend class Parser ;
2021-05-25 22:13:15 +02:00
2019-08-10 17:27:56 +02:00
public :
2025-04-19 17:11:24 +12:00
// FIXME: We should get rid of the default constructor, all URLs should be constructed through the Parser.
2021-01-10 16:29:28 -07:00
URL ( ) = default ;
2019-08-10 17:27:56 +02:00
2024-08-02 15:23:49 +02:00
String const & scheme ( ) const { return m_data - > scheme ; }
2024-08-04 22:02:02 +12:00
String const & username ( ) const { return m_data - > username ; }
String const & password ( ) const { return m_data - > password ; }
2024-11-27 12:48:28 +00:00
Optional < Host > const & host ( ) const { return m_data - > host ; }
2024-11-28 14:32:07 +00:00
String serialized_host ( ) const ;
2023-12-16 17:49:34 +03:30
ByteString basename ( ) const ;
2024-08-02 15:23:49 +02:00
Optional < String > const & query ( ) const { return m_data - > query ; }
Optional < String > const & fragment ( ) const { return m_data - > fragment ; }
Optional < u16 > port ( ) const { return m_data - > port ; }
2023-12-16 17:49:34 +03:30
ByteString path_segment_at_index ( size_t index ) const ;
2024-08-02 15:23:49 +02:00
size_t path_segment_count ( ) const { return m_data - > paths . size ( ) ; }
2023-04-13 23:06:58 +01:00
2024-08-02 15:23:49 +02:00
u16 port_or_default ( ) const { return m_data - > port . value_or ( default_port_for_scheme ( m_data - > scheme ) . value_or ( 0 ) ) ; }
2025-03-07 19:08:44 +13:00
// https://url.spec.whatwg.org/#url-opaque-path
// A URL has an opaque path if its path is a URL path segment.
bool has_an_opaque_path ( ) const { return m_data - > has_an_opaque_path ; }
2023-07-26 20:54:36 +12:00
bool cannot_have_a_username_or_password_or_port ( ) const ;
2019-08-10 17:27:56 +02:00
2024-08-02 15:23:49 +02:00
bool includes_credentials ( ) const { return ! m_data - > username . is_empty ( ) | | ! m_data - > password . is_empty ( ) ; }
bool is_special ( ) const { return is_special_scheme ( m_data - > scheme ) ; }
2021-05-25 22:05:01 +02:00
2023-08-12 16:52:41 +12:00
void set_scheme ( String ) ;
2024-08-10 13:12:19 +12:00
void set_username ( StringView ) ;
void set_password ( StringView ) ;
2023-07-27 21:40:41 +12:00
void set_host ( Host ) ;
2021-09-13 23:12:16 +03:00
void set_port ( Optional < u16 > ) ;
2023-12-16 17:49:34 +03:30
void set_paths ( Vector < ByteString > const & ) ;
2024-08-05 15:14:00 +12:00
Vector < String > const & paths ( ) const { return m_data - > paths ; }
2024-08-02 15:23:49 +02:00
void set_query ( Optional < String > query ) { m_data - > query = move ( query ) ; }
void set_fragment ( Optional < String > fragment ) { m_data - > fragment = move ( fragment ) ; }
2025-03-07 19:08:44 +13:00
void set_has_an_opaque_path ( bool value ) { m_data - > has_an_opaque_path = value ; }
2023-08-06 16:32:44 +12:00
void append_path ( StringView ) ;
2023-04-09 14:21:00 +01:00
void append_slash ( )
{
// NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment.
2024-08-02 15:23:49 +02:00
m_data - > paths . append ( String { } ) ;
2023-04-09 14:21:00 +01:00
}
2019-10-05 10:14:42 +02:00
2024-08-05 16:55:39 +12:00
String serialize_path ( ) const ;
2025-02-14 15:31:43 +05:00
ByteString file_path ( ) const ;
2024-12-03 22:31:33 +13:00
String serialize ( ExcludeFragment = ExcludeFragment : : No ) const ;
2023-12-16 17:49:34 +03:30
ByteString serialize_for_display ( ) const ;
2024-12-03 22:31:33 +13:00
ByteString to_byte_string ( ) const { return serialize ( ) . to_byte_string ( ) ; }
String to_string ( ) const { return serialize ( ) ; }
2021-05-27 21:38:16 +02:00
2024-10-05 17:03:51 +13:00
Origin origin ( ) const ;
2021-09-13 22:18:14 +03:00
2021-06-01 10:58:27 +02:00
bool equals ( URL const & other , ExcludeFragment = ExcludeFragment : : No ) const ;
2021-05-27 21:38:16 +02:00
2025-02-15 22:55:46 +13:00
Optional < URL > complete_url ( StringView ) const ;
2019-11-18 22:04:39 +01:00
2024-08-02 15:23:49 +02:00
[ [ nodiscard ] ] bool operator = = ( URL const & other ) const
{
if ( m_data . ptr ( ) = = other . m_data . ptr ( ) )
return true ;
return equals ( other , ExcludeFragment : : No ) ;
}
2020-06-01 21:50:07 +02:00
2024-08-02 15:23:49 +02:00
Optional < BlobURLEntry > const & blob_url_entry ( ) const { return m_data - > blob_url_entry ; }
void set_blob_url_entry ( Optional < BlobURLEntry > entry ) { m_data - > blob_url_entry = move ( entry ) ; }
2024-05-05 20:32:20 +12:00
2025-02-15 23:45:40 +13:00
static URL about ( String path ) ;
2019-08-10 17:27:56 +02:00
private :
2024-08-02 15:23:49 +02:00
struct Data : public RefCounted < Data > {
2025-04-19 17:11:24 +12:00
NonnullRefPtr < Data > clone ( ) const
2024-08-02 15:23:49 +02:00
{
auto clone = adopt_ref ( * new Data ) ;
clone - > scheme = scheme ;
clone - > username = username ;
clone - > password = password ;
clone - > host = host ;
clone - > port = port ;
clone - > paths = paths ;
clone - > query = query ;
clone - > fragment = fragment ;
2025-03-07 19:08:44 +13:00
clone - > has_an_opaque_path = has_an_opaque_path ;
2024-08-02 15:23:49 +02:00
clone - > blob_url_entry = blob_url_entry ;
return clone ;
}
// A URL’ s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string.
String scheme ;
// A URL’ s username is an ASCII string identifying a username. It is initially the empty string.
String username ;
// A URL’ s password is an ASCII string identifying a password. It is initially the empty string.
String password ;
// A URL’ s host is null or a host. It is initially null.
2024-11-27 12:48:28 +00:00
Optional < Host > host ;
2024-08-02 15:23:49 +02:00
// A URL’ s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null.
Optional < u16 > port ;
// A URL’ s path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ».
// A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning.
Vector < String > paths ;
// A URL’ s query is either null or an ASCII string. It is initially null.
Optional < String > query ;
// A URL’ s fragment is either null or an ASCII string that can be used for further processing on the resource the URL’ s other components identify. It is initially null.
Optional < String > fragment ;
2025-03-07 19:08:44 +13:00
bool has_an_opaque_path { false } ;
2024-08-02 15:23:49 +02:00
// https://url.spec.whatwg.org/#concept-url-blob-entry
// A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null.
Optional < BlobURLEntry > blob_url_entry ;
} ;
2024-09-10 11:05:56 +02:00
AK : : CopyOnWrite < Data > m_data ;
2019-08-10 17:27:56 +02:00
} ;
2025-04-19 16:43:17 +12:00
Optional < URL > create_with_url_or_path ( ByteString const & ) ;
Optional < URL > create_with_file_scheme ( ByteString const & path , ByteString const & fragment = { } , ByteString const & hostname = { } ) ;
2024-03-18 16:22:27 +13:00
URL create_with_data ( StringView mime_type , StringView payload , bool is_base64 = false ) ;
2024-11-26 16:27:08 +00:00
bool is_public_suffix ( StringView host ) ;
2025-03-09 11:11:35 -04:00
Optional < String > get_registrable_domain ( StringView host ) ;
2024-11-26 16:27:08 +00:00
2025-02-15 23:45:40 +13:00
inline URL about_blank ( ) { return URL : : about ( " blank " _string ) ; }
inline URL about_srcdoc ( ) { return URL : : about ( " srcdoc " _string ) ; }
inline URL about_error ( ) { return URL : : about ( " error " _string ) ; }
inline URL about_version ( ) { return URL : : about ( " version " _string ) ; }
inline URL about_newtab ( ) { return URL : : about ( " newtab " _string ) ; }
2024-03-18 16:22:27 +13:00
}
2020-10-04 13:29:47 +02:00
template < >
2024-03-18 16:22:27 +13:00
struct AK : : Formatter < URL : : URL > : AK : : Formatter < StringView > {
ErrorOr < void > format ( FormatBuilder & builder , URL : : URL const & value )
2020-10-04 13:29:47 +02:00
{
2021-11-16 01:15:21 +01:00
return Formatter < StringView > : : format ( builder , value . serialize ( ) ) ;
2020-10-04 13:29:47 +02:00
}
} ;
2020-06-01 21:50:07 +02:00
template < >
2024-03-18 16:22:27 +13:00
struct AK : : Traits < URL : : URL > : public AK : : DefaultTraits < URL : : URL > {
2024-12-03 22:31:33 +13:00
static unsigned hash ( URL : : URL const & url ) { return url . to_string ( ) . hash ( ) ; }
2020-06-01 21:50:07 +02:00
} ;