mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-10-25 10:24:13 +00:00
440 lines
14 KiB
C++
440 lines
14 KiB
C++
|
|
/*
|
||
|
|
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org>
|
||
|
|
*
|
||
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include <AK/GenericLexer.h>
|
||
|
|
#include <LibWeb/ContentSecurityPolicy/Directives/KeywordSources.h>
|
||
|
|
#include <LibWeb/ContentSecurityPolicy/Directives/SourceExpression.h>
|
||
|
|
|
||
|
|
namespace Web::ContentSecurityPolicy::Directives {
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#source-expression
|
||
|
|
class SourceExpressionParser {
|
||
|
|
public:
|
||
|
|
explicit SourceExpressionParser(StringView input)
|
||
|
|
: m_input(input)
|
||
|
|
, m_state({
|
||
|
|
.lexer = GenericLexer { input },
|
||
|
|
.parse_result = {},
|
||
|
|
})
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
[[nodiscard]] GenericLexer const& lexer() const { return m_state.lexer; }
|
||
|
|
[[nodiscard]] SourceExpressionParseResult const& parse_result() const { return m_state.parse_result; }
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-scheme-source
|
||
|
|
[[nodiscard]] bool parse_scheme_source()
|
||
|
|
{
|
||
|
|
// ; Schemes: "https:" / "custom-scheme:" / "another.custom-scheme:"
|
||
|
|
// scheme-source = scheme-part ":"
|
||
|
|
if (!parse_scheme_part())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return m_state.lexer.consume_specific(':');
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-scheme-part
|
||
|
|
[[nodiscard]] bool parse_scheme_part()
|
||
|
|
{
|
||
|
|
// scheme-part = scheme
|
||
|
|
// ; scheme is defined in section 3.1 of RFC 3986.
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
if (!parse_scheme())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
m_state.parse_result.scheme_part = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
|
||
|
|
[[nodiscard]] bool parse_scheme()
|
||
|
|
{
|
||
|
|
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||
|
|
if (!m_state.lexer.consume_specific_with_predicate(is_ascii_alpha))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
(void)m_state.lexer.consume_while([](char ch) {
|
||
|
|
return is_ascii_alpha(ch) || is_ascii_digit(ch) || ch == '+' || ch == '-' || ch == '.';
|
||
|
|
});
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-host-source
|
||
|
|
[[nodiscard]] bool parse_host_source()
|
||
|
|
{
|
||
|
|
// ; Hosts: "example.com" / "*.example.com" / "https://*.example.com:12/path/to/file.js"
|
||
|
|
// host-source = [ scheme-part "://" ] host-part [ ":" port-part ] [ path-part ]
|
||
|
|
auto parse_scheme = [&] {
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
if (!parse_scheme_part())
|
||
|
|
return;
|
||
|
|
|
||
|
|
if (!m_state.lexer.consume_specific("://"sv)) {
|
||
|
|
m_state.parse_result.scheme_part = OptionalNone {};
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
transaction.commit();
|
||
|
|
};
|
||
|
|
|
||
|
|
parse_scheme();
|
||
|
|
|
||
|
|
if (!parse_host_part())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (m_state.lexer.consume_specific(':')) {
|
||
|
|
if (!parse_port_part())
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
(void)parse_path_part();
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-host-part
|
||
|
|
[[nodiscard]] bool parse_host_part()
|
||
|
|
{
|
||
|
|
// host-part = "*" / [ "*." ] 1*host-char *( "." 1*host-char ) [ "." ]
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
if (m_state.lexer.consume_specific('*') && !m_state.lexer.consume_specific('.')) {
|
||
|
|
m_state.parse_result.host_part = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!parse_host_char())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
while (parse_host_char())
|
||
|
|
;
|
||
|
|
|
||
|
|
while (m_state.lexer.consume_specific('.')) {
|
||
|
|
if (parse_host_char()) {
|
||
|
|
while (parse_host_char())
|
||
|
|
;
|
||
|
|
} else {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
m_state.parse_result.host_part = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-host-char
|
||
|
|
[[nodiscard]] bool parse_host_char()
|
||
|
|
{
|
||
|
|
// host-char = ALPHA / DIGIT / "-"
|
||
|
|
return m_state.lexer.consume_specific_with_predicate(is_ascii_alpha)
|
||
|
|
|| m_state.lexer.consume_specific_with_predicate(is_ascii_digit)
|
||
|
|
|| m_state.lexer.consume_specific('-');
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-port-part
|
||
|
|
[[nodiscard]] bool parse_port_part()
|
||
|
|
{
|
||
|
|
// port-part = 1*DIGIT / "*"
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
if (m_state.lexer.consume_specific('*')) {
|
||
|
|
m_state.parse_result.port_part = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!m_state.lexer.consume_specific_with_predicate(is_ascii_digit))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
(void)m_state.lexer.consume_while(is_ascii_digit);
|
||
|
|
|
||
|
|
m_state.parse_result.port_part = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-path-part
|
||
|
|
[[nodiscard]] bool parse_path_part()
|
||
|
|
{
|
||
|
|
// path-part = path-absolute (but not including ";" or ",")
|
||
|
|
// ; path-absolute is defined in section 3.3 of RFC 3986.
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
if (!parse_path_absolute())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
m_state.parse_result.path_part = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
|
||
|
|
[[nodiscard]] bool parse_path_absolute()
|
||
|
|
{
|
||
|
|
// path-absolute = "/" [ segment-nz *( "/" segment ) ]
|
||
|
|
if (!m_state.lexer.consume_specific('/'))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (parse_segment_non_zero()) {
|
||
|
|
while (m_state.lexer.consume_specific('/')) {
|
||
|
|
parse_segment();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
|
||
|
|
void parse_segment()
|
||
|
|
{
|
||
|
|
// segment = *pchar
|
||
|
|
while (parse_path_character())
|
||
|
|
;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
|
||
|
|
[[nodiscard]] bool parse_segment_non_zero()
|
||
|
|
{
|
||
|
|
// segment-nz = 1*pchar
|
||
|
|
if (!parse_path_character())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
while (parse_path_character())
|
||
|
|
;
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
|
||
|
|
[[nodiscard]] bool parse_path_character()
|
||
|
|
{
|
||
|
|
// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
|
||
|
|
return parse_unreserved()
|
||
|
|
|| parse_percent_encoded()
|
||
|
|
|| parse_sub_delims()
|
||
|
|
|| m_state.lexer.consume_specific_with_predicate(is_any_of(":@"sv));
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
|
||
|
|
[[nodiscard]] bool parse_unreserved()
|
||
|
|
{
|
||
|
|
// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||
|
|
return m_state.lexer.consume_specific_with_predicate(is_ascii_alpha)
|
||
|
|
|| m_state.lexer.consume_specific_with_predicate(is_ascii_digit)
|
||
|
|
|| m_state.lexer.consume_specific_with_predicate(is_any_of("-._~"sv));
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.1
|
||
|
|
[[nodiscard]] bool parse_percent_encoded()
|
||
|
|
{
|
||
|
|
// pct-encoded = "%" HEXDIG HEXDIG
|
||
|
|
// "The uppercase hexadecimal digits 'A' through 'F' are equivalent to
|
||
|
|
// the lowercase digits 'a' through 'f', respectively. If two URIs
|
||
|
|
// differ only in the case of hexadecimal digits used in percent-encoded
|
||
|
|
// octets, they are equivalent. For consistency, URI producers and
|
||
|
|
// normalizers should use uppercase hexadecimal digits for all percent-
|
||
|
|
// encodings."
|
||
|
|
return m_state.lexer.consume_specific('%')
|
||
|
|
&& m_state.lexer.consume_specific_with_predicate(is_ascii_hex_digit)
|
||
|
|
&& m_state.lexer.consume_specific_with_predicate(is_ascii_hex_digit);
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
|
||
|
|
[[nodiscard]] bool parse_sub_delims()
|
||
|
|
{
|
||
|
|
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||
|
|
// / "*" / "+" / "," / ";" / "="
|
||
|
|
// NOTE: This does not contain ';' and ',' as per the requirement specified in parse_path_part.
|
||
|
|
return m_state.lexer.consume_specific_with_predicate(is_any_of("!$&'()*+="sv));
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-keyword-source
|
||
|
|
[[nodiscard]] bool parse_keyword_source()
|
||
|
|
{
|
||
|
|
// ; Keywords:
|
||
|
|
// keyword-source = "'self'" / "'unsafe-inline'" / "'unsafe-eval'"
|
||
|
|
// / "'strict-dynamic'" / "'unsafe-hashes'" /
|
||
|
|
// / "'report-sample'" / "'unsafe-allow-redirects'"
|
||
|
|
// / "'wasm-unsafe-eval'"
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
#define __ENUMERATE_KEYWORD_SOURCE(_, value) \
|
||
|
|
if (m_state.lexer.consume_specific(value##sv)) { \
|
||
|
|
m_state.parse_result.keyword_source = transaction.parsed_string_view(); \
|
||
|
|
transaction.commit(); \
|
||
|
|
return true; \
|
||
|
|
}
|
||
|
|
ENUMERATE_KEYWORD_SOURCES
|
||
|
|
#undef __ENUMERATE_KEYWORD_SOURCE
|
||
|
|
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-nonce-source
|
||
|
|
[[nodiscard]] bool parse_nonce_source()
|
||
|
|
{
|
||
|
|
// ; Nonces: 'nonce-[nonce goes here]'
|
||
|
|
// nonce-source = "'nonce-" base64-value "'"
|
||
|
|
auto prefix = m_state.lexer.consume(7);
|
||
|
|
if (prefix.length() != 7)
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (!prefix.equals_ignoring_ascii_case("'nonce-"sv))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (!parse_base64_value())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return m_state.lexer.consume_specific('\'');
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-base64-value
|
||
|
|
[[nodiscard]] bool parse_base64_value()
|
||
|
|
{
|
||
|
|
// base64-value = 1*( ALPHA / DIGIT / "+" / "/" / "-" / "_" )*2( "=" )
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
auto is_main_part = [](char ch) {
|
||
|
|
return is_ascii_alpha(ch) || is_ascii_digit(ch) || ch == '+' || ch == '/' || ch == '-' || ch == '_';
|
||
|
|
};
|
||
|
|
|
||
|
|
if (!m_state.lexer.consume_specific_with_predicate(is_main_part))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
(void)m_state.lexer.consume_while(is_main_part);
|
||
|
|
(void)m_state.lexer.consume_specific('=');
|
||
|
|
(void)m_state.lexer.consume_specific('=');
|
||
|
|
|
||
|
|
m_state.parse_result.base64_value = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-hash-source
|
||
|
|
[[nodiscard]] bool parse_hash_source()
|
||
|
|
{
|
||
|
|
// ; Digests: 'sha256-[digest goes here]'
|
||
|
|
// hash-source = "'" hash-algorithm "-" base64-value "'"
|
||
|
|
if (!m_state.lexer.consume_specific('\''))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (!parse_hash_algorithm())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (!m_state.lexer.consume_specific('-'))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (!parse_base64_value())
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return m_state.lexer.consume_specific('\'');
|
||
|
|
}
|
||
|
|
|
||
|
|
// https://w3c.github.io/webappsec-csp/#grammardef-hash-algorithm
|
||
|
|
[[nodiscard]] bool parse_hash_algorithm()
|
||
|
|
{
|
||
|
|
// hash-algorithm = "sha256" / "sha384" / "sha512"
|
||
|
|
StateTransaction transaction { *this };
|
||
|
|
|
||
|
|
auto hash_algorithm = m_state.lexer.consume(6);
|
||
|
|
if (hash_algorithm.length() != 6)
|
||
|
|
return false;
|
||
|
|
|
||
|
|
if (hash_algorithm.equals_ignoring_ascii_case("sha256"sv)) {
|
||
|
|
m_state.parse_result.hash_algorithm = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (hash_algorithm.equals_ignoring_ascii_case("sha384"sv)) {
|
||
|
|
m_state.parse_result.hash_algorithm = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (hash_algorithm.equals_ignoring_ascii_case("sha512"sv)) {
|
||
|
|
m_state.parse_result.hash_algorithm = transaction.parsed_string_view();
|
||
|
|
transaction.commit();
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
struct State {
|
||
|
|
GenericLexer lexer;
|
||
|
|
SourceExpressionParseResult parse_result;
|
||
|
|
};
|
||
|
|
|
||
|
|
struct StateTransaction {
|
||
|
|
explicit StateTransaction(SourceExpressionParser& parser)
|
||
|
|
: m_parser(parser)
|
||
|
|
, m_saved_state(parser.m_state)
|
||
|
|
, m_start_index(parser.m_state.lexer.tell())
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
~StateTransaction()
|
||
|
|
{
|
||
|
|
if (!m_commit)
|
||
|
|
m_parser.m_state = move(m_saved_state);
|
||
|
|
}
|
||
|
|
|
||
|
|
void commit() { m_commit = true; }
|
||
|
|
StringView parsed_string_view() const
|
||
|
|
{
|
||
|
|
return m_parser.m_input.substring_view(m_start_index, m_parser.m_state.lexer.tell() - m_start_index);
|
||
|
|
}
|
||
|
|
|
||
|
|
private:
|
||
|
|
SourceExpressionParser& m_parser;
|
||
|
|
State m_saved_state;
|
||
|
|
size_t m_start_index { 0 };
|
||
|
|
bool m_commit { false };
|
||
|
|
};
|
||
|
|
|
||
|
|
StringView m_input;
|
||
|
|
State m_state;
|
||
|
|
};
|
||
|
|
|
||
|
|
#define ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSERS \
|
||
|
|
__ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(SchemeSource, parse_scheme_source) \
|
||
|
|
__ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(HostSource, parse_host_source) \
|
||
|
|
__ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(KeywordSource, parse_keyword_source) \
|
||
|
|
__ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(NonceSource, parse_nonce_source) \
|
||
|
|
__ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(HashSource, parse_hash_source)
|
||
|
|
|
||
|
|
Optional<SourceExpressionParseResult> parse_source_expression(Production production, StringView input)
|
||
|
|
{
|
||
|
|
SourceExpressionParser parser { input };
|
||
|
|
|
||
|
|
switch (production) {
|
||
|
|
#define __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER(ProductionName, parse_production) \
|
||
|
|
case Production::ProductionName: \
|
||
|
|
if (!parser.parse_production()) \
|
||
|
|
return {}; \
|
||
|
|
break;
|
||
|
|
ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSERS
|
||
|
|
#undef __ENUMERATE_SOURCE_EXPRESSION_PRODUCTION_PARSER
|
||
|
|
default:
|
||
|
|
VERIFY_NOT_REACHED();
|
||
|
|
}
|
||
|
|
|
||
|
|
// If we parsed successfully but didn't reach the end, the string doesn't match the given production.
|
||
|
|
if (!parser.lexer().is_eof())
|
||
|
|
return {};
|
||
|
|
|
||
|
|
return parser.parse_result();
|
||
|
|
}
|
||
|
|
|
||
|
|
}
|