| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2021-04-28 22:46:44 +02:00
										 |  |  |  * Copyright (c) 2020-2021, the SerenityOS developers. | 
					
						
							| 
									
										
										
										
											2023-02-15 11:24:38 +00:00
										 |  |  |  * Copyright (c) 2021-2023, Sam Atkins <atkinssj@serenityos.org> | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2021-04-22 01:24:48 -07:00
										 |  |  |  * SPDX-License-Identifier: BSD-2-Clause | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #pragma once
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <AK/Optional.h>
 | 
					
						
							|  |  |  | #include <AK/StringView.h>
 | 
					
						
							|  |  |  | #include <AK/Types.h>
 | 
					
						
							|  |  |  | #include <AK/Utf8View.h>
 | 
					
						
							|  |  |  | #include <LibWeb/CSS/Parser/Token.h>
 | 
					
						
							|  |  |  | #include <LibWeb/Forward.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-12 15:10:08 +01:00
										 |  |  | namespace Web::CSS::Parser { | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | class U32Twin { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     void set(size_t index, u32 value) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         if (index == 0) | 
					
						
							|  |  |  |             first = value; | 
					
						
							|  |  |  |         if (index == 1) | 
					
						
							|  |  |  |             second = value; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     u32 first {}; | 
					
						
							|  |  |  |     u32 second {}; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class U32Triplet { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     void set(size_t index, u32 value) | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         if (index == 0) | 
					
						
							|  |  |  |             first = value; | 
					
						
							|  |  |  |         if (index == 1) | 
					
						
							|  |  |  |             second = value; | 
					
						
							|  |  |  |         if (index == 2) | 
					
						
							|  |  |  |             third = value; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     U32Twin to_twin_12() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return { first, second }; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     U32Twin to_twin_23() | 
					
						
							|  |  |  |     { | 
					
						
							|  |  |  |         return { second, third }; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     u32 first {}; | 
					
						
							|  |  |  |     u32 second {}; | 
					
						
							|  |  |  |     u32 third {}; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Tokenizer { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2024-07-26 15:11:57 +01:00
										 |  |  |     static Vector<Token> tokenize(StringView input, StringView encoding); | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-03 14:00:41 +01:00
										 |  |  |     [[nodiscard]] static Token create_eof_token(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | private: | 
					
						
							| 
									
										
										
										
											2023-03-06 14:19:39 +00:00
										 |  |  |     explicit Tokenizer(String decoded_input); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-26 15:11:57 +01:00
										 |  |  |     [[nodiscard]] Vector<Token> tokenize(); | 
					
						
							| 
									
										
										
										
											2023-03-06 14:19:39 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-03-22 15:25:20 +00:00
										 |  |  |     size_t current_byte_offset() const; | 
					
						
							| 
									
										
										
										
											2024-07-26 15:11:57 +01:00
										 |  |  |     String input_since(size_t offset) const; | 
					
						
							| 
									
										
										
										
											2023-03-22 15:25:20 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-02 19:44:43 +01:00
										 |  |  |     [[nodiscard]] u32 next_code_point(); | 
					
						
							|  |  |  |     [[nodiscard]] u32 peek_code_point(size_t offset = 0) const; | 
					
						
							|  |  |  |     [[nodiscard]] U32Twin peek_twin() const; | 
					
						
							|  |  |  |     [[nodiscard]] U32Triplet peek_triplet() const; | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-24 16:08:35 +00:00
										 |  |  |     [[nodiscard]] U32Twin start_of_input_stream_twin(); | 
					
						
							|  |  |  |     [[nodiscard]] U32Triplet start_of_input_stream_triplet(); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-09 20:54:06 +01:00
										 |  |  |     [[nodiscard]] static Token create_new_token(Token::Type); | 
					
						
							| 
									
										
										
										
											2023-03-22 15:25:20 +00:00
										 |  |  |     [[nodiscard]] static Token create_value_token(Token::Type, FlyString&& value, String&& representation); | 
					
						
							|  |  |  |     [[nodiscard]] static Token create_value_token(Token::Type, u32 value, String&& representation); | 
					
						
							| 
									
										
										
										
											2024-07-26 15:11:57 +01:00
										 |  |  |     [[nodiscard]] Token consume_a_token(); | 
					
						
							|  |  |  |     [[nodiscard]] Token consume_string_token(u32 ending_code_point); | 
					
						
							|  |  |  |     [[nodiscard]] Token consume_a_numeric_token(); | 
					
						
							|  |  |  |     [[nodiscard]] Token consume_an_ident_like_token(); | 
					
						
							| 
									
										
										
										
											2022-03-21 21:01:27 +00:00
										 |  |  |     [[nodiscard]] Number consume_a_number(); | 
					
						
							| 
									
										
										
										
											2023-08-20 13:07:43 +01:00
										 |  |  |     [[nodiscard]] double convert_a_string_to_a_number(StringView); | 
					
						
							| 
									
										
										
										
											2024-07-26 15:11:57 +01:00
										 |  |  |     [[nodiscard]] FlyString consume_an_ident_sequence(); | 
					
						
							| 
									
										
										
										
											2021-06-01 10:01:11 +02:00
										 |  |  |     [[nodiscard]] u32 consume_escaped_code_point(); | 
					
						
							| 
									
										
										
										
											2024-07-26 15:11:57 +01:00
										 |  |  |     [[nodiscard]] Token consume_a_url_token(); | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  |     void consume_the_remnants_of_a_bad_url(); | 
					
						
							|  |  |  |     void consume_comments(); | 
					
						
							| 
									
										
										
										
											2021-10-21 16:53:49 +01:00
										 |  |  |     void consume_as_much_whitespace_as_possible(); | 
					
						
							| 
									
										
										
										
											2021-06-01 10:01:11 +02:00
										 |  |  |     void reconsume_current_input_code_point(); | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  |     [[nodiscard]] static bool is_valid_escape_sequence(U32Twin); | 
					
						
							| 
									
										
										
										
											2022-10-03 16:05:07 +01:00
										 |  |  |     [[nodiscard]] static bool would_start_an_ident_sequence(U32Triplet); | 
					
						
							| 
									
										
										
										
											2021-10-21 16:49:12 +01:00
										 |  |  |     [[nodiscard]] static bool would_start_a_number(U32Triplet); | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-15 11:24:38 +00:00
										 |  |  |     String m_decoded_input; | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  |     Utf8View m_utf8_view; | 
					
						
							| 
									
										
										
										
											2021-06-01 09:45:52 +02:00
										 |  |  |     AK::Utf8CodePointIterator m_utf8_iterator; | 
					
						
							|  |  |  |     AK::Utf8CodePointIterator m_prev_utf8_iterator; | 
					
						
							| 
									
										
										
										
											2021-10-21 21:25:14 +01:00
										 |  |  |     Token::Position m_position; | 
					
						
							|  |  |  |     Token::Position m_prev_position; | 
					
						
							| 
									
										
										
										
											2021-03-09 17:18:08 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | } |