| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2021-02-16 17:31:22 +01:00
										 |  |  |  * Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org> | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  |  * | 
					
						
							| 
									
										
										
										
											2021-04-22 01:24:48 -07:00
										 |  |  |  * SPDX-License-Identifier: BSD-2-Clause | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #pragma once
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <AK/Forward.h>
 | 
					
						
							| 
									
										
										
										
											2021-08-29 11:44:28 +00:00
										 |  |  | #include <AK/Function.h>
 | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | namespace TextCodec { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) = 0; | 
					
						
							|  |  |  |     virtual String to_utf8(StringView); | 
					
						
							| 
									
										
										
										
											2021-04-15 10:43:29 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | protected: | 
					
						
							|  |  |  |     virtual ~Decoder() = default; | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class UTF8Decoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							|  |  |  |     virtual String to_utf8(StringView) override; | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-02-16 17:31:22 +01:00
										 |  |  | class UTF16BEDecoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							|  |  |  |     virtual String to_utf8(StringView) override; | 
					
						
							| 
									
										
										
										
											2021-02-16 17:31:22 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | class Latin1Decoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-27 22:44:38 +01:00
										 |  |  | class Latin2Decoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							| 
									
										
										
										
											2020-12-27 22:44:38 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:32 +03:00
										 |  |  | class HebrewDecoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:32 +03:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-01 18:18:26 +03:00
										 |  |  | class CyrillicDecoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							| 
									
										
										
										
											2021-05-01 18:18:26 +03:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-14 01:19:56 +01:00
										 |  |  | class Koi8RDecoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							|  |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-15 16:07:56 +03:00
										 |  |  | class Latin9Decoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							| 
									
										
										
										
											2021-06-15 16:07:56 +03:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-23 16:18:50 +03:00
										 |  |  | class TurkishDecoder final : public Decoder { | 
					
						
							|  |  |  | public: | 
					
						
							| 
									
										
										
										
											2021-11-11 00:55:02 +01:00
										 |  |  |     virtual void process(StringView, Function<void(u32)> on_code_point) override; | 
					
						
							| 
									
										
										
										
											2021-06-23 16:18:50 +03:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-29 11:44:28 +00:00
										 |  |  | Decoder* decoder_for(String const& encoding); | 
					
						
							| 
									
										
										
										
											2021-05-11 15:52:25 +02:00
										 |  |  | Optional<String> get_standardized_encoding(const String& encoding); | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-11 20:58:06 +00:00
										 |  |  | // This returns the appropriate Unicode decoder for the sniffed BOM or nullptr if there is no appropriate decoder.
 | 
					
						
							|  |  |  | Decoder* bom_sniff_to_decoder(StringView); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-03 22:41:34 +02:00
										 |  |  | } |