mirror of
				https://github.com/LadybirdBrowser/ladybird.git
				synced 2025-10-31 21:30:58 +00:00 
			
		
		
		
	 fe676585f5
			
		
	
	
		fe676585f5
		
	
	
	
	
		
			
			This is a strictly UTF-16 string with some optimizations for ASCII.
* If created from a short UTF-8 or UTF-16 string that is also ASCII,
  then the string is stored in an inlined byte buffer.
* If created with a long UTF-8 or UTF-16 string that is also ASCII,
  then the string is stored in an outlined char buffer.
* If created with a short or long UTF-8 or UTF-16 string that is not
  ASCII, then the string is stored in an outlined char16 buffer.
We do not store short non-ASCII text in the inlined buffer to avoid
confusion with operations such as `length_in_code_units` and
`code_unit_at`. For example, "😀" would be stored as 4 UTF-8 bytes
in short string form. But we still want `length_in_code_units` to
be 2, and `code_unit_at(0)` to be 0xD83D.
		
	
			
		
			
				
	
	
		
			157 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			157 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #include <AK/StringView.h>
 | |
| #include <LibJS/Runtime/Utf16String.h>
 | |
| #include <LibJS/Runtime/VM.h>
 | |
| 
 | |
| namespace JS {
 | |
| namespace Detail {
 | |
| 
 | |
| static NonnullRefPtr<Utf16StringImpl> the_empty_utf16_string()
 | |
| {
 | |
|     static NonnullRefPtr<Utf16StringImpl> empty_string = Utf16StringImpl::create();
 | |
|     return empty_string;
 | |
| }
 | |
| 
 | |
| Utf16StringImpl::Utf16StringImpl(Utf16Data string)
 | |
|     : m_string(move(string))
 | |
| {
 | |
| }
 | |
| 
 | |
| NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create()
 | |
| {
 | |
|     return adopt_ref(*new Utf16StringImpl);
 | |
| }
 | |
| 
 | |
| NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
 | |
| {
 | |
|     return adopt_ref(*new Utf16StringImpl(move(string)));
 | |
| }
 | |
| 
 | |
| NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
 | |
| {
 | |
|     auto result = MUST(utf8_to_utf16(string));
 | |
|     auto impl = create(move(result.data));
 | |
|     impl->m_cached_view.unsafe_set_code_point_length(result.code_point_count);
 | |
|     return impl;
 | |
| }
 | |
| 
 | |
| NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
 | |
| {
 | |
|     Utf16Data string;
 | |
|     string.ensure_capacity(view.length_in_code_units());
 | |
| 
 | |
|     if (view.has_ascii_storage()) {
 | |
|         for (size_t i = 0; i < view.length_in_code_units(); ++i)
 | |
|             string.unchecked_append(static_cast<char16_t>(view.code_unit_at(i)));
 | |
|     } else {
 | |
|         string.unchecked_append(view.utf16_span().data(), view.length_in_code_units());
 | |
|     }
 | |
| 
 | |
|     auto impl = create(move(string));
 | |
|     if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
 | |
|         impl->m_cached_view.unsafe_set_code_point_length(*length_in_code_points);
 | |
| 
 | |
|     return impl;
 | |
| }
 | |
| 
 | |
| Utf16Data const& Utf16StringImpl::string() const
 | |
| {
 | |
|     return m_string;
 | |
| }
 | |
| 
 | |
| Utf16View Utf16StringImpl::view() const
 | |
| {
 | |
|     return m_cached_view;
 | |
| }
 | |
| 
 | |
| u32 Utf16StringImpl::compute_hash() const
 | |
| {
 | |
|     if (m_string.is_empty())
 | |
|         return 0;
 | |
|     return string_hash((char const*)m_string.data(), m_string.size() * sizeof(u16));
 | |
| }
 | |
| 
 | |
| }
 | |
| 
 | |
| Utf16String Utf16String::create()
 | |
| {
 | |
|     return Utf16String { Detail::the_empty_utf16_string() };
 | |
| }
 | |
| 
 | |
| Utf16String Utf16String::create(Utf16Data string)
 | |
| {
 | |
|     return Utf16String { Detail::Utf16StringImpl::create(move(string)) };
 | |
| }
 | |
| 
 | |
| Utf16String Utf16String::create(StringView string)
 | |
| {
 | |
|     return Utf16String { Detail::Utf16StringImpl::create(string) };
 | |
| }
 | |
| 
 | |
| Utf16String Utf16String::create(Utf16View const& string)
 | |
| {
 | |
|     return Utf16String { Detail::Utf16StringImpl::create(string) };
 | |
| }
 | |
| 
 | |
| Utf16String Utf16String::invalid()
 | |
| {
 | |
|     static auto invalid = Utf16String {};
 | |
|     return invalid;
 | |
| }
 | |
| 
 | |
| Utf16String::Utf16String(NonnullRefPtr<Detail::Utf16StringImpl> string)
 | |
|     : m_string(move(string))
 | |
| {
 | |
| }
 | |
| 
 | |
| Utf16Data const& Utf16String::string() const
 | |
| {
 | |
|     return m_string->string();
 | |
| }
 | |
| 
 | |
| Utf16View Utf16String::view() const
 | |
| {
 | |
|     return m_string->view();
 | |
| }
 | |
| 
 | |
| Utf16View Utf16String::substring_view(size_t code_unit_offset, size_t code_unit_length) const
 | |
| {
 | |
|     return view().substring_view(code_unit_offset, code_unit_length);
 | |
| }
 | |
| 
 | |
| Utf16View Utf16String::substring_view(size_t code_unit_offset) const
 | |
| {
 | |
|     return view().substring_view(code_unit_offset);
 | |
| }
 | |
| 
 | |
| String Utf16String::to_utf8() const
 | |
| {
 | |
|     return MUST(view().to_utf8());
 | |
| }
 | |
| 
 | |
| ByteString Utf16String::to_byte_string() const
 | |
| {
 | |
|     return MUST(view().to_byte_string());
 | |
| }
 | |
| 
 | |
| u16 Utf16String::code_unit_at(size_t index) const
 | |
| {
 | |
|     return view().code_unit_at(index);
 | |
| }
 | |
| 
 | |
| size_t Utf16String::length_in_code_units() const
 | |
| {
 | |
|     return view().length_in_code_units();
 | |
| }
 | |
| 
 | |
| bool Utf16String::is_empty() const
 | |
| {
 | |
|     return view().is_empty();
 | |
| }
 | |
| 
 | |
| }
 |