2020-01-18 09:38:21 +01:00
|
|
|
/*
|
2024-10-04 13:19:50 +02:00
|
|
|
* Copyright (c) 2018-2020, Andreas Kling <andreas@ladybird.org>
|
2020-01-18 09:38:21 +01:00
|
|
|
*
|
2021-04-22 01:24:48 -07:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 09:38:21 +01:00
|
|
|
*/
|
|
|
|
|
2020-11-15 13:11:21 +01:00
|
|
|
#include <AK/ByteBuffer.h>
|
2023-12-16 17:49:34 +03:30
|
|
|
#include <AK/ByteString.h>
|
2025-04-06 09:45:05 -04:00
|
|
|
#include <AK/Enumerate.h>
|
2025-04-01 16:49:30 +02:00
|
|
|
#include <AK/FlyString.h>
|
2020-09-22 13:27:40 +02:00
|
|
|
#include <AK/Format.h>
|
2022-02-25 16:46:32 +01:00
|
|
|
#include <AK/Function.h>
|
2019-09-06 15:34:26 +02:00
|
|
|
#include <AK/StdLibExtras.h>
|
2020-03-23 13:45:10 +01:00
|
|
|
#include <AK/StringView.h>
|
2020-02-14 21:41:10 +01:00
|
|
|
#include <AK/Vector.h>
|
2018-10-10 11:53:07 +02:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::operator==(ByteString const& other) const
|
2018-10-10 11:53:07 +02:00
|
|
|
{
|
2022-01-29 16:00:27 +01:00
|
|
|
return m_impl == other.impl() || view() == other.view();
|
2018-10-10 11:53:07 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::operator==(StringView other) const
|
2019-08-24 22:28:42 +02:00
|
|
|
{
|
2023-10-10 15:00:58 +03:30
|
|
|
if (other.is_null())
|
|
|
|
return is_empty();
|
|
|
|
|
2022-01-29 16:00:27 +01:00
|
|
|
return view() == other;
|
2019-08-24 22:28:42 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::operator<(ByteString const& other) const
|
2019-03-09 13:33:52 +01:00
|
|
|
{
|
2022-01-29 16:00:27 +01:00
|
|
|
return view() < other.view();
|
2019-03-09 13:33:52 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::operator>(ByteString const& other) const
|
2019-10-19 20:50:55 +02:00
|
|
|
{
|
2022-01-29 16:00:27 +01:00
|
|
|
return view() > other.view();
|
2019-10-19 20:50:55 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::copy_characters_to_buffer(char* buffer, size_t buffer_size) const
|
2020-08-25 17:23:18 +03:00
|
|
|
{
|
|
|
|
// We must fit at least the NUL-terminator.
|
2021-02-23 20:42:32 +01:00
|
|
|
VERIFY(buffer_size > 0);
|
2020-08-25 17:23:18 +03:00
|
|
|
|
|
|
|
size_t characters_to_copy = min(length(), buffer_size - 1);
|
|
|
|
__builtin_memcpy(buffer, characters(), characters_to_copy);
|
|
|
|
buffer[characters_to_copy] = 0;
|
|
|
|
|
|
|
|
return characters_to_copy == length();
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::substring(size_t start, size_t length) const
|
2018-10-16 11:42:39 +02:00
|
|
|
{
|
2019-03-08 14:08:15 +01:00
|
|
|
if (!length)
|
2023-12-16 17:49:34 +03:30
|
|
|
return ByteString::empty();
|
2021-07-01 17:52:20 +02:00
|
|
|
VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
|
2021-02-23 20:42:32 +01:00
|
|
|
VERIFY(start + length <= m_impl->length());
|
2019-04-16 02:39:16 +02:00
|
|
|
return { characters() + start, length };
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::substring(size_t start) const
|
2021-07-01 17:52:20 +02:00
|
|
|
{
|
|
|
|
VERIFY(start <= length());
|
|
|
|
return { characters() + start, length() - start };
|
|
|
|
}
|
|
|
|
|
2024-04-03 22:01:43 -04:00
|
|
|
StringView ByteString::substring_view(size_t start, size_t length) const&
|
2019-04-16 02:39:16 +02:00
|
|
|
{
|
2021-07-01 17:52:20 +02:00
|
|
|
VERIFY(!Checked<size_t>::addition_would_overflow(start, length));
|
2021-02-23 20:42:32 +01:00
|
|
|
VERIFY(start + length <= m_impl->length());
|
2019-04-16 02:39:16 +02:00
|
|
|
return { characters() + start, length };
|
2018-10-16 11:42:39 +02:00
|
|
|
}
|
|
|
|
|
2024-04-03 22:01:43 -04:00
|
|
|
StringView ByteString::substring_view(size_t start) const&
|
2020-12-01 23:11:20 +01:00
|
|
|
{
|
2021-02-23 20:42:32 +01:00
|
|
|
VERIFY(start <= length());
|
2020-12-01 23:11:20 +01:00
|
|
|
return { characters() + start, length() - start };
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
Vector<ByteString> ByteString::split(char separator, SplitBehavior split_behavior) const
|
2019-06-04 18:13:07 +10:00
|
|
|
{
|
2022-10-22 15:38:21 +02:00
|
|
|
return split_limit(separator, 0, split_behavior);
|
2019-06-04 18:13:07 +10:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
Vector<ByteString> ByteString::split_limit(char separator, size_t limit, SplitBehavior split_behavior) const
|
2018-10-10 11:53:07 +02:00
|
|
|
{
|
2018-12-21 02:10:45 +01:00
|
|
|
if (is_empty())
|
2019-05-28 11:53:16 +02:00
|
|
|
return {};
|
2018-10-10 11:53:07 +02:00
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
Vector<ByteString> v;
|
2019-12-09 17:45:40 +01:00
|
|
|
size_t substart = 0;
|
2022-10-22 15:38:21 +02:00
|
|
|
bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty);
|
2022-10-22 16:31:59 +02:00
|
|
|
bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator);
|
2020-03-01 12:35:09 +01:00
|
|
|
for (size_t i = 0; i < length() && (v.size() + 1) != limit; ++i) {
|
2018-10-16 11:42:39 +02:00
|
|
|
char ch = characters()[i];
|
2018-10-10 11:53:07 +02:00
|
|
|
if (ch == separator) {
|
2019-12-09 17:45:40 +01:00
|
|
|
size_t sublen = i - substart;
|
2020-01-22 17:17:27 +03:00
|
|
|
if (sublen != 0 || keep_empty)
|
2022-10-22 16:31:59 +02:00
|
|
|
v.append(substring(substart, keep_separator ? sublen + 1 : sublen));
|
2018-10-16 11:42:39 +02:00
|
|
|
substart = i + 1;
|
2018-10-10 11:53:07 +02:00
|
|
|
}
|
|
|
|
}
|
2019-12-09 17:45:40 +01:00
|
|
|
size_t taillen = length() - substart;
|
2020-01-22 17:17:27 +03:00
|
|
|
if (taillen != 0 || keep_empty)
|
2018-10-16 11:42:39 +02:00
|
|
|
v.append(substring(substart, taillen));
|
|
|
|
return v;
|
2018-10-10 11:53:07 +02:00
|
|
|
}
|
|
|
|
|
2024-04-03 22:01:43 -04:00
|
|
|
Vector<StringView> ByteString::split_view(Function<bool(char)> separator, SplitBehavior split_behavior) const&
|
2019-04-16 02:39:16 +02:00
|
|
|
{
|
|
|
|
if (is_empty())
|
2019-05-28 11:53:16 +02:00
|
|
|
return {};
|
2019-04-16 02:39:16 +02:00
|
|
|
|
|
|
|
Vector<StringView> v;
|
2019-12-09 17:45:40 +01:00
|
|
|
size_t substart = 0;
|
2022-10-22 15:38:21 +02:00
|
|
|
bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty);
|
2022-10-22 16:31:59 +02:00
|
|
|
bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator);
|
2019-12-09 17:45:40 +01:00
|
|
|
for (size_t i = 0; i < length(); ++i) {
|
2019-04-16 02:39:16 +02:00
|
|
|
char ch = characters()[i];
|
2022-02-25 16:46:32 +01:00
|
|
|
if (separator(ch)) {
|
2019-12-09 17:45:40 +01:00
|
|
|
size_t sublen = i - substart;
|
2019-09-21 00:43:37 +03:00
|
|
|
if (sublen != 0 || keep_empty)
|
2022-10-22 16:31:59 +02:00
|
|
|
v.append(substring_view(substart, keep_separator ? sublen + 1 : sublen));
|
2019-04-16 02:39:16 +02:00
|
|
|
substart = i + 1;
|
|
|
|
}
|
|
|
|
}
|
2019-12-09 17:45:40 +01:00
|
|
|
size_t taillen = length() - substart;
|
2019-09-21 00:43:37 +03:00
|
|
|
if (taillen != 0 || keep_empty)
|
2019-04-16 02:39:16 +02:00
|
|
|
v.append(substring_view(substart, taillen));
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
2024-04-03 22:01:43 -04:00
|
|
|
Vector<StringView> ByteString::split_view(char const separator, SplitBehavior split_behavior) const&
|
2022-02-25 16:46:32 +01:00
|
|
|
{
|
2022-10-22 15:38:21 +02:00
|
|
|
return split_view([separator](char ch) { return ch == separator; }, split_behavior);
|
2022-02-25 16:46:32 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteBuffer ByteString::to_byte_buffer() const
|
2018-10-10 11:53:07 +02:00
|
|
|
{
|
2021-09-06 03:29:52 +04:30
|
|
|
// FIXME: Handle OOM failure.
|
2022-01-20 17:47:39 +00:00
|
|
|
return ByteBuffer::copy(bytes()).release_value_but_fixme_should_propagate_errors();
|
2018-10-10 11:53:07 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::starts_with(StringView str, CaseSensitivity case_sensitivity) const
|
2019-06-04 21:53:25 +10:00
|
|
|
{
|
2020-07-18 17:59:38 +01:00
|
|
|
return StringUtils::starts_with(*this, str, case_sensitivity);
|
2019-06-04 21:53:25 +10:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::starts_with(char ch) const
|
2020-02-15 13:04:00 +13:00
|
|
|
{
|
|
|
|
if (is_empty())
|
|
|
|
return false;
|
|
|
|
return characters()[0] == ch;
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::ends_with(StringView str, CaseSensitivity case_sensitivity) const
|
2019-03-22 12:43:29 +01:00
|
|
|
{
|
2020-05-26 02:12:18 -07:00
|
|
|
return StringUtils::ends_with(*this, str, case_sensitivity);
|
2019-03-22 12:43:29 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::ends_with(char ch) const
|
2020-02-15 13:04:00 +13:00
|
|
|
{
|
|
|
|
if (is_empty())
|
|
|
|
return false;
|
|
|
|
return characters()[length() - 1] == ch;
|
|
|
|
}
|
2021-05-23 16:14:48 -07:00
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::repeated(char ch, size_t count)
|
2019-04-25 22:56:09 +02:00
|
|
|
{
|
|
|
|
if (!count)
|
|
|
|
return empty();
|
|
|
|
char* buffer;
|
2025-04-06 10:26:37 -04:00
|
|
|
auto impl = ByteStringImpl::create_uninitialized(count, buffer);
|
2019-04-25 22:56:09 +02:00
|
|
|
memset(buffer, ch, count);
|
2025-04-06 09:53:04 -04:00
|
|
|
return impl;
|
2019-04-25 22:56:09 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::repeated(StringView string, size_t count)
|
2021-05-23 16:14:48 -07:00
|
|
|
{
|
|
|
|
if (!count || string.is_empty())
|
|
|
|
return empty();
|
|
|
|
char* buffer;
|
2025-04-06 10:26:37 -04:00
|
|
|
auto impl = ByteStringImpl::create_uninitialized(count * string.length(), buffer);
|
2021-05-23 16:14:48 -07:00
|
|
|
for (size_t i = 0; i < count; i++)
|
|
|
|
__builtin_memcpy(buffer + i * string.length(), string.characters_without_null_termination(), string.length());
|
2025-04-06 09:53:04 -04:00
|
|
|
return impl;
|
2021-05-23 16:14:48 -07:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::matches(StringView mask, Vector<MaskSpan>& mask_spans, CaseSensitivity case_sensitivity) const
|
2020-10-25 09:04:39 +03:30
|
|
|
{
|
|
|
|
return StringUtils::matches(*this, mask, case_sensitivity, &mask_spans);
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::matches(StringView mask, CaseSensitivity case_sensitivity) const
|
2019-05-26 20:36:16 +02:00
|
|
|
{
|
2020-02-26 15:25:24 +08:00
|
|
|
return StringUtils::matches(*this, mask, case_sensitivity);
|
2019-05-26 20:36:16 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::contains(StringView needle, CaseSensitivity case_sensitivity) const
|
2019-10-28 18:47:48 +01:00
|
|
|
{
|
2020-10-20 15:07:03 -06:00
|
|
|
return StringUtils::contains(*this, needle, case_sensitivity);
|
2019-10-28 18:47:48 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::contains(char needle, CaseSensitivity case_sensitivity) const
|
2021-08-11 20:49:32 +02:00
|
|
|
{
|
|
|
|
return StringUtils::contains(*this, StringView(&needle, 1), case_sensitivity);
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::equals_ignoring_ascii_case(StringView other) const
|
2019-12-18 12:43:53 +01:00
|
|
|
{
|
2023-03-10 08:48:54 +01:00
|
|
|
return StringUtils::equals_ignoring_ascii_case(view(), other);
|
2018-10-10 11:53:07 +02:00
|
|
|
}
|
2019-10-28 18:47:48 +01:00
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::reverse() const
|
2020-12-26 11:30:14 +01:00
|
|
|
{
|
2021-06-29 17:50:37 +03:00
|
|
|
StringBuilder reversed_string(length());
|
2020-12-26 11:30:14 +01:00
|
|
|
for (size_t i = length(); i-- > 0;) {
|
|
|
|
reversed_string.append(characters()[i]);
|
|
|
|
}
|
2023-12-16 17:49:34 +03:30
|
|
|
return reversed_string.to_byte_string();
|
2020-12-26 11:30:14 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString escape_html_entities(StringView html)
|
2020-02-13 08:46:00 +01:00
|
|
|
{
|
|
|
|
StringBuilder builder;
|
|
|
|
for (size_t i = 0; i < html.length(); ++i) {
|
|
|
|
if (html[i] == '<')
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("<"sv);
|
2020-02-13 08:46:00 +01:00
|
|
|
else if (html[i] == '>')
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append(">"sv);
|
2020-02-13 08:46:00 +01:00
|
|
|
else if (html[i] == '&')
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("&"sv);
|
2021-09-09 19:33:29 -06:00
|
|
|
else if (html[i] == '"')
|
2022-07-11 17:32:29 +00:00
|
|
|
builder.append("""sv);
|
2020-02-13 08:46:00 +01:00
|
|
|
else
|
|
|
|
builder.append(html[i]);
|
|
|
|
}
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2019-12-18 12:43:53 +01:00
|
|
|
}
|
2019-12-30 14:52:04 +01:00
|
|
|
|
2025-04-01 16:49:30 +02:00
|
|
|
ByteString::ByteString(FlyString const& string)
|
2025-04-06 10:26:37 -04:00
|
|
|
: m_impl(ByteStringImpl::create(string.bytes()))
|
2020-03-22 19:07:02 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::to_lowercase() const
|
2020-03-22 19:07:02 +01:00
|
|
|
{
|
2025-04-06 09:45:05 -04:00
|
|
|
if (!any_of(bytes(), is_ascii_upper_alpha))
|
|
|
|
return *this;
|
|
|
|
|
|
|
|
char* buffer = nullptr;
|
2025-04-06 10:26:37 -04:00
|
|
|
auto impl = ByteStringImpl::create_uninitialized(length(), buffer);
|
2025-04-06 09:45:05 -04:00
|
|
|
|
|
|
|
for (auto [i, character] : enumerate(view()))
|
|
|
|
buffer[i] = static_cast<char>(to_ascii_lowercase(character));
|
|
|
|
|
2025-04-06 09:53:04 -04:00
|
|
|
return impl;
|
2020-03-22 19:07:02 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::to_uppercase() const
|
2020-03-22 19:07:02 +01:00
|
|
|
{
|
2025-04-06 09:45:05 -04:00
|
|
|
if (!any_of(bytes(), is_ascii_lower_alpha))
|
|
|
|
return *this;
|
|
|
|
|
|
|
|
char* buffer = nullptr;
|
2025-04-06 10:26:37 -04:00
|
|
|
auto impl = ByteStringImpl::create_uninitialized(length(), buffer);
|
2025-04-06 09:45:05 -04:00
|
|
|
|
|
|
|
for (auto [i, character] : enumerate(view()))
|
|
|
|
buffer[i] = static_cast<char>(to_ascii_uppercase(character));
|
|
|
|
|
2025-04-06 09:53:04 -04:00
|
|
|
return impl;
|
2020-03-22 19:07:02 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::to_snakecase() const
|
2021-02-20 22:39:22 +01:00
|
|
|
{
|
|
|
|
return StringUtils::to_snakecase(*this);
|
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
bool ByteString::operator==(char const* cstring) const
|
2020-03-23 13:45:10 +01:00
|
|
|
{
|
2023-10-10 15:00:58 +03:30
|
|
|
if (!cstring)
|
|
|
|
return is_empty();
|
|
|
|
|
2022-01-29 16:00:27 +01:00
|
|
|
return view() == cstring;
|
2020-03-23 13:45:10 +01:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
ByteString ByteString::vformatted(StringView fmtstr, TypeErasedFormatParams& params)
|
2020-09-23 13:21:18 +02:00
|
|
|
{
|
|
|
|
StringBuilder builder;
|
2021-11-16 01:15:21 +01:00
|
|
|
MUST(vformat(builder, fmtstr, params));
|
2023-12-16 17:49:34 +03:30
|
|
|
return builder.to_byte_string();
|
2020-09-23 13:21:18 +02:00
|
|
|
}
|
|
|
|
|
2023-12-16 17:49:34 +03:30
|
|
|
Vector<size_t> ByteString::find_all(StringView needle) const
|
2021-11-10 11:05:21 +01:00
|
|
|
{
|
|
|
|
return StringUtils::find_all(*this, needle);
|
|
|
|
}
|
|
|
|
|
2020-02-13 08:46:00 +01:00
|
|
|
}
|