ladybird/Libraries/LibJS/Runtime/JSONObject.cpp
Andreas Kling 5e0ee26e8b LibJS: Use simdjson for JSON.parse
Replace the custom AK JSON parser with simdjson for parsing JSON in
LibJS. This eliminates the intermediate AK::JsonValue object graph,
going directly from JSON text to JS::Value.

simdjson's on-demand API parses at ~4GB/s and only materializes values
as they are accessed, making this both faster and more memory efficient
than the previous approach.

The AK JSON parser is still used elsewhere (WebDriver protocol, config
files, etc.) but LibJS now uses simdjson exclusively for JSON.parse()
and JSON.rawJSON().
2026-01-12 13:53:28 -05:00

904 lines
35 KiB
C++

/*
* Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Function.h>
#include <AK/GenericLexer.h>
#include <AK/StringBuilder.h>
#include <AK/StringConversions.h>
#include <AK/TypeCasts.h>
#include <AK/Utf16View.h>
#include <AK/Utf8View.h>
#include <LibJS/Runtime/AbstractOperations.h>
#include <LibJS/Runtime/Array.h>
#include <LibJS/Runtime/BigIntObject.h>
#include <LibJS/Runtime/BooleanObject.h>
#include <LibJS/Runtime/Error.h>
#include <LibJS/Runtime/FunctionObject.h>
#include <LibJS/Runtime/GlobalObject.h>
#include <LibJS/Runtime/JSONObject.h>
#include <LibJS/Runtime/NumberObject.h>
#include <LibJS/Runtime/Object.h>
#include <LibJS/Runtime/RawJSONObject.h>
#include <LibJS/Runtime/StringObject.h>
#include <LibJS/Runtime/ValueInlines.h>
#include <simdjson.h>
namespace JS {
GC_DEFINE_ALLOCATOR(JSONObject);
JSONObject::JSONObject(Realm& realm)
: Object(ConstructWithPrototypeTag::Tag, realm.intrinsics().object_prototype())
{
}
void JSONObject::initialize(Realm& realm)
{
auto& vm = this->vm();
Base::initialize(realm);
u8 attr = Attribute::Writable | Attribute::Configurable;
define_native_function(realm, vm.names.stringify, stringify, 3, attr);
define_native_function(realm, vm.names.parse, parse, 2, attr);
define_native_function(realm, vm.names.rawJSON, raw_json, 1, attr);
define_native_function(realm, vm.names.isRawJSON, is_raw_json, 1, attr);
// 25.5.3 JSON [ @@toStringTag ], https://tc39.es/ecma262/#sec-json-@@tostringtag
define_direct_property(vm.well_known_symbol_to_string_tag(), PrimitiveString::create(vm, "JSON"_string), Attribute::Configurable);
}
// 25.5.2 JSON.stringify ( value [ , replacer [ , space ] ] ), https://tc39.es/ecma262/#sec-json.stringify
ThrowCompletionOr<Optional<String>> JSONObject::stringify_impl(VM& vm, Value value, Value replacer, Value space)
{
auto& realm = *vm.current_realm();
StringifyState state;
if (replacer.is_object()) {
if (replacer.as_object().is_function()) {
state.replacer_function = &replacer.as_function();
} else {
auto is_array = TRY(replacer.is_array(vm));
if (is_array) {
auto& replacer_object = replacer.as_object();
auto replacer_length = TRY(length_of_array_like(vm, replacer_object));
Vector<Utf16String> list;
for (size_t i = 0; i < replacer_length; ++i) {
auto replacer_value = TRY(replacer_object.get(i));
Optional<Utf16String> item;
if (replacer_value.is_string()) {
item = replacer_value.as_string().utf16_string();
} else if (replacer_value.is_number()) {
item = MUST(replacer_value.to_utf16_string(vm));
} else if (replacer_value.is_object()) {
auto& value_object = replacer_value.as_object();
if (is<StringObject>(value_object) || is<NumberObject>(value_object))
item = TRY(replacer_value.to_utf16_string(vm));
}
if (item.has_value() && !list.contains_slow(*item)) {
list.append(*item);
}
}
state.property_list = move(list);
}
}
}
if (space.is_object()) {
auto& space_object = space.as_object();
if (is<NumberObject>(space_object))
space = TRY(space.to_number(vm));
else if (is<StringObject>(space_object))
space = TRY(space.to_primitive_string(vm));
}
if (space.is_number()) {
auto space_mv = MUST(space.to_integer_or_infinity(vm));
space_mv = min(10, space_mv);
state.gap = space_mv < 1 ? String {} : MUST(String::repeated(' ', space_mv));
} else if (space.is_string()) {
auto string = space.as_string().utf8_string();
if (string.bytes().size() <= 10)
state.gap = string;
else
state.gap = MUST(string.substring_from_byte_offset(0, 10));
} else {
state.gap = String {};
}
auto wrapper = Object::create(realm, realm.intrinsics().object_prototype());
MUST(wrapper->create_data_property_or_throw(Utf16String {}, value));
return serialize_json_property(vm, state, Utf16String {}, wrapper);
}
// 25.5.2 JSON.stringify ( value [ , replacer [ , space ] ] ), https://tc39.es/ecma262/#sec-json.stringify
JS_DEFINE_NATIVE_FUNCTION(JSONObject::stringify)
{
if (!vm.argument_count())
return js_undefined();
auto value = vm.argument(0);
auto replacer = vm.argument(1);
auto space = vm.argument(2);
auto maybe_string = TRY(stringify_impl(vm, value, replacer, space));
if (!maybe_string.has_value())
return js_undefined();
return PrimitiveString::create(vm, maybe_string.release_value());
}
// 25.5.2.1 SerializeJSONProperty ( state, key, holder ), https://tc39.es/ecma262/#sec-serializejsonproperty
// 1.4.1 SerializeJSONProperty ( state, key, holder ), https://tc39.es/proposal-json-parse-with-source/#sec-serializejsonproperty
ThrowCompletionOr<Optional<String>> JSONObject::serialize_json_property(VM& vm, StringifyState& state, PropertyKey const& key, Object* holder)
{
// 1. Let value be ? Get(holder, key).
auto value = TRY(holder->get(key));
// 2. If Type(value) is Object or BigInt, then
if (value.is_object() || value.is_bigint()) {
// a. Let toJSON be ? GetV(value, "toJSON").
auto to_json = TRY(value.get(vm, vm.names.toJSON));
// b. If IsCallable(toJSON) is true, then
if (to_json.is_function()) {
// i. Set value to ? Call(toJSON, value, « key »).
value = TRY(call(vm, to_json.as_function(), value, PrimitiveString::create(vm, key.to_string())));
}
}
// 3. If state.[[ReplacerFunction]] is not undefined, then
if (state.replacer_function) {
// a. Set value to ? Call(state.[[ReplacerFunction]], holder, « key, value »).
value = TRY(call(vm, *state.replacer_function, holder, PrimitiveString::create(vm, key.to_string()), value));
}
// 4. If Type(value) is Object, then
if (value.is_object()) {
auto& value_object = value.as_object();
// a. If value has an [[IsRawJSON]] internal slot, then
if (is<RawJSONObject>(value_object)) {
// i. Return ! Get(value, "rawJSON").
return MUST(value_object.get(vm.names.rawJSON)).as_string().utf8_string();
}
// b. If value has a [[NumberData]] internal slot, then
if (is<NumberObject>(value_object)) {
// i. Set value to ? ToNumber(value).
value = TRY(value.to_number(vm));
}
// c. Else if value has a [[StringData]] internal slot, then
else if (is<StringObject>(value_object)) {
// i. Set value to ? ToString(value).
value = TRY(value.to_primitive_string(vm));
}
// d. Else if value has a [[BooleanData]] internal slot, then
else if (is<BooleanObject>(value_object)) {
// i. Set value to value.[[BooleanData]].
value = Value(static_cast<BooleanObject&>(value_object).boolean());
}
// e. Else if value has a [[BigIntData]] internal slot, then
else if (is<BigIntObject>(value_object)) {
// i. Set value to value.[[BigIntData]].
value = Value(&static_cast<BigIntObject&>(value_object).bigint());
}
}
// 5. If value is null, return "null".
if (value.is_null())
return "null"_string;
// 6. If value is true, return "true".
// 7. If value is false, return "false".
if (value.is_boolean())
return value.as_bool() ? "true"_string : "false"_string;
// 8. If Type(value) is String, return QuoteJSONString(value).
if (value.is_string())
return quote_json_string(value.as_string().utf16_string_view());
// 9. If Type(value) is Number, then
if (value.is_number()) {
// a. If value is finite, return ! ToString(value).
if (value.is_finite_number())
return MUST(value.to_string(vm));
// b. Return "null".
return "null"_string;
}
// 10. If Type(value) is BigInt, throw a TypeError exception.
if (value.is_bigint())
return vm.throw_completion<TypeError>(ErrorType::JsonBigInt);
// 11. If Type(value) is Object and IsCallable(value) is false, then
if (value.is_object() && !value.is_function()) {
// a. Let isArray be ? IsArray(value).
auto is_array = TRY(value.is_array(vm));
// b. If isArray is true, return ? SerializeJSONArray(state, value).
if (is_array)
return TRY(serialize_json_array(vm, state, value.as_object()));
// c. Return ? SerializeJSONObject(state, value).
return TRY(serialize_json_object(vm, state, value.as_object()));
}
// 12. Return undefined.
return Optional<String> {};
}
// 25.5.2.4 SerializeJSONObject ( state, value ), https://tc39.es/ecma262/#sec-serializejsonobject
ThrowCompletionOr<String> JSONObject::serialize_json_object(VM& vm, StringifyState& state, Object& object)
{
if (state.seen_objects.contains(&object))
return vm.throw_completion<TypeError>(ErrorType::JsonCircular);
state.seen_objects.set(&object);
String previous_indent = state.indent;
state.indent = MUST(String::formatted("{}{}", state.indent, state.gap));
Vector<String> property_strings;
auto process_property = [&](PropertyKey const& key) -> ThrowCompletionOr<void> {
if (key.is_symbol())
return {};
auto serialized_property_string = TRY(serialize_json_property(vm, state, key, &object));
if (serialized_property_string.has_value()) {
property_strings.append(MUST(String::formatted(
"{}:{}{}",
quote_json_string(key.to_string()),
state.gap.is_empty() ? "" : " ",
serialized_property_string)));
}
return {};
};
if (state.property_list.has_value()) {
auto property_list = state.property_list.value();
for (auto& property : property_list)
TRY(process_property(property));
} else {
auto property_list = TRY(object.enumerable_own_property_names(PropertyKind::Key));
for (auto& property : property_list)
TRY(process_property(property.as_string().utf16_string()));
}
StringBuilder builder;
if (property_strings.is_empty()) {
builder.append("{}"sv);
} else {
bool first = true;
builder.append('{');
if (state.gap.is_empty()) {
for (auto& property_string : property_strings) {
if (!first)
builder.append(',');
first = false;
builder.append(property_string);
}
} else {
builder.append('\n');
builder.append(state.indent);
auto separator = MUST(String::formatted(",\n{}", state.indent));
for (auto& property_string : property_strings) {
if (!first)
builder.append(separator);
first = false;
builder.append(property_string);
}
builder.append('\n');
builder.append(previous_indent);
}
builder.append('}');
}
state.seen_objects.remove(&object);
state.indent = previous_indent;
return builder.to_string_without_validation();
}
// 25.5.2.5 SerializeJSONArray ( state, value ), https://tc39.es/ecma262/#sec-serializejsonarray
ThrowCompletionOr<String> JSONObject::serialize_json_array(VM& vm, StringifyState& state, Object& object)
{
if (state.seen_objects.contains(&object))
return vm.throw_completion<TypeError>(ErrorType::JsonCircular);
state.seen_objects.set(&object);
String previous_indent = state.indent;
state.indent = MUST(String::formatted("{}{}", state.indent, state.gap));
Vector<String> property_strings;
auto length = TRY(length_of_array_like(vm, object));
// Optimization
property_strings.ensure_capacity(length);
for (size_t i = 0; i < length; ++i) {
auto serialized_property_string = TRY(serialize_json_property(vm, state, i, &object));
if (!serialized_property_string.has_value()) {
property_strings.append("null"_string);
} else {
property_strings.append(serialized_property_string.release_value());
}
}
StringBuilder builder;
if (property_strings.is_empty()) {
builder.append("[]"sv);
} else {
if (state.gap.is_empty()) {
builder.append('[');
bool first = true;
for (auto& property_string : property_strings) {
if (!first)
builder.append(',');
first = false;
builder.append(property_string);
}
builder.append(']');
} else {
builder.append("[\n"sv);
builder.append(state.indent);
auto separator = MUST(String::formatted(",\n{}", state.indent));
bool first = true;
for (auto& property_string : property_strings) {
if (!first)
builder.append(separator);
first = false;
builder.append(property_string);
}
builder.append('\n');
builder.append(previous_indent);
builder.append(']');
}
}
state.seen_objects.remove(&object);
state.indent = previous_indent;
return builder.to_string_without_validation();
}
// 25.5.2.2 QuoteJSONString ( value ), https://tc39.es/ecma262/#sec-quotejsonstring
String JSONObject::quote_json_string(Utf16View const& string)
{
// 1. Let product be the String value consisting solely of the code unit 0x0022 (QUOTATION MARK).
StringBuilder builder;
builder.append('"');
// 2. For each code point C of StringToCodePoints(value), do
for (auto code_point : string) {
// a. If C is listed in the “Code Point” column of Table 70, then
// i. Set product to the string-concatenation of product and the escape sequence for C as specified in the “Escape Sequence” column of the corresponding row.
switch (code_point) {
case '\b':
builder.append("\\b"sv);
break;
case '\t':
builder.append("\\t"sv);
break;
case '\n':
builder.append("\\n"sv);
break;
case '\f':
builder.append("\\f"sv);
break;
case '\r':
builder.append("\\r"sv);
break;
case '"':
builder.append("\\\""sv);
break;
case '\\':
builder.append("\\\\"sv);
break;
default:
// b. Else if C has a numeric value less than 0x0020 (SPACE), or if C has the same numeric value as a leading surrogate or trailing surrogate, then
if (code_point < 0x20 || is_unicode_surrogate(code_point)) {
// i. Let unit be the code unit whose numeric value is that of C.
// ii. Set product to the string-concatenation of product and UnicodeEscape(unit).
builder.appendff("\\u{:04x}", code_point);
}
// c. Else,
else {
// i. Set product to the string-concatenation of product and UTF16EncodeCodePoint(C).
builder.append_code_point(code_point);
}
}
}
// 3. Set product to the string-concatenation of product and the code unit 0x0022 (QUOTATION MARK).
builder.append('"');
// 4. Return product.
return builder.to_string_without_validation();
}
// 25.5.1 JSON.parse ( text [ , reviver ] ), https://tc39.es/ecma262/#sec-json.parse
JS_DEFINE_NATIVE_FUNCTION(JSONObject::parse)
{
auto& realm = *vm.current_realm();
auto text = vm.argument(0);
auto reviver = vm.argument(1);
// 1. Let jsonString be ? ToString(text).
auto json_string = TRY(text.to_string(vm));
// 2. Let unfiltered be ? ParseJSON(jsonString).
auto unfiltered = TRY(parse_json(vm, json_string));
// 3. If IsCallable(reviver) is true, then
if (reviver.is_function()) {
// a. Let root be OrdinaryObjectCreate(%Object.prototype%).
auto root = Object::create(realm, realm.intrinsics().object_prototype());
// b. Let rootName be the empty String.
Utf16String root_name;
// c. Perform ! CreateDataPropertyOrThrow(root, rootName, unfiltered).
MUST(root->create_data_property_or_throw(root_name, unfiltered));
// d. Return ? InternalizeJSONProperty(root, rootName, reviver).
return internalize_json_property(vm, root, root_name, reviver.as_function());
}
// 4. Else,
// a. Return unfiltered.
return unfiltered;
}
// Unescape a JSON string, properly handling \uXXXX escape sequences including lone surrogates.
// simdjson validates UTF-8 strictly and rejects lone surrogates, but JSON allows them.
// Returns {} on malformed escape sequences.
static Optional<Utf16String> unescape_json_string(StringView raw)
{
StringBuilder builder(StringBuilder::Mode::UTF16, raw.length());
GenericLexer lexer { raw };
auto consume_hex4 = [&]() -> Optional<u16> {
if (lexer.tell_remaining() < 4)
return {};
u16 value = 0;
for (int i = 0; i < 4; ++i) {
auto ch = lexer.consume();
value <<= 4;
if (ch >= '0' && ch <= '9')
value |= ch - '0';
else if (ch >= 'a' && ch <= 'f')
value |= ch - 'a' + 10;
else if (ch >= 'A' && ch <= 'F')
value |= ch - 'A' + 10;
else
return {};
}
return value;
};
while (!lexer.is_eof()) {
if (lexer.consume_specific('\\')) {
if (lexer.is_eof())
return {};
auto escaped = lexer.consume();
switch (escaped) {
case '"':
builder.append_code_unit('"');
break;
case '\\':
builder.append_code_unit('\\');
break;
case '/':
builder.append_code_unit('/');
break;
case 'b':
builder.append_code_unit('\b');
break;
case 'f':
builder.append_code_unit('\f');
break;
case 'n':
builder.append_code_unit('\n');
break;
case 'r':
builder.append_code_unit('\r');
break;
case 't':
builder.append_code_unit('\t');
break;
case 'u': {
auto code_unit = consume_hex4();
if (!code_unit.has_value())
return {};
builder.append_code_unit(*code_unit);
break;
}
default:
return {};
}
} else {
// Non-escaped character - copy UTF-8 code point to UTF-16
auto ch = lexer.consume();
if ((ch & 0x80) == 0) {
// ASCII
builder.append_code_unit(ch);
} else if ((ch & 0xE0) == 0xC0) {
// 2-byte UTF-8
if (lexer.is_eof())
return {};
auto ch2 = lexer.consume();
u32 code_point = ((ch & 0x1F) << 6) | (ch2 & 0x3F);
builder.append_code_unit(code_point);
} else if ((ch & 0xF0) == 0xE0) {
// 3-byte UTF-8
if (lexer.tell_remaining() < 2)
return {};
auto ch2 = lexer.consume();
auto ch3 = lexer.consume();
u32 code_point = ((ch & 0x0F) << 12) | ((ch2 & 0x3F) << 6) | (ch3 & 0x3F);
builder.append_code_unit(code_point);
} else if ((ch & 0xF8) == 0xF0) {
// 4-byte UTF-8 (needs surrogate pair)
if (lexer.tell_remaining() < 3)
return {};
auto ch2 = lexer.consume();
auto ch3 = lexer.consume();
auto ch4 = lexer.consume();
u32 code_point = ((ch & 0x07) << 18) | ((ch2 & 0x3F) << 12) | ((ch3 & 0x3F) << 6) | (ch4 & 0x3F);
builder.append_code_point(code_point);
} else {
return {};
}
}
}
return builder.to_utf16_string();
}
template<typename T>
static ALWAYS_INLINE ThrowCompletionOr<void> ensure_simdjson_fully_parsed(VM& vm, T& value)
{
if constexpr (IsSame<T, simdjson::ondemand::document>) {
if (!value.at_end())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
}
return {};
}
static ThrowCompletionOr<Value> parse_simdjson_value(VM&, simdjson::ondemand::value);
template<typename T>
static ThrowCompletionOr<Value> parse_simdjson_number(VM& vm, T& value, StringView raw_sv)
{
// Validate JSON number format (simdjson is more lenient than spec)
// - No leading zeros (except "0" or "0.xxx")
// - No trailing decimal point (e.g., "1." is invalid)
size_t i = 0;
if (i < raw_sv.length() && raw_sv[i] == '-')
++i;
if (i < raw_sv.length() && raw_sv[i] == '0' && i + 1 < raw_sv.length() && is_ascii_digit(raw_sv[i + 1]))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed); // Leading zero
while (i < raw_sv.length() && is_ascii_digit(raw_sv[i]))
++i;
if (i < raw_sv.length() && raw_sv[i] == '.') {
++i;
if (i >= raw_sv.length() || !is_ascii_digit(raw_sv[i]))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed); // Trailing decimal
}
double double_value;
auto error = value.get_double().get(double_value);
if (!error) {
TRY(ensure_simdjson_fully_parsed(vm, value));
return Value(double_value);
}
// Handle overflow to infinity (e.g., 1e309)
// simdjson returns NUMBER_ERROR for numbers that overflow double
// Use parse_first_number as fallback - it handles overflow correctly
if (error == simdjson::NUMBER_ERROR) {
auto result = parse_first_number<double>(raw_sv, TrimWhitespace::No);
if (result.has_value() && result->characters_parsed == raw_sv.length())
return Value(result->value);
}
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
}
template<typename T>
static ThrowCompletionOr<Value> parse_simdjson_string(VM& vm, T& value)
{
// Use get_raw_json_string() to get the raw JSON string content (without quotes, with escapes),
// then unescape ourselves to properly handle lone surrogates like \uD800 which simdjson rejects.
simdjson::ondemand::raw_json_string raw_string;
if (value.get_raw_json_string().get(raw_string))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
char const* raw = raw_string.raw();
// Find the length by looking for the closing quote (simdjson validated the structure)
size_t length = 0;
while (raw[length] != '"') {
if (raw[length] == '\\')
++length; // Skip escaped character
++length;
}
auto unescaped = unescape_json_string({ raw, length });
if (!unescaped.has_value())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
return PrimitiveString::create(vm, unescaped.release_value());
}
template<typename T>
static ThrowCompletionOr<Value> parse_simdjson_array(VM& vm, T& value)
{
auto& realm = *vm.current_realm();
simdjson::ondemand::array simdjson_array;
if (value.get_array().get(simdjson_array))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
auto array = MUST(Array::create(realm, 0));
size_t index = 0;
for (auto element : simdjson_array) {
simdjson::ondemand::value element_value;
if (element.get(element_value))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
auto parsed = TRY(parse_simdjson_value(vm, element_value));
array->define_direct_property(index++, parsed, default_attributes);
}
TRY(ensure_simdjson_fully_parsed(vm, value));
return array;
}
template<typename T>
static ThrowCompletionOr<Value> parse_simdjson_object(VM& vm, T& value)
{
auto& realm = *vm.current_realm();
simdjson::ondemand::object simdjson_object;
if (value.get_object().get(simdjson_object))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
auto object = Object::create(realm, realm.intrinsics().object_prototype());
for (auto field : simdjson_object) {
// Use escaped_key() to get the raw JSON key (with escapes), then unescape ourselves
std::string_view raw_key;
if (field.escaped_key().get(raw_key))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
auto unescaped_key = unescape_json_string({ raw_key.data(), raw_key.size() });
if (!unescaped_key.has_value())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
simdjson::ondemand::value field_value;
if (field.value().get(field_value))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
auto parsed = TRY(parse_simdjson_value(vm, field_value));
object->define_direct_property(unescaped_key.release_value(), parsed, default_attributes);
}
TRY(ensure_simdjson_fully_parsed(vm, value));
return object;
}
static ThrowCompletionOr<Value> parse_simdjson_value(VM& vm, simdjson::ondemand::value value)
{
simdjson::ondemand::json_type type;
if (value.type().get(type))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
switch (type) {
case simdjson::ondemand::json_type::null:
return js_null();
case simdjson::ondemand::json_type::boolean: {
bool boolean_value;
if (value.get_bool().get(boolean_value))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
return Value(boolean_value);
}
case simdjson::ondemand::json_type::number: {
auto raw = value.raw_json_token();
StringView raw_sv { raw.data(), raw.size() };
return parse_simdjson_number(vm, value, raw_sv);
}
case simdjson::ondemand::json_type::string:
return parse_simdjson_string(vm, value);
case simdjson::ondemand::json_type::array:
return parse_simdjson_array(vm, value);
case simdjson::ondemand::json_type::object:
return parse_simdjson_object(vm, value);
}
VERIFY_NOT_REACHED();
}
static ThrowCompletionOr<Value> parse_simdjson_document(VM& vm, simdjson::ondemand::document& document)
{
simdjson::ondemand::json_type type;
if (document.type().get(type))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
switch (type) {
case simdjson::ondemand::json_type::null: {
if (document.is_null().error())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
if (!document.at_end())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
return js_null();
}
case simdjson::ondemand::json_type::boolean: {
bool boolean_value;
if (document.get_bool().get(boolean_value))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
if (!document.at_end())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
return Value(boolean_value);
}
case simdjson::ondemand::json_type::number: {
// Get raw token first in case get_double fails (e.g., overflow)
std::string_view raw;
if (document.raw_json_token().get(raw))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
StringView raw_sv { raw.data(), raw.size() };
auto trimmed = raw_sv.trim_whitespace();
return parse_simdjson_number(vm, document, trimmed);
}
case simdjson::ondemand::json_type::string:
return parse_simdjson_string(vm, document);
case simdjson::ondemand::json_type::array:
return parse_simdjson_array(vm, document);
case simdjson::ondemand::json_type::object:
return parse_simdjson_object(vm, document);
}
VERIFY_NOT_REACHED();
}
// 25.5.1.1 ParseJSON ( text ), https://tc39.es/ecma262/#sec-ParseJSON
ThrowCompletionOr<Value> JSONObject::parse_json(VM& vm, StringView text)
{
// 1. If StringToCodePoints(text) is not a valid JSON text as specified in ECMA-404, throw a SyntaxError exception.
// NB: Per ECMA-404, the BOM is not valid JSON whitespace. simdjson silently skips it, so we must reject it explicitly.
if (text.length() >= 3
&& static_cast<u8>(text[0]) == 0xEF
&& static_cast<u8>(text[1]) == 0xBB
&& static_cast<u8>(text[2]) == 0xBF) {
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
}
simdjson::ondemand::parser parser;
simdjson::padded_string padded(text.characters_without_null_termination(), text.length());
simdjson::ondemand::document document;
if (parser.iterate(padded).get(document))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
// 2. Let scriptString be the string-concatenation of "(", text, and ");".
// 3. Let script be ParseText(scriptString, Script).
// 4. NOTE: The early error rules defined in 13.2.5.1 have special handling for the above invocation of ParseText.
// 5. Assert: script is a Parse Node.
// 6. Let result be ! Evaluation of script.
auto result = TRY(parse_simdjson_document(vm, document));
// 7. NOTE: The PropertyDefinitionEvaluation semantics defined in 13.2.5.5 have special handling for the above evaluation.
// 8. Assert: result is either a String, a Number, a Boolean, an Object that is defined by either an ArrayLiteral or an ObjectLiteral, or null.
// 9. Return result.
return result;
}
// 25.5.1.1 InternalizeJSONProperty ( holder, name, reviver ), https://tc39.es/ecma262/#sec-internalizejsonproperty
ThrowCompletionOr<Value> JSONObject::internalize_json_property(VM& vm, Object* holder, PropertyKey const& name, FunctionObject& reviver)
{
auto value = TRY(holder->get(name));
if (value.is_object()) {
auto is_array = TRY(value.is_array(vm));
auto& value_object = value.as_object();
auto process_property = [&](PropertyKey const& key) -> ThrowCompletionOr<void> {
auto element = TRY(internalize_json_property(vm, &value_object, key, reviver));
if (element.is_undefined())
TRY(value_object.internal_delete(key));
else
TRY(value_object.create_data_property(key, element));
return {};
};
if (is_array) {
auto length = TRY(length_of_array_like(vm, value_object));
for (size_t i = 0; i < length; ++i)
TRY(process_property(i));
} else {
auto property_list = TRY(value_object.enumerable_own_property_names(Object::PropertyKind::Key));
for (auto& property_key : property_list)
TRY(process_property(property_key.as_string().utf16_string()));
}
}
return TRY(call(vm, reviver, holder, PrimitiveString::create(vm, name.to_string()), value));
}
// 1.3 JSON.rawJSON ( text ), https://tc39.es/proposal-json-parse-with-source/#sec-json.rawjson
JS_DEFINE_NATIVE_FUNCTION(JSONObject::raw_json)
{
auto& realm = *vm.current_realm();
// 1. Let jsonString be ? ToString(text).
auto json_string = TRY(vm.argument(0).to_string(vm));
// 2. Throw a SyntaxError exception if jsonString is the empty String, or if either the first or last code unit of
// jsonString is any of 0x0009 (CHARACTER TABULATION), 0x000A (LINE FEED), 0x000D (CARRIAGE RETURN), or
// 0x0020 (SPACE).
auto bytes = json_string.bytes_as_string_view();
if (bytes.is_empty())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
static constexpr AK::Array invalid_code_points { 0x09, 0x0A, 0x0D, 0x20 };
auto first_char = bytes[0];
auto last_char = bytes[bytes.length() - 1];
if (invalid_code_points.contains_slow(first_char) || invalid_code_points.contains_slow(last_char))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
// 3. Parse StringToCodePoints(jsonString) as a JSON text as specified in ECMA-404. Throw a SyntaxError exception
// if it is not a valid JSON text as defined in that specification, or if its outermost value is an object or
// array as defined in that specification.
simdjson::ondemand::parser parser;
simdjson::padded_string padded(json_string.bytes_as_string_view().characters_without_null_termination(), json_string.bytes_as_string_view().length());
simdjson::ondemand::document doc;
if (parser.iterate(padded).get(doc))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
simdjson::ondemand::json_type type;
if (doc.type().get(type))
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
if (type == simdjson::ondemand::json_type::object || type == simdjson::ondemand::json_type::array)
return vm.throw_completion<SyntaxError>(ErrorType::JsonRawJSONNonPrimitive);
// Consume the value to advance past it, then check for trailing content
switch (type) {
case simdjson::ondemand::json_type::null:
(void)doc.is_null();
break;
case simdjson::ondemand::json_type::boolean:
(void)doc.get_bool();
break;
case simdjson::ondemand::json_type::number:
(void)doc.get_double();
break;
case simdjson::ondemand::json_type::string:
(void)doc.get_string();
break;
default:
VERIFY_NOT_REACHED();
}
if (!doc.at_end())
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
// 4. Let internalSlotsList be « [[IsRawJSON]] ».
// 5. Let obj be OrdinaryObjectCreate(null, internalSlotsList).
auto object = RawJSONObject::create(realm, nullptr);
// 6. Perform ! CreateDataPropertyOrThrow(obj, "rawJSON", jsonString).
MUST(object->create_data_property_or_throw(vm.names.rawJSON, PrimitiveString::create(vm, json_string)));
// 7. Perform ! SetIntegrityLevel(obj, frozen).
MUST(object->set_integrity_level(Object::IntegrityLevel::Frozen));
// 8. Return obj.
return object;
}
// 1.1 JSON.isRawJSON ( O ), https://tc39.es/proposal-json-parse-with-source/#sec-json.israwjson
JS_DEFINE_NATIVE_FUNCTION(JSONObject::is_raw_json)
{
// 1. If Type(O) is Object and O has an [[IsRawJSON]] internal slot, return true.
if (vm.argument(0).is_object() && is<RawJSONObject>(vm.argument(0).as_object()))
return Value(true);
// 2. Return false.
return Value(false);
}
}