mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2026-04-19 02:10:26 +00:00
Replace the custom AK JSON parser with simdjson for parsing JSON in LibJS. This eliminates the intermediate AK::JsonValue object graph, going directly from JSON text to JS::Value. simdjson's on-demand API parses at ~4GB/s and only materializes values as they are accessed, making this both faster and more memory efficient than the previous approach. The AK JSON parser is still used elsewhere (WebDriver protocol, config files, etc.) but LibJS now uses simdjson exclusively for JSON.parse() and JSON.rawJSON().
904 lines
35 KiB
C++
904 lines
35 KiB
C++
/*
|
|
* Copyright (c) 2020, Matthew Olsson <mattco@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/Function.h>
|
|
#include <AK/GenericLexer.h>
|
|
#include <AK/StringBuilder.h>
|
|
#include <AK/StringConversions.h>
|
|
#include <AK/TypeCasts.h>
|
|
#include <AK/Utf16View.h>
|
|
#include <AK/Utf8View.h>
|
|
#include <LibJS/Runtime/AbstractOperations.h>
|
|
#include <LibJS/Runtime/Array.h>
|
|
#include <LibJS/Runtime/BigIntObject.h>
|
|
#include <LibJS/Runtime/BooleanObject.h>
|
|
#include <LibJS/Runtime/Error.h>
|
|
#include <LibJS/Runtime/FunctionObject.h>
|
|
#include <LibJS/Runtime/GlobalObject.h>
|
|
#include <LibJS/Runtime/JSONObject.h>
|
|
#include <LibJS/Runtime/NumberObject.h>
|
|
#include <LibJS/Runtime/Object.h>
|
|
#include <LibJS/Runtime/RawJSONObject.h>
|
|
#include <LibJS/Runtime/StringObject.h>
|
|
#include <LibJS/Runtime/ValueInlines.h>
|
|
|
|
#include <simdjson.h>
|
|
|
|
namespace JS {
|
|
|
|
GC_DEFINE_ALLOCATOR(JSONObject);
|
|
|
|
JSONObject::JSONObject(Realm& realm)
|
|
: Object(ConstructWithPrototypeTag::Tag, realm.intrinsics().object_prototype())
|
|
{
|
|
}
|
|
|
|
void JSONObject::initialize(Realm& realm)
|
|
{
|
|
auto& vm = this->vm();
|
|
Base::initialize(realm);
|
|
u8 attr = Attribute::Writable | Attribute::Configurable;
|
|
define_native_function(realm, vm.names.stringify, stringify, 3, attr);
|
|
define_native_function(realm, vm.names.parse, parse, 2, attr);
|
|
define_native_function(realm, vm.names.rawJSON, raw_json, 1, attr);
|
|
define_native_function(realm, vm.names.isRawJSON, is_raw_json, 1, attr);
|
|
|
|
// 25.5.3 JSON [ @@toStringTag ], https://tc39.es/ecma262/#sec-json-@@tostringtag
|
|
define_direct_property(vm.well_known_symbol_to_string_tag(), PrimitiveString::create(vm, "JSON"_string), Attribute::Configurable);
|
|
}
|
|
|
|
// 25.5.2 JSON.stringify ( value [ , replacer [ , space ] ] ), https://tc39.es/ecma262/#sec-json.stringify
|
|
ThrowCompletionOr<Optional<String>> JSONObject::stringify_impl(VM& vm, Value value, Value replacer, Value space)
|
|
{
|
|
auto& realm = *vm.current_realm();
|
|
|
|
StringifyState state;
|
|
|
|
if (replacer.is_object()) {
|
|
if (replacer.as_object().is_function()) {
|
|
state.replacer_function = &replacer.as_function();
|
|
} else {
|
|
auto is_array = TRY(replacer.is_array(vm));
|
|
if (is_array) {
|
|
auto& replacer_object = replacer.as_object();
|
|
auto replacer_length = TRY(length_of_array_like(vm, replacer_object));
|
|
Vector<Utf16String> list;
|
|
for (size_t i = 0; i < replacer_length; ++i) {
|
|
auto replacer_value = TRY(replacer_object.get(i));
|
|
Optional<Utf16String> item;
|
|
if (replacer_value.is_string()) {
|
|
item = replacer_value.as_string().utf16_string();
|
|
} else if (replacer_value.is_number()) {
|
|
item = MUST(replacer_value.to_utf16_string(vm));
|
|
} else if (replacer_value.is_object()) {
|
|
auto& value_object = replacer_value.as_object();
|
|
if (is<StringObject>(value_object) || is<NumberObject>(value_object))
|
|
item = TRY(replacer_value.to_utf16_string(vm));
|
|
}
|
|
if (item.has_value() && !list.contains_slow(*item)) {
|
|
list.append(*item);
|
|
}
|
|
}
|
|
state.property_list = move(list);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (space.is_object()) {
|
|
auto& space_object = space.as_object();
|
|
if (is<NumberObject>(space_object))
|
|
space = TRY(space.to_number(vm));
|
|
else if (is<StringObject>(space_object))
|
|
space = TRY(space.to_primitive_string(vm));
|
|
}
|
|
|
|
if (space.is_number()) {
|
|
auto space_mv = MUST(space.to_integer_or_infinity(vm));
|
|
space_mv = min(10, space_mv);
|
|
state.gap = space_mv < 1 ? String {} : MUST(String::repeated(' ', space_mv));
|
|
} else if (space.is_string()) {
|
|
auto string = space.as_string().utf8_string();
|
|
if (string.bytes().size() <= 10)
|
|
state.gap = string;
|
|
else
|
|
state.gap = MUST(string.substring_from_byte_offset(0, 10));
|
|
} else {
|
|
state.gap = String {};
|
|
}
|
|
|
|
auto wrapper = Object::create(realm, realm.intrinsics().object_prototype());
|
|
MUST(wrapper->create_data_property_or_throw(Utf16String {}, value));
|
|
return serialize_json_property(vm, state, Utf16String {}, wrapper);
|
|
}
|
|
|
|
// 25.5.2 JSON.stringify ( value [ , replacer [ , space ] ] ), https://tc39.es/ecma262/#sec-json.stringify
|
|
JS_DEFINE_NATIVE_FUNCTION(JSONObject::stringify)
|
|
{
|
|
if (!vm.argument_count())
|
|
return js_undefined();
|
|
|
|
auto value = vm.argument(0);
|
|
auto replacer = vm.argument(1);
|
|
auto space = vm.argument(2);
|
|
|
|
auto maybe_string = TRY(stringify_impl(vm, value, replacer, space));
|
|
if (!maybe_string.has_value())
|
|
return js_undefined();
|
|
|
|
return PrimitiveString::create(vm, maybe_string.release_value());
|
|
}
|
|
|
|
// 25.5.2.1 SerializeJSONProperty ( state, key, holder ), https://tc39.es/ecma262/#sec-serializejsonproperty
|
|
// 1.4.1 SerializeJSONProperty ( state, key, holder ), https://tc39.es/proposal-json-parse-with-source/#sec-serializejsonproperty
|
|
ThrowCompletionOr<Optional<String>> JSONObject::serialize_json_property(VM& vm, StringifyState& state, PropertyKey const& key, Object* holder)
|
|
{
|
|
// 1. Let value be ? Get(holder, key).
|
|
auto value = TRY(holder->get(key));
|
|
|
|
// 2. If Type(value) is Object or BigInt, then
|
|
if (value.is_object() || value.is_bigint()) {
|
|
// a. Let toJSON be ? GetV(value, "toJSON").
|
|
auto to_json = TRY(value.get(vm, vm.names.toJSON));
|
|
|
|
// b. If IsCallable(toJSON) is true, then
|
|
if (to_json.is_function()) {
|
|
// i. Set value to ? Call(toJSON, value, « key »).
|
|
value = TRY(call(vm, to_json.as_function(), value, PrimitiveString::create(vm, key.to_string())));
|
|
}
|
|
}
|
|
|
|
// 3. If state.[[ReplacerFunction]] is not undefined, then
|
|
if (state.replacer_function) {
|
|
// a. Set value to ? Call(state.[[ReplacerFunction]], holder, « key, value »).
|
|
value = TRY(call(vm, *state.replacer_function, holder, PrimitiveString::create(vm, key.to_string()), value));
|
|
}
|
|
|
|
// 4. If Type(value) is Object, then
|
|
if (value.is_object()) {
|
|
auto& value_object = value.as_object();
|
|
|
|
// a. If value has an [[IsRawJSON]] internal slot, then
|
|
if (is<RawJSONObject>(value_object)) {
|
|
// i. Return ! Get(value, "rawJSON").
|
|
return MUST(value_object.get(vm.names.rawJSON)).as_string().utf8_string();
|
|
}
|
|
// b. If value has a [[NumberData]] internal slot, then
|
|
if (is<NumberObject>(value_object)) {
|
|
// i. Set value to ? ToNumber(value).
|
|
value = TRY(value.to_number(vm));
|
|
}
|
|
// c. Else if value has a [[StringData]] internal slot, then
|
|
else if (is<StringObject>(value_object)) {
|
|
// i. Set value to ? ToString(value).
|
|
value = TRY(value.to_primitive_string(vm));
|
|
}
|
|
// d. Else if value has a [[BooleanData]] internal slot, then
|
|
else if (is<BooleanObject>(value_object)) {
|
|
// i. Set value to value.[[BooleanData]].
|
|
value = Value(static_cast<BooleanObject&>(value_object).boolean());
|
|
}
|
|
// e. Else if value has a [[BigIntData]] internal slot, then
|
|
else if (is<BigIntObject>(value_object)) {
|
|
// i. Set value to value.[[BigIntData]].
|
|
value = Value(&static_cast<BigIntObject&>(value_object).bigint());
|
|
}
|
|
}
|
|
|
|
// 5. If value is null, return "null".
|
|
if (value.is_null())
|
|
return "null"_string;
|
|
|
|
// 6. If value is true, return "true".
|
|
// 7. If value is false, return "false".
|
|
if (value.is_boolean())
|
|
return value.as_bool() ? "true"_string : "false"_string;
|
|
|
|
// 8. If Type(value) is String, return QuoteJSONString(value).
|
|
if (value.is_string())
|
|
return quote_json_string(value.as_string().utf16_string_view());
|
|
|
|
// 9. If Type(value) is Number, then
|
|
if (value.is_number()) {
|
|
// a. If value is finite, return ! ToString(value).
|
|
if (value.is_finite_number())
|
|
return MUST(value.to_string(vm));
|
|
|
|
// b. Return "null".
|
|
return "null"_string;
|
|
}
|
|
|
|
// 10. If Type(value) is BigInt, throw a TypeError exception.
|
|
if (value.is_bigint())
|
|
return vm.throw_completion<TypeError>(ErrorType::JsonBigInt);
|
|
|
|
// 11. If Type(value) is Object and IsCallable(value) is false, then
|
|
if (value.is_object() && !value.is_function()) {
|
|
// a. Let isArray be ? IsArray(value).
|
|
auto is_array = TRY(value.is_array(vm));
|
|
|
|
// b. If isArray is true, return ? SerializeJSONArray(state, value).
|
|
if (is_array)
|
|
return TRY(serialize_json_array(vm, state, value.as_object()));
|
|
|
|
// c. Return ? SerializeJSONObject(state, value).
|
|
return TRY(serialize_json_object(vm, state, value.as_object()));
|
|
}
|
|
|
|
// 12. Return undefined.
|
|
return Optional<String> {};
|
|
}
|
|
|
|
// 25.5.2.4 SerializeJSONObject ( state, value ), https://tc39.es/ecma262/#sec-serializejsonobject
|
|
ThrowCompletionOr<String> JSONObject::serialize_json_object(VM& vm, StringifyState& state, Object& object)
|
|
{
|
|
if (state.seen_objects.contains(&object))
|
|
return vm.throw_completion<TypeError>(ErrorType::JsonCircular);
|
|
|
|
state.seen_objects.set(&object);
|
|
String previous_indent = state.indent;
|
|
state.indent = MUST(String::formatted("{}{}", state.indent, state.gap));
|
|
Vector<String> property_strings;
|
|
|
|
auto process_property = [&](PropertyKey const& key) -> ThrowCompletionOr<void> {
|
|
if (key.is_symbol())
|
|
return {};
|
|
auto serialized_property_string = TRY(serialize_json_property(vm, state, key, &object));
|
|
if (serialized_property_string.has_value()) {
|
|
property_strings.append(MUST(String::formatted(
|
|
"{}:{}{}",
|
|
quote_json_string(key.to_string()),
|
|
state.gap.is_empty() ? "" : " ",
|
|
serialized_property_string)));
|
|
}
|
|
return {};
|
|
};
|
|
|
|
if (state.property_list.has_value()) {
|
|
auto property_list = state.property_list.value();
|
|
for (auto& property : property_list)
|
|
TRY(process_property(property));
|
|
} else {
|
|
auto property_list = TRY(object.enumerable_own_property_names(PropertyKind::Key));
|
|
for (auto& property : property_list)
|
|
TRY(process_property(property.as_string().utf16_string()));
|
|
}
|
|
StringBuilder builder;
|
|
if (property_strings.is_empty()) {
|
|
builder.append("{}"sv);
|
|
} else {
|
|
bool first = true;
|
|
builder.append('{');
|
|
if (state.gap.is_empty()) {
|
|
for (auto& property_string : property_strings) {
|
|
if (!first)
|
|
builder.append(',');
|
|
first = false;
|
|
builder.append(property_string);
|
|
}
|
|
} else {
|
|
builder.append('\n');
|
|
builder.append(state.indent);
|
|
auto separator = MUST(String::formatted(",\n{}", state.indent));
|
|
for (auto& property_string : property_strings) {
|
|
if (!first)
|
|
builder.append(separator);
|
|
first = false;
|
|
builder.append(property_string);
|
|
}
|
|
builder.append('\n');
|
|
builder.append(previous_indent);
|
|
}
|
|
builder.append('}');
|
|
}
|
|
|
|
state.seen_objects.remove(&object);
|
|
state.indent = previous_indent;
|
|
return builder.to_string_without_validation();
|
|
}
|
|
|
|
// 25.5.2.5 SerializeJSONArray ( state, value ), https://tc39.es/ecma262/#sec-serializejsonarray
|
|
ThrowCompletionOr<String> JSONObject::serialize_json_array(VM& vm, StringifyState& state, Object& object)
|
|
{
|
|
if (state.seen_objects.contains(&object))
|
|
return vm.throw_completion<TypeError>(ErrorType::JsonCircular);
|
|
|
|
state.seen_objects.set(&object);
|
|
String previous_indent = state.indent;
|
|
state.indent = MUST(String::formatted("{}{}", state.indent, state.gap));
|
|
Vector<String> property_strings;
|
|
|
|
auto length = TRY(length_of_array_like(vm, object));
|
|
|
|
// Optimization
|
|
property_strings.ensure_capacity(length);
|
|
|
|
for (size_t i = 0; i < length; ++i) {
|
|
auto serialized_property_string = TRY(serialize_json_property(vm, state, i, &object));
|
|
if (!serialized_property_string.has_value()) {
|
|
property_strings.append("null"_string);
|
|
} else {
|
|
property_strings.append(serialized_property_string.release_value());
|
|
}
|
|
}
|
|
|
|
StringBuilder builder;
|
|
if (property_strings.is_empty()) {
|
|
builder.append("[]"sv);
|
|
} else {
|
|
if (state.gap.is_empty()) {
|
|
builder.append('[');
|
|
bool first = true;
|
|
for (auto& property_string : property_strings) {
|
|
if (!first)
|
|
builder.append(',');
|
|
first = false;
|
|
builder.append(property_string);
|
|
}
|
|
builder.append(']');
|
|
} else {
|
|
builder.append("[\n"sv);
|
|
builder.append(state.indent);
|
|
auto separator = MUST(String::formatted(",\n{}", state.indent));
|
|
bool first = true;
|
|
for (auto& property_string : property_strings) {
|
|
if (!first)
|
|
builder.append(separator);
|
|
first = false;
|
|
builder.append(property_string);
|
|
}
|
|
builder.append('\n');
|
|
builder.append(previous_indent);
|
|
builder.append(']');
|
|
}
|
|
}
|
|
|
|
state.seen_objects.remove(&object);
|
|
state.indent = previous_indent;
|
|
return builder.to_string_without_validation();
|
|
}
|
|
|
|
// 25.5.2.2 QuoteJSONString ( value ), https://tc39.es/ecma262/#sec-quotejsonstring
|
|
String JSONObject::quote_json_string(Utf16View const& string)
|
|
{
|
|
// 1. Let product be the String value consisting solely of the code unit 0x0022 (QUOTATION MARK).
|
|
StringBuilder builder;
|
|
builder.append('"');
|
|
|
|
// 2. For each code point C of StringToCodePoints(value), do
|
|
for (auto code_point : string) {
|
|
// a. If C is listed in the “Code Point” column of Table 70, then
|
|
// i. Set product to the string-concatenation of product and the escape sequence for C as specified in the “Escape Sequence” column of the corresponding row.
|
|
switch (code_point) {
|
|
case '\b':
|
|
builder.append("\\b"sv);
|
|
break;
|
|
case '\t':
|
|
builder.append("\\t"sv);
|
|
break;
|
|
case '\n':
|
|
builder.append("\\n"sv);
|
|
break;
|
|
case '\f':
|
|
builder.append("\\f"sv);
|
|
break;
|
|
case '\r':
|
|
builder.append("\\r"sv);
|
|
break;
|
|
case '"':
|
|
builder.append("\\\""sv);
|
|
break;
|
|
case '\\':
|
|
builder.append("\\\\"sv);
|
|
break;
|
|
default:
|
|
// b. Else if C has a numeric value less than 0x0020 (SPACE), or if C has the same numeric value as a leading surrogate or trailing surrogate, then
|
|
if (code_point < 0x20 || is_unicode_surrogate(code_point)) {
|
|
// i. Let unit be the code unit whose numeric value is that of C.
|
|
// ii. Set product to the string-concatenation of product and UnicodeEscape(unit).
|
|
builder.appendff("\\u{:04x}", code_point);
|
|
}
|
|
// c. Else,
|
|
else {
|
|
// i. Set product to the string-concatenation of product and UTF16EncodeCodePoint(C).
|
|
builder.append_code_point(code_point);
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3. Set product to the string-concatenation of product and the code unit 0x0022 (QUOTATION MARK).
|
|
builder.append('"');
|
|
|
|
// 4. Return product.
|
|
return builder.to_string_without_validation();
|
|
}
|
|
|
|
// 25.5.1 JSON.parse ( text [ , reviver ] ), https://tc39.es/ecma262/#sec-json.parse
|
|
JS_DEFINE_NATIVE_FUNCTION(JSONObject::parse)
|
|
{
|
|
auto& realm = *vm.current_realm();
|
|
|
|
auto text = vm.argument(0);
|
|
auto reviver = vm.argument(1);
|
|
|
|
// 1. Let jsonString be ? ToString(text).
|
|
auto json_string = TRY(text.to_string(vm));
|
|
|
|
// 2. Let unfiltered be ? ParseJSON(jsonString).
|
|
auto unfiltered = TRY(parse_json(vm, json_string));
|
|
|
|
// 3. If IsCallable(reviver) is true, then
|
|
if (reviver.is_function()) {
|
|
// a. Let root be OrdinaryObjectCreate(%Object.prototype%).
|
|
auto root = Object::create(realm, realm.intrinsics().object_prototype());
|
|
|
|
// b. Let rootName be the empty String.
|
|
Utf16String root_name;
|
|
|
|
// c. Perform ! CreateDataPropertyOrThrow(root, rootName, unfiltered).
|
|
MUST(root->create_data_property_or_throw(root_name, unfiltered));
|
|
|
|
// d. Return ? InternalizeJSONProperty(root, rootName, reviver).
|
|
return internalize_json_property(vm, root, root_name, reviver.as_function());
|
|
}
|
|
// 4. Else,
|
|
// a. Return unfiltered.
|
|
return unfiltered;
|
|
}
|
|
|
|
// Unescape a JSON string, properly handling \uXXXX escape sequences including lone surrogates.
|
|
// simdjson validates UTF-8 strictly and rejects lone surrogates, but JSON allows them.
|
|
// Returns {} on malformed escape sequences.
|
|
static Optional<Utf16String> unescape_json_string(StringView raw)
|
|
{
|
|
StringBuilder builder(StringBuilder::Mode::UTF16, raw.length());
|
|
|
|
GenericLexer lexer { raw };
|
|
|
|
auto consume_hex4 = [&]() -> Optional<u16> {
|
|
if (lexer.tell_remaining() < 4)
|
|
return {};
|
|
u16 value = 0;
|
|
for (int i = 0; i < 4; ++i) {
|
|
auto ch = lexer.consume();
|
|
value <<= 4;
|
|
if (ch >= '0' && ch <= '9')
|
|
value |= ch - '0';
|
|
else if (ch >= 'a' && ch <= 'f')
|
|
value |= ch - 'a' + 10;
|
|
else if (ch >= 'A' && ch <= 'F')
|
|
value |= ch - 'A' + 10;
|
|
else
|
|
return {};
|
|
}
|
|
return value;
|
|
};
|
|
|
|
while (!lexer.is_eof()) {
|
|
if (lexer.consume_specific('\\')) {
|
|
if (lexer.is_eof())
|
|
return {};
|
|
auto escaped = lexer.consume();
|
|
switch (escaped) {
|
|
case '"':
|
|
builder.append_code_unit('"');
|
|
break;
|
|
case '\\':
|
|
builder.append_code_unit('\\');
|
|
break;
|
|
case '/':
|
|
builder.append_code_unit('/');
|
|
break;
|
|
case 'b':
|
|
builder.append_code_unit('\b');
|
|
break;
|
|
case 'f':
|
|
builder.append_code_unit('\f');
|
|
break;
|
|
case 'n':
|
|
builder.append_code_unit('\n');
|
|
break;
|
|
case 'r':
|
|
builder.append_code_unit('\r');
|
|
break;
|
|
case 't':
|
|
builder.append_code_unit('\t');
|
|
break;
|
|
case 'u': {
|
|
auto code_unit = consume_hex4();
|
|
if (!code_unit.has_value())
|
|
return {};
|
|
builder.append_code_unit(*code_unit);
|
|
break;
|
|
}
|
|
default:
|
|
return {};
|
|
}
|
|
} else {
|
|
// Non-escaped character - copy UTF-8 code point to UTF-16
|
|
auto ch = lexer.consume();
|
|
if ((ch & 0x80) == 0) {
|
|
// ASCII
|
|
builder.append_code_unit(ch);
|
|
} else if ((ch & 0xE0) == 0xC0) {
|
|
// 2-byte UTF-8
|
|
if (lexer.is_eof())
|
|
return {};
|
|
auto ch2 = lexer.consume();
|
|
u32 code_point = ((ch & 0x1F) << 6) | (ch2 & 0x3F);
|
|
builder.append_code_unit(code_point);
|
|
} else if ((ch & 0xF0) == 0xE0) {
|
|
// 3-byte UTF-8
|
|
if (lexer.tell_remaining() < 2)
|
|
return {};
|
|
auto ch2 = lexer.consume();
|
|
auto ch3 = lexer.consume();
|
|
u32 code_point = ((ch & 0x0F) << 12) | ((ch2 & 0x3F) << 6) | (ch3 & 0x3F);
|
|
builder.append_code_unit(code_point);
|
|
} else if ((ch & 0xF8) == 0xF0) {
|
|
// 4-byte UTF-8 (needs surrogate pair)
|
|
if (lexer.tell_remaining() < 3)
|
|
return {};
|
|
auto ch2 = lexer.consume();
|
|
auto ch3 = lexer.consume();
|
|
auto ch4 = lexer.consume();
|
|
u32 code_point = ((ch & 0x07) << 18) | ((ch2 & 0x3F) << 12) | ((ch3 & 0x3F) << 6) | (ch4 & 0x3F);
|
|
builder.append_code_point(code_point);
|
|
} else {
|
|
return {};
|
|
}
|
|
}
|
|
}
|
|
|
|
return builder.to_utf16_string();
|
|
}
|
|
|
|
template<typename T>
|
|
static ALWAYS_INLINE ThrowCompletionOr<void> ensure_simdjson_fully_parsed(VM& vm, T& value)
|
|
{
|
|
if constexpr (IsSame<T, simdjson::ondemand::document>) {
|
|
if (!value.at_end())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
}
|
|
return {};
|
|
}
|
|
|
|
static ThrowCompletionOr<Value> parse_simdjson_value(VM&, simdjson::ondemand::value);
|
|
|
|
template<typename T>
|
|
static ThrowCompletionOr<Value> parse_simdjson_number(VM& vm, T& value, StringView raw_sv)
|
|
{
|
|
// Validate JSON number format (simdjson is more lenient than spec)
|
|
// - No leading zeros (except "0" or "0.xxx")
|
|
// - No trailing decimal point (e.g., "1." is invalid)
|
|
size_t i = 0;
|
|
if (i < raw_sv.length() && raw_sv[i] == '-')
|
|
++i;
|
|
if (i < raw_sv.length() && raw_sv[i] == '0' && i + 1 < raw_sv.length() && is_ascii_digit(raw_sv[i + 1]))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed); // Leading zero
|
|
while (i < raw_sv.length() && is_ascii_digit(raw_sv[i]))
|
|
++i;
|
|
if (i < raw_sv.length() && raw_sv[i] == '.') {
|
|
++i;
|
|
if (i >= raw_sv.length() || !is_ascii_digit(raw_sv[i]))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed); // Trailing decimal
|
|
}
|
|
|
|
double double_value;
|
|
auto error = value.get_double().get(double_value);
|
|
if (!error) {
|
|
TRY(ensure_simdjson_fully_parsed(vm, value));
|
|
return Value(double_value);
|
|
}
|
|
|
|
// Handle overflow to infinity (e.g., 1e309)
|
|
// simdjson returns NUMBER_ERROR for numbers that overflow double
|
|
// Use parse_first_number as fallback - it handles overflow correctly
|
|
if (error == simdjson::NUMBER_ERROR) {
|
|
auto result = parse_first_number<double>(raw_sv, TrimWhitespace::No);
|
|
if (result.has_value() && result->characters_parsed == raw_sv.length())
|
|
return Value(result->value);
|
|
}
|
|
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
}
|
|
|
|
template<typename T>
|
|
static ThrowCompletionOr<Value> parse_simdjson_string(VM& vm, T& value)
|
|
{
|
|
// Use get_raw_json_string() to get the raw JSON string content (without quotes, with escapes),
|
|
// then unescape ourselves to properly handle lone surrogates like \uD800 which simdjson rejects.
|
|
simdjson::ondemand::raw_json_string raw_string;
|
|
if (value.get_raw_json_string().get(raw_string))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
char const* raw = raw_string.raw();
|
|
// Find the length by looking for the closing quote (simdjson validated the structure)
|
|
size_t length = 0;
|
|
while (raw[length] != '"') {
|
|
if (raw[length] == '\\')
|
|
++length; // Skip escaped character
|
|
++length;
|
|
}
|
|
auto unescaped = unescape_json_string({ raw, length });
|
|
if (!unescaped.has_value())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
return PrimitiveString::create(vm, unescaped.release_value());
|
|
}
|
|
|
|
template<typename T>
|
|
static ThrowCompletionOr<Value> parse_simdjson_array(VM& vm, T& value)
|
|
{
|
|
auto& realm = *vm.current_realm();
|
|
|
|
simdjson::ondemand::array simdjson_array;
|
|
if (value.get_array().get(simdjson_array))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
auto array = MUST(Array::create(realm, 0));
|
|
size_t index = 0;
|
|
|
|
for (auto element : simdjson_array) {
|
|
simdjson::ondemand::value element_value;
|
|
if (element.get(element_value))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
auto parsed = TRY(parse_simdjson_value(vm, element_value));
|
|
array->define_direct_property(index++, parsed, default_attributes);
|
|
}
|
|
|
|
TRY(ensure_simdjson_fully_parsed(vm, value));
|
|
return array;
|
|
}
|
|
|
|
template<typename T>
|
|
static ThrowCompletionOr<Value> parse_simdjson_object(VM& vm, T& value)
|
|
{
|
|
auto& realm = *vm.current_realm();
|
|
|
|
simdjson::ondemand::object simdjson_object;
|
|
if (value.get_object().get(simdjson_object))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
auto object = Object::create(realm, realm.intrinsics().object_prototype());
|
|
|
|
for (auto field : simdjson_object) {
|
|
// Use escaped_key() to get the raw JSON key (with escapes), then unescape ourselves
|
|
std::string_view raw_key;
|
|
if (field.escaped_key().get(raw_key))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
auto unescaped_key = unescape_json_string({ raw_key.data(), raw_key.size() });
|
|
if (!unescaped_key.has_value())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
simdjson::ondemand::value field_value;
|
|
if (field.value().get(field_value))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
auto parsed = TRY(parse_simdjson_value(vm, field_value));
|
|
object->define_direct_property(unescaped_key.release_value(), parsed, default_attributes);
|
|
}
|
|
|
|
TRY(ensure_simdjson_fully_parsed(vm, value));
|
|
return object;
|
|
}
|
|
|
|
static ThrowCompletionOr<Value> parse_simdjson_value(VM& vm, simdjson::ondemand::value value)
|
|
{
|
|
simdjson::ondemand::json_type type;
|
|
if (value.type().get(type))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
switch (type) {
|
|
case simdjson::ondemand::json_type::null:
|
|
return js_null();
|
|
case simdjson::ondemand::json_type::boolean: {
|
|
bool boolean_value;
|
|
if (value.get_bool().get(boolean_value))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
return Value(boolean_value);
|
|
}
|
|
case simdjson::ondemand::json_type::number: {
|
|
auto raw = value.raw_json_token();
|
|
StringView raw_sv { raw.data(), raw.size() };
|
|
return parse_simdjson_number(vm, value, raw_sv);
|
|
}
|
|
case simdjson::ondemand::json_type::string:
|
|
return parse_simdjson_string(vm, value);
|
|
case simdjson::ondemand::json_type::array:
|
|
return parse_simdjson_array(vm, value);
|
|
case simdjson::ondemand::json_type::object:
|
|
return parse_simdjson_object(vm, value);
|
|
}
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
static ThrowCompletionOr<Value> parse_simdjson_document(VM& vm, simdjson::ondemand::document& document)
|
|
{
|
|
simdjson::ondemand::json_type type;
|
|
if (document.type().get(type))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
switch (type) {
|
|
case simdjson::ondemand::json_type::null: {
|
|
if (document.is_null().error())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
if (!document.at_end())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
return js_null();
|
|
}
|
|
case simdjson::ondemand::json_type::boolean: {
|
|
bool boolean_value;
|
|
if (document.get_bool().get(boolean_value))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
if (!document.at_end())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
return Value(boolean_value);
|
|
}
|
|
case simdjson::ondemand::json_type::number: {
|
|
// Get raw token first in case get_double fails (e.g., overflow)
|
|
std::string_view raw;
|
|
if (document.raw_json_token().get(raw))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
StringView raw_sv { raw.data(), raw.size() };
|
|
auto trimmed = raw_sv.trim_whitespace();
|
|
return parse_simdjson_number(vm, document, trimmed);
|
|
}
|
|
case simdjson::ondemand::json_type::string:
|
|
return parse_simdjson_string(vm, document);
|
|
case simdjson::ondemand::json_type::array:
|
|
return parse_simdjson_array(vm, document);
|
|
case simdjson::ondemand::json_type::object:
|
|
return parse_simdjson_object(vm, document);
|
|
}
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
// 25.5.1.1 ParseJSON ( text ), https://tc39.es/ecma262/#sec-ParseJSON
|
|
ThrowCompletionOr<Value> JSONObject::parse_json(VM& vm, StringView text)
|
|
{
|
|
// 1. If StringToCodePoints(text) is not a valid JSON text as specified in ECMA-404, throw a SyntaxError exception.
|
|
// NB: Per ECMA-404, the BOM is not valid JSON whitespace. simdjson silently skips it, so we must reject it explicitly.
|
|
if (text.length() >= 3
|
|
&& static_cast<u8>(text[0]) == 0xEF
|
|
&& static_cast<u8>(text[1]) == 0xBB
|
|
&& static_cast<u8>(text[2]) == 0xBF) {
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
}
|
|
|
|
simdjson::ondemand::parser parser;
|
|
simdjson::padded_string padded(text.characters_without_null_termination(), text.length());
|
|
|
|
simdjson::ondemand::document document;
|
|
if (parser.iterate(padded).get(document))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
// 2. Let scriptString be the string-concatenation of "(", text, and ");".
|
|
// 3. Let script be ParseText(scriptString, Script).
|
|
// 4. NOTE: The early error rules defined in 13.2.5.1 have special handling for the above invocation of ParseText.
|
|
// 5. Assert: script is a Parse Node.
|
|
// 6. Let result be ! Evaluation of script.
|
|
auto result = TRY(parse_simdjson_document(vm, document));
|
|
|
|
// 7. NOTE: The PropertyDefinitionEvaluation semantics defined in 13.2.5.5 have special handling for the above evaluation.
|
|
// 8. Assert: result is either a String, a Number, a Boolean, an Object that is defined by either an ArrayLiteral or an ObjectLiteral, or null.
|
|
|
|
// 9. Return result.
|
|
return result;
|
|
}
|
|
|
|
// 25.5.1.1 InternalizeJSONProperty ( holder, name, reviver ), https://tc39.es/ecma262/#sec-internalizejsonproperty
|
|
ThrowCompletionOr<Value> JSONObject::internalize_json_property(VM& vm, Object* holder, PropertyKey const& name, FunctionObject& reviver)
|
|
{
|
|
auto value = TRY(holder->get(name));
|
|
if (value.is_object()) {
|
|
auto is_array = TRY(value.is_array(vm));
|
|
|
|
auto& value_object = value.as_object();
|
|
auto process_property = [&](PropertyKey const& key) -> ThrowCompletionOr<void> {
|
|
auto element = TRY(internalize_json_property(vm, &value_object, key, reviver));
|
|
if (element.is_undefined())
|
|
TRY(value_object.internal_delete(key));
|
|
else
|
|
TRY(value_object.create_data_property(key, element));
|
|
return {};
|
|
};
|
|
|
|
if (is_array) {
|
|
auto length = TRY(length_of_array_like(vm, value_object));
|
|
for (size_t i = 0; i < length; ++i)
|
|
TRY(process_property(i));
|
|
} else {
|
|
auto property_list = TRY(value_object.enumerable_own_property_names(Object::PropertyKind::Key));
|
|
for (auto& property_key : property_list)
|
|
TRY(process_property(property_key.as_string().utf16_string()));
|
|
}
|
|
}
|
|
|
|
return TRY(call(vm, reviver, holder, PrimitiveString::create(vm, name.to_string()), value));
|
|
}
|
|
|
|
// 1.3 JSON.rawJSON ( text ), https://tc39.es/proposal-json-parse-with-source/#sec-json.rawjson
|
|
JS_DEFINE_NATIVE_FUNCTION(JSONObject::raw_json)
|
|
{
|
|
auto& realm = *vm.current_realm();
|
|
|
|
// 1. Let jsonString be ? ToString(text).
|
|
auto json_string = TRY(vm.argument(0).to_string(vm));
|
|
|
|
// 2. Throw a SyntaxError exception if jsonString is the empty String, or if either the first or last code unit of
|
|
// jsonString is any of 0x0009 (CHARACTER TABULATION), 0x000A (LINE FEED), 0x000D (CARRIAGE RETURN), or
|
|
// 0x0020 (SPACE).
|
|
auto bytes = json_string.bytes_as_string_view();
|
|
if (bytes.is_empty())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
static constexpr AK::Array invalid_code_points { 0x09, 0x0A, 0x0D, 0x20 };
|
|
auto first_char = bytes[0];
|
|
auto last_char = bytes[bytes.length() - 1];
|
|
|
|
if (invalid_code_points.contains_slow(first_char) || invalid_code_points.contains_slow(last_char))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
// 3. Parse StringToCodePoints(jsonString) as a JSON text as specified in ECMA-404. Throw a SyntaxError exception
|
|
// if it is not a valid JSON text as defined in that specification, or if its outermost value is an object or
|
|
// array as defined in that specification.
|
|
simdjson::ondemand::parser parser;
|
|
simdjson::padded_string padded(json_string.bytes_as_string_view().characters_without_null_termination(), json_string.bytes_as_string_view().length());
|
|
|
|
simdjson::ondemand::document doc;
|
|
if (parser.iterate(padded).get(doc))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
simdjson::ondemand::json_type type;
|
|
if (doc.type().get(type))
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
if (type == simdjson::ondemand::json_type::object || type == simdjson::ondemand::json_type::array)
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonRawJSONNonPrimitive);
|
|
|
|
// Consume the value to advance past it, then check for trailing content
|
|
switch (type) {
|
|
case simdjson::ondemand::json_type::null:
|
|
(void)doc.is_null();
|
|
break;
|
|
case simdjson::ondemand::json_type::boolean:
|
|
(void)doc.get_bool();
|
|
break;
|
|
case simdjson::ondemand::json_type::number:
|
|
(void)doc.get_double();
|
|
break;
|
|
case simdjson::ondemand::json_type::string:
|
|
(void)doc.get_string();
|
|
break;
|
|
default:
|
|
VERIFY_NOT_REACHED();
|
|
}
|
|
|
|
if (!doc.at_end())
|
|
return vm.throw_completion<SyntaxError>(ErrorType::JsonMalformed);
|
|
|
|
// 4. Let internalSlotsList be « [[IsRawJSON]] ».
|
|
// 5. Let obj be OrdinaryObjectCreate(null, internalSlotsList).
|
|
auto object = RawJSONObject::create(realm, nullptr);
|
|
|
|
// 6. Perform ! CreateDataPropertyOrThrow(obj, "rawJSON", jsonString).
|
|
MUST(object->create_data_property_or_throw(vm.names.rawJSON, PrimitiveString::create(vm, json_string)));
|
|
|
|
// 7. Perform ! SetIntegrityLevel(obj, frozen).
|
|
MUST(object->set_integrity_level(Object::IntegrityLevel::Frozen));
|
|
|
|
// 8. Return obj.
|
|
return object;
|
|
}
|
|
|
|
// 1.1 JSON.isRawJSON ( O ), https://tc39.es/proposal-json-parse-with-source/#sec-json.israwjson
|
|
JS_DEFINE_NATIVE_FUNCTION(JSONObject::is_raw_json)
|
|
{
|
|
// 1. If Type(O) is Object and O has an [[IsRawJSON]] internal slot, return true.
|
|
if (vm.argument(0).is_object() && is<RawJSONObject>(vm.argument(0).as_object()))
|
|
return Value(true);
|
|
|
|
// 2. Return false.
|
|
return Value(false);
|
|
}
|
|
|
|
}
|