GDScript: Add raw string literals (r-strings)

This commit is contained in:
Danil Alexeev 2023-08-28 13:00:33 +03:00
parent 221884e6bc
commit 2964c7d51c
No known key found for this signature in database
GPG key ID: 124453E157DA8DC7
13 changed files with 250 additions and 132 deletions

View file

@ -857,10 +857,14 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
STRING_NODEPATH,
};
bool is_raw = false;
bool is_multiline = false;
StringType type = STRING_REGULAR;
if (_peek(-1) == '&') {
if (_peek(-1) == 'r') {
is_raw = true;
_advance();
} else if (_peek(-1) == '&') {
type = STRING_NAME;
_advance();
} else if (_peek(-1) == '^') {
@ -890,7 +894,12 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
char32_t ch = _peek();
if (ch == 0x200E || ch == 0x200F || (ch >= 0x202A && ch <= 0x202E) || (ch >= 0x2066 && ch <= 0x2069)) {
Token error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
Token error;
if (is_raw) {
error = make_error("Invisible text direction control character present in the string, use regular string literal instead of r-string.");
} else {
error = make_error("Invisible text direction control character present in the string, escape it (\"\\u" + String::num_int64(ch, 16) + "\") to avoid confusion.");
}
error.start_column = column;
error.leftmost_column = error.start_column;
error.end_column = column + 1;
@ -905,144 +914,164 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
return make_error("Unterminated string.");
}
// Grab escape character.
char32_t code = _peek();
_advance();
if (_is_at_end()) {
return make_error("Unterminated string.");
}
if (is_raw) {
if (_peek() == quote_char) {
_advance();
if (_is_at_end()) {
return make_error("Unterminated string.");
}
result += '\\';
result += quote_char;
} else if (_peek() == '\\') { // For `\\\"`.
_advance();
if (_is_at_end()) {
return make_error("Unterminated string.");
}
result += '\\';
result += '\\';
} else {
result += '\\';
}
} else {
// Grab escape character.
char32_t code = _peek();
_advance();
if (_is_at_end()) {
return make_error("Unterminated string.");
}
char32_t escaped = 0;
bool valid_escape = true;
char32_t escaped = 0;
bool valid_escape = true;
switch (code) {
case 'a':
escaped = '\a';
break;
case 'b':
escaped = '\b';
break;
case 'f':
escaped = '\f';
break;
case 'n':
escaped = '\n';
break;
case 'r':
escaped = '\r';
break;
case 't':
escaped = '\t';
break;
case 'v':
escaped = '\v';
break;
case '\'':
escaped = '\'';
break;
case '\"':
escaped = '\"';
break;
case '\\':
escaped = '\\';
break;
case 'U':
case 'u': {
// Hexadecimal sequence.
int hex_len = (code == 'U') ? 6 : 4;
for (int j = 0; j < hex_len; j++) {
if (_is_at_end()) {
return make_error("Unterminated string.");
switch (code) {
case 'a':
escaped = '\a';
break;
case 'b':
escaped = '\b';
break;
case 'f':
escaped = '\f';
break;
case 'n':
escaped = '\n';
break;
case 'r':
escaped = '\r';
break;
case 't':
escaped = '\t';
break;
case 'v':
escaped = '\v';
break;
case '\'':
escaped = '\'';
break;
case '\"':
escaped = '\"';
break;
case '\\':
escaped = '\\';
break;
case 'U':
case 'u': {
// Hexadecimal sequence.
int hex_len = (code == 'U') ? 6 : 4;
for (int j = 0; j < hex_len; j++) {
if (_is_at_end()) {
return make_error("Unterminated string.");
}
char32_t digit = _peek();
char32_t value = 0;
if (is_digit(digit)) {
value = digit - '0';
} else if (digit >= 'a' && digit <= 'f') {
value = digit - 'a';
value += 10;
} else if (digit >= 'A' && digit <= 'F') {
value = digit - 'A';
value += 10;
} else {
// Make error, but keep parsing the string.
Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
error.start_column = column;
error.leftmost_column = error.start_column;
error.end_column = column + 1;
error.rightmost_column = error.end_column;
push_error(error);
valid_escape = false;
break;
}
escaped <<= 4;
escaped |= value;
_advance();
}
char32_t digit = _peek();
char32_t value = 0;
if (is_digit(digit)) {
value = digit - '0';
} else if (digit >= 'a' && digit <= 'f') {
value = digit - 'a';
value += 10;
} else if (digit >= 'A' && digit <= 'F') {
value = digit - 'A';
value += 10;
} else {
// Make error, but keep parsing the string.
Token error = make_error("Invalid hexadecimal digit in unicode escape sequence.");
error.start_column = column;
error.leftmost_column = error.start_column;
error.end_column = column + 1;
error.rightmost_column = error.end_column;
push_error(error);
valid_escape = false;
} break;
case '\r':
if (_peek() != '\n') {
// Carriage return without newline in string. (???)
// Just add it to the string and keep going.
result += ch;
_advance();
break;
}
escaped <<= 4;
escaped |= value;
_advance();
}
} break;
case '\r':
if (_peek() != '\n') {
// Carriage return without newline in string. (???)
// Just add it to the string and keep going.
result += ch;
_advance();
[[fallthrough]];
case '\n':
// Escaping newline.
newline(false);
valid_escape = false; // Don't add to the string.
break;
}
[[fallthrough]];
case '\n':
// Escaping newline.
newline(false);
valid_escape = false; // Don't add to the string.
break;
default:
Token error = make_error("Invalid escape in string.");
error.start_column = column - 2;
error.leftmost_column = error.start_column;
push_error(error);
valid_escape = false;
break;
}
// Parse UTF-16 pair.
if (valid_escape) {
if ((escaped & 0xfffffc00) == 0xd800) {
if (prev == 0) {
prev = escaped;
prev_pos = column - 2;
continue;
} else {
Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
default:
Token error = make_error("Invalid escape in string.");
error.start_column = column - 2;
error.leftmost_column = error.start_column;
push_error(error);
valid_escape = false;
prev = 0;
break;
}
// Parse UTF-16 pair.
if (valid_escape) {
if ((escaped & 0xfffffc00) == 0xd800) {
if (prev == 0) {
prev = escaped;
prev_pos = column - 2;
continue;
} else {
Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
error.start_column = column - 2;
error.leftmost_column = error.start_column;
push_error(error);
valid_escape = false;
prev = 0;
}
} else if ((escaped & 0xfffffc00) == 0xdc00) {
if (prev == 0) {
Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate.");
error.start_column = column - 2;
error.leftmost_column = error.start_column;
push_error(error);
valid_escape = false;
} else {
escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
prev = 0;
}
}
} else if ((escaped & 0xfffffc00) == 0xdc00) {
if (prev == 0) {
Token error = make_error("Invalid UTF-16 sequence in string, unpaired trail surrogate");
error.start_column = column - 2;
if (prev != 0) {
Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate.");
error.start_column = prev_pos;
error.leftmost_column = error.start_column;
push_error(error);
valid_escape = false;
} else {
escaped = (prev << 10UL) + escaped - ((0xd800 << 10UL) + 0xdc00 - 0x10000);
prev = 0;
}
}
if (prev != 0) {
Token error = make_error("Invalid UTF-16 sequence in string, unpaired lead surrogate");
error.start_column = prev_pos;
error.leftmost_column = error.start_column;
push_error(error);
prev = 0;
}
}
if (valid_escape) {
result += escaped;
if (valid_escape) {
result += escaped;
}
}
} else if (ch == quote_char) {
if (prev != 0) {
@ -1416,6 +1445,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
if (is_digit(c)) {
return number();
} else if (c == 'r' && (_peek() == '"' || _peek() == '\'')) {
// Raw string literals.
return string();
} else if (is_unicode_identifier_start(c)) {
return potential_identifier();
}