GDScript: Reintroduce binary tokenization on export

This adds back a function available in 3.x: exporting the GDScript
files in a binary form by converting the tokens recognized by the
tokenizer into a data format.

It is enabled by default on export but can be manually disabled. The
format helps with loading times since, the tokens are easily
reconstructed, and with hiding the source code, since recovering it
would require a specialized tool. Code comments are not stored in this
format.

The `--test` command can also include a `--use-binary-tokens` flag
which will run the GDScript tests with the binary format instead of the
regular source code by converting them in-memory before the test runs.
This commit is contained in:
George Marques 2024-01-22 11:31:55 -03:00
parent 41564aaf77
commit b4d0a09f15
No known key found for this signature in database
GPG key ID: 046BD46A3201E43D
26 changed files with 1010 additions and 119 deletions

View file

@ -256,7 +256,7 @@ String GDScriptTokenizer::get_token_name(Token::Type p_token_type) {
return token_names[p_token_type];
}
void GDScriptTokenizer::set_source_code(const String &p_source_code) {
void GDScriptTokenizerText::set_source_code(const String &p_source_code) {
source = p_source_code;
if (source.is_empty()) {
_source = U"";
@ -270,34 +270,34 @@ void GDScriptTokenizer::set_source_code(const String &p_source_code) {
position = 0;
}
void GDScriptTokenizer::set_cursor_position(int p_line, int p_column) {
void GDScriptTokenizerText::set_cursor_position(int p_line, int p_column) {
cursor_line = p_line;
cursor_column = p_column;
}
void GDScriptTokenizer::set_multiline_mode(bool p_state) {
void GDScriptTokenizerText::set_multiline_mode(bool p_state) {
multiline_mode = p_state;
}
void GDScriptTokenizer::push_expression_indented_block() {
void GDScriptTokenizerText::push_expression_indented_block() {
indent_stack_stack.push_back(indent_stack);
}
void GDScriptTokenizer::pop_expression_indented_block() {
void GDScriptTokenizerText::pop_expression_indented_block() {
ERR_FAIL_COND(indent_stack_stack.size() == 0);
indent_stack = indent_stack_stack.back()->get();
indent_stack_stack.pop_back();
}
int GDScriptTokenizer::get_cursor_line() const {
int GDScriptTokenizerText::get_cursor_line() const {
return cursor_line;
}
int GDScriptTokenizer::get_cursor_column() const {
int GDScriptTokenizerText::get_cursor_column() const {
return cursor_column;
}
bool GDScriptTokenizer::is_past_cursor() const {
bool GDScriptTokenizerText::is_past_cursor() const {
if (line < cursor_line) {
return false;
}
@ -310,7 +310,7 @@ bool GDScriptTokenizer::is_past_cursor() const {
return true;
}
char32_t GDScriptTokenizer::_advance() {
char32_t GDScriptTokenizerText::_advance() {
if (unlikely(_is_at_end())) {
return '\0';
}
@ -329,11 +329,11 @@ char32_t GDScriptTokenizer::_advance() {
return _peek(-1);
}
void GDScriptTokenizer::push_paren(char32_t p_char) {
void GDScriptTokenizerText::push_paren(char32_t p_char) {
paren_stack.push_back(p_char);
}
bool GDScriptTokenizer::pop_paren(char32_t p_expected) {
bool GDScriptTokenizerText::pop_paren(char32_t p_expected) {
if (paren_stack.is_empty()) {
return false;
}
@ -343,13 +343,13 @@ bool GDScriptTokenizer::pop_paren(char32_t p_expected) {
return actual == p_expected;
}
GDScriptTokenizer::Token GDScriptTokenizer::pop_error() {
GDScriptTokenizer::Token GDScriptTokenizerText::pop_error() {
Token error = error_stack.back()->get();
error_stack.pop_back();
return error;
}
GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) {
GDScriptTokenizer::Token GDScriptTokenizerText::make_token(Token::Type p_type) {
Token token(p_type);
token.start_line = start_line;
token.end_line = line;
@ -408,35 +408,35 @@ GDScriptTokenizer::Token GDScriptTokenizer::make_token(Token::Type p_type) {
return token;
}
GDScriptTokenizer::Token GDScriptTokenizer::make_literal(const Variant &p_literal) {
GDScriptTokenizer::Token GDScriptTokenizerText::make_literal(const Variant &p_literal) {
Token token = make_token(Token::LITERAL);
token.literal = p_literal;
return token;
}
GDScriptTokenizer::Token GDScriptTokenizer::make_identifier(const StringName &p_identifier) {
GDScriptTokenizer::Token GDScriptTokenizerText::make_identifier(const StringName &p_identifier) {
Token identifier = make_token(Token::IDENTIFIER);
identifier.literal = p_identifier;
return identifier;
}
GDScriptTokenizer::Token GDScriptTokenizer::make_error(const String &p_message) {
GDScriptTokenizer::Token GDScriptTokenizerText::make_error(const String &p_message) {
Token error = make_token(Token::ERROR);
error.literal = p_message;
return error;
}
void GDScriptTokenizer::push_error(const String &p_message) {
void GDScriptTokenizerText::push_error(const String &p_message) {
Token error = make_error(p_message);
error_stack.push_back(error);
}
void GDScriptTokenizer::push_error(const Token &p_error) {
void GDScriptTokenizerText::push_error(const Token &p_error) {
error_stack.push_back(p_error);
}
GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(char32_t p_paren) {
GDScriptTokenizer::Token GDScriptTokenizerText::make_paren_error(char32_t p_paren) {
if (paren_stack.is_empty()) {
return make_error(vformat("Closing \"%c\" doesn't have an opening counterpart.", p_paren));
}
@ -445,7 +445,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::make_paren_error(char32_t p_paren) {
return error;
}
GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, Token::Type p_double_type) {
GDScriptTokenizer::Token GDScriptTokenizerText::check_vcs_marker(char32_t p_test, Token::Type p_double_type) {
const char32_t *next = _current + 1;
int chars = 2; // Two already matched.
@ -469,7 +469,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::check_vcs_marker(char32_t p_test, To
}
}
GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
GDScriptTokenizer::Token GDScriptTokenizerText::annotation() {
if (is_unicode_identifier_start(_peek())) {
_advance(); // Consume start character.
} else {
@ -550,7 +550,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::annotation() {
#define MAX_KEYWORD_LENGTH 10
#ifdef DEBUG_ENABLED
void GDScriptTokenizer::make_keyword_list() {
void GDScriptTokenizerText::make_keyword_list() {
#define KEYWORD_LINE(keyword, token_type) keyword,
#define KEYWORD_GROUP_IGNORE(group)
keyword_list = {
@ -561,7 +561,7 @@ void GDScriptTokenizer::make_keyword_list() {
}
#endif // DEBUG_ENABLED
GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
GDScriptTokenizer::Token GDScriptTokenizerText::potential_identifier() {
bool only_ascii = _peek(-1) < 128;
// Consume all identifier characters.
@ -611,7 +611,9 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
static_assert(keyword_length <= MAX_KEYWORD_LENGTH, "There's a keyword longer than the defined maximum length"); \
static_assert(keyword_length >= MIN_KEYWORD_LENGTH, "There's a keyword shorter than the defined minimum length"); \
if (keyword_length == len && name == keyword) { \
return make_token(token_type); \
Token kw = make_token(token_type); \
kw.literal = name; \
return kw; \
} \
}
@ -646,7 +648,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() {
#undef MIN_KEYWORD_LENGTH
#undef KEYWORDS
void GDScriptTokenizer::newline(bool p_make_token) {
void GDScriptTokenizerText::newline(bool p_make_token) {
// Don't overwrite previous newline, nor create if we want a line continuation.
if (p_make_token && !pending_newline && !line_continuation) {
Token newline(Token::NEWLINE);
@ -667,7 +669,7 @@ void GDScriptTokenizer::newline(bool p_make_token) {
leftmost_column = 1;
}
GDScriptTokenizer::Token GDScriptTokenizer::number() {
GDScriptTokenizer::Token GDScriptTokenizerText::number() {
int base = 10;
bool has_decimal = false;
bool has_exponent = false;
@ -868,7 +870,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::number() {
}
}
GDScriptTokenizer::Token GDScriptTokenizer::string() {
GDScriptTokenizer::Token GDScriptTokenizerText::string() {
enum StringType {
STRING_REGULAR,
STRING_NAME,
@ -1154,7 +1156,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::string() {
return make_literal(string);
}
void GDScriptTokenizer::check_indent() {
void GDScriptTokenizerText::check_indent() {
ERR_FAIL_COND_MSG(column != 1, "Checking tokenizer indentation in the middle of a line.");
if (_is_at_end()) {
@ -1323,13 +1325,13 @@ void GDScriptTokenizer::check_indent() {
}
}
String GDScriptTokenizer::_get_indent_char_name(char32_t ch) {
String GDScriptTokenizerText::_get_indent_char_name(char32_t ch) {
ERR_FAIL_COND_V(ch != ' ' && ch != '\t', String(&ch, 1).c_escape());
return ch == ' ' ? "space" : "tab";
}
void GDScriptTokenizer::_skip_whitespace() {
void GDScriptTokenizerText::_skip_whitespace() {
if (pending_indents != 0) {
// Still have some indent/dedent tokens to give.
return;
@ -1391,7 +1393,7 @@ void GDScriptTokenizer::_skip_whitespace() {
}
}
GDScriptTokenizer::Token GDScriptTokenizer::scan() {
GDScriptTokenizer::Token GDScriptTokenizerText::scan() {
if (has_error()) {
return pop_error();
}
@ -1453,6 +1455,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
if (_peek() != '\n') {
return make_error("Expected new line after \"\\\".");
}
continuation_lines.push_back(line);
_advance();
newline(false);
line_continuation = true;
@ -1673,7 +1676,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::scan() {
}
}
GDScriptTokenizer::GDScriptTokenizer() {
GDScriptTokenizerText::GDScriptTokenizerText() {
#ifdef TOOLS_ENABLED
if (EditorSettings::get_singleton()) {
tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");