GDScript: Reintroduce binary tokenization on export

This adds back a function available in 3.x: exporting the GDScript files in a binary form by converting the tokens recognized by the tokenizer into a data format. It is enabled by default on export but can be manually disabled. The format helps with loading times since, the tokens are easily reconstructed, and with hiding the source code, since recovering it would require a specialized tool. Code comments are not stored in this format. The `--test` command can also include a `--use-binary-tokens` flag which will run the GDScript tests with the binary format instead of the regular source code by converting them in-memory before the test runs.
2025-12-08 06:09:55 +00:00 · 2024-01-22 11:31:55 -03:00 · 2024-01-22 11:31:55 -03:00 · b4d0a09f15
commit b4d0a09f15
parent 41564aaf77
26 changed files with 1010 additions and 119 deletions
--- a/modules/gdscript/tests/test_gdscript.cpp
+++ b/modules/gdscript/tests/test_gdscript.cpp
@ -34,6 +34,7 @@
 #include "../gdscript_compiler.h"
 #include "../gdscript_parser.h"
 #include "../gdscript_tokenizer.h"
+#include "../gdscript_tokenizer_buffer.h"

 #include "core/config/project_settings.h"
 #include "core/io/file_access.h"
@ -50,7 +51,7 @@
 namespace GDScriptTests {

 static void test_tokenizer(const String &p_code, const Vector<String> &p_lines) {
-	GDScriptTokenizer tokenizer;
+	GDScriptTokenizerText tokenizer;
 	tokenizer.set_source_code(p_code);

 	int tab_size = 4;
@ -107,6 +108,53 @@ static void test_tokenizer(const String &p_code, const Vector<String> &p_lines)
 	print_line(current.get_name()); // Should be EOF
 }

+static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines);
+
+static void test_tokenizer_buffer(const String &p_code, const Vector<String> &p_lines) {
+	Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code);
+	test_tokenizer_buffer(binary, p_lines);
+}
+
+static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines) {
+	GDScriptTokenizerBuffer tokenizer;
+	tokenizer.set_code_buffer(p_buffer);
+
+	int tab_size = 4;
+#ifdef TOOLS_ENABLED
+	if (EditorSettings::get_singleton()) {
+		tab_size = EditorSettings::get_singleton()->get_setting("text_editor/behavior/indent/size");
+	}
+#endif // TOOLS_ENABLED
+	String tab = String(" ").repeat(tab_size);
+
+	GDScriptTokenizer::Token current = tokenizer.scan();
+	while (current.type != GDScriptTokenizer::Token::TK_EOF) {
+		StringBuilder token;
+		token += " --> "; // Padding for line number.
+
+		for (int l = current.start_line; l <= current.end_line && l <= p_lines.size(); l++) {
+			print_line(vformat("%04d %s", l, p_lines[l - 1]).replace("\t", tab));
+		}
+
+		token += current.get_name();
+
+		if (current.type == GDScriptTokenizer::Token::ERROR || current.type == GDScriptTokenizer::Token::LITERAL || current.type == GDScriptTokenizer::Token::IDENTIFIER || current.type == GDScriptTokenizer::Token::ANNOTATION) {
+			token += "(";
+			token += Variant::get_type_name(current.literal.get_type());
+			token += ") ";
+			token += current.literal;
+		}
+
+		print_line(token.as_string());
+
+		print_line("-------------------------------------------------------");
+
+		current = tokenizer.scan();
+	}
+
+	print_line(current.get_name()); // Should be EOF
+}
+
 static void test_parser(const String &p_code, const String &p_script_path, const Vector<String> &p_lines) {
 	GDScriptParser parser;
 	Error err = parser.parse(p_code, p_script_path, false);
@ -119,7 +167,7 @@ static void test_parser(const String &p_code, const String &p_script_path, const
 	}

 	GDScriptAnalyzer analyzer(&parser);
-	analyzer.analyze();
+	err = analyzer.analyze();

 	if (err != OK) {
 		const List<GDScriptParser::ParserError> &errors = parser.get_errors();
@ -212,7 +260,7 @@ void test(TestType p_type) {
 	}

 	String test = cmdlargs.back()->get();
-	if (!test.ends_with(".gd")) {
+	if (!test.ends_with(".gd") && !test.ends_with(".gdc")) {
 		print_line("This test expects a path to a GDScript file as its last parameter. Got: " + test);
 		return;
 	}
@ -255,6 +303,13 @@ void test(TestType p_type) {
 		case TEST_TOKENIZER:
 			test_tokenizer(code, lines);
 			break;
+		case TEST_TOKENIZER_BUFFER:
+			if (test.ends_with(".gdc")) {
+				test_tokenizer_buffer(buf, lines);
+			} else {
+				test_tokenizer_buffer(code, lines);
+			}
+			break;
 		case TEST_PARSER:
 			test_parser(code, test, lines);
 			break;