Skip to content

Commit

Permalink
GDScript: Enable compression on export
Browse files Browse the repository at this point in the history
Besides the regular option to export GDScript as binary tokens, this
also includes a compression option on top of it. The binary format
needs to encode some information which generally makes it bigger than
the source text. This option reduces that difference by using Zstandard
compression on the buffer.
  • Loading branch information
vnen committed Feb 8, 2024
1 parent b4d0a09 commit 72e5f8c
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 44 deletions.
2 changes: 1 addition & 1 deletion editor/export/editor_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ void EditorExport::load_config() {
preset->set_include_filter(config->get_value(section, "include_filter"));
preset->set_exclude_filter(config->get_value(section, "exclude_filter"));
preset->set_export_path(config->get_value(section, "export_path", ""));
preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS));
preset->set_script_export_mode(config->get_value(section, "script_export_mode", EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED));

if (config->has_section_key(section, "encrypt_pck")) {
preset->set_enc_pck(config->get_value(section, "encrypt_pck"));
Expand Down
3 changes: 2 additions & 1 deletion editor/export/editor_export_preset.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ class EditorExportPreset : public RefCounted {
enum ScriptExportMode {
MODE_SCRIPT_TEXT,
MODE_SCRIPT_BINARY_TOKENS,
MODE_SCRIPT_BINARY_TOKENS_COMPRESSED,
};

private:
Expand Down Expand Up @@ -89,7 +90,7 @@ class EditorExportPreset : public RefCounted {
bool enc_directory = false;

String script_key;
int script_mode = MODE_SCRIPT_BINARY_TOKENS;
int script_mode = MODE_SCRIPT_BINARY_TOKENS_COMPRESSED;

protected:
bool _set(const StringName &p_name, const Variant &p_value);
Expand Down
2 changes: 2 additions & 0 deletions editor/export/project_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1398,7 +1398,9 @@ ProjectExportDialog::ProjectExportDialog() {
script_vb->add_margin_child(TTR("GDScript Export Mode:"), script_mode);
script_mode->add_item(TTR("Text (easier debugging)"), (int)EditorExportPreset::MODE_SCRIPT_TEXT);
script_mode->add_item(TTR("Binary tokens (faster loading)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS);
script_mode->add_item(TTR("Compressed binary tokens (smaller files)"), (int)EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED);
script_mode->connect("item_selected", callable_mp(this, &ProjectExportDialog::_script_export_mode_changed));

sections->add_child(script_vb);

sections->connect("tab_changed", callable_mp(this, &ProjectExportDialog::_tab_changed));
Expand Down
2 changes: 1 addition & 1 deletion modules/gdscript/gdscript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,7 @@ const Vector<uint8_t> &GDScript::get_binary_tokens_source() const {

Vector<uint8_t> GDScript::get_as_binary_tokens() const {
GDScriptTokenizerBuffer tokenizer;
return tokenizer.parse_code_string(source);
return tokenizer.parse_code_string(source, GDScriptTokenizerBuffer::COMPRESS_NONE);
}

const HashMap<StringName, GDScriptFunction *> &GDScript::debug_get_member_functions() const {
Expand Down
3 changes: 3 additions & 0 deletions modules/gdscript/gdscript_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,7 @@ Error GDScriptParser::parse(const String &p_source_code, const String &p_script_
pop_multiline();

memdelete(text_tokenizer);
tokenizer = nullptr;

#ifdef DEBUG_ENABLED
if (multiline_stack.size() > 0) {
Expand All @@ -384,6 +385,7 @@ Error GDScriptParser::parse_binary(const Vector<uint8_t> &p_binary, const String
Error err = buffer_tokenizer->set_code_buffer(p_binary);

if (err) {
memdelete(buffer_tokenizer);
return err;
}

Expand All @@ -404,6 +406,7 @@ Error GDScriptParser::parse_binary(const Vector<uint8_t> &p_binary, const String
pop_multiline();

memdelete(buffer_tokenizer);
tokenizer = nullptr;

if (errors.is_empty()) {
return OK;
Expand Down
2 changes: 1 addition & 1 deletion modules/gdscript/gdscript_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ void GDScriptTokenizerText::push_expression_indented_block() {
}

void GDScriptTokenizerText::pop_expression_indented_block() {
ERR_FAIL_COND(indent_stack_stack.size() == 0);
ERR_FAIL_COND(indent_stack_stack.is_empty());
indent_stack = indent_stack_stack.back()->get();
indent_stack_stack.pop_back();
}
Expand Down
106 changes: 71 additions & 35 deletions modules/gdscript/gdscript_tokenizer_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "gdscript_tokenizer_buffer.h"

#include "core/io/compression.h"
#include "core/io/marshalls.h"

#define TOKENIZER_VERSION 100
Expand Down Expand Up @@ -139,19 +140,31 @@ GDScriptTokenizer::Token GDScriptTokenizerBuffer::_binary_to_token(const uint8_t

Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer) {
const uint8_t *buf = p_buffer.ptr();
int total_len = p_buffer.size();
ERR_FAIL_COND_V(p_buffer.size() < 24 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);
ERR_FAIL_COND_V(p_buffer.size() < 12 || p_buffer[0] != 'G' || p_buffer[1] != 'D' || p_buffer[2] != 'S' || p_buffer[3] != 'C', ERR_INVALID_DATA);

int version = decode_uint32(&buf[4]);
ERR_FAIL_COND_V_MSG(version > TOKENIZER_VERSION, ERR_INVALID_DATA, "Binary GDScript is too recent! Please use a newer engine version.");

uint32_t identifier_count = decode_uint32(&buf[8]);
uint32_t constant_count = decode_uint32(&buf[12]);
uint32_t token_line_count = decode_uint32(&buf[16]);
uint32_t token_count = decode_uint32(&buf[20]);
int decompressed_size = decode_uint32(&buf[8]);

const uint8_t *b = &buf[24];
total_len -= 24;
Vector<uint8_t> contents;
if (decompressed_size == 0) {
contents = p_buffer.slice(12);
} else {
contents.resize(decompressed_size);
int result = Compression::decompress(contents.ptrw(), contents.size(), &buf[12], p_buffer.size() - 12, Compression::MODE_ZSTD);
ERR_FAIL_COND_V_MSG(result != decompressed_size, ERR_INVALID_DATA, "Error decompressing GDScript tokenizer buffer.");
}

int total_len = contents.size();
buf = contents.ptr();
uint32_t identifier_count = decode_uint32(&buf[0]);
uint32_t constant_count = decode_uint32(&buf[4]);
uint32_t token_line_count = decode_uint32(&buf[8]);
uint32_t token_count = decode_uint32(&buf[16]);

const uint8_t *b = &buf[20];
total_len -= 20;

identifiers.resize(identifier_count);
for (uint32_t i = 0; i < identifier_count; i++) {
Expand Down Expand Up @@ -226,9 +239,7 @@ Error GDScriptTokenizerBuffer::set_code_buffer(const Vector<uint8_t> &p_buffer)
return OK;
}

Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code) {
Vector<uint8_t> buf;

Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code, CompressMode p_compress_mode) {
HashMap<StringName, uint32_t> identifier_map;
HashMap<Variant, uint32_t, VariantHasher, VariantComparator> constant_map;
Vector<uint8_t> token_buffer;
Expand Down Expand Up @@ -280,36 +291,31 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
}
}

// Save header.
buf.resize(24);
buf.write[0] = 'G';
buf.write[1] = 'D';
buf.write[2] = 'S';
buf.write[3] = 'C';
encode_uint32(TOKENIZER_VERSION, &buf.write[4]);
encode_uint32(identifier_map.size(), &buf.write[8]);
encode_uint32(constant_map.size(), &buf.write[12]);
encode_uint32(token_lines.size(), &buf.write[16]);
encode_uint32(token_counter, &buf.write[20]);
Vector<uint8_t> contents;
contents.resize(20);
encode_uint32(identifier_map.size(), &contents.write[0]);
encode_uint32(constant_map.size(), &contents.write[4]);
encode_uint32(token_lines.size(), &contents.write[8]);
encode_uint32(token_counter, &contents.write[16]);

int buf_pos = 24;
int buf_pos = 20;

// Save identifiers.
for (const StringName &id : rev_identifier_map) {
String s = id.operator String();
int len = s.length();

buf.resize(buf_pos + (len + 1) * 4);
contents.resize(buf_pos + (len + 1) * 4);

encode_uint32(len, &buf.write[buf_pos]);
encode_uint32(len, &contents.write[buf_pos]);
buf_pos += 4;

for (int i = 0; i < len; i++) {
uint8_t tmp[4];
encode_uint32(s[i], tmp);

for (int b = 0; b < 4; b++) {
buf.write[buf_pos + b] = tmp[b] ^ 0xb6;
contents.write[buf_pos + b] = tmp[b] ^ 0xb6;
}

buf_pos += 4;
Expand All @@ -322,28 +328,58 @@ Vector<uint8_t> GDScriptTokenizerBuffer::parse_code_string(const String &p_code)
// Objects cannot be constant, never encode objects.
Error err = encode_variant(v, nullptr, len, false);
ERR_FAIL_COND_V_MSG(err != OK, Vector<uint8_t>(), "Error when trying to encode Variant.");
buf.resize(buf_pos + len);
encode_variant(v, &buf.write[buf_pos], len, false);
contents.resize(buf_pos + len);
encode_variant(v, &contents.write[buf_pos], len, false);
buf_pos += len;
}

// Save lines and columns.
buf.resize(buf_pos + token_lines.size() * 16);
contents.resize(buf_pos + token_lines.size() * 16);
for (const KeyValue<uint32_t, uint32_t> &e : token_lines) {
encode_uint32(e.key, &buf.write[buf_pos]);
encode_uint32(e.key, &contents.write[buf_pos]);
buf_pos += 4;
encode_uint32(e.value, &buf.write[buf_pos]);
encode_uint32(e.value, &contents.write[buf_pos]);
buf_pos += 4;
}
for (const KeyValue<uint32_t, uint32_t> &e : token_columns) {
encode_uint32(e.key, &buf.write[buf_pos]);
encode_uint32(e.key, &contents.write[buf_pos]);
buf_pos += 4;
encode_uint32(e.value, &buf.write[buf_pos]);
encode_uint32(e.value, &contents.write[buf_pos]);
buf_pos += 4;
}

// Store tokens.
buf.append_array(token_buffer);
contents.append_array(token_buffer);

Vector<uint8_t> buf;

// Save header.
buf.resize(12);
buf.write[0] = 'G';
buf.write[1] = 'D';
buf.write[2] = 'S';
buf.write[3] = 'C';
encode_uint32(TOKENIZER_VERSION, &buf.write[4]);

switch (p_compress_mode) {
case COMPRESS_NONE:
encode_uint32(0u, &buf.write[8]);
buf.append_array(contents);
break;

case COMPRESS_ZSTD: {
encode_uint32(contents.size(), &buf.write[8]);
Vector<uint8_t> compressed;
int max_size = Compression::get_max_compressed_buffer_size(contents.size(), Compression::MODE_ZSTD);
compressed.resize(max_size);

int compressed_size = Compression::compress(compressed.ptrw(), contents.ptr(), contents.size(), Compression::MODE_ZSTD);
ERR_FAIL_COND_V_MSG(compressed_size < 0, Vector<uint8_t>(), "Error compressing GDScript tokenizer buffer.");
compressed.resize(compressed_size);

buf.append_array(compressed);
} break;
}

return buf;
}
Expand Down Expand Up @@ -372,7 +408,7 @@ void GDScriptTokenizerBuffer::push_expression_indented_block() {
}

void GDScriptTokenizerBuffer::pop_expression_indented_block() {
ERR_FAIL_COND(indent_stack_stack.size() == 0);
ERR_FAIL_COND(indent_stack_stack.is_empty());
indent_stack = indent_stack_stack.back()->get();
indent_stack_stack.pop_back();
}
Expand Down
8 changes: 7 additions & 1 deletion modules/gdscript/gdscript_tokenizer_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@
#include "gdscript_tokenizer.h"

class GDScriptTokenizerBuffer : public GDScriptTokenizer {
public:
enum CompressMode {
COMPRESS_NONE,
COMPRESS_ZSTD,
};

enum {
TOKEN_BYTE_MASK = 0x80,
TOKEN_BITS = 8,
Expand Down Expand Up @@ -64,7 +70,7 @@ class GDScriptTokenizerBuffer : public GDScriptTokenizer {

public:
Error set_code_buffer(const Vector<uint8_t> &p_buffer);
static Vector<uint8_t> parse_code_string(const String &p_code);
static Vector<uint8_t> parse_code_string(const String &p_code, CompressMode p_compress_mode);

virtual int get_cursor_line() const override;
virtual int get_cursor_column() const override;
Expand Down
5 changes: 3 additions & 2 deletions modules/gdscript/register_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class EditorExportGDScript : public EditorExportPlugin {

public:
virtual void _export_file(const String &p_path, const String &p_type, const HashSet<String> &p_features) override {
int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS;
int script_mode = EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED;

const Ref<EditorExportPreset> &preset = get_export_preset();

Expand All @@ -103,7 +103,8 @@ class EditorExportGDScript : public EditorExportPlugin {

String source;
source.parse_utf8(reinterpret_cast<const char *>(file.ptr()), file.size());
file = GDScriptTokenizerBuffer::parse_code_string(source);
GDScriptTokenizerBuffer::CompressMode compress_mode = script_mode == EditorExportPreset::MODE_SCRIPT_BINARY_TOKENS_COMPRESSED ? GDScriptTokenizerBuffer::COMPRESS_ZSTD : GDScriptTokenizerBuffer::COMPRESS_NONE;
file = GDScriptTokenizerBuffer::parse_code_string(source, compress_mode);
if (file.is_empty()) {
return;
}
Expand Down
2 changes: 1 addition & 1 deletion modules/gdscript/tests/gdscript_test_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ GDScriptTest::TestResult GDScriptTest::execute_test_code(bool p_is_generating) {
} else {
String code = FileAccess::get_file_as_string(source_file, &err);
if (!err) {
Vector<uint8_t> buffer = GDScriptTokenizerBuffer::parse_code_string(code);
Vector<uint8_t> buffer = GDScriptTokenizerBuffer::parse_code_string(code, GDScriptTokenizerBuffer::COMPRESS_ZSTD);
script->set_binary_tokens_source(buffer);
}
}
Expand Down
2 changes: 1 addition & 1 deletion modules/gdscript/tests/test_gdscript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static void test_tokenizer(const String &p_code, const Vector<String> &p_lines)
static void test_tokenizer_buffer(const Vector<uint8_t> &p_buffer, const Vector<String> &p_lines);

static void test_tokenizer_buffer(const String &p_code, const Vector<String> &p_lines) {
Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code);
Vector<uint8_t> binary = GDScriptTokenizerBuffer::parse_code_string(p_code, GDScriptTokenizerBuffer::COMPRESS_NONE);
test_tokenizer_buffer(binary, p_lines);
}

Expand Down

0 comments on commit 72e5f8c

Please sign in to comment.