Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement OP_WRITE_RAW on the instructions buffer #86

Merged
merged 8 commits into from
Nov 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 20 additions & 14 deletions ext/liquid_c/block.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "liquid.h"
#include "block.h"
#include "intutil.h"
#include "tokenizer.h"
#include "stringutil.h"
#include "vm.h"
Expand Down Expand Up @@ -32,7 +33,6 @@ typedef struct parse_context {
static void block_body_mark(void *ptr)
{
block_body_t *body = ptr;
rb_gc_mark(body->source);
vm_assembler_gc_mark(&body->code);
}

Expand Down Expand Up @@ -66,7 +66,6 @@ static VALUE block_body_allocate(VALUE klass)
vm_assembler_init(&body->code);
vm_assembler_add_leave(&body->code);
body->obj = obj;
body->source = Qnil;
body->render_score = 0;
body->blank = true;
body->nodelist = Qundef;
Expand Down Expand Up @@ -230,11 +229,6 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte
BlockBody_Get_Struct(self, body);

ensure_not_parsing(body);
if (body->source == Qnil) {
body->source = parse_context.tokenizer->source;
} else if (body->source != parse_context.tokenizer->source) {
rb_raise(rb_eArgError, "Liquid::C::BlockBody#parse must be passed the same tokenizer when called multiple times");
}
vm_assembler_remove_leave(&body->code); // to extend block

tag_markup_t unknown_tag = internal_block_body_parse(body, &parse_context);
Expand Down Expand Up @@ -272,17 +266,21 @@ static VALUE block_body_remove_blank_strings(VALUE self)
ensure_not_parsing(body);

size_t *const_ptr = (size_t *)body->code.constants.data;
const uint8_t *ip = body->code.instructions.data;
uint8_t *ip = (uint8_t *)body->code.instructions.data;

while (*ip != OP_LEAVE) {
if (*ip == OP_WRITE_RAW) {
size_t *size_ptr = &const_ptr[1];
if (*size_ptr) {
*size_ptr = 0; // effectively a no-op
if (ip[1]) { // if (size != 0)
ip[0] = OP_JUMP_FWD; // effectively a no-op
body->render_score--;
}
} else if (*ip == OP_WRITE_RAW_W) {
if (ip[1] || ip[2] || ip[3]) { // if (size != 0)
ip[0] = OP_JUMP_FWD_W; // effectively a no-op
body->render_score--;
}
}
liquid_vm_next_instruction(&ip, (const size_t **)&const_ptr);
liquid_vm_next_instruction((const uint8_t **)&ip, (const size_t **)&const_ptr);
}

return Qnil;
Expand Down Expand Up @@ -316,10 +314,18 @@ static VALUE block_body_nodelist(VALUE self)
switch (*ip) {
case OP_LEAVE:
goto loop_break;
case OP_WRITE_RAW_W:
case OP_WRITE_RAW:
{
const char *text = (const char *)const_ptr[0];
size_t size = const_ptr[1];
const char *text;
size_t size;
if (*ip == OP_WRITE_RAW_W) {
size = bytes_to_uint24(&ip[1]);
text = (const char *)&ip[4];
} else {
size = ip[1];
text = (const char *)&ip[2];
}
VALUE string = rb_enc_str_new(text, size, utf8_encoding);
rb_ary_push(nodelist, string);
break;
Expand Down
1 change: 0 additions & 1 deletion ext/liquid_c/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
typedef struct block_body {
VALUE obj;
vm_assembler_t code;
VALUE source; // hold a reference to the ruby object that OP_WRITE_RAW points to
bool blank;
int render_score;
VALUE nodelist;
Expand Down
22 changes: 22 additions & 0 deletions ext/liquid_c/intutil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef LIQUID_INTUTIL_H
#define LIQUID_INTUTIL_H

#include <stdint.h>

static inline unsigned int bytes_to_uint24(const uint8_t *bytes)
{
return (bytes[0] << 16) | (bytes[1] << 8) | bytes[2];
}

static inline void uint24_to_bytes(unsigned int num, uint8_t *bytes)
{
assert(num < (1 << 24));

bytes[0] = num >> 16;
bytes[1] = num >> 8;
bytes[2] = num;

assert(bytes_to_uint24(bytes) == num);
}

#endif
45 changes: 41 additions & 4 deletions ext/liquid_c/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "resource_limits.h"
#include "context.h"
#include "variable_lookup.h"
#include "intutil.h"

ID id_render_node;
ID id_ivar_interrupts;
Expand Down Expand Up @@ -355,14 +356,38 @@ static VALUE vm_render_until_error(VALUE uncast_args)

// Rendering instructions

case OP_WRITE_RAW_W:
case OP_WRITE_RAW:
{
const char *text = (const char *)*const_ptr++;
size_t size = *const_ptr++;
const char *text;
size_t size;
if (ip[-1] == OP_WRITE_RAW_W) {
size = bytes_to_uint24(ip);
text = (const char *)&ip[3];
ip += 3 + size;
} else {
size = *ip;
text = (const char *)&ip[1];
ip += 1 + size;
}
rb_str_cat(output, text, size);
resource_limits_increment_write_score(vm->resource_limits, output);
break;
}
case OP_JUMP_FWD_W:
{
size_t size = bytes_to_uint24(ip);
ip += 3 + size;
break;
}

case OP_JUMP_FWD:
{
uint8_t size = *ip;
ip += 1 + size;
break;
}

case OP_WRITE_NODE:
rb_funcall(cLiquidBlockBody, id_render_node, 3, args->context, output, (VALUE)*const_ptr++);
if (RARRAY_LEN(vm->interrupts)) {
Expand Down Expand Up @@ -459,9 +484,21 @@ void liquid_vm_next_instruction(const uint8_t **ip_ptr, const size_t **const_ptr
(*const_ptr_ptr)++;
break;

case OP_WRITE_RAW_W:
case OP_JUMP_FWD_W:
{
size_t size = bytes_to_uint24(ip);
ip += 3 + size;
break;
}

case OP_WRITE_RAW:
(*const_ptr_ptr) += 2;
case OP_JUMP_FWD:
{
uint8_t size = *ip;
ip += 1 + size;
break;
}

default:
rb_bug("invalid opcode: %u", ip[-1]);
Expand Down Expand Up @@ -506,7 +543,7 @@ static VALUE vm_render_rescue(VALUE uncast_args, VALUE exception)

VALUE line_number = Qnil;
if (render_args->node_line_number) {
unsigned int node_line_number = decode_node_line_number(render_args->node_line_number);
unsigned int node_line_number = bytes_to_uint24(render_args->node_line_number);
if (node_line_number != 0) {
line_number = UINT2NUM(node_line_number);
}
Expand Down
84 changes: 26 additions & 58 deletions ext/liquid_c/vm_assembler.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,57 +21,7 @@ void vm_assembler_free(vm_assembler_t *code)

void vm_assembler_gc_mark(vm_assembler_t *code)
{
size_t *const_ptr = (size_t *)code->constants.data;
const uint8_t *ip = code->instructions.data;
// Don't rely on a terminating OP_LEAVE instruction
// since this could be called in the middle of parsing
const uint8_t *end_ip = code->instructions.data_end;
while (ip < end_ip) {
switch (*ip++) {
case OP_LEAVE:
case OP_POP_WRITE:
case OP_PUSH_NIL:
case OP_PUSH_TRUE:
case OP_PUSH_FALSE:
case OP_FIND_VAR:
case OP_LOOKUP_KEY:
case OP_NEW_INT_RANGE:
break;

case OP_HASH_NEW:
case OP_PUSH_INT8:
ip++;
break;

case OP_PUSH_INT16:
ip += 2;
break;

case OP_RENDER_VARIABLE_RESCUE:
ip += 3;
break;

case OP_WRITE_RAW:
const_ptr += 2;
break;

case OP_WRITE_NODE:
case OP_PUSH_CONST:
case OP_FIND_STATIC_VAR:
case OP_LOOKUP_CONST_KEY:
case OP_LOOKUP_COMMAND:
rb_gc_mark(*const_ptr++);
break;

case OP_FILTER:
ip++;
rb_gc_mark(*const_ptr++);
break;

default:
rb_bug("invalid opcode: %u", ip[-1]);
}
}
c_buffer_rb_gc_mark(&code->constants);
}

VALUE vm_assembler_disassemble(vm_assembler_t *code)
Expand Down Expand Up @@ -138,12 +88,23 @@ VALUE vm_assembler_disassemble(vm_assembler_t *code)
break;
dylanahsmith marked this conversation as resolved.
Show resolved Hide resolved
}

case OP_WRITE_RAW_W:
case OP_WRITE_RAW:
{
const char *text = (const char *)const_ptr[0];
size_t size = const_ptr[1];
const char *text;
size_t size;
const char *name;
if (*ip == OP_WRITE_RAW_W) {
name = "write_raw_w";
size = bytes_to_uint24(&ip[1]);
text = (const char *)&ip[4];
} else {
name = "write_raw";
size = ip[1];
text = (const char *)&ip[2];
}
VALUE string = rb_enc_str_new(text, size, utf8_encoding);
rb_str_catf(output, "write_raw(%+"PRIsVALUE")\n", string);
rb_str_catf(output, "%s(%+"PRIsVALUE")\n", name, string);
break;
}

Expand Down Expand Up @@ -202,10 +163,17 @@ void vm_assembler_require_stack_args(vm_assembler_t *code, unsigned int count)

void vm_assembler_add_write_raw(vm_assembler_t *code, const char *string, size_t size)
{
vm_assembler_write_opcode(code, OP_WRITE_RAW);
VALUE *constants = c_buffer_extend_for_write(&code->constants, 2 * sizeof(VALUE));
constants[0] = (size_t)string;
constants[1] = size;
if (size > UINT8_MAX) {
uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 4);
instructions[0] = OP_WRITE_RAW_W;
uint24_to_bytes((unsigned int)size, &instructions[1]);
} else {
uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 2);
instructions[0] = OP_WRITE_RAW;
instructions[1] = size;
}

c_buffer_write(&code->instructions, (char *)string, size);
}

void vm_assembler_add_write_node(vm_assembler_t *code, VALUE node)
Expand Down
11 changes: 7 additions & 4 deletions ext/liquid_c/vm_assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
#include <assert.h>
#include "liquid.h"
#include "c_buffer.h"
#include "intutil.h"

enum opcode {
OP_LEAVE = 0,
OP_WRITE_RAW = 1,
OP_WRITE_RAW_W = 1,
OP_WRITE_NODE = 2,
OP_POP_WRITE,
OP_WRITE_RAW_SKIP,
OP_PUSH_CONST,
OP_PUSH_NIL,
OP_PUSH_TRUE,
Expand All @@ -25,6 +27,9 @@ enum opcode {
OP_HASH_NEW, // rb_hash_new & rb_hash_bulk_insert
OP_FILTER,
OP_RENDER_VARIABLE_RESCUE, // setup state to rescue variable rendering
OP_WRITE_RAW,
OP_JUMP_FWD_W,
OP_JUMP_FWD,
};

typedef struct vm_assembler {
Expand Down Expand Up @@ -214,9 +219,7 @@ static inline void vm_assembler_add_render_variable_rescue(vm_assembler_t *code,
{
uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 4);
instructions[0] = OP_RENDER_VARIABLE_RESCUE;
instructions[1] = node_line_number >> 16;
instructions[2] = node_line_number >> 8;
instructions[3] = node_line_number;
uint24_to_bytes((unsigned int)node_line_number, &instructions[1]);
}

#endif
36 changes: 26 additions & 10 deletions test/unit/block_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ def test_write_unicode_characters
assert_equal("üñ", template.render!({ 'unicode_char' => 'ñ' }, output: output))
end

def test_op_write_raw_w
source = "a" * 2**8
template = Liquid::Template.parse(source)
assert_equal(source, template.render!)
end

def test_disassemble_raw_w
source = "a" * 2**8
template = Liquid::Template.parse(source)
block_body = template.root.body
assert_equal(<<~ASM, block_body.disassemble)
0x0000: write_raw_w("#{source}")
0x0104: leave
ASM
end

def test_disassemble
source = <<~LIQUID
raw
Expand All @@ -37,16 +53,16 @@ def test_disassemble
assert_instance_of(Liquid::Increment, increment_node)
assert_equal(<<~ASM, block_body.disassemble)
0x0000: write_raw("raw")
0x0001: render_variable_rescue(line_number: 2)
0x0005: find_static_var("var")
0x0006: push_const("none")
0x0007: push_const("allow_false")
0x0008: push_true
0x0009: hash_new(1)
0x000b: filter(name: :default, num_args: 3)
0x000d: pop_write
0x000e: write_node(#{increment_node.inspect})
0x000f: leave
0x0005: render_variable_rescue(line_number: 2)
0x0009: find_static_var("var")
0x000a: push_const("none")
0x000b: push_const("allow_false")
0x000c: push_true
0x000d: hash_new(1)
0x000f: filter(name: :default, num_args: 3)
0x0011: pop_write
0x0012: write_node(#{increment_node.inspect})
0x0013: leave
ASM
end
end