Skip to content

Commit

Permalink
Merge pull request #86 from Shopify/pz-immediate-raw
Browse files Browse the repository at this point in the history
Implement OP_WRITE_RAW on the instructions buffer
  • Loading branch information
peterzhu2118 authored Nov 2, 2020
2 parents 1dc9459 + 971611f commit ac408fe
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 91 deletions.
34 changes: 20 additions & 14 deletions ext/liquid_c/block.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "liquid.h"
#include "block.h"
#include "intutil.h"
#include "tokenizer.h"
#include "stringutil.h"
#include "vm.h"
Expand Down Expand Up @@ -32,7 +33,6 @@ typedef struct parse_context {
static void block_body_mark(void *ptr)
{
block_body_t *body = ptr;
rb_gc_mark(body->source);
vm_assembler_gc_mark(&body->code);
}

Expand Down Expand Up @@ -66,7 +66,6 @@ static VALUE block_body_allocate(VALUE klass)
vm_assembler_init(&body->code);
vm_assembler_add_leave(&body->code);
body->obj = obj;
body->source = Qnil;
body->render_score = 0;
body->blank = true;
body->nodelist = Qundef;
Expand Down Expand Up @@ -230,11 +229,6 @@ static VALUE block_body_parse(VALUE self, VALUE tokenizer_obj, VALUE parse_conte
BlockBody_Get_Struct(self, body);

ensure_not_parsing(body);
if (body->source == Qnil) {
body->source = parse_context.tokenizer->source;
} else if (body->source != parse_context.tokenizer->source) {
rb_raise(rb_eArgError, "Liquid::C::BlockBody#parse must be passed the same tokenizer when called multiple times");
}
vm_assembler_remove_leave(&body->code); // to extend block

tag_markup_t unknown_tag = internal_block_body_parse(body, &parse_context);
Expand Down Expand Up @@ -272,17 +266,21 @@ static VALUE block_body_remove_blank_strings(VALUE self)
ensure_not_parsing(body);

size_t *const_ptr = (size_t *)body->code.constants.data;
const uint8_t *ip = body->code.instructions.data;
uint8_t *ip = (uint8_t *)body->code.instructions.data;

while (*ip != OP_LEAVE) {
if (*ip == OP_WRITE_RAW) {
size_t *size_ptr = &const_ptr[1];
if (*size_ptr) {
*size_ptr = 0; // effectively a no-op
if (ip[1]) { // if (size != 0)
ip[0] = OP_JUMP_FWD; // effectively a no-op
body->render_score--;
}
} else if (*ip == OP_WRITE_RAW_W) {
if (ip[1] || ip[2] || ip[3]) { // if (size != 0)
ip[0] = OP_JUMP_FWD_W; // effectively a no-op
body->render_score--;
}
}
liquid_vm_next_instruction(&ip, (const size_t **)&const_ptr);
liquid_vm_next_instruction((const uint8_t **)&ip, (const size_t **)&const_ptr);
}

return Qnil;
Expand Down Expand Up @@ -316,10 +314,18 @@ static VALUE block_body_nodelist(VALUE self)
switch (*ip) {
case OP_LEAVE:
goto loop_break;
case OP_WRITE_RAW_W:
case OP_WRITE_RAW:
{
const char *text = (const char *)const_ptr[0];
size_t size = const_ptr[1];
const char *text;
size_t size;
if (*ip == OP_WRITE_RAW_W) {
size = bytes_to_uint24(&ip[1]);
text = (const char *)&ip[4];
} else {
size = ip[1];
text = (const char *)&ip[2];
}
VALUE string = rb_enc_str_new(text, size, utf8_encoding);
rb_ary_push(nodelist, string);
break;
Expand Down
1 change: 0 additions & 1 deletion ext/liquid_c/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
typedef struct block_body {
VALUE obj;
vm_assembler_t code;
VALUE source; // hold a reference to the ruby object that OP_WRITE_RAW points to
bool blank;
int render_score;
VALUE nodelist;
Expand Down
22 changes: 22 additions & 0 deletions ext/liquid_c/intutil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef LIQUID_INTUTIL_H
#define LIQUID_INTUTIL_H

#include <stdint.h>

static inline unsigned int bytes_to_uint24(const uint8_t *bytes)
{
return (bytes[0] << 16) | (bytes[1] << 8) | bytes[2];
}

static inline void uint24_to_bytes(unsigned int num, uint8_t *bytes)
{
assert(num < (1 << 24));

bytes[0] = num >> 16;
bytes[1] = num >> 8;
bytes[2] = num;

assert(bytes_to_uint24(bytes) == num);
}

#endif
45 changes: 41 additions & 4 deletions ext/liquid_c/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "resource_limits.h"
#include "context.h"
#include "variable_lookup.h"
#include "intutil.h"

ID id_render_node;
ID id_ivar_interrupts;
Expand Down Expand Up @@ -355,14 +356,38 @@ static VALUE vm_render_until_error(VALUE uncast_args)

// Rendering instructions

case OP_WRITE_RAW_W:
case OP_WRITE_RAW:
{
const char *text = (const char *)*const_ptr++;
size_t size = *const_ptr++;
const char *text;
size_t size;
if (ip[-1] == OP_WRITE_RAW_W) {
size = bytes_to_uint24(ip);
text = (const char *)&ip[3];
ip += 3 + size;
} else {
size = *ip;
text = (const char *)&ip[1];
ip += 1 + size;
}
rb_str_cat(output, text, size);
resource_limits_increment_write_score(vm->resource_limits, output);
break;
}
case OP_JUMP_FWD_W:
{
size_t size = bytes_to_uint24(ip);
ip += 3 + size;
break;
}

case OP_JUMP_FWD:
{
uint8_t size = *ip;
ip += 1 + size;
break;
}

case OP_WRITE_NODE:
rb_funcall(cLiquidBlockBody, id_render_node, 3, args->context, output, (VALUE)*const_ptr++);
if (RARRAY_LEN(vm->interrupts)) {
Expand Down Expand Up @@ -459,9 +484,21 @@ void liquid_vm_next_instruction(const uint8_t **ip_ptr, const size_t **const_ptr
(*const_ptr_ptr)++;
break;

case OP_WRITE_RAW_W:
case OP_JUMP_FWD_W:
{
size_t size = bytes_to_uint24(ip);
ip += 3 + size;
break;
}

case OP_WRITE_RAW:
(*const_ptr_ptr) += 2;
case OP_JUMP_FWD:
{
uint8_t size = *ip;
ip += 1 + size;
break;
}

default:
rb_bug("invalid opcode: %u", ip[-1]);
Expand Down Expand Up @@ -506,7 +543,7 @@ static VALUE vm_render_rescue(VALUE uncast_args, VALUE exception)

VALUE line_number = Qnil;
if (render_args->node_line_number) {
unsigned int node_line_number = decode_node_line_number(render_args->node_line_number);
unsigned int node_line_number = bytes_to_uint24(render_args->node_line_number);
if (node_line_number != 0) {
line_number = UINT2NUM(node_line_number);
}
Expand Down
84 changes: 26 additions & 58 deletions ext/liquid_c/vm_assembler.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,57 +21,7 @@ void vm_assembler_free(vm_assembler_t *code)

void vm_assembler_gc_mark(vm_assembler_t *code)
{
size_t *const_ptr = (size_t *)code->constants.data;
const uint8_t *ip = code->instructions.data;
// Don't rely on a terminating OP_LEAVE instruction
// since this could be called in the middle of parsing
const uint8_t *end_ip = code->instructions.data_end;
while (ip < end_ip) {
switch (*ip++) {
case OP_LEAVE:
case OP_POP_WRITE:
case OP_PUSH_NIL:
case OP_PUSH_TRUE:
case OP_PUSH_FALSE:
case OP_FIND_VAR:
case OP_LOOKUP_KEY:
case OP_NEW_INT_RANGE:
break;

case OP_HASH_NEW:
case OP_PUSH_INT8:
ip++;
break;

case OP_PUSH_INT16:
ip += 2;
break;

case OP_RENDER_VARIABLE_RESCUE:
ip += 3;
break;

case OP_WRITE_RAW:
const_ptr += 2;
break;

case OP_WRITE_NODE:
case OP_PUSH_CONST:
case OP_FIND_STATIC_VAR:
case OP_LOOKUP_CONST_KEY:
case OP_LOOKUP_COMMAND:
rb_gc_mark(*const_ptr++);
break;

case OP_FILTER:
ip++;
rb_gc_mark(*const_ptr++);
break;

default:
rb_bug("invalid opcode: %u", ip[-1]);
}
}
c_buffer_rb_gc_mark(&code->constants);
}

VALUE vm_assembler_disassemble(vm_assembler_t *code)
Expand Down Expand Up @@ -138,12 +88,23 @@ VALUE vm_assembler_disassemble(vm_assembler_t *code)
break;
}

case OP_WRITE_RAW_W:
case OP_WRITE_RAW:
{
const char *text = (const char *)const_ptr[0];
size_t size = const_ptr[1];
const char *text;
size_t size;
const char *name;
if (*ip == OP_WRITE_RAW_W) {
name = "write_raw_w";
size = bytes_to_uint24(&ip[1]);
text = (const char *)&ip[4];
} else {
name = "write_raw";
size = ip[1];
text = (const char *)&ip[2];
}
VALUE string = rb_enc_str_new(text, size, utf8_encoding);
rb_str_catf(output, "write_raw(%+"PRIsVALUE")\n", string);
rb_str_catf(output, "%s(%+"PRIsVALUE")\n", name, string);
break;
}

Expand Down Expand Up @@ -202,10 +163,17 @@ void vm_assembler_require_stack_args(vm_assembler_t *code, unsigned int count)

void vm_assembler_add_write_raw(vm_assembler_t *code, const char *string, size_t size)
{
vm_assembler_write_opcode(code, OP_WRITE_RAW);
VALUE *constants = c_buffer_extend_for_write(&code->constants, 2 * sizeof(VALUE));
constants[0] = (size_t)string;
constants[1] = size;
if (size > UINT8_MAX) {
uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 4);
instructions[0] = OP_WRITE_RAW_W;
uint24_to_bytes((unsigned int)size, &instructions[1]);
} else {
uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 2);
instructions[0] = OP_WRITE_RAW;
instructions[1] = size;
}

c_buffer_write(&code->instructions, (char *)string, size);
}

void vm_assembler_add_write_node(vm_assembler_t *code, VALUE node)
Expand Down
11 changes: 7 additions & 4 deletions ext/liquid_c/vm_assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
#include <assert.h>
#include "liquid.h"
#include "c_buffer.h"
#include "intutil.h"

enum opcode {
OP_LEAVE = 0,
OP_WRITE_RAW = 1,
OP_WRITE_RAW_W = 1,
OP_WRITE_NODE = 2,
OP_POP_WRITE,
OP_WRITE_RAW_SKIP,
OP_PUSH_CONST,
OP_PUSH_NIL,
OP_PUSH_TRUE,
Expand All @@ -25,6 +27,9 @@ enum opcode {
OP_HASH_NEW, // rb_hash_new & rb_hash_bulk_insert
OP_FILTER,
OP_RENDER_VARIABLE_RESCUE, // setup state to rescue variable rendering
OP_WRITE_RAW,
OP_JUMP_FWD_W,
OP_JUMP_FWD,
};

typedef struct vm_assembler {
Expand Down Expand Up @@ -214,9 +219,7 @@ static inline void vm_assembler_add_render_variable_rescue(vm_assembler_t *code,
{
uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 4);
instructions[0] = OP_RENDER_VARIABLE_RESCUE;
instructions[1] = node_line_number >> 16;
instructions[2] = node_line_number >> 8;
instructions[3] = node_line_number;
uint24_to_bytes((unsigned int)node_line_number, &instructions[1]);
}

#endif
36 changes: 26 additions & 10 deletions test/unit/block_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ def test_write_unicode_characters
assert_equal("üñ", template.render!({ 'unicode_char' => 'ñ' }, output: output))
end

def test_op_write_raw_w
source = "a" * 2**8
template = Liquid::Template.parse(source)
assert_equal(source, template.render!)
end

def test_disassemble_raw_w
source = "a" * 2**8
template = Liquid::Template.parse(source)
block_body = template.root.body
assert_equal(<<~ASM, block_body.disassemble)
0x0000: write_raw_w("#{source}")
0x0104: leave
ASM
end

def test_disassemble
source = <<~LIQUID
raw
Expand All @@ -37,16 +53,16 @@ def test_disassemble
assert_instance_of(Liquid::Increment, increment_node)
assert_equal(<<~ASM, block_body.disassemble)
0x0000: write_raw("raw")
0x0001: render_variable_rescue(line_number: 2)
0x0005: find_static_var("var")
0x0006: push_const("none")
0x0007: push_const("allow_false")
0x0008: push_true
0x0009: hash_new(1)
0x000b: filter(name: :default, num_args: 3)
0x000d: pop_write
0x000e: write_node(#{increment_node.inspect})
0x000f: leave
0x0005: render_variable_rescue(line_number: 2)
0x0009: find_static_var("var")
0x000a: push_const("none")
0x000b: push_const("allow_false")
0x000c: push_true
0x000d: hash_new(1)
0x000f: filter(name: :default, num_args: 3)
0x0011: pop_write
0x0012: write_node(#{increment_node.inspect})
0x0013: leave
ASM
end
end

0 comments on commit ac408fe

Please sign in to comment.