From 41ccd193acceb1532ab1372433351c0a1eac59c2 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sat, 6 Aug 2022 14:10:23 +0200 Subject: [PATCH 1/5] compiler: don't treat offset 0 special at syntax errors If a compile error is raised at offset 0, try to resolve line and character position anyway. Signed-off-by: Jo-Philipp Wich --- compiler.c | 6 ++---- tests/custom/04_modules/06_export_errors | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/compiler.c b/compiler.c index 0042c15f..7fd4da20 100644 --- a/compiler.c +++ b/compiler.c @@ -187,10 +187,8 @@ uc_compiler_syntax_error(uc_compiler_t *compiler, size_t off, const char *fmt, . off = uc_program_function_srcpos(compiler->function, uc_compiler_current_chunk(compiler)->count); - if (off) { - byte = off; - line = uc_source_get_line(source, &byte); - } + byte = off; + line = uc_source_get_line(source, &byte); va_start(ap, fmt); len = xvasprintf(&s, fmt, ap); diff --git a/tests/custom/04_modules/06_export_errors b/tests/custom/04_modules/06_export_errors index 5c9f6764..83227b16 100644 --- a/tests/custom/04_modules/06_export_errors +++ b/tests/custom/04_modules/06_export_errors @@ -10,6 +10,7 @@ export let x = 1; -- Expect stderr -- Syntax error: Exports may only appear at top level of a module +In line 1, byte 1: `export let x = 1;` ^-- Near here From a486adc4e377611f1bcd957afe8c3ff6643e75c4 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sat, 6 Aug 2022 14:13:14 +0200 Subject: [PATCH 2/5] vm: don't treat offset 0 special for exceptions Try to resolve the source offset to line and character position and only fall back to report the location as instruction offset if we weren't able to determine the line number. Signed-off-by: Jo-Philipp Wich --- vm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vm.c b/vm.c index 3a6e39ea..1cf24ab3 100644 --- a/vm.c +++ b/vm.c @@ -902,11 +902,11 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) static uc_value_t * uc_vm_get_error_context(uc_vm_t *vm) { + size_t offset, i, byte, line; uc_value_t *stacktrace; uc_callframe_t *frame; uc_stringbuf_t *buf; uc_chunk_t *chunk; - size_t offset, i; /* skip to first non-native function call frame */ for (i = vm->callframes.count; i > 1; i--) @@ -924,7 +924,10 @@ uc_vm_get_error_context(uc_vm_t *vm) buf = ucv_stringbuf_new(); - if (offset) + byte = offset; + line = uc_source_get_line(uc_program_function_source(frame->closure->function), &byte); + + if (line) uc_error_context_format(buf, uc_vm_frame_source(frame), stacktrace, offset); else if (frame->ip != chunk->entries) ucv_stringbuf_printf(buf, "At instruction %zu", (frame->ip - chunk->entries) - 1); From b6fd8a2f825ba3d38137bcfbf44ef7dd09161cd2 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sat, 6 Aug 2022 14:16:19 +0200 Subject: [PATCH 3/5] lib: internally expose new uc_require_library() helper Break out the core logic of the uc_require() stl function into a new uc_require_library() helper function and make it available for usage outside of lib.c. Also add a new boolean parameter to the helper function which allows restricting runtime require operations of modules to dynamic libraries only. Signed-off-by: Jo-Philipp Wich --- include/ucode/lib.h | 1 + lib.c | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/ucode/lib.h b/include/ucode/lib.h index 4c7a3b0c..416fea55 100644 --- a/include/ucode/lib.h +++ b/include/ucode/lib.h @@ -34,6 +34,7 @@ uc_cfn_ptr_t uc_stdlib_function(const char *name); __hidden bool uc_source_context_format(uc_stringbuf_t *buf, uc_source_t *src, size_t off, bool compact); __hidden bool uc_error_context_format(uc_stringbuf_t *buf, uc_source_t *src, uc_value_t *stacktrace, size_t off); +__hidden uc_value_t *uc_require_library(uc_vm_t *vm, uc_value_t *nameval, bool so_only); /* vm helper */ diff --git a/lib.c b/lib.c index 779e3f5e..7a04cddf 100644 --- a/lib.c +++ b/lib.c @@ -1674,7 +1674,7 @@ uc_require_ucode(uc_vm_t *vm, const char *path, uc_value_t *scope, uc_value_t ** } static bool -uc_require_path(uc_vm_t *vm, const char *path_template, const char *name, uc_value_t **res) +uc_require_path(uc_vm_t *vm, const char *path_template, const char *name, uc_value_t **res, bool so_only) { uc_stringbuf_t *buf = xprintbuf_new(); const char *p, *q, *last; @@ -1715,7 +1715,7 @@ uc_require_path(uc_vm_t *vm, const char *path_template, const char *name, uc_val if (!strcmp(p + 1, ".so")) rv = uc_require_so(vm, buf->buf, res); - else if (!strcmp(p + 1, ".uc")) + else if (!strcmp(p + 1, ".uc") && !so_only) rv = uc_require_ucode(vm, buf->buf, NULL, res, true); if (rv) @@ -1727,18 +1727,17 @@ uc_require_path(uc_vm_t *vm, const char *path_template, const char *name, uc_val return rv; } -static uc_value_t * -uc_require(uc_vm_t *vm, size_t nargs) +uc_value_t * +uc_require_library(uc_vm_t *vm, uc_value_t *nameval, bool so_only) { - uc_value_t *val = uc_fn_arg(0); uc_value_t *search, *se, *res; size_t arridx, arrlen; const char *name; - if (ucv_type(val) != UC_STRING) + if (ucv_type(nameval) != UC_STRING) return NULL; - name = ucv_string_get(val); + name = ucv_string_get(nameval); search = ucv_property_get(uc_vm_scope_get(vm), "REQUIRE_SEARCH_PATH"); if (ucv_type(search) != UC_ARRAY) { @@ -1754,7 +1753,7 @@ uc_require(uc_vm_t *vm, size_t nargs) if (ucv_type(se) != UC_STRING) continue; - if (uc_require_path(vm, ucv_string_get(se), name, &res)) + if (uc_require_path(vm, ucv_string_get(se), name, &res, so_only)) return res; } @@ -1764,6 +1763,12 @@ uc_require(uc_vm_t *vm, size_t nargs) return NULL; } +static uc_value_t * +uc_require(uc_vm_t *vm, size_t nargs) +{ + return uc_require_library(vm, uc_fn_arg(0), false); +} + static uc_value_t * uc_iptoarr(uc_vm_t *vm, size_t nargs) { From c9442f12ee056fd50f314408052917cc5f359bb4 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sat, 6 Aug 2022 14:19:33 +0200 Subject: [PATCH 4/5] vm: introduce new I_DYNLOAD opcode The I_DYNLOAD opcode is basically a bytecode level instruction for uc_require() with semantics similar to I_IMPORT. It allows loading a dynamic extension library at runtime and treating values from the resulting module context object like exports from a compile time source module. For example the statement `import { readfile, writefile } from "fs"` would import the readfile() and writefile() functions of fs.so as readonly live bindings into the current file scope. Signed-off-by: Jo-Philipp Wich --- include/ucode/vm.h | 3 +- vm.c | 89 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/include/ucode/vm.h b/include/ucode/vm.h index cc57fdb2..8562524b 100644 --- a/include/ucode/vm.h +++ b/include/ucode/vm.h @@ -97,7 +97,8 @@ __insn(NEXTK) \ __insn(NEXTKV) \ __insn(DELETE) \ __insn(IMPORT) \ -__insn(EXPORT) +__insn(EXPORT) \ +__insn(DYNLOAD) #undef __insn diff --git a/vm.c b/vm.c index 1cf24ab3..c08f9cec 100644 --- a/vm.c +++ b/vm.c @@ -75,7 +75,8 @@ static const int8_t insn_operand_bytes[__I_MAX] = { [I_QMCALL] = 4, [I_IMPORT] = 4, - [I_EXPORT] = 4 + [I_EXPORT] = 4, + [I_DYNLOAD] = 4 }; static const char *exception_type_strings[] = { @@ -2425,6 +2426,88 @@ uc_vm_insn_export(uc_vm_t *vm, uc_vm_insn_t insn) ucv_get(&ref->header); } +static void +uc_vm_insn_dynload(uc_vm_t *vm, uc_vm_insn_t insn) +{ + uc_callframe_t *frame = uc_vm_current_frame(vm); + uc_value_t *name, *export, *modscope, *modobj; + uint16_t count = vm->arg.u32 & 0xffff; + uint16_t to = vm->arg.u32 >> 16; + uint32_t cidx; + bool found; + + /* instruction is followed by u32 containing the constant index of the + * module name string to import and `count` times u32 values containing + * the import name constant indexes */ + + cidx = ( + frame->ip[0] * 0x1000000UL + + frame->ip[1] * 0x10000UL + + frame->ip[2] * 0x100UL + + frame->ip[3] + ); + + frame->ip += 4; + + /* push module name onto stack, then attempt to load module and pop + * name value again. Will raise exception on error */ + name = uc_program_get_constant(uc_vm_current_program(vm), cidx); + modscope = uc_require_library(vm, name, true); + ucv_put(name); + + if (!modscope) + return; + + /* If count is zero, we're doing a wildcard import. Shallow copy module + * object, mark it constant and patch into the target upvalue. */ + if (count == 0) { + modobj = ucv_object_new(vm); + + ucv_object_foreach(modscope, k, v) + ucv_object_add(modobj, k, ucv_get(v)); + + ucv_set_constant(modobj, true); + + uc_vm_stack_push(vm, modobj); + } + + /* ... otherwise we're importing a specific list of names */ + else { + while (count > 0) { + cidx = ( + frame->ip[0] * 0x1000000UL + + frame->ip[1] * 0x10000UL + + frame->ip[2] * 0x100UL + + frame->ip[3] + ); + + frame->ip += 4; + + name = uc_program_get_constant(uc_vm_current_program(vm), cidx); + export = ucv_object_get(modscope, ucv_string_get(name), &found); + + if (!found) { + uc_vm_raise_exception(vm, EXCEPTION_REFERENCE, + "Module does not export %s", + ucv_string_get(name)); + + ucv_put(name); + + return; + } + + ucv_put(name); + + frame->closure->upvals[to] = (uc_upvalref_t *)ucv_upvalref_new(0); + frame->closure->upvals[to]->closed = true; + frame->closure->upvals[to]->value = ucv_get(export); + + count--; + to++; + } + } +} + static uc_value_t * uc_vm_callframe_pop(uc_vm_t *vm) { @@ -2720,6 +2803,10 @@ uc_vm_execute_chunk(uc_vm_t *vm) uc_vm_insn_export(vm, insn); break; + case I_DYNLOAD: + uc_vm_insn_dynload(vm, insn); + break; + default: uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "unknown opcode %d", insn); break; From fcc49e6944ab29ab48e8363d2d72e9ca10d3fb76 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sat, 6 Aug 2022 14:23:44 +0200 Subject: [PATCH 5/5] compiler: add import statement support for dynamic extensions Utilize the new I_DYNLINK vm opcode to support import statements referring to dynamic extension modules. During compilation, the compiler will try to infer the type of the imported module from the resolved file path; if it ends with `.so`, the module is assumed to by a dynamic extension and loading/binding of the module is deferred to runtime using I_DYNLINK opcodes. Additionally, the `-c` cli option gained support for a new compiler flag `dynlink=...` which allows forcing a particular module name expression to be treated as dynamic extension. This is useful to e.g. force resolving `import { x } from "foo"` to a dynamic extension `foo.so` loaded at runtime even if a plain `foo.uc` exists in the search path during compilation or if no such module is available at build time. Signed-off-by: Jo-Philipp Wich --- compiler.c | 70 +++++++++++++++++++++++++++++++++++++++-- include/ucode/types.h | 1 + main.c | 14 +++++++-- tests/cram/test_basic.t | 3 +- 4 files changed, 82 insertions(+), 6 deletions(-) diff --git a/compiler.c b/compiler.c index 7fd4da20..1199d827 100644 --- a/compiler.c +++ b/compiler.c @@ -3329,6 +3329,68 @@ uc_compiler_acquire_source(uc_compiler_t *compiler, const char *path) return uc_source_new_file(path); } +static bool +uc_compiler_compile_dynload(uc_compiler_t *compiler, const char *name, uc_value_t *imports) +{ + uc_value_t *modname = ucv_string_new(name); + size_t i, n_imports; + uc_value_t *import; + + for (i = 0, n_imports = 0; i < ucv_array_length(imports); i++) { + import = ucv_array_get(imports, i); + + if (ucv_boolean_get(import)) { + uc_compiler_emit_insn(compiler, 0, I_DYNLOAD); + uc_compiler_emit_u32(compiler, 0, 0); + uc_compiler_emit_constant_index(compiler, 0, modname); + } + else { + n_imports++; + } + } + + if (n_imports > 0) { + uc_compiler_emit_insn(compiler, 0, I_DYNLOAD); + uc_compiler_emit_u32(compiler, 0, n_imports | ((compiler->upvals.count - n_imports) << 16)); + uc_compiler_emit_constant_index(compiler, 0, modname); + + for (i = 0; i < ucv_array_length(imports); i++) { + import = ucv_get(ucv_array_get(imports, i)); + + if (!import) + import = ucv_string_new("default"); + + if (!ucv_boolean_get(import)) + uc_compiler_emit_constant_index(compiler, 0, import); + + ucv_put(import); + } + } + + ucv_put(modname); + + return true; +} + +static bool +uc_compiler_is_dynlink_module(uc_compiler_t *compiler, const char *name, const char *path) +{ + uc_search_path_t *dynlink_list = &compiler->parser->config->force_dynlink_list; + size_t i; + char *p; + + for (i = 0; i < dynlink_list->count; i++) + if (!strcmp(dynlink_list->entries[i], name)) + return true; + + if (!path) + return false; + + p = strrchr(path, '.'); + + return (p && !strcmp(p, ".so")); +} + static bool uc_compiler_compile_module(uc_compiler_t *compiler, const char *name, uc_value_t *imports) { @@ -3341,7 +3403,10 @@ uc_compiler_compile_module(uc_compiler_t *compiler, const char *name, uc_value_t path = uc_compiler_resolve_module_path(compiler, name); - if (path) { + if (uc_compiler_is_dynlink_module(compiler, name, path)) { + res = uc_compiler_compile_dynload(compiler, name, imports); + } + else if (path) { source = uc_compiler_acquire_source(compiler, path); if (source) { @@ -3361,6 +3426,8 @@ uc_compiler_compile_module(uc_compiler_t *compiler, const char *name, uc_value_t res = false; } + + uc_source_put(source); } else { uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, @@ -3369,7 +3436,6 @@ uc_compiler_compile_module(uc_compiler_t *compiler, const char *name, uc_value_t return false; } - uc_source_put(source); free(path); return res; diff --git a/include/ucode/types.h b/include/ucode/types.h index 636d6e45..e20f3d64 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -227,6 +227,7 @@ typedef struct { bool strict_declarations; bool raw_mode; uc_search_path_t module_search_path; + uc_search_path_t force_dynlink_list; } uc_parse_config_t; extern uc_parse_config_t uc_default_parse_config; diff --git a/main.c b/main.c index 087efff5..02df3b1a 100644 --- a/main.c +++ b/main.c @@ -90,7 +90,8 @@ print_usage(const char *app) "-c[flag,flag,...]\n" " Compile the given source file(s) to bytecode instead of executing them.\n" " Supported flags: no-interp (omit interpreter line), interp=... (over-\n" - " ride interpreter line with ...)\n\n" + " ride interpreter line with ...), dynlink=... (force import from ... to\n" + " be treated as shared extensions loaded at runtime).\n\n" "-o path\n" " Output file path when compiling. If omitted, the compiled byte code\n" @@ -204,7 +205,7 @@ parse_template_modeflags(char *opt, uc_parse_config_t *config) } static void -parse_compile_flags(char *opt, char **interp) +parse_compile_flags(char *opt, char **interp, uc_search_path_t *dynlink_list) { char *p, *k, *v; @@ -230,6 +231,12 @@ parse_compile_flags(char *opt, char **interp) else *interp = v; } + else if (!strcmp(k, "dynlink")) { + if (!v) + fprintf(stderr, "Compile flag \"%s\" requires a value, ignoring\n", k); + else + uc_vector_push(dynlink_list, v); + } else { fprintf(stderr, "Unrecognized -c flag \"%s\", ignoring\n", k); } @@ -577,7 +584,7 @@ main(int argc, char **argv) case 'c': outfile = "./uc.out"; - parse_compile_flags(optarg, &interp); + parse_compile_flags(optarg, &interp, &config.force_dynlink_list); break; case 's': @@ -640,6 +647,7 @@ main(int argc, char **argv) out: uc_search_path_free(&config.module_search_path); + uc_vector_clear(&config.force_dynlink_list); uc_source_put(source); diff --git a/tests/cram/test_basic.t b/tests/cram/test_basic.t index 7308002e..5911ac99 100644 --- a/tests/cram/test_basic.t +++ b/tests/cram/test_basic.t @@ -61,7 +61,8 @@ check that ucode provides exepected help: -c[flag,flag,...] Compile the given source file(s) to bytecode instead of executing them. Supported flags: no-interp (omit interpreter line), interp=... (over- - ride interpreter line with ...) + ride interpreter line with ...), dynlink=... (force import from ... to + be treated as shared extensions loaded at runtime). -o path Output file path when compiling. If omitted, the compiled byte code