aboutsummaryrefslogtreecommitdiffstats
path: root/compile.c
diff options
context:
space:
mode:
authorKoichi Sasada <ko1@atdot.net>2019-11-07 16:58:00 +0900
committerKoichi Sasada <ko1@atdot.net>2019-11-08 09:09:29 +0900
commit46acd0075d80c2f886498f089fde1e9d795d50c4 (patch)
treea00dfbf124cd7e158e125549efa65cbfba394416 /compile.c
parentdddf5afb7947f5aba1ff875e9f5eb163f8c3d6c7 (diff)
downloadruby-46acd0075d80c2f886498f089fde1e9d795d50c4.tar.gz
support builtin features with Ruby and C.
Support loading builtin features written in Ruby, which implement with C builtin functions. [Feature #16254] Several features: (1) Load .rb file at boottime with native binary. Now, prelude.rb is loaded at boottime. However, this file is contained into the interpreter as a text format and we need to compile it. This patch contains a feature to load from binary format. (2) __builtin_func() in Ruby call func() written in C. In Ruby file, we can write `__builtin_func()` like method call. However this is not a method call, but special syntax to call a function `func()` written in C. C functions should be defined in a file (same compile unit) which load this .rb file. Functions (`func` in above example) should be defined with (a) 1st parameter: rb_execution_context_t *ec (b) rest parameters (0 to 15). (c) VALUE return type. This is very similar requirements for functions used by rb_define_method(), however `rb_execution_context_t *ec` is new requirement. (3) automatic C code generation from .rb files. tool/mk_builtin_loader.rb creates a C code to load .rb files needed by miniruby and ruby command. This script is run by BASERUBY, so *.rb should be written in BASERUBY compatbile syntax. This script load a .rb file and find all of __builtin_ prefix method calls, and generate a part of C code to export functions. tool/mk_builtin_binary.rb creates a C code which contains binary compiled Ruby files needed by ruby command.
Diffstat (limited to 'compile.c')
-rw-r--r--compile.c256
1 files changed, 229 insertions, 27 deletions
diff --git a/compile.c b/compile.c
index 0b90a360a0..e0a3c891d1 100644
--- a/compile.c
+++ b/compile.c
@@ -18,6 +18,7 @@
#include "vm_core.h"
#include "vm_debug.h"
+#include "builtin.h"
#include "iseq.h"
#include "insns.inc"
#include "insns_info.inc"
@@ -2237,6 +2238,9 @@ iseq_set_sequence(rb_iseq_t *iseq, LINK_ANCHOR *const anchor)
case TS_FUNCPTR:
generated_iseq[code_index + 1 + j] = operands[j];
break;
+ case TS_BUILTIN:
+ generated_iseq[code_index + 1 + j] = operands[j];
+ break;
default:
BADINSN_ERROR(iseq, iobj->insn_info.line_no,
"unknown operand type: %c", type);
@@ -3214,6 +3218,14 @@ iseq_peephole_optimize(rb_iseq_t *iseq, LINK_ELEMENT *list, const int do_tailcal
}
}
+ if (IS_INSN_ID(iobj, opt_invokebuiltin_delegate)) {
+ if (IS_TRACE(iobj->link.next)) {
+ if (IS_NEXT_INSN_ID(iobj->link.next, leave)) {
+ iobj->insn_id = BIN(opt_invokebuiltin_delegate_leave);
+ }
+ }
+ }
+
return COMPILE_OK;
}
@@ -6718,6 +6730,77 @@ compile_call_precheck_freeze(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE
}
static int
+iseq_has_builtin_function_table(const rb_iseq_t *iseq)
+{
+ return ISEQ_COMPILE_DATA(iseq)->builtin_function_table != NULL;
+}
+
+static const struct rb_builtin_function *
+iseq_builtin_function_lookup(const rb_iseq_t *iseq, const char *name)
+{
+ int i;
+ const struct rb_builtin_function *table = ISEQ_COMPILE_DATA(iseq)->builtin_function_table;
+ for (i=0; table[i].name != NULL; i++) {
+ // fprintf(stderr, "table[%d].name:%s, name:%s\n", i, table[i].name, name);
+ if (strcmp(table[i].name, name) == 0) {
+ return &table[i];
+ }
+ }
+ return NULL;
+}
+
+static const char *
+iseq_builtin_function_name(ID mid)
+{
+ const char *name = rb_id2name(mid);
+ const char prefix[] = "__builtin_";
+ const int prefix_len = strlen(prefix);
+
+ if (UNLIKELY(strncmp("__builtin_", name, prefix_len) == 0)) {
+ return &name[prefix_len];
+ }
+ else {
+ return NULL;
+ }
+}
+
+static int
+delegate_call_p(const rb_iseq_t *iseq, unsigned int argc, const LINK_ANCHOR *args)
+{
+ if (argc == 0) {
+ return TRUE;
+ }
+ else if (argc == iseq->body->param.size) {
+ const LINK_ELEMENT *elem = FIRST_ELEMENT(args);
+
+ for (unsigned int i=0; i<argc; i++) {
+ if (elem->type == ISEQ_ELEMENT_INSN &&
+ INSN_OF(elem) == BIN(getlocal)) {
+ int local_index = FIX2INT(OPERAND_AT(elem, 0));
+ int local_level = FIX2INT(OPERAND_AT(elem, 1));
+ if (local_level == 0) {
+ unsigned int index = iseq->body->local_table_size - (local_index - VM_ENV_DATA_SIZE + 1);
+#if 0
+ ID param_id = iseq->body->local_table[i];
+ fprintf(stderr, "param_id:%s (%d), id:%s (%d) local_index:%d, local_size:%d\n",
+ rb_id2name(param_id), i,
+ rb_id2name(iseq->body->local_table[index]), index,
+ local_index, (int)iseq->body->local_table_size);
+#endif
+ if (i == index) {
+ elem = elem->next;
+ continue; /* for */
+ }
+ }
+ }
+ return FALSE;
+ }
+ return TRUE;
+ }
+ return FALSE;
+}
+
+static int
compile_call(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int type, int line, int popped)
{
/* call: obj.method(...)
@@ -6802,6 +6885,51 @@ compile_call(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, in
}
}
#endif
+ const char *builtin_func;
+ if (UNLIKELY(iseq_has_builtin_function_table(iseq)) &&
+ (builtin_func = iseq_builtin_function_name(mid)) != NULL) {
+
+ if (parent_block != NULL) {
+ COMPILE_ERROR(iseq, line, "should not call builtins here.");
+ return COMPILE_NG;
+ }
+ else {
+ const struct rb_builtin_function *bf = iseq_builtin_function_lookup(iseq, builtin_func);
+
+ if (bf == NULL) {
+ if (1) {
+ rb_bug("can't find builtin function:%s", builtin_func);
+ }
+ else {
+ COMPILE_ERROR(ERROR_ARGS "can't find builtin function:%s", builtin_func);
+ }
+ return COMPILE_NG;
+ }
+
+ // fprintf(stderr, "func_name:%s -> %p\n", builtin_func, bf->func_ptr);
+
+ argc = setup_args(iseq, args, node->nd_args, &flag, &keywords);
+
+ if (FIX2INT(argc) != bf->argc) {
+ COMPILE_ERROR(ERROR_ARGS "argc is not match for builtin function:%s (expect %d but %d)",
+ builtin_func, bf->argc, FIX2INT(argc));
+ return COMPILE_NG;
+ }
+
+ if (delegate_call_p(iseq, FIX2INT(argc), args)) {
+ ADD_INSN1(ret, line, opt_invokebuiltin_delegate, bf);
+ }
+ else {
+ ADD_SEQ(ret, args);
+ ADD_INSN1(ret,line, invokebuiltin, bf);
+ }
+
+ if (popped) ADD_INSN(ret, line, pop);
+ return COMPILE_OK;
+ }
+ }
+
+
/* receiver */
if (type == NODE_CALL || type == NODE_OPCALL || type == NODE_QCALL) {
int idx, level;
@@ -8475,6 +8603,9 @@ insn_data_to_s_detail(INSN *iobj)
rb_str_catf(str, "<%p>", func);
}
break;
+ case TS_BUILTIN:
+ rb_bug("unsupported: TS_BUILTIN");
+ break;
default:{
rb_raise(rb_eSyntaxError, "unknown operand type: %c", type);
}
@@ -9395,6 +9526,14 @@ ibf_dump_overwrite(struct ibf_dump *dump, void *buff, unsigned int size, long of
memcpy(ptr + offset, buff, size);
}
+static const void *
+ibf_load_ptr(const struct ibf_load *load, ibf_offset_t *offset, int size)
+{
+ ibf_offset_t beg = *offset;
+ *offset += size;
+ return load->current_buffer->buff + beg;
+}
+
static void *
ibf_load_alloc(const struct ibf_load *load, ibf_offset_t offset, size_t x, size_t y)
{
@@ -9603,6 +9742,42 @@ ibf_load_small_value(const struct ibf_load *load, ibf_offset_t *offset)
return x;
}
+static void
+ibf_dump_builtin(struct ibf_dump *dump, const struct rb_builtin_function *bf)
+{
+ // short: index
+ // short: name.length
+ // bytes: name
+ // // omit argc (only verify with name)
+ ibf_dump_write_small_value(dump, (VALUE)bf->index);
+
+ size_t len = strlen(bf->name);
+ ibf_dump_write_small_value(dump, (VALUE)len);
+ ibf_dump_write(dump, bf->name, len);
+}
+
+static const struct rb_builtin_function *
+ibf_load_builtin(const struct ibf_load *load, ibf_offset_t *offset)
+{
+ int i = (int)ibf_load_small_value(load, offset);
+ int len = (int)ibf_load_small_value(load, offset);
+ const char *name = (char *)ibf_load_ptr(load, offset, len);
+
+ if (0) {
+ for (int i=0; i<len; i++) fprintf(stderr, "%c", name[i]);
+ fprintf(stderr, "!!\n");
+ }
+
+ const struct rb_builtin_function *table = GET_VM()->builtin_function_table;
+ if (table == NULL) rb_bug(__func__);
+ if (strncmp(table[i].name, name, len) != 0) {
+ rb_bug("%s mistach", __func__);
+ }
+ // fprintf(stderr, "load-builtin: name:%s(%d)\n", table[i].name, table[i].argc);
+
+ return &table[i];
+}
+
static ibf_offset_t
ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq)
{
@@ -9625,16 +9800,15 @@ ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq)
/* operands */
for (op_index=0; types[op_index]; op_index++, code_index++) {
VALUE op = orig_code[code_index];
+ VALUE wv;
+
switch (types[op_index]) {
case TS_CDHASH:
case TS_VALUE:
- ibf_dump_write_small_value(dump, ibf_dump_object(dump, op));
+ wv = ibf_dump_object(dump, op);
break;
case TS_ISEQ:
- {
- VALUE index = (VALUE)ibf_dump_iseq(dump, (const rb_iseq_t *)op);
- ibf_dump_write_small_value(dump, index);
- }
+ wv = (VALUE)ibf_dump_iseq(dump, (const rb_iseq_t *)op);
break;
case TS_IC:
case TS_ISE:
@@ -9645,29 +9819,34 @@ ibf_dump_code(struct ibf_dump *dump, const rb_iseq_t *iseq)
break;
}
}
- ibf_dump_write_small_value(dump, (VALUE)i);
+ wv = (VALUE)i;
}
break;
case TS_CALLDATA:
{
- VALUE callinfo = ibf_dump_calldata(dump, (const struct rb_call_data *)op);
/* ibf_dump_calldata() always returns either Qtrue or Qfalse */
- ibf_dump_write_byte(dump, callinfo == Qtrue);
+ char c = ibf_dump_calldata(dump, (const struct rb_call_data *)op) == Qtrue; // 1 or 0
+ ibf_dump_write_byte(dump, c);
+ goto skip_wv;
}
- break;
case TS_ID:
- ibf_dump_write_small_value(dump, ibf_dump_id(dump, (ID)op));
+ wv = ibf_dump_id(dump, (ID)op);
break;
case TS_GENTRY:
- ibf_dump_write_small_value(dump, ibf_dump_gentry(dump, (const struct rb_global_entry *)op));
+ wv = ibf_dump_gentry(dump, (const struct rb_global_entry *)op);
break;
case TS_FUNCPTR:
rb_raise(rb_eRuntimeError, "TS_FUNCPTR is not supported");
- break;
+ goto skip_wv;
+ case TS_BUILTIN:
+ ibf_dump_builtin(dump, (const struct rb_builtin_function *)op);
+ goto skip_wv;
default:
- ibf_dump_write_small_value(dump, op);
+ wv = op;
break;
}
+ ibf_dump_write_small_value(dump, wv);
+ skip_wv:;
}
assert(insn_len(insn) == op_index+1);
}
@@ -9749,6 +9928,9 @@ ibf_load_code(const struct ibf_load *load, const rb_iseq_t *iseq, ibf_offset_t b
case TS_FUNCPTR:
rb_raise(rb_eRuntimeError, "TS_FUNCPTR is not supported");
break;
+ case TS_BUILTIN:
+ code[code_index] = (VALUE)ibf_load_builtin(load, &reading_pos);
+ break;
default:
code[code_index] = ibf_load_small_value(load, &reading_pos);
continue;
@@ -11244,21 +11426,10 @@ ibf_load_iseq(const struct ibf_load *load, const rb_iseq_t *index_iseq)
}
static void
-ibf_load_setup(struct ibf_load *load, VALUE loader_obj, VALUE str)
+ibf_load_setup_cstr(struct ibf_load *load, VALUE loader_obj, const char *cstr, size_t size)
{
- rb_check_safe_obj(str);
-
- if (RSTRING_LENINT(str) < (int)sizeof(struct ibf_header)) {
- rb_raise(rb_eRuntimeError, "broken binary format");
- }
-
-#if USE_LAZY_LOAD
- str = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
-#endif
-
- RB_OBJ_WRITE(loader_obj, &load->str, str);
load->loader_obj = loader_obj;
- load->global_buffer.buff = StringValuePtr(str);
+ load->global_buffer.buff = cstr;
load->header = (struct ibf_header *)load->global_buffer.buff;
load->global_buffer.size = load->header->size;
load->global_buffer.obj_list_offset = load->header->global_object_list_offset;
@@ -11270,7 +11441,7 @@ ibf_load_setup(struct ibf_load *load, VALUE loader_obj, VALUE str)
load->current_buffer = &load->global_buffer;
- if (RSTRING_LENINT(str) < (int)load->header->size) {
+ if (size < load->header->size) {
rb_raise(rb_eRuntimeError, "broken binary format");
}
if (strncmp(load->header->magic, "YARB", 4) != 0) {
@@ -11295,6 +11466,23 @@ ibf_load_setup(struct ibf_load *load, VALUE loader_obj, VALUE str)
}
static void
+ibf_load_setup(struct ibf_load *load, VALUE loader_obj, VALUE str)
+{
+ rb_check_safe_obj(str);
+
+ if (RSTRING_LENINT(str) < (int)sizeof(struct ibf_header)) {
+ rb_raise(rb_eRuntimeError, "broken binary format");
+ }
+
+#if USE_LAZY_LOAD
+ str = rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
+#endif
+
+ ibf_load_setup_cstr(load, loader_obj, StringValuePtr(str), RSTRING_LEN(str));
+ RB_OBJ_WRITE(loader_obj, &load->str, str);
+}
+
+static void
ibf_loader_mark(void *ptr)
{
struct ibf_load *load = (struct ibf_load *)ptr;
@@ -11336,6 +11524,20 @@ rb_iseq_ibf_load(VALUE str)
return iseq;
}
+const rb_iseq_t *
+rb_iseq_ibf_load_cstr(const char *cstr, size_t size)
+{
+ struct ibf_load *load;
+ rb_iseq_t *iseq;
+ VALUE loader_obj = TypedData_Make_Struct(0, struct ibf_load, &ibf_load_type, load);
+
+ ibf_load_setup_cstr(load, loader_obj, cstr, size);
+ iseq = ibf_load_iseq(load, 0);
+
+ RB_GC_GUARD(loader_obj);
+ return iseq;
+}
+
VALUE
rb_iseq_ibf_load_extra_data(VALUE str)
{