diff options
-rw-r--r-- | lib/ruby_vm/rjit/exit_compiler.rb | 19 | ||||
-rw-r--r-- | lib/ruby_vm/rjit/stats.rb | 83 | ||||
-rw-r--r-- | rjit.c | 27 | ||||
-rw-r--r-- | rjit.h | 4 | ||||
-rw-r--r-- | rjit.rb | 6 | ||||
-rw-r--r-- | rjit_c.c | 193 | ||||
-rw-r--r-- | rjit_c.rb | 9 | ||||
-rw-r--r-- | ruby.c | 2 | ||||
-rwxr-xr-x | tool/rjit/bindgen.rb | 1 |
9 files changed, 336 insertions, 8 deletions
diff --git a/lib/ruby_vm/rjit/exit_compiler.rb b/lib/ruby_vm/rjit/exit_compiler.rb index c082cc3660..b7beb22177 100644 --- a/lib/ruby_vm/rjit/exit_compiler.rb +++ b/lib/ruby_vm/rjit/exit_compiler.rb @@ -6,12 +6,12 @@ module RubyVM::RJIT # @param pc [Integer] # @param asm [RubyVM::RJIT::Assembler] def compile_entry_exit(pc, ctx, asm, cause:) - # Increment per-insn exit counter - incr_insn_exit(pc, asm) - # Fix pc/sp offsets for the interpreter save_pc_and_sp(pc, ctx, asm, reset_sp_offset: false) + # Increment per-insn exit counter + count_insn_exit(pc, asm) + # Restore callee-saved registers asm.comment("#{cause}: entry exit") asm.pop(SP) @@ -62,12 +62,12 @@ module RubyVM::RJIT # @param ctx [RubyVM::RJIT::Context] # @param asm [RubyVM::RJIT::Assembler] def compile_side_exit(pc, ctx, asm) - # Increment per-insn exit counter - incr_insn_exit(pc, asm) - # Fix pc/sp offsets for the interpreter save_pc_and_sp(pc, ctx.dup, asm) # dup to avoid sp_offset update + # Increment per-insn exit counter + count_insn_exit(pc, asm) + # Restore callee-saved registers asm.comment("exit to interpreter on #{pc_to_insn(pc).name}") asm.pop(SP) @@ -105,13 +105,18 @@ module RubyVM::RJIT # @param pc [Integer] # @param asm [RubyVM::RJIT::Assembler] - def incr_insn_exit(pc, asm) + def count_insn_exit(pc, asm) if C.rjit_opts.stats insn = Compiler.decode_insn(C.VALUE.new(pc).*) asm.comment("increment insn exit: #{insn.name}") asm.mov(:rax, (C.rjit_insn_exits + insn.bin).to_i) asm.add([:rax], 1) # TODO: lock end + if C.rjit_opts.trace_exits + asm.comment('rjit_record_exit_stack') + asm.mov(C_ARGS[0], pc) + asm.call(C.rjit_record_exit_stack) + end end # @param jit [RubyVM::RJIT::JITState] diff --git a/lib/ruby_vm/rjit/stats.rb b/lib/ruby_vm/rjit/stats.rb index 7cef634991..2fde44bc8e 100644 --- a/lib/ruby_vm/rjit/stats.rb +++ b/lib/ruby_vm/rjit/stats.rb @@ -30,6 +30,7 @@ module RubyVM::RJIT class << self private + # --yjit-stats at_exit def print_stats stats = runtime_stats $stderr.puts("***RJIT: Printing RJIT statistics on exit***") @@ -98,5 +99,87 @@ module RubyVM::RJIT with_commas = d_groups.map(&:join).join(',').reverse [with_commas, decimal].compact.join('.').rjust(pad, ' ') end + + # --yjit-trace-exits at_exit + def dump_trace_exits + filename = "#{Dir.pwd}/rjit_exit_locations.dump" + File.binwrite(filename, Marshal.dump(exit_traces)) + $stderr.puts("RJIT exit locations dumped to:\n#{filename}") + end + + # Convert rb_rjit_raw_samples and rb_rjit_line_samples into a StackProf format. + def exit_traces + results = C.rjit_exit_traces + raw_samples = results[:raw].dup + line_samples = results[:lines].dup + frames = results[:frames].dup + samples_count = 0 + + # Loop through the instructions and set the frame hash with the data. + # We use nonexistent.def for the file name, otherwise insns.def will be displayed + # and that information isn't useful in this context. + RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id| + frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil, lines: {} } + results[:frames][frame_id] = frame_hash + frames[frame_id] = frame_hash + end + + # Loop through the raw_samples and build the hashes for StackProf. + # The loop is based off an example in the StackProf documentation and therefore + # this functionality can only work with that library. + # + # Raw Samples: + # [ length, frame1, frame2, frameN, ..., instruction, count + # + # Line Samples + # [ length, line_1, line_2, line_n, ..., dummy value, count + i = 0 + while i < raw_samples.length + stack_length = raw_samples[i] + 1 + i += 1 # consume the stack length + + prev_frame_id = nil + stack_length.times do |idx| + idx += i + frame_id = raw_samples[idx] + + if prev_frame_id + prev_frame = frames[prev_frame_id] + prev_frame[:edges][frame_id] ||= 0 + prev_frame[:edges][frame_id] += 1 + end + + frame_info = frames[frame_id] + frame_info[:total_samples] += 1 + + frame_info[:lines][line_samples[idx]] ||= [0, 0] + frame_info[:lines][line_samples[idx]][0] += 1 + + prev_frame_id = frame_id + end + + i += stack_length # consume the stack + + top_frame_id = prev_frame_id + top_frame_line = 1 + + sample_count = raw_samples[i] + + frames[top_frame_id][:samples] += sample_count + frames[top_frame_id][:lines] ||= {} + frames[top_frame_id][:lines][top_frame_line] ||= [0, 0] + frames[top_frame_id][:lines][top_frame_line][1] += sample_count + + samples_count += sample_count + i += 1 + end + + results[:samples] = samples_count + # Set missed_samples and gc_samples to 0 as their values + # don't matter to us in this context. + results[:missed_samples] = 0 + results[:gc_samples] = 0 + results + end end end @@ -67,7 +67,10 @@ struct rjit_options rb_rjit_opts; // true if RJIT is enabled. bool rb_rjit_enabled = false; +// true if --rjit-stats (used before rb_rjit_opts is set) bool rb_rjit_stats_enabled = false; +// true if --rjit-trace-exits (used before rb_rjit_opts is set) +bool rb_rjit_trace_exits_enabled = false; // true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS` // and `rb_rjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible. bool rb_rjit_call_p = false; @@ -93,6 +96,11 @@ static VALUE rb_cRJITCfpPtr = 0; // RubyVM::RJIT::Hooks static VALUE rb_mRJITHooks = 0; +// Frames for --rjit-trace-exits +VALUE rb_rjit_raw_samples = 0; +// Line numbers for --rjit-trace-exits +VALUE rb_rjit_line_samples = 0; + // A default threshold used to add iseq to JIT. #define DEFAULT_CALL_THRESHOLD 30 // Size of executable memory block in MiB. @@ -113,6 +121,9 @@ rb_rjit_setup_options(const char *s, struct rjit_options *rjit_opt) else if (opt_match_noarg(s, l, "stats")) { rjit_opt->stats = true; } + else if (opt_match_noarg(s, l, "trace-exits")) { + rjit_opt->trace_exits = true; + } else if (opt_match_arg(s, l, "call-threshold")) { rjit_opt->call_threshold = atoi(s + 1); } @@ -136,6 +147,7 @@ rb_rjit_setup_options(const char *s, struct rjit_options *rjit_opt) const struct ruby_opt_message rb_rjit_option_messages[] = { #if RJIT_STATS M("--rjit-stats", "", "Enable collecting RJIT statistics"), + M("--rjit-trace-exits", "", "Trace side exit locations"), #endif M("--rjit-exec-mem-size=num", "", "Size of executable memory block in MiB (default: " STRINGIZE(DEFAULT_EXEC_MEM_SIZE) ")"), M("--rjit-call-threshold=num", "", "Number of calls to trigger JIT (default: " STRINGIZE(DEFAULT_CALL_THRESHOLD) ")"), @@ -314,6 +326,8 @@ rb_rjit_mark(void) rb_gc_mark(rb_cRJITIseqPtr); rb_gc_mark(rb_cRJITCfpPtr); rb_gc_mark(rb_mRJITHooks); + rb_gc_mark(rb_rjit_raw_samples); + rb_gc_mark(rb_rjit_line_samples); RUBY_MARK_LEAVE("rjit"); } @@ -398,6 +412,10 @@ rb_rjit_init(const struct rjit_options *opts) rb_cRJITIseqPtr = rb_funcall(rb_mRJITC, rb_intern("rb_iseq_t"), 0); rb_cRJITCfpPtr = rb_funcall(rb_mRJITC, rb_intern("rb_control_frame_t"), 0); rb_mRJITHooks = rb_const_get(rb_mRJIT, rb_intern("Hooks")); + if (rb_rjit_opts.trace_exits) { + rb_rjit_raw_samples = rb_ary_new(); + rb_rjit_line_samples = rb_ary_new(); + } // Enable RJIT and stats from here rb_rjit_call_p = !rb_rjit_opts.pause; @@ -408,13 +426,20 @@ rb_rjit_init(const struct rjit_options *opts) // Primitive for rjit.rb // -// Same as `RubyVM::RJIT::C.enabled?`, but this is used before rjit_init. +// Same as `rb_rjit_opts.stats`, but this is used before rb_rjit_opts is set. static VALUE rjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self) { return RBOOL(rb_rjit_stats_enabled); } +// Same as `rb_rjit_opts.trace_exits`, but this is used before rb_rjit_opts is set. +static VALUE +rjit_trace_exits_enabled_p(rb_execution_context_t *ec, VALUE self) +{ + return RBOOL(rb_rjit_trace_exits_enabled); +} + // Disable anything that could impact stats. It ends up disabling JIT calls as well. static VALUE rjit_stop_stats(rb_execution_context_t *ec, VALUE self) @@ -32,6 +32,8 @@ struct rjit_options { unsigned int exec_mem_size; // Collect RJIT statistics bool stats; + // Trace side exit locations + bool trace_exits; // Enable disasm of all JIT code bool dump_disasm; // [experimental] Do not start RJIT until RJIT.resume is called. @@ -69,6 +71,7 @@ extern void rb_rjit_collect_vm_usage_insn(int insn); extern bool rb_rjit_enabled; extern bool rb_rjit_stats_enabled; +extern bool rb_rjit_trace_exits_enabled; # else // USE_RJIT @@ -88,6 +91,7 @@ static inline void rb_rjit_tracing_invalidate_all(rb_event_flag_t new_iseq_event #define rb_rjit_enabled false #define rb_rjit_call_p false #define rb_rjit_stats_enabled false +#define rb_rjit_trace_exits_enabled false #define rb_rjit_call_threshold() UINT_MAX @@ -18,6 +18,12 @@ module RubyVM::RJIT print_stats end end + if Primitive.rjit_trace_exits_enabled_p + at_exit do + Primitive.rjit_stop_stats + dump_trace_exits + end + end end if RubyVM::RJIT.enabled? @@ -11,6 +11,8 @@ #if USE_RJIT #include "rjit_c.h" +#include "include/ruby/assert.h" +#include "include/ruby/debug.h" #include "internal.h" #include "internal/compile.h" #include "internal/fixnum.h" @@ -206,6 +208,197 @@ rjit_get_proc_ptr(VALUE procv) return proc; } +// Use the same buffer size as Stackprof. +#define BUFF_LEN 2048 + +extern VALUE rb_rjit_raw_samples; +extern VALUE rb_rjit_line_samples; + +static void +rjit_record_exit_stack(const VALUE *exit_pc) +{ + // Let Primitive.rjit_stop_stats stop this + if (!rb_rjit_call_p) return; + + // Get the opcode from the encoded insn handler at this PC + int insn = rb_vm_insn_addr2opcode((void *)*exit_pc); + + // Create 2 array buffers to be used to collect frames and lines. + VALUE frames_buffer[BUFF_LEN] = { 0 }; + int lines_buffer[BUFF_LEN] = { 0 }; + + // Records call frame and line information for each method entry into two + // temporary buffers. Returns the number of times we added to the buffer (ie + // the length of the stack). + // + // Call frame info is stored in the frames_buffer, line number information + // in the lines_buffer. The first argument is the start point and the second + // argument is the buffer limit, set at 2048. + int stack_length = rb_profile_frames(0, BUFF_LEN, frames_buffer, lines_buffer); + int samples_length = stack_length + 3; // 3: length, insn, count + + // If yjit_raw_samples is less than or equal to the current length of the samples + // we might have seen this stack trace previously. + int prev_stack_len_index = RARRAY_LEN(rb_rjit_raw_samples) - samples_length; + VALUE prev_stack_len_obj; + if (RARRAY_LEN(rb_rjit_raw_samples) >= samples_length && FIXNUM_P(prev_stack_len_obj = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index))) { + int prev_stack_len = NUM2INT(prev_stack_len_obj); + int idx = stack_length - 1; + int prev_frame_idx = 0; + bool seen_already = true; + + // If the previous stack length and current stack length are equal, + // loop and compare the current frame to the previous frame. If they are + // not equal, set seen_already to false and break out of the loop. + if (prev_stack_len == stack_length) { + while (idx >= 0) { + VALUE current_frame = frames_buffer[idx]; + VALUE prev_frame = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index + prev_frame_idx + 1); + + // If the current frame and previous frame are not equal, set + // seen_already to false and break out of the loop. + if (current_frame != prev_frame) { + seen_already = false; + break; + } + + idx--; + prev_frame_idx++; + } + + // If we know we've seen this stack before, increment the counter by 1. + if (seen_already) { + int prev_idx = RARRAY_LEN(rb_rjit_raw_samples) - 1; + int prev_count = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, prev_idx)); + int new_count = prev_count + 1; + + rb_ary_store(rb_rjit_raw_samples, prev_idx, INT2NUM(new_count)); + rb_ary_store(rb_rjit_line_samples, prev_idx, INT2NUM(new_count)); + return; + } + } + } + + rb_ary_push(rb_rjit_raw_samples, INT2NUM(stack_length)); + rb_ary_push(rb_rjit_line_samples, INT2NUM(stack_length)); + + int idx = stack_length - 1; + + while (idx >= 0) { + VALUE frame = frames_buffer[idx]; + int line = lines_buffer[idx]; + + rb_ary_push(rb_rjit_raw_samples, frame); + rb_ary_push(rb_rjit_line_samples, INT2NUM(line)); + + idx--; + } + + // Push the insn value into the yjit_raw_samples Vec. + rb_ary_push(rb_rjit_raw_samples, INT2NUM(insn)); + + // Push the current line onto the yjit_line_samples Vec. This + // points to the line in insns.def. + int line = RARRAY_LEN(rb_rjit_line_samples) - 1; + rb_ary_push(rb_rjit_line_samples, INT2NUM(line)); + + // Push number of times seen onto the stack, which is 1 + // because it's the first time we've seen it. + rb_ary_push(rb_rjit_raw_samples, INT2NUM(1)); + rb_ary_push(rb_rjit_line_samples, INT2NUM(1)); +} + +// For a given raw_sample (frame), set the hash with the caller's +// name, file, and line number. Return the hash with collected frame_info. +static void +rjit_add_frame(VALUE hash, VALUE frame) +{ + VALUE frame_id = SIZET2NUM(frame); + + if (RTEST(rb_hash_aref(hash, frame_id))) { + return; + } + else { + VALUE frame_info = rb_hash_new(); + // Full label for the frame + VALUE name = rb_profile_frame_full_label(frame); + // Absolute path of the frame from rb_iseq_realpath + VALUE file = rb_profile_frame_absolute_path(frame); + // Line number of the frame + VALUE line = rb_profile_frame_first_lineno(frame); + + // If absolute path isn't available use the rb_iseq_path + if (NIL_P(file)) { + file = rb_profile_frame_path(frame); + } + + rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name); + rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file); + rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0)); + rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0)); + rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new()); + rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new()); + + if (line != INT2FIX(0)) { + rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line); + } + + rb_hash_aset(hash, frame_id, frame_info); + } +} + +static VALUE +rjit_exit_traces(void) +{ + int samples_len = RARRAY_LEN(rb_rjit_raw_samples); + RUBY_ASSERT(samples_len == RARRAY_LEN(rb_rjit_line_samples)); + + VALUE result = rb_hash_new(); + VALUE raw_samples = rb_ary_new_capa(samples_len); + VALUE line_samples = rb_ary_new_capa(samples_len); + VALUE frames = rb_hash_new(); + int idx = 0; + + // While the index is less than samples_len, parse yjit_raw_samples and + // yjit_line_samples, then add casted values to raw_samples and line_samples array. + while (idx < samples_len) { + int num = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, idx)); + int line_num = NUM2INT(RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + + rb_ary_push(raw_samples, SIZET2NUM(num)); + rb_ary_push(line_samples, INT2NUM(line_num)); + + // Loop through the length of samples_len and add data to the + // frames hash. Also push the current value onto the raw_samples + // and line_samples array respectively. + for (int o = 0; o < num; o++) { + rjit_add_frame(frames, RARRAY_AREF(rb_rjit_raw_samples, idx)); + rb_ary_push(raw_samples, SIZET2NUM(RARRAY_AREF(rb_rjit_raw_samples, idx))); + rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + } + + // insn BIN and lineno + rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx)); + rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + + // Number of times seen + rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx)); + rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx)); + idx++; + } + + // Set add the raw_samples, line_samples, and frames to the results + // hash. + rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples); + rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples); + rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames); + + return result; +} + // An offsetof implementation that works for unnamed struct and union. // Multiplying 8 for compatibility with libclang's offsetof. #define OFFSETOF(ptr, member) RB_SIZE2NUM(((char *)&ptr.member - (char*)&ptr) * 8) @@ -294,6 +294,10 @@ module RubyVM::RJIT # :nodoc: all } end + def rjit_exit_traces + Primitive.cexpr! 'rjit_exit_traces()' + end + # # Utilities: Not used by RJIT, but useful for debugging # @@ -585,6 +589,10 @@ module RubyVM::RJIT # :nodoc: all Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_optimized_call) } end + def C.rjit_record_exit_stack + Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_record_exit_stack) } + end + def C.rjit_str_neq_internal Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_str_neq_internal) } end @@ -1239,6 +1247,7 @@ module RubyVM::RJIT # :nodoc: all call_threshold: [CType::Immediate.parse("unsigned int"), Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), call_threshold)")], exec_mem_size: [CType::Immediate.parse("unsigned int"), Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), exec_mem_size)")], stats: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), stats)")], + trace_exits: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), trace_exits)")], dump_disasm: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), dump_disasm)")], pause: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), pause)")], ) @@ -1617,6 +1617,8 @@ ruby_opt_init(ruby_cmdline_options_t *opt) rb_rjit_enabled = true; if (opt->rjit.stats) rb_rjit_stats_enabled = true; + if (opt->rjit.trace_exits) + rb_rjit_trace_exits_enabled = true; #endif Init_ext(); /* load statically linked extensions before rubygems */ diff --git a/tool/rjit/bindgen.rb b/tool/rjit/bindgen.rb index 2255400c9b..ec943505d9 100755 --- a/tool/rjit/bindgen.rb +++ b/tool/rjit/bindgen.rb @@ -512,6 +512,7 @@ generator = BindingGenerator.new( rjit_full_cfunc_return rjit_optimized_call rjit_str_neq_internal + rjit_record_exit_stack ], types: %w[ CALL_DATA |