aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/ruby_vm/rjit/exit_compiler.rb19
-rw-r--r--lib/ruby_vm/rjit/stats.rb83
-rw-r--r--rjit.c27
-rw-r--r--rjit.h4
-rw-r--r--rjit.rb6
-rw-r--r--rjit_c.c193
-rw-r--r--rjit_c.rb9
-rw-r--r--ruby.c2
-rwxr-xr-xtool/rjit/bindgen.rb1
9 files changed, 336 insertions, 8 deletions
diff --git a/lib/ruby_vm/rjit/exit_compiler.rb b/lib/ruby_vm/rjit/exit_compiler.rb
index c082cc3660..b7beb22177 100644
--- a/lib/ruby_vm/rjit/exit_compiler.rb
+++ b/lib/ruby_vm/rjit/exit_compiler.rb
@@ -6,12 +6,12 @@ module RubyVM::RJIT
# @param pc [Integer]
# @param asm [RubyVM::RJIT::Assembler]
def compile_entry_exit(pc, ctx, asm, cause:)
- # Increment per-insn exit counter
- incr_insn_exit(pc, asm)
-
# Fix pc/sp offsets for the interpreter
save_pc_and_sp(pc, ctx, asm, reset_sp_offset: false)
+ # Increment per-insn exit counter
+ count_insn_exit(pc, asm)
+
# Restore callee-saved registers
asm.comment("#{cause}: entry exit")
asm.pop(SP)
@@ -62,12 +62,12 @@ module RubyVM::RJIT
# @param ctx [RubyVM::RJIT::Context]
# @param asm [RubyVM::RJIT::Assembler]
def compile_side_exit(pc, ctx, asm)
- # Increment per-insn exit counter
- incr_insn_exit(pc, asm)
-
# Fix pc/sp offsets for the interpreter
save_pc_and_sp(pc, ctx.dup, asm) # dup to avoid sp_offset update
+ # Increment per-insn exit counter
+ count_insn_exit(pc, asm)
+
# Restore callee-saved registers
asm.comment("exit to interpreter on #{pc_to_insn(pc).name}")
asm.pop(SP)
@@ -105,13 +105,18 @@ module RubyVM::RJIT
# @param pc [Integer]
# @param asm [RubyVM::RJIT::Assembler]
- def incr_insn_exit(pc, asm)
+ def count_insn_exit(pc, asm)
if C.rjit_opts.stats
insn = Compiler.decode_insn(C.VALUE.new(pc).*)
asm.comment("increment insn exit: #{insn.name}")
asm.mov(:rax, (C.rjit_insn_exits + insn.bin).to_i)
asm.add([:rax], 1) # TODO: lock
end
+ if C.rjit_opts.trace_exits
+ asm.comment('rjit_record_exit_stack')
+ asm.mov(C_ARGS[0], pc)
+ asm.call(C.rjit_record_exit_stack)
+ end
end
# @param jit [RubyVM::RJIT::JITState]
diff --git a/lib/ruby_vm/rjit/stats.rb b/lib/ruby_vm/rjit/stats.rb
index 7cef634991..2fde44bc8e 100644
--- a/lib/ruby_vm/rjit/stats.rb
+++ b/lib/ruby_vm/rjit/stats.rb
@@ -30,6 +30,7 @@ module RubyVM::RJIT
class << self
private
+ # --yjit-stats at_exit
def print_stats
stats = runtime_stats
$stderr.puts("***RJIT: Printing RJIT statistics on exit***")
@@ -98,5 +99,87 @@ module RubyVM::RJIT
with_commas = d_groups.map(&:join).join(',').reverse
[with_commas, decimal].compact.join('.').rjust(pad, ' ')
end
+
+ # --yjit-trace-exits at_exit
+ def dump_trace_exits
+ filename = "#{Dir.pwd}/rjit_exit_locations.dump"
+ File.binwrite(filename, Marshal.dump(exit_traces))
+ $stderr.puts("RJIT exit locations dumped to:\n#{filename}")
+ end
+
+ # Convert rb_rjit_raw_samples and rb_rjit_line_samples into a StackProf format.
+ def exit_traces
+ results = C.rjit_exit_traces
+ raw_samples = results[:raw].dup
+ line_samples = results[:lines].dup
+ frames = results[:frames].dup
+ samples_count = 0
+
+ # Loop through the instructions and set the frame hash with the data.
+ # We use nonexistent.def for the file name, otherwise insns.def will be displayed
+ # and that information isn't useful in this context.
+ RubyVM::INSTRUCTION_NAMES.each_with_index do |name, frame_id|
+ frame_hash = { samples: 0, total_samples: 0, edges: {}, name: name, file: "nonexistent.def", line: nil, lines: {} }
+ results[:frames][frame_id] = frame_hash
+ frames[frame_id] = frame_hash
+ end
+
+ # Loop through the raw_samples and build the hashes for StackProf.
+ # The loop is based off an example in the StackProf documentation and therefore
+ # this functionality can only work with that library.
+ #
+ # Raw Samples:
+ # [ length, frame1, frame2, frameN, ..., instruction, count
+ #
+ # Line Samples
+ # [ length, line_1, line_2, line_n, ..., dummy value, count
+ i = 0
+ while i < raw_samples.length
+ stack_length = raw_samples[i] + 1
+ i += 1 # consume the stack length
+
+ prev_frame_id = nil
+ stack_length.times do |idx|
+ idx += i
+ frame_id = raw_samples[idx]
+
+ if prev_frame_id
+ prev_frame = frames[prev_frame_id]
+ prev_frame[:edges][frame_id] ||= 0
+ prev_frame[:edges][frame_id] += 1
+ end
+
+ frame_info = frames[frame_id]
+ frame_info[:total_samples] += 1
+
+ frame_info[:lines][line_samples[idx]] ||= [0, 0]
+ frame_info[:lines][line_samples[idx]][0] += 1
+
+ prev_frame_id = frame_id
+ end
+
+ i += stack_length # consume the stack
+
+ top_frame_id = prev_frame_id
+ top_frame_line = 1
+
+ sample_count = raw_samples[i]
+
+ frames[top_frame_id][:samples] += sample_count
+ frames[top_frame_id][:lines] ||= {}
+ frames[top_frame_id][:lines][top_frame_line] ||= [0, 0]
+ frames[top_frame_id][:lines][top_frame_line][1] += sample_count
+
+ samples_count += sample_count
+ i += 1
+ end
+
+ results[:samples] = samples_count
+ # Set missed_samples and gc_samples to 0 as their values
+ # don't matter to us in this context.
+ results[:missed_samples] = 0
+ results[:gc_samples] = 0
+ results
+ end
end
end
diff --git a/rjit.c b/rjit.c
index a459167ff3..5d60627a1a 100644
--- a/rjit.c
+++ b/rjit.c
@@ -67,7 +67,10 @@ struct rjit_options rb_rjit_opts;
// true if RJIT is enabled.
bool rb_rjit_enabled = false;
+// true if --rjit-stats (used before rb_rjit_opts is set)
bool rb_rjit_stats_enabled = false;
+// true if --rjit-trace-exits (used before rb_rjit_opts is set)
+bool rb_rjit_trace_exits_enabled = false;
// true if JIT-ed code should be called. When `ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS`
// and `rb_rjit_call_p == false`, any JIT-ed code execution is cancelled as soon as possible.
bool rb_rjit_call_p = false;
@@ -93,6 +96,11 @@ static VALUE rb_cRJITCfpPtr = 0;
// RubyVM::RJIT::Hooks
static VALUE rb_mRJITHooks = 0;
+// Frames for --rjit-trace-exits
+VALUE rb_rjit_raw_samples = 0;
+// Line numbers for --rjit-trace-exits
+VALUE rb_rjit_line_samples = 0;
+
// A default threshold used to add iseq to JIT.
#define DEFAULT_CALL_THRESHOLD 30
// Size of executable memory block in MiB.
@@ -113,6 +121,9 @@ rb_rjit_setup_options(const char *s, struct rjit_options *rjit_opt)
else if (opt_match_noarg(s, l, "stats")) {
rjit_opt->stats = true;
}
+ else if (opt_match_noarg(s, l, "trace-exits")) {
+ rjit_opt->trace_exits = true;
+ }
else if (opt_match_arg(s, l, "call-threshold")) {
rjit_opt->call_threshold = atoi(s + 1);
}
@@ -136,6 +147,7 @@ rb_rjit_setup_options(const char *s, struct rjit_options *rjit_opt)
const struct ruby_opt_message rb_rjit_option_messages[] = {
#if RJIT_STATS
M("--rjit-stats", "", "Enable collecting RJIT statistics"),
+ M("--rjit-trace-exits", "", "Trace side exit locations"),
#endif
M("--rjit-exec-mem-size=num", "", "Size of executable memory block in MiB (default: " STRINGIZE(DEFAULT_EXEC_MEM_SIZE) ")"),
M("--rjit-call-threshold=num", "", "Number of calls to trigger JIT (default: " STRINGIZE(DEFAULT_CALL_THRESHOLD) ")"),
@@ -314,6 +326,8 @@ rb_rjit_mark(void)
rb_gc_mark(rb_cRJITIseqPtr);
rb_gc_mark(rb_cRJITCfpPtr);
rb_gc_mark(rb_mRJITHooks);
+ rb_gc_mark(rb_rjit_raw_samples);
+ rb_gc_mark(rb_rjit_line_samples);
RUBY_MARK_LEAVE("rjit");
}
@@ -398,6 +412,10 @@ rb_rjit_init(const struct rjit_options *opts)
rb_cRJITIseqPtr = rb_funcall(rb_mRJITC, rb_intern("rb_iseq_t"), 0);
rb_cRJITCfpPtr = rb_funcall(rb_mRJITC, rb_intern("rb_control_frame_t"), 0);
rb_mRJITHooks = rb_const_get(rb_mRJIT, rb_intern("Hooks"));
+ if (rb_rjit_opts.trace_exits) {
+ rb_rjit_raw_samples = rb_ary_new();
+ rb_rjit_line_samples = rb_ary_new();
+ }
// Enable RJIT and stats from here
rb_rjit_call_p = !rb_rjit_opts.pause;
@@ -408,13 +426,20 @@ rb_rjit_init(const struct rjit_options *opts)
// Primitive for rjit.rb
//
-// Same as `RubyVM::RJIT::C.enabled?`, but this is used before rjit_init.
+// Same as `rb_rjit_opts.stats`, but this is used before rb_rjit_opts is set.
static VALUE
rjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self)
{
return RBOOL(rb_rjit_stats_enabled);
}
+// Same as `rb_rjit_opts.trace_exits`, but this is used before rb_rjit_opts is set.
+static VALUE
+rjit_trace_exits_enabled_p(rb_execution_context_t *ec, VALUE self)
+{
+ return RBOOL(rb_rjit_trace_exits_enabled);
+}
+
// Disable anything that could impact stats. It ends up disabling JIT calls as well.
static VALUE
rjit_stop_stats(rb_execution_context_t *ec, VALUE self)
diff --git a/rjit.h b/rjit.h
index 90b98323bc..0b92b5293f 100644
--- a/rjit.h
+++ b/rjit.h
@@ -32,6 +32,8 @@ struct rjit_options {
unsigned int exec_mem_size;
// Collect RJIT statistics
bool stats;
+ // Trace side exit locations
+ bool trace_exits;
// Enable disasm of all JIT code
bool dump_disasm;
// [experimental] Do not start RJIT until RJIT.resume is called.
@@ -69,6 +71,7 @@ extern void rb_rjit_collect_vm_usage_insn(int insn);
extern bool rb_rjit_enabled;
extern bool rb_rjit_stats_enabled;
+extern bool rb_rjit_trace_exits_enabled;
# else // USE_RJIT
@@ -88,6 +91,7 @@ static inline void rb_rjit_tracing_invalidate_all(rb_event_flag_t new_iseq_event
#define rb_rjit_enabled false
#define rb_rjit_call_p false
#define rb_rjit_stats_enabled false
+#define rb_rjit_trace_exits_enabled false
#define rb_rjit_call_threshold() UINT_MAX
diff --git a/rjit.rb b/rjit.rb
index da75576746..ebad3529ef 100644
--- a/rjit.rb
+++ b/rjit.rb
@@ -18,6 +18,12 @@ module RubyVM::RJIT
print_stats
end
end
+ if Primitive.rjit_trace_exits_enabled_p
+ at_exit do
+ Primitive.rjit_stop_stats
+ dump_trace_exits
+ end
+ end
end
if RubyVM::RJIT.enabled?
diff --git a/rjit_c.c b/rjit_c.c
index 0df65d53d9..9afdd36b71 100644
--- a/rjit_c.c
+++ b/rjit_c.c
@@ -11,6 +11,8 @@
#if USE_RJIT
#include "rjit_c.h"
+#include "include/ruby/assert.h"
+#include "include/ruby/debug.h"
#include "internal.h"
#include "internal/compile.h"
#include "internal/fixnum.h"
@@ -206,6 +208,197 @@ rjit_get_proc_ptr(VALUE procv)
return proc;
}
+// Use the same buffer size as Stackprof.
+#define BUFF_LEN 2048
+
+extern VALUE rb_rjit_raw_samples;
+extern VALUE rb_rjit_line_samples;
+
+static void
+rjit_record_exit_stack(const VALUE *exit_pc)
+{
+ // Let Primitive.rjit_stop_stats stop this
+ if (!rb_rjit_call_p) return;
+
+ // Get the opcode from the encoded insn handler at this PC
+ int insn = rb_vm_insn_addr2opcode((void *)*exit_pc);
+
+ // Create 2 array buffers to be used to collect frames and lines.
+ VALUE frames_buffer[BUFF_LEN] = { 0 };
+ int lines_buffer[BUFF_LEN] = { 0 };
+
+ // Records call frame and line information for each method entry into two
+ // temporary buffers. Returns the number of times we added to the buffer (ie
+ // the length of the stack).
+ //
+ // Call frame info is stored in the frames_buffer, line number information
+ // in the lines_buffer. The first argument is the start point and the second
+ // argument is the buffer limit, set at 2048.
+ int stack_length = rb_profile_frames(0, BUFF_LEN, frames_buffer, lines_buffer);
+ int samples_length = stack_length + 3; // 3: length, insn, count
+
+ // If yjit_raw_samples is less than or equal to the current length of the samples
+ // we might have seen this stack trace previously.
+ int prev_stack_len_index = RARRAY_LEN(rb_rjit_raw_samples) - samples_length;
+ VALUE prev_stack_len_obj;
+ if (RARRAY_LEN(rb_rjit_raw_samples) >= samples_length && FIXNUM_P(prev_stack_len_obj = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index))) {
+ int prev_stack_len = NUM2INT(prev_stack_len_obj);
+ int idx = stack_length - 1;
+ int prev_frame_idx = 0;
+ bool seen_already = true;
+
+ // If the previous stack length and current stack length are equal,
+ // loop and compare the current frame to the previous frame. If they are
+ // not equal, set seen_already to false and break out of the loop.
+ if (prev_stack_len == stack_length) {
+ while (idx >= 0) {
+ VALUE current_frame = frames_buffer[idx];
+ VALUE prev_frame = RARRAY_AREF(rb_rjit_raw_samples, prev_stack_len_index + prev_frame_idx + 1);
+
+ // If the current frame and previous frame are not equal, set
+ // seen_already to false and break out of the loop.
+ if (current_frame != prev_frame) {
+ seen_already = false;
+ break;
+ }
+
+ idx--;
+ prev_frame_idx++;
+ }
+
+ // If we know we've seen this stack before, increment the counter by 1.
+ if (seen_already) {
+ int prev_idx = RARRAY_LEN(rb_rjit_raw_samples) - 1;
+ int prev_count = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, prev_idx));
+ int new_count = prev_count + 1;
+
+ rb_ary_store(rb_rjit_raw_samples, prev_idx, INT2NUM(new_count));
+ rb_ary_store(rb_rjit_line_samples, prev_idx, INT2NUM(new_count));
+ return;
+ }
+ }
+ }
+
+ rb_ary_push(rb_rjit_raw_samples, INT2NUM(stack_length));
+ rb_ary_push(rb_rjit_line_samples, INT2NUM(stack_length));
+
+ int idx = stack_length - 1;
+
+ while (idx >= 0) {
+ VALUE frame = frames_buffer[idx];
+ int line = lines_buffer[idx];
+
+ rb_ary_push(rb_rjit_raw_samples, frame);
+ rb_ary_push(rb_rjit_line_samples, INT2NUM(line));
+
+ idx--;
+ }
+
+ // Push the insn value into the yjit_raw_samples Vec.
+ rb_ary_push(rb_rjit_raw_samples, INT2NUM(insn));
+
+ // Push the current line onto the yjit_line_samples Vec. This
+ // points to the line in insns.def.
+ int line = RARRAY_LEN(rb_rjit_line_samples) - 1;
+ rb_ary_push(rb_rjit_line_samples, INT2NUM(line));
+
+ // Push number of times seen onto the stack, which is 1
+ // because it's the first time we've seen it.
+ rb_ary_push(rb_rjit_raw_samples, INT2NUM(1));
+ rb_ary_push(rb_rjit_line_samples, INT2NUM(1));
+}
+
+// For a given raw_sample (frame), set the hash with the caller's
+// name, file, and line number. Return the hash with collected frame_info.
+static void
+rjit_add_frame(VALUE hash, VALUE frame)
+{
+ VALUE frame_id = SIZET2NUM(frame);
+
+ if (RTEST(rb_hash_aref(hash, frame_id))) {
+ return;
+ }
+ else {
+ VALUE frame_info = rb_hash_new();
+ // Full label for the frame
+ VALUE name = rb_profile_frame_full_label(frame);
+ // Absolute path of the frame from rb_iseq_realpath
+ VALUE file = rb_profile_frame_absolute_path(frame);
+ // Line number of the frame
+ VALUE line = rb_profile_frame_first_lineno(frame);
+
+ // If absolute path isn't available use the rb_iseq_path
+ if (NIL_P(file)) {
+ file = rb_profile_frame_path(frame);
+ }
+
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0));
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0));
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new());
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new());
+
+ if (line != INT2FIX(0)) {
+ rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
+ }
+
+ rb_hash_aset(hash, frame_id, frame_info);
+ }
+}
+
+static VALUE
+rjit_exit_traces(void)
+{
+ int samples_len = RARRAY_LEN(rb_rjit_raw_samples);
+ RUBY_ASSERT(samples_len == RARRAY_LEN(rb_rjit_line_samples));
+
+ VALUE result = rb_hash_new();
+ VALUE raw_samples = rb_ary_new_capa(samples_len);
+ VALUE line_samples = rb_ary_new_capa(samples_len);
+ VALUE frames = rb_hash_new();
+ int idx = 0;
+
+ // While the index is less than samples_len, parse yjit_raw_samples and
+ // yjit_line_samples, then add casted values to raw_samples and line_samples array.
+ while (idx < samples_len) {
+ int num = NUM2INT(RARRAY_AREF(rb_rjit_raw_samples, idx));
+ int line_num = NUM2INT(RARRAY_AREF(rb_rjit_line_samples, idx));
+ idx++;
+
+ rb_ary_push(raw_samples, SIZET2NUM(num));
+ rb_ary_push(line_samples, INT2NUM(line_num));
+
+ // Loop through the length of samples_len and add data to the
+ // frames hash. Also push the current value onto the raw_samples
+ // and line_samples array respectively.
+ for (int o = 0; o < num; o++) {
+ rjit_add_frame(frames, RARRAY_AREF(rb_rjit_raw_samples, idx));
+ rb_ary_push(raw_samples, SIZET2NUM(RARRAY_AREF(rb_rjit_raw_samples, idx)));
+ rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx));
+ idx++;
+ }
+
+ // insn BIN and lineno
+ rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx));
+ rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx));
+ idx++;
+
+ // Number of times seen
+ rb_ary_push(raw_samples, RARRAY_AREF(rb_rjit_raw_samples, idx));
+ rb_ary_push(line_samples, RARRAY_AREF(rb_rjit_line_samples, idx));
+ idx++;
+ }
+
+ // Set add the raw_samples, line_samples, and frames to the results
+ // hash.
+ rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples);
+ rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples);
+ rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames);
+
+ return result;
+}
+
// An offsetof implementation that works for unnamed struct and union.
// Multiplying 8 for compatibility with libclang's offsetof.
#define OFFSETOF(ptr, member) RB_SIZE2NUM(((char *)&ptr.member - (char*)&ptr) * 8)
diff --git a/rjit_c.rb b/rjit_c.rb
index 9e1a277b47..cf9b8c3cc8 100644
--- a/rjit_c.rb
+++ b/rjit_c.rb
@@ -294,6 +294,10 @@ module RubyVM::RJIT # :nodoc: all
}
end
+ def rjit_exit_traces
+ Primitive.cexpr! 'rjit_exit_traces()'
+ end
+
#
# Utilities: Not used by RJIT, but useful for debugging
#
@@ -585,6 +589,10 @@ module RubyVM::RJIT # :nodoc: all
Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_optimized_call) }
end
+ def C.rjit_record_exit_stack
+ Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_record_exit_stack) }
+ end
+
def C.rjit_str_neq_internal
Primitive.cexpr! %q{ SIZET2NUM((size_t)rjit_str_neq_internal) }
end
@@ -1239,6 +1247,7 @@ module RubyVM::RJIT # :nodoc: all
call_threshold: [CType::Immediate.parse("unsigned int"), Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), call_threshold)")],
exec_mem_size: [CType::Immediate.parse("unsigned int"), Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), exec_mem_size)")],
stats: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), stats)")],
+ trace_exits: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), trace_exits)")],
dump_disasm: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), dump_disasm)")],
pause: [self._Bool, Primitive.cexpr!("OFFSETOF((*((struct rjit_options *)NULL)), pause)")],
)
diff --git a/ruby.c b/ruby.c
index 035175915d..f307a2b5a0 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1617,6 +1617,8 @@ ruby_opt_init(ruby_cmdline_options_t *opt)
rb_rjit_enabled = true;
if (opt->rjit.stats)
rb_rjit_stats_enabled = true;
+ if (opt->rjit.trace_exits)
+ rb_rjit_trace_exits_enabled = true;
#endif
Init_ext(); /* load statically linked extensions before rubygems */
diff --git a/tool/rjit/bindgen.rb b/tool/rjit/bindgen.rb
index 2255400c9b..ec943505d9 100755
--- a/tool/rjit/bindgen.rb
+++ b/tool/rjit/bindgen.rb
@@ -512,6 +512,7 @@ generator = BindingGenerator.new(
rjit_full_cfunc_return
rjit_optimized_call
rjit_str_neq_internal
+ rjit_record_exit_stack
],
types: %w[
CALL_DATA