aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTakashi Kokubun <takashikkbn@gmail.com>2023-10-18 14:07:03 -0700
committerGitHub <noreply@github.com>2023-10-18 21:07:03 +0000
commitf51b92fe231266a1a184b8329d43520599d2017b (patch)
treed5a1e218714a894e83b86981676f94f742114d76
parent58ee088bb36874d4c63ef55e0d3287bb07ead9d2 (diff)
downloadruby-f51b92fe231266a1a184b8329d43520599d2017b.tar.gz
YJIT: Add --yjit-perf (#8697)
Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
-rw-r--r--doc/yjit/yjit.md28
-rw-r--r--yjit/src/asm/mod.rs1
-rw-r--r--yjit/src/backend/x86_64/mod.rs17
-rw-r--r--yjit/src/codegen.rs64
-rw-r--r--yjit/src/cruby.rs1
-rw-r--r--yjit/src/options.rs21
-rw-r--r--yjit/src/utils.rs2
-rw-r--r--yjit/src/yjit.rs7
8 files changed, 133 insertions, 8 deletions
diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md
index b1a4d5f022..4c1984ca6b 100644
--- a/doc/yjit/yjit.md
+++ b/doc/yjit/yjit.md
@@ -173,6 +173,7 @@ compiled, lower values mean less code is compiled (default 200000)
- `--yjit-trace-exits`: produce a Marshal dump of backtraces from specific exits. Automatically enables `--yjit-stats`
- `--yjit-max-versions=N`: maximum number of versions to generate per basic block (default 4)
- `--yjit-greedy-versioning`: greedy versioning mode (disabled by default, may increase code size)
+- `--yjit-perf`: Enable frame pointers and perf profiling
Note that there is also an environment variable `RUBY_YJIT_ENABLE` which can be used to enable YJIT.
This can be useful for some deployment scripts where specifying an extra command-line option to Ruby is not practical.
@@ -428,3 +429,30 @@ While in your i386 shell, install Cargo and Homebrew, then hack away!
2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install
If you use Fish shell you can [read this link](https://tenderlovemaking.com/2022/01/07/homebrew-rosetta-and-ruby.html) for information on making the dev environment easier.
+
+## Profiling with Linux perf
+
+`--yjit-perf` allows you to profile JIT-ed methods along with other native functions using Linux perf.
+When you run Ruby with `perf record`, perf looks up `/tmp/perf-{pid}.map` to resolve symbols in JIT code,
+and this option lets YJIT write method symbols into that file as well as enabling frame pointers.
+
+Here's an example way to use this option with [Firefox Profiler](https://profiler.firefox.com)
+(See also: [Profiling with Linux perf](https://profiler.firefox.com/docs/#/./guide-perf-profiling)):
+
+```bash
+# Compile the interpreter with frame pointers enabled
+./configure --enable-yjit --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc cflags=-fno-omit-frame-pointer
+make -j && make install
+
+# [Optional] Allow running perf without sudo
+echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
+echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid
+
+# Profile Ruby with --yjit-perf
+cd ../yjit-bench
+perf record --call-graph fp -- ruby --yjit-perf -Iharness-perf benchmarks/liquid-render/benchmark.rb
+
+# View results on Firefox Profiler https://profiler.firefox.com.
+# Create /tmp/test.perf as below and upload it using "Load a profile from file".
+perf script --fields +pid > /tmp/test.perf
+```
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index 75478814c2..ea1095734a 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -323,7 +323,6 @@ impl CodeBlock {
}
/// Return the address ranges of a given address range that this CodeBlock can write.
- #[cfg(any(feature = "disasm", target_arch = "aarch64"))]
#[allow(dead_code)]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
let region_start = self.get_ptr(0).into_usize();
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index 7a67429488..fe5f821372 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -498,8 +498,21 @@ impl Assembler
cb.write_byte(0);
},
- Insn::FrameSetup => {},
- Insn::FrameTeardown => {},
+ // Set up RBP to work with frame pointer unwinding
+ // (e.g. with Linux `perf record --call-graph fp`)
+ Insn::FrameSetup => {
+ if get_option!(frame_pointer) {
+ push(cb, RBP);
+ mov(cb, RBP, RSP);
+ push(cb, RBP);
+ }
+ },
+ Insn::FrameTeardown => {
+ if get_option!(frame_pointer) {
+ pop(cb, RBP);
+ pop(cb, RBP);
+ }
+ },
Insn::Add { left, right, .. } => {
let opnd1 = emit_64bit_immediate(cb, right);
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index d396243f1c..1636a4d74b 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -21,6 +21,7 @@ use std::mem;
use std::os::raw::c_int;
use std::ptr;
use std::rc::Rc;
+use std::cell::RefCell;
use std::slice;
pub use crate::virtualmem::CodePtr;
@@ -97,6 +98,9 @@ pub struct JITState {
/// When true, the block is valid only when there is a total of one ractor running
pub block_assumes_single_ractor: bool,
+
+ /// Address range for Linux perf's [JIT interface](https://github.com/torvalds/linux/blob/master/tools/perf/Documentation/jit-interface.txt)
+ perf_map: Rc::<RefCell::<Vec<(CodePtr, Option<CodePtr>, String)>>>,
}
impl JITState {
@@ -118,6 +122,7 @@ impl JITState {
bop_assumptions: vec![],
stable_constant_names_assumption: None,
block_assumes_single_ractor: false,
+ perf_map: Rc::default(),
}
}
@@ -231,6 +236,40 @@ impl JITState {
pub fn queue_outgoing_branch(&mut self, branch: PendingBranchRef) {
self.pending_outgoing.push(branch)
}
+
+ /// Mark the start address of a symbol to be reported to perf
+ fn perf_symbol_range_start(&self, asm: &mut Assembler, symbol_name: &str) {
+ let symbol_name = symbol_name.to_string();
+ let syms = self.perf_map.clone();
+ asm.pos_marker(move |start| syms.borrow_mut().push((start, None, symbol_name.clone())));
+ }
+
+ /// Mark the end address of a symbol to be reported to perf
+ fn perf_symbol_range_end(&self, asm: &mut Assembler) {
+ let syms = self.perf_map.clone();
+ asm.pos_marker(move |end| {
+ if let Some((_, ref mut end_store, _)) = syms.borrow_mut().last_mut() {
+ assert_eq!(None, *end_store);
+ *end_store = Some(end);
+ }
+ });
+ }
+
+ /// Flush addresses and symbols to /tmp/perf-{pid}.map
+ fn flush_perf_symbols(&self, cb: &CodeBlock) {
+ let path = format!("/tmp/perf-{}.map", std::process::id());
+ let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap();
+ for sym in self.perf_map.borrow().iter() {
+ if let (start, Some(end), name) = sym {
+ // In case the code straddles two pages, part of it belongs to the symbol.
+ for (inline_start, inline_end) in cb.writable_addrs(*start, *end) {
+ use std::io::Write;
+ let code_size = inline_end - inline_start;
+ writeln!(f, "{inline_start:x} {code_size:x} {name}").unwrap();
+ }
+ }
+ }
+ }
}
use crate::codegen::JCCKinds::*;
@@ -883,6 +922,19 @@ pub fn gen_single_block(
asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8());
}
+ // Mark the start of a method name symbol for --yjit-perf
+ if get_option!(perf_map) {
+ let comptime_recv_class = jit.peek_at_self().class_of();
+ let class_name = unsafe { cstr_to_rust_string(rb_class2name(comptime_recv_class)) };
+ match (class_name, unsafe { rb_iseq_label(iseq) }) {
+ (Some(class_name), iseq_label) if iseq_label != Qnil => {
+ let iseq_label = ruby_str_to_rust(iseq_label);
+ jit.perf_symbol_range_start(&mut asm, &format!("[JIT] {}#{}", class_name, iseq_label));
+ }
+ _ => {},
+ }
+ }
+
if asm.ctx.is_return_landing() {
// Continuation of the end of gen_leave().
// Reload REG_SP for the current frame and transfer the return value
@@ -1004,10 +1056,20 @@ pub fn gen_single_block(
asm.pad_inval_patch();
}
+ // Mark the end of a method name symbol for --yjit-perf
+ if get_option!(perf_map) {
+ jit.perf_symbol_range_end(&mut asm);
+ }
+
// Compile code into the code block
let gc_offsets = asm.compile(cb, Some(ocb));
let end_addr = cb.get_write_ptr();
+ // Flush perf symbols after asm.compile() writes addresses
+ if get_option!(perf_map) {
+ jit.flush_perf_symbols(cb);
+ }
+
// If code for the block doesn't fit, fail
if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
return Err(());
@@ -8681,8 +8743,6 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- use std::cell::RefCell;
-
let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size as u32) };
// Memory protection syscalls need page-aligned addresses, so check it here. Assuming
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
index 2f9f2bedf2..6896ae9fce 100644
--- a/yjit/src/cruby.rs
+++ b/yjit/src/cruby.rs
@@ -577,7 +577,6 @@ pub fn rust_str_to_sym(str: &str) -> VALUE {
}
/// Produce an owned Rust String from a C char pointer
-#[cfg(feature = "disasm")]
pub fn cstr_to_rust_string(c_char_ptr: *const c_char) -> Option<String> {
assert!(c_char_ptr != std::ptr::null());
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
index 1d8e711a81..e5e0552d7e 100644
--- a/yjit/src/options.rs
+++ b/yjit/src/options.rs
@@ -62,6 +62,12 @@ pub struct Options {
/// Verify context objects (debug mode only)
pub verify_ctx: bool,
+
+ /// Enable generating frame pointers (for x86. arm64 always does this)
+ pub frame_pointer: bool,
+
+ /// Enable writing /tmp/perf-{pid}.map for Linux perf
+ pub perf_map: bool,
}
// Initialize the options to default values
@@ -80,10 +86,12 @@ pub static mut OPTIONS: Options = Options {
dump_disasm: None,
verify_ctx: false,
dump_iseq_disasm: None,
+ frame_pointer: false,
+ perf_map: false,
};
/// YJIT option descriptions for `ruby --help`.
-static YJIT_OPTIONS: [(&str, &str); 8] = [
+static YJIT_OPTIONS: [(&str, &str); 9] = [
("--yjit-stats", "Enable collecting YJIT statistics"),
("--yjit-trace-exits", "Record Ruby source location when exiting from generated code"),
("--yjit-trace-exits-sample-rate", "Trace exit locations only every Nth occurrence"),
@@ -92,6 +100,7 @@ static YJIT_OPTIONS: [(&str, &str); 8] = [
("--yjit-cold-threshold=num", "Global call after which ISEQs not compiled (default: 200K)"),
("--yjit-max-versions=num", "Maximum number of versions per basic block (default: 4)"),
("--yjit-greedy-versioning", "Greedy versioning mode (default: disabled)"),
+ ("--yjit-perf", "Enable frame pointers and perf profiling"),
];
#[derive(Clone, PartialEq, Eq, Debug)]
@@ -191,6 +200,16 @@ pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
}
},
+ ("perf", _) => match opt_val {
+ "" => unsafe {
+ OPTIONS.frame_pointer = true;
+ OPTIONS.perf_map = true;
+ },
+ "fp" => unsafe { OPTIONS.frame_pointer = true },
+ "map" => unsafe { OPTIONS.perf_map = true },
+ _ => return None,
+ },
+
("dump-disasm", _) => match opt_val {
"" => unsafe { OPTIONS.dump_disasm = Some(DumpDisasm::Stdout) },
directory => {
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index 3a8f0ef590..a883e959a0 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -73,7 +73,7 @@ pub(crate) use offset_of;
// Convert a CRuby UTF-8-encoded RSTRING into a Rust string.
// This should work fine on ASCII strings and anything else
// that is considered legal UTF-8, including embedded nulls.
-fn ruby_str_to_rust(v: VALUE) -> String {
+pub fn ruby_str_to_rust(v: VALUE) -> String {
let str_ptr = unsafe { rb_RSTRING_PTR(v) } as *mut u8;
let str_len: usize = unsafe { rb_RSTRING_LEN(v) }.try_into().unwrap();
let str_slice: &[u8] = unsafe { slice::from_raw_parts(str_ptr, str_len) };
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
index 94e2d174e6..515fa75ce8 100644
--- a/yjit/src/yjit.rs
+++ b/yjit/src/yjit.rs
@@ -72,6 +72,13 @@ pub extern "C" fn rb_yjit_init_rust() {
println!("YJIT: rb_yjit_init_rust() panicked. Aborting.");
std::process::abort();
}
+
+ // Make sure --yjit-perf doesn't append symbols to an old file
+ if get_option!(perf_map) {
+ let perf_map = format!("/tmp/perf-{}.map", std::process::id());
+ let _ = std::fs::remove_file(&perf_map);
+ println!("YJIT perf map: {perf_map}");
+ }
}
/// At the moment, we abort in all cases we panic.