aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlan Wu <XrXr@users.noreply.github.com>2023-10-16 18:35:26 -0400
committerAlan Wu <XrXr@users.noreply.github.com>2023-11-07 17:43:43 -0500
commita1c61f0ae5f5ecaa7d8289942b78e6b0c77118fe (patch)
tree34ce4467e77196c9e6fc842f39b7bae0d8cd62c5
parentaa6642de630cfc10063154d84e45a7bff30e9103 (diff)
downloadruby-a1c61f0ae5f5ecaa7d8289942b78e6b0c77118fe.tar.gz
YJIT: Use u32 for CodePtr to save 4 bytes each
We've long had a size restriction on the code memory region such that a u32 could refer to everything. This commit capitalizes on this restriction by shrinking the size of `CodePtr` to be 4 bytes from 8. To derive a full raw pointer from a `CodePtr`, one needs a base pointer. Both `CodeBlock` and `VirtualMemory` can be used for this purpose. The base pointer is readily available everywhere, except for in the case of the `jit_return` "branch". Generalize lea_label() to lea_jump_target() in the IR to delay deriving the `jit_return` address until `compile()`, when the base pointer is available. On railsbench, this yields roughly a 1% reduction to `yjit_alloc_size` (58,397,765 to 57,742,248).
-rw-r--r--yjit/src/asm/mod.rs39
-rw-r--r--yjit/src/asm/x86_64/mod.rs9
-rw-r--r--yjit/src/asm/x86_64/tests.rs8
-rw-r--r--yjit/src/backend/arm64/mod.rs44
-rw-r--r--yjit/src/backend/ir.rs23
-rw-r--r--yjit/src/backend/tests.rs6
-rw-r--r--yjit/src/backend/x86_64/mod.rs26
-rw-r--r--yjit/src/codegen.rs6
-rw-r--r--yjit/src/core.rs39
-rw-r--r--yjit/src/disasm.rs22
-rw-r--r--yjit/src/invariants.rs6
-rw-r--r--yjit/src/utils.rs16
-rw-r--r--yjit/src/virtualmem.rs90
-rw-r--r--yjit/src/yjit.rs2
14 files changed, 175 insertions, 161 deletions
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
index ea1095734a..a89be1a2f8 100644
--- a/yjit/src/asm/mod.rs
+++ b/yjit/src/asm/mod.rs
@@ -325,10 +325,10 @@ impl CodeBlock {
/// Return the address ranges of a given address range that this CodeBlock can write.
#[allow(dead_code)]
pub fn writable_addrs(&self, start_ptr: CodePtr, end_ptr: CodePtr) -> Vec<(usize, usize)> {
- let region_start = self.get_ptr(0).into_usize();
- let region_end = self.get_ptr(self.get_mem_size()).into_usize();
- let mut start = start_ptr.into_usize();
- let end = std::cmp::min(end_ptr.into_usize(), region_end);
+ let region_start = self.get_ptr(0).raw_addr(self);
+ let region_end = self.get_ptr(self.get_mem_size()).raw_addr(self);
+ let mut start = start_ptr.raw_addr(self);
+ let end = std::cmp::min(end_ptr.raw_addr(self), region_end);
let freed_pages = self.freed_pages.as_ref().as_ref();
let mut addrs = vec![];
@@ -366,7 +366,7 @@ impl CodeBlock {
/// If not, this becomes an inline no-op.
#[cfg(feature = "disasm")]
pub fn add_comment(&mut self, comment: &str) {
- let cur_ptr = self.get_write_ptr().into_usize();
+ let cur_ptr = self.get_write_ptr().raw_addr(self);
// If there's no current list of comments for this line number, add one.
let this_line_comments = self.asm_comments.entry(cur_ptr).or_default();
@@ -388,7 +388,7 @@ impl CodeBlock {
#[allow(unused_variables)]
#[cfg(feature = "disasm")]
pub fn remove_comments(&mut self, start_addr: CodePtr, end_addr: CodePtr) {
- for addr in start_addr.into_usize()..end_addr.into_usize() {
+ for addr in start_addr.raw_addr(self)..end_addr.raw_addr(self) {
self.asm_comments.remove(&addr);
}
}
@@ -424,8 +424,8 @@ impl CodeBlock {
// Set the current write position from a pointer
pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
- let pos = code_ptr.into_usize() - self.mem_block.borrow().start_ptr().into_usize();
- self.set_pos(pos);
+ let pos = code_ptr.as_offset() - self.mem_block.borrow().start_ptr().as_offset();
+ self.set_pos(pos.try_into().unwrap());
}
/// Get a (possibly dangling) direct pointer into the executable memory block
@@ -435,19 +435,19 @@ impl CodeBlock {
/// Convert an address range to memory page indexes against a num_pages()-sized array.
pub fn addrs_to_pages(&self, start_addr: CodePtr, end_addr: CodePtr) -> Vec<usize> {
- let mem_start = self.mem_block.borrow().start_ptr().into_usize();
- let mem_end = self.mem_block.borrow().mapped_end_ptr().into_usize();
- assert!(mem_start <= start_addr.into_usize());
- assert!(start_addr.into_usize() <= end_addr.into_usize());
- assert!(end_addr.into_usize() <= mem_end);
+ let mem_start = self.mem_block.borrow().start_ptr().raw_addr(self);
+ let mem_end = self.mem_block.borrow().mapped_end_ptr().raw_addr(self);
+ assert!(mem_start <= start_addr.raw_addr(self));
+ assert!(start_addr.raw_addr(self) <= end_addr.raw_addr(self));
+ assert!(end_addr.raw_addr(self) <= mem_end);
// Ignore empty code ranges
if start_addr == end_addr {
return vec![];
}
- let start_page = (start_addr.into_usize() - mem_start) / self.page_size;
- let end_page = (end_addr.into_usize() - mem_start - 1) / self.page_size;
+ let start_page = (start_addr.raw_addr(self) - mem_start) / self.page_size;
+ let end_page = (end_addr.raw_addr(self) - mem_start - 1) / self.page_size;
(start_page..=end_page).collect() // TODO: consider returning an iterator
}
@@ -716,13 +716,20 @@ impl CodeBlock {
impl fmt::LowerHex for CodeBlock {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
for pos in 0..self.write_pos {
- let byte = unsafe { self.mem_block.borrow().start_ptr().raw_ptr().add(pos).read() };
+ let mem_block = &*self.mem_block.borrow();
+ let byte = unsafe { mem_block.start_ptr().raw_ptr(mem_block).add(pos).read() };
fmtr.write_fmt(format_args!("{:02x}", byte))?;
}
Ok(())
}
}
+impl crate::virtualmem::CodePtrBase for CodeBlock {
+ fn base_ptr(&self) -> std::ptr::NonNull<u8> {
+ self.mem_block.borrow().base_ptr()
+ }
+}
+
/// Wrapper struct so we can use the type system to distinguish
/// Between the inlined and outlined code blocks
pub struct OutlinedCb {
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
index 4a7c7e5a84..20ac13f09c 100644
--- a/yjit/src/asm/x86_64/mod.rs
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -362,11 +362,6 @@ pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
uimm_opnd(ptr as u64)
}
-pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd
-{
- uimm_opnd(code_ptr.raw_ptr() as u64)
-}
-
/// Write the REX byte
fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) {
// 0 1 0 0 w r x b
@@ -696,7 +691,7 @@ pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
let end_ptr = cb.get_ptr(cb.write_pos + 5);
// Compute the jump offset
- let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64();
+ let rel64: i64 = dst_ptr as i64 - end_ptr.raw_ptr(cb) as i64;
// If the offset fits in 32-bit
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
@@ -897,7 +892,7 @@ fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
let end_ptr = cb.get_ptr(cb.write_pos + 4);
// Compute the jump offset
- let rel64 = dst_ptr.into_i64() - end_ptr.into_i64();
+ let rel64 = dst_ptr.as_offset() - end_ptr.as_offset();
if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
// Write the relative 32-bit jump offset
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
index 8e9282e784..5ae983270f 100644
--- a/yjit/src/asm/x86_64/tests.rs
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -68,7 +68,7 @@ fn test_call_ptr() {
// calling a lower address
check_bytes("e8fbffffff", |cb| {
let ptr = cb.get_write_ptr();
- call_ptr(cb, RAX, ptr.raw_ptr());
+ call_ptr(cb, RAX, ptr.raw_ptr(cb));
});
}
@@ -442,15 +442,15 @@ fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> {
fn block_comments() {
let mut cb = super::CodeBlock::new_dummy(4096);
- let first_write_ptr = cb.get_write_ptr().into_usize();
+ let first_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Beginning");
xor(&mut cb, EAX, EAX); // 2 bytes long
- let second_write_ptr = cb.get_write_ptr().into_usize();
+ let second_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Two bytes in");
cb.add_comment("Still two bytes in");
cb.add_comment("Still two bytes in"); // Duplicate, should be ignored
test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long
- let third_write_ptr = cb.get_write_ptr().into_usize();
+ let third_write_ptr = cb.get_write_ptr().raw_addr(&cb);
cb.add_comment("Ten bytes in");
assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap());
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs
index 553b8dd173..7b58e115c1 100644
--- a/yjit/src/backend/arm64/mod.rs
+++ b/yjit/src/backend/arm64/mod.rs
@@ -5,6 +5,7 @@ use crate::asm::arm64::*;
use crate::cruby::*;
use crate::backend::ir::*;
use crate::virtualmem::CodePtr;
+use crate::utils::*;
// Use the arm64 register type for this platform
pub type Reg = A64Reg;
@@ -97,13 +98,13 @@ fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) {
#[cfg(not(test))]
{
let end = cb.get_write_ptr();
- unsafe { rb_yjit_icache_invalidate(start.raw_ptr() as _, end.raw_ptr() as _) };
+ unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) };
}
}
fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) {
- let src_addr = cb.get_write_ptr().into_i64();
- let dst_addr = dst_ptr.into_i64();
+ let src_addr = cb.get_write_ptr().as_offset();
+ let dst_addr = dst_ptr.as_offset();
// If the offset is short enough, then we'll use the
// branch instruction. Otherwise, we'll move the
@@ -716,8 +717,8 @@ impl Assembler
fn emit_conditional_jump<const CONDITION: u8>(cb: &mut CodeBlock, target: Target) {
match target {
Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => {
- let dst_addr = dst_ptr.into_i64();
- let src_addr = cb.get_write_ptr().into_i64();
+ let dst_addr = dst_ptr.as_offset();
+ let src_addr = cb.get_write_ptr().as_offset();
let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) {
// If the jump offset fits into the conditional jump as
@@ -746,7 +747,7 @@ impl Assembler
} else {
// Otherwise, we need to load the address into a
// register and use the branch register instruction.
- let dst_addr = dst_ptr.into_u64();
+ let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64();
let load_insns: i32 = emit_load_size(dst_addr).into();
// We're going to write out the inverse condition so
@@ -1023,14 +1024,20 @@ impl Assembler
}
};
},
- Insn::LeaLabel { out, target, .. } => {
- let label_idx = target.unwrap_label_idx();
+ Insn::LeaJumpTarget { out, target, .. } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| {
+ adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
+ });
- cb.label_ref(label_idx, 4, |cb, end_addr, dst_addr| {
- adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4)));
- });
-
- mov(cb, out.into(), Self::SCRATCH0);
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ emit_load_value(cb, out.into(), target_addr);
+ }
},
Insn::CPush(opnd) => {
emit_push(cb, opnd.into());
@@ -1065,7 +1072,7 @@ impl Assembler
},
Insn::CCall { fptr, .. } => {
// The offset to the call target in bytes
- let src_addr = cb.get_write_ptr().into_i64();
+ let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64;
let dst_addr = *fptr as i64;
// Use BL if the offset is short enough to encode as an immediate.
@@ -1317,8 +1324,7 @@ mod tests {
fn test_emit_je_fits_into_bcond() {
let (mut asm, mut cb) = setup_asm();
- let offset = 80;
- let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
+ let target: CodePtr = cb.get_write_ptr().add_bytes(80);
asm.je(Target::CodePtr(target));
asm.compile_with_num_regs(&mut cb, 0);
@@ -1329,7 +1335,7 @@ mod tests {
let (mut asm, mut cb) = setup_asm();
let offset = 1 << 21;
- let target: CodePtr = ((cb.get_write_ptr().into_u64() + offset) as *mut u8).into();
+ let target: CodePtr = cb.get_write_ptr().add_bytes(offset);
asm.je(Target::CodePtr(target));
asm.compile_with_num_regs(&mut cb, 0);
@@ -1340,7 +1346,7 @@ mod tests {
let (mut asm, mut cb) = setup_asm();
let label = asm.new_label("label");
- let opnd = asm.lea_label(label);
+ let opnd = asm.lea_jump_target(label);
asm.write_label(label);
asm.bake_string("Hello, world!");
@@ -1590,7 +1596,7 @@ mod tests {
assert!(gap > 0b1111111111111111111);
let instruction_at_starting_pos: [u8; 4] = unsafe {
- std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(), 4)
+ std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4)
}.try_into().unwrap();
assert_eq!(
0b000101 << 26_u32,
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index 960397ac7c..1b9ae944ee 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -447,9 +447,8 @@ pub enum Insn {
// Add a label into the IR at the point that this instruction is added.
Label(Target),
- // Load effective address relative to the current instruction pointer. It
- // accepts a single signed immediate operand.
- LeaLabel { target: Target, out: Opnd },
+ /// Get the code address of a jump target
+ LeaJumpTarget { target: Target, out: Opnd },
// Load effective address
Lea { opnd: Opnd, out: Opnd },
@@ -539,7 +538,7 @@ impl Insn {
Insn::Jo(target) |
Insn::Jz(target) |
Insn::Label(target) |
- Insn::LeaLabel { target, .. } => {
+ Insn::LeaJumpTarget { target, .. } => {
Some(target)
}
_ => None,
@@ -587,7 +586,7 @@ impl Insn {
Insn::JoMul(_) => "JoMul",
Insn::Jz(_) => "Jz",
Insn::Label(_) => "Label",
- Insn::LeaLabel { .. } => "LeaLabel",
+ Insn::LeaJumpTarget { .. } => "LeaJumpTarget",
Insn::Lea { .. } => "Lea",
Insn::LiveReg { .. } => "LiveReg",
Insn::Load { .. } => "Load",
@@ -626,7 +625,7 @@ impl Insn {
Insn::CSelNZ { out, .. } |
Insn::CSelZ { out, .. } |
Insn::Lea { out, .. } |
- Insn::LeaLabel { out, .. } |
+ Insn::LeaJumpTarget { out, .. } |
Insn::LiveReg { out, .. } |
Insn::Load { out, .. } |
Insn::LoadSExt { out, .. } |
@@ -659,7 +658,7 @@ impl Insn {
Insn::CSelNZ { out, .. } |
Insn::CSelZ { out, .. } |
Insn::Lea { out, .. } |
- Insn::LeaLabel { out, .. } |
+ Insn::LeaJumpTarget { out, .. } |
Insn::LiveReg { out, .. } |
Insn::Load { out, .. } |
Insn::LoadSExt { out, .. } |
@@ -688,7 +687,7 @@ impl Insn {
Insn::Jnz(target) |
Insn::Jo(target) |
Insn::Jz(target) |
- Insn::LeaLabel { target, .. } => Some(target),
+ Insn::LeaJumpTarget { target, .. } => Some(target),
_ => None
}
}
@@ -741,7 +740,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> {
Insn::JoMul(_) |
Insn::Jz(_) |
Insn::Label(_) |
- Insn::LeaLabel { .. } |
+ Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
@@ -842,7 +841,7 @@ impl<'a> InsnOpndMutIterator<'a> {
Insn::JoMul(_) |
Insn::Jz(_) |
Insn::Label(_) |
- Insn::LeaLabel { .. } |
+ Insn::LeaJumpTarget { .. } |
Insn::PadInvalPatch |
Insn::PosMarker(_) => None,
Insn::CPopInto(opnd) |
@@ -1830,9 +1829,9 @@ impl Assembler {
}
#[must_use]
- pub fn lea_label(&mut self, target: Target) -> Opnd {
+ pub fn lea_jump_target(&mut self, target: Target) -> Opnd {
let out = self.next_opnd_out(Opnd::DEFAULT_NUM_BITS);
- self.push_insn(Insn::LeaLabel { target, out });
+ self.push_insn(Insn::LeaJumpTarget { target, out });
out
}
diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs
index 774a02dc6b..ad46321ace 100644
--- a/yjit/src/backend/tests.rs
+++ b/yjit/src/backend/tests.rs
@@ -231,7 +231,7 @@ fn test_jcc_ptr()
{
let (mut asm, mut cb) = setup_asm();
- let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
+ let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK));
asm.test(
Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
@@ -248,7 +248,7 @@ fn test_jmp_ptr()
{
let (mut asm, mut cb) = setup_asm();
- let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
+ let stub = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
asm.jmp(stub);
asm.compile_with_num_regs(&mut cb, 0);
@@ -259,7 +259,7 @@ fn test_jo()
{
let (mut asm, mut cb) = setup_asm();
- let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into());
+ let side_exit = Target::CodePtr(cb.get_write_ptr().add_bytes(4));
let arg1 = Opnd::mem(64, SP, 0);
let arg0 = Opnd::mem(64, SP, 8);
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index e87ededf69..7dd54e6c21 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -6,6 +6,7 @@ use crate::codegen::CodePtr;
use crate::cruby::*;
use crate::backend::ir::*;
use crate::options::*;
+use crate::utils::*;
// Use the x86 register type for this platform
pub type Reg = X86Reg;
@@ -586,16 +587,23 @@ impl Assembler
lea(cb, out.into(), opnd.into());
},
- // Load relative address
- Insn::LeaLabel { target, out } => {
- let label_idx = target.unwrap_label_idx();
+ // Load address of jump target
+ Insn::LeaJumpTarget { target, out } => {
+ if let Target::Label(label_idx) = target {
+ // Set output to the raw address of the label
+ cb.label_ref(*label_idx, 7, |cb, src_addr, dst_addr| {
+ let disp = dst_addr - src_addr;
+ lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
+ });
- cb.label_ref(label_idx, 7, |cb, src_addr, dst_addr| {
- let disp = dst_addr - src_addr;
- lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap()));
- });
-
- mov(cb, out.into(), Self::SCRATCH0);
+ mov(cb, out.into(), Self::SCRATCH0);
+ } else {
+ // Set output to the jump target's raw address
+ let target_code = target.unwrap_code_ptr();
+ let target_addr = target_code.raw_addr(cb).as_u64();
+ // Constant encoded length important for patching
+ movabs(cb, out.into(), target_addr);
+ }
},
// Push and pop to/from the C stack
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index eba1de713c..9d7cbb6947 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -770,8 +770,8 @@ pub fn gen_entry_prologue(
rb_yjit_set_exception_return as *mut u8,
vec![
CFP,
- Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr()),
- Opnd::const_ptr(CodegenGlobals::get_leave_exception_code().raw_ptr()),
+ Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)),
+ Opnd::const_ptr(CodegenGlobals::get_leave_exception_code().raw_ptr(cb)),
],
);
} else {
@@ -779,7 +779,7 @@ pub fn gen_entry_prologue(
// on the entry frame. See [jit_compile] for details.
asm.mov(
Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN),
- Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr()),
+ Opnd::const_ptr(CodegenGlobals::get_leave_exit_code().raw_ptr(cb)),
);
}
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index e58475a3aa..a091272470 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -564,7 +564,8 @@ impl BranchGenFn {
}
BranchGenFn::JITReturn => {
asm_comment!(asm, "update cfp->jit_return");
- asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), Opnd::const_ptr(target0.unwrap_code_ptr().raw_ptr()));
+ let raw_ptr = asm.lea_jump_target(target0);
+ asm.mov(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_JIT_RETURN), raw_ptr);
}
}
}
@@ -699,7 +700,7 @@ pub struct PendingBranch {
impl Branch {
// Compute the size of the branch code
fn code_size(&self) -> usize {
- (self.end_addr.get().raw_ptr() as usize) - (self.start_addr.raw_ptr() as usize)
+ (self.end_addr.get().as_offset() - self.start_addr.as_offset()) as usize
}
/// Get the address of one of the branch destination
@@ -1190,7 +1191,7 @@ pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
// Walk over references to objects in generated code.
for offset in block.gc_obj_offsets.iter() {
- let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+ let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_address = value_address as *const VALUE;
@@ -1248,7 +1249,7 @@ pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
for offset in block.gc_obj_offsets.iter() {
let offset_to_value = offset.as_usize();
let value_code_ptr = cb.get_ptr(offset_to_value);
- let value_ptr: *const u8 = value_code_ptr.raw_ptr();
+ let value_ptr: *const u8 = value_code_ptr.raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_ptr = value_ptr as *mut VALUE;
@@ -1466,7 +1467,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
// Run write barriers for all objects in generated code.
for offset in block.gc_obj_offsets.iter() {
- let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+ let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr(cb);
// Creating an unaligned pointer is well defined unlike in C.
let value_address: *const VALUE = value_address.cast();
@@ -1588,7 +1589,7 @@ impl Block {
// Compute the size of the block code
pub fn code_size(&self) -> usize {
- (self.end_addr.get().into_usize()) - (self.start_addr.into_usize())
+ (self.end_addr.get().as_offset() - self.start_addr.as_offset()).try_into().unwrap()
}
}
@@ -2197,7 +2198,7 @@ fn gen_block_series_body(
/// NOTE: this function assumes that the VM lock has been taken
/// If jit_exception is true, compile JIT code for handling exceptions.
/// See [jit_compile_exception] for details.
-pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<CodePtr> {
+pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<*const u8> {
// Compute the current instruction index based on the current PC
let cfp = unsafe { get_ec_cfp(ec) };
let insn_idx: u16 = unsafe {
@@ -2250,7 +2251,7 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<
incr_counter!(compiled_iseq_entry);
// Compilation successful and block not empty
- return code_ptr;
+ code_ptr.map(|ptr| ptr.raw_ptr(cb))
}
// Change the entry's jump target from an entry stub to a next entry
@@ -2300,7 +2301,7 @@ c_callable! {
// Trigger code GC (e.g. no space).
// This entry point will be recompiled later.
cb.code_gc(ocb);
- CodegenGlobals::get_stub_exit_code().raw_ptr()
+ CodegenGlobals::get_stub_exit_code().raw_ptr(cb)
});
cb.mark_all_executable();
@@ -2361,7 +2362,7 @@ fn entry_stub_hit_body(
}
// Let the stub jump to the block
- blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr())
+ blockref.map(|block| unsafe { block.as_ref() }.start_addr.raw_ptr(cb))
}
/// Generate a stub that calls entry_stub_hit
@@ -2507,6 +2508,9 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
_ => unreachable!("target_idx < 2 must always hold"),
};
+ let cb = CodegenGlobals::get_inline_cb();
+ let ocb = CodegenGlobals::get_outlined_cb();
+
let (target_blockid, target_ctx): (BlockId, Context) = unsafe {
// SAFETY: no mutation of the target's Cell. Just reading out data.
let target = branch.targets[target_idx].ref_unchecked().as_ref().unwrap();
@@ -2514,7 +2518,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// If this branch has already been patched, return the dst address
// Note: recursion can cause the same stub to be hit multiple times
if let BranchTarget::Block(_) = target.as_ref() {
- return target.get_address().unwrap().raw_ptr();
+ return target.get_address().unwrap().raw_ptr(cb);
}
(target.get_blockid(), target.get_ctx())
@@ -2547,15 +2551,12 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
// Bail if we're about to run out of native stack space.
// We've just reconstructed interpreter state.
if rb_ec_stack_check(ec as _) != 0 {
- return CodegenGlobals::get_stub_exit_code().raw_ptr();
+ return CodegenGlobals::get_stub_exit_code().raw_ptr(cb);
}
(cfp, original_interp_sp)
};
- let cb = CodegenGlobals::get_inline_cb();
- let ocb = CodegenGlobals::get_outlined_cb();
-
// Try to find an existing compiled version of this block
let mut block = find_block_version(target_blockid, &target_ctx);
let mut branch_modified = false;
@@ -2641,11 +2642,11 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
assert!(
new_branch_size <= branch_size_on_entry,
"branch stubs should never enlarge branches (start_addr: {:?}, old_size: {}, new_size: {})",
- branch.start_addr.raw_ptr(), branch_size_on_entry, new_branch_size,
+ branch.start_addr.raw_ptr(cb), branch_size_on_entry, new_branch_size,
);
// Return a pointer to the compiled block version
- dst_addr.raw_ptr()
+ dst_addr.raw_ptr(cb)
}
/// Generate a "stub", a piece of code that calls the compiler back when run.
@@ -3075,7 +3076,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
cb.get_write_ptr() <= block_end,
"invalidation wrote past end of block (code_size: {:?}, new_size: {})",
block.code_size(),
- cb.get_write_ptr().into_i64() - block_start.into_i64(),
+ cb.get_write_ptr().as_offset() - block_start.as_offset(),
);
cb.set_write_ptr(cur_pos);
cb.set_dropped_bytes(cur_dropped_bytes);
@@ -3139,7 +3140,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
if !target_next && branch.code_size() > old_branch_size {
panic!(
"invalidated branch grew in size (start_addr: {:?}, old_size: {}, new_size: {})",
- branch.start_addr.raw_ptr(), old_branch_size, branch.code_size()
+ branch.start_addr.raw_ptr(cb), old_branch_size, branch.code_size()
);
}
}
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs
index 0b160c7cae..7875276815 100644
--- a/yjit/src/disasm.rs
+++ b/yjit/src/disasm.rs
@@ -59,21 +59,7 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> St
let global_cb = crate::codegen::CodegenGlobals::get_inline_cb();
// Sort the blocks by increasing start addresses
- block_list.sort_by(|a, b| {
- use std::cmp::Ordering;
-
- // Get the start addresses for each block
- let addr_a = a.get_start_addr().raw_ptr();
- let addr_b = b.get_start_addr().raw_ptr();
-
- if addr_a < addr_b {
- Ordering::Less
- } else if addr_a == addr_b {
- Ordering::Equal
- } else {
- Ordering::Greater
- }
- });
+ block_list.sort_by_key(|block| block.get_start_addr().as_offset());
// Compute total code size in bytes for all blocks in the function
let mut total_code_size = 0;
@@ -116,7 +102,7 @@ pub fn disasm_iseq_insn_range(iseq: IseqPtr, start_idx: u16, end_idx: u16) -> St
// Compute the size of the gap between this block and the next
let next_block = block_list[block_idx + 1];
let next_start_addr = next_block.get_start_addr();
- let gap_size = next_start_addr.into_usize() - end_addr.into_usize();
+ let gap_size = next_start_addr.as_offset() - end_addr.as_offset();
// Log the size of the gap between the blocks if nonzero
if gap_size > 0 {
@@ -212,8 +198,8 @@ macro_rules! assert_disasm {
{
let disasm = disasm_addr_range(
&$cb,
- $cb.get_ptr(0).raw_ptr() as usize,
- $cb.get_write_ptr().raw_ptr() as usize,
+ $cb.get_ptr(0).raw_addr(&$cb),
+ $cb.get_write_ptr().raw_addr(&$cb),
);
assert_eq!(unindent(&disasm, false), unindent(&$disasm, true));
}
diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs
index 26c15b692e..b03956c771 100644
--- a/yjit/src/invariants.rs
+++ b/yjit/src/invariants.rs
@@ -521,10 +521,10 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
let old_pos = cb.get_write_pos();
let old_dropped_bytes = cb.has_dropped_bytes();
let mut patches = CodegenGlobals::take_global_inval_patches();
- patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr());
+ patches.sort_by_cached_key(|patch| patch.inline_patch_pos.raw_ptr(cb));
let mut last_patch_end = std::ptr::null();
for patch in &patches {
- assert!(last_patch_end <= patch.inline_patch_pos.raw_ptr(), "patches should not overlap");
+ assert!(last_patch_end <= patch.inline_patch_pos.raw_ptr(cb), "patches should not overlap");
let mut asm = crate::backend::ir::Assembler::new();
asm.jmp(patch.outlined_target_pos.as_side_exit());
@@ -532,7 +532,7 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
cb.set_write_ptr(patch.inline_patch_pos);
cb.set_dropped_bytes(false);
asm.compile(cb, None).expect("can rewrite existing code");
- last_patch_end = cb.get_write_ptr().raw_ptr();
+ last_patch_end = cb.get_write_ptr().raw_ptr(cb);
}
cb.set_pos(old_pos);
cb.set_dropped_bytes(old_dropped_bytes);
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
index 58415e279f..6bc66ee33e 100644
--- a/yjit/src/utils.rs
+++ b/yjit/src/utils.rs
@@ -51,6 +51,20 @@ impl IntoUsize for u8 {
}
}
+/// The [Into<u64>] Rust does not provide.
+/// Convert to u64 with assurance that the value is preserved.
+/// Currently, `usize::BITS == 64` holds for all platforms we support.
+pub(crate) trait IntoU64 {
+ fn as_u64(self) -> u64;
+}
+
+#[cfg(target_pointer_width = "64")]
+impl IntoU64 for usize {
+ fn as_u64(self) -> u64 {
+ self as u64
+ }
+}
+
/// Compute an offset in bytes of a given struct field
#[allow(unused)]
macro_rules! offset_of {
@@ -219,7 +233,7 @@ pub fn print_str(asm: &mut Assembler, str: &str) {
asm.bake_string(str);
asm.write_label(after_string);
- let opnd = asm.lea_label(string_data);
+ let opnd = asm.lea_jump_target(string_data);
asm.ccall(print_str_cfun as *const u8, vec![opnd, Opnd::UImm(str.len() as u64)]);
asm.cpop_all();
diff --git a/yjit/src/virtualmem.rs b/yjit/src/virtualmem.rs
index d6e5089dac..f3c0ceefff 100644
--- a/yjit/src/virtualmem.rs
+++ b/yjit/src/virtualmem.rs
@@ -57,14 +57,39 @@ pub trait Allocator {
fn mark_unused(&mut self, ptr: *const u8, size: u32) -> bool;
}
-/// Pointer into a [VirtualMemory].
-/// We may later change this to wrap an u32.
-/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
+/// Pointer into a [VirtualMemory] represented as an offset from the base.
+/// Note: there is no NULL constant for [CodePtr]. You should use `Option<CodePtr>` instead.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
#[repr(C, packed)]
-pub struct CodePtr(NonNull<u8>);
+pub struct CodePtr(u32);
impl CodePtr {
+ /// Advance the CodePtr. Can return a dangling pointer.
+ pub fn add_bytes(self, bytes: usize) -> Self {
+ let CodePtr(raw) = self;
+ let bytes: u32 = bytes.try_into().unwrap();
+ CodePtr(raw + bytes)
+ }
+
+ /// Note that the raw pointer might be dangling if there hasn't
+ /// been any writes to it through the [VirtualMemory] yet.
+ pub fn raw_ptr(self, base: &impl CodePtrBase) -> *const u8 {
+ let CodePtr(offset) = self;
+ return base.base_ptr().as_ptr().wrapping_add(offset.as_usize())
+ }
+
+ /// Get the address of the code pointer.
+ pub fn raw_addr(self, base: &impl CodePtrBase) -> usize {
+ self.raw_ptr(base) as usize
+ }
+
+ /// Get the offset component for the code pointer. Useful finding the distance between two
+ /// code pointers that share the same [VirtualMem].
+ pub fn as_offset(self) -> i64 {
+ let CodePtr(offset) = self;
+ offset.into()
+ }
+
pub fn as_side_exit(self) -> Target {
Target::SideExitPtr(self)
}
@@ -98,7 +123,7 @@ impl<A: Allocator> VirtualMemory<A> {
/// Return the start of the region as a raw pointer. Note that it could be a dangling
/// pointer so be careful dereferencing it.
pub fn start_ptr(&self) -> CodePtr {
- CodePtr(self.region_start)
+ CodePtr(0)
}
pub fn mapped_end_ptr(&self) -> CodePtr {
@@ -128,7 +153,7 @@ impl<A: Allocator> VirtualMemory<A> {
/// Write a single byte. The first write to a page makes it readable.
pub fn write_byte(&mut self, write_ptr: CodePtr, byte: u8) -> Result<(), WriteError> {
let page_size = self.page_size_bytes;
- let raw: *mut u8 = write_ptr.raw_ptr() as *mut u8;
+ let raw: *mut u8 = write_ptr.raw_ptr(self) as *mut u8;
let page_addr = (raw as usize / page_size) * page_size;
if self.current_write_page == Some(page_addr) {
@@ -209,57 +234,30 @@ impl<A: Allocator> VirtualMemory<A> {
/// Free a range of bytes. start_ptr must be memory page-aligned.
pub fn free_bytes(&mut self, start_ptr: CodePtr, size: u32) {
- assert_eq!(start_ptr.into_usize() % self.page_size_bytes, 0);
+ assert_eq!(start_ptr.raw_ptr(self) as usize % self.page_size_bytes, 0);
// Bounds check the request. We should only free memory we manage.
- let mapped_region = self.start_ptr().raw_ptr()..self.mapped_end_ptr().raw_ptr();
- let virtual_region = self.start_ptr().raw_ptr()..self.virtual_end_ptr().raw_ptr();
- let last_byte_to_free = start_ptr.add_bytes(size.saturating_sub(1).as_usize()).raw_ptr();
- assert!(mapped_region.contains(&start_ptr.raw_ptr()));
+ let mapped_region = self.start_ptr().raw_ptr(self)..self.mapped_end_ptr().raw_ptr(self);
+ let virtual_region = self.start_ptr().raw_ptr(self)..self.virtual_end_ptr().raw_ptr(self);
+ let last_byte_to_free = start_ptr.add_bytes(size.saturating_sub(1).as_usize()).raw_ptr(self);
+ assert!(mapped_region.contains(&start_ptr.raw_ptr(self)));
// On platforms where code page size != memory page size (e.g. Linux), we often need
// to free code pages that contain unmapped memory pages. When it happens on the last
// code page, it's more appropriate to check the last byte against the virtual region.
assert!(virtual_region.contains(&last_byte_to_free));
- self.allocator.mark_unused(start_ptr.0.as_ptr(), size);
+ self.allocator.mark_unused(start_ptr.raw_ptr(self), size);
}
}
-impl CodePtr {
- /// Note that the raw pointer might be dangling if there hasn't
- /// been any writes to it through the [VirtualMemory] yet.
- pub fn raw_ptr(self) -> *const u8 {
- let CodePtr(ptr) = self;
- return ptr.as_ptr();
- }
-
- /// Advance the CodePtr. Can return a dangling pointer.
- pub fn add_bytes(self, bytes: usize) -> Self {
- let CodePtr(raw) = self;
- CodePtr(NonNull::new(raw.as_ptr().wrapping_add(bytes)).unwrap())
- }
-
- pub fn into_i64(self) -> i64 {
- let CodePtr(ptr) = self;
- ptr.as_ptr() as i64
- }
-
- #[cfg(target_arch = "aarch64")]
- pub fn into_u64(self) -> u64 {
- let CodePtr(ptr) = self;
- ptr.as_ptr() as u64
- }
-
- pub fn into_usize(self) -> usize {
- let CodePtr(ptr) = self;
- ptr.as_ptr() as usize
- }
+/// Something that could provide a base pointer to compute a raw pointer from a [CodePtr].
+pub trait CodePtrBase {
+ fn base_ptr(&self) -> NonNull<u8>;
}
-impl From<*mut u8> for CodePtr {
- fn from(value: *mut u8) -> Self {
- assert!(value as usize != 0);
- return CodePtr(NonNull::new(value).unwrap());
+impl<A: Allocator> CodePtrBase for VirtualMemory<A> {
+ fn base_ptr(&self) -> NonNull<u8> {
+ self.region_start
}
}
@@ -416,7 +414,7 @@ pub mod tests {
let one_past_end = virt.start_ptr().add_bytes(virt.virtual_region_size());
assert_eq!(Err(OutOfBounds), virt.write_byte(one_past_end, 0));
- let end_of_addr_space = CodePtr(NonNull::new(usize::MAX as _).unwrap());
+ let end_of_addr_space = CodePtr(u32::MAX);
assert_eq!(Err(OutOfBounds), virt.write_byte(end_of_addr_space, 0));
}
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
index 8431168763..d2ec5f0568 100644
--- a/yjit/src/yjit.rs
+++ b/yjit/src/yjit.rs
@@ -145,7 +145,7 @@ pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exc
let maybe_code_ptr = with_compile_time(|| { gen_entry_point(iseq, ec, jit_exception) });
match maybe_code_ptr {
- Some(ptr) => ptr.raw_ptr(),
+ Some(ptr) => ptr,
None => std::ptr::null(),
}
}