diff options
author | Takashi Kokubun <takashikkbn@gmail.com> | 2022-10-17 10:45:59 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-17 10:45:59 -0700 |
commit | 64c52c428285e7930aed62740cc9c54ee483178e (patch) | |
tree | 818515b6cc1909e98cdcdca93f0a3ac3b2b8cd5a /yjit/src/backend | |
parent | e7c71c6c9271b0c29f210769159090e17128e740 (diff) | |
download | ruby-64c52c428285e7930aed62740cc9c54ee483178e.tar.gz |
YJIT: Interleave inline and outlined code blocks (#6460)
Co-authored-by: Alan Wu <alansi.xingwu@shopify.com>
Co-authored-by: Maxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>
Diffstat (limited to 'yjit/src/backend')
-rw-r--r-- | yjit/src/backend/arm64/mod.rs | 103 | ||||
-rw-r--r-- | yjit/src/backend/ir.rs | 17 | ||||
-rw-r--r-- | yjit/src/backend/tests.rs | 6 | ||||
-rw-r--r-- | yjit/src/backend/x86_64/mod.rs | 36 |
4 files changed, 103 insertions, 59 deletions
diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 0180737d4d..5df072ed38 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -4,7 +4,7 @@ use crate::asm::{CodeBlock}; use crate::asm::arm64::*; -use crate::codegen::{JITState}; +use crate::codegen::{JITState, CodegenGlobals}; use crate::cruby::*; use crate::backend::ir::*; use crate::virtualmem::CodePtr; @@ -36,6 +36,9 @@ pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); pub const C_SP_REG: A64Opnd = X31; pub const C_SP_STEP: i32 = 16; +// The number of bytes that are generated by emit_jmp_ptr +pub const JMP_PTR_BYTES: usize = 20; + /// Map Opnd to A64Opnd impl From<Opnd> for A64Opnd { fn from(opnd: Opnd) -> Self { @@ -567,7 +570,7 @@ impl Assembler /// Emit the required instructions to load the given value into the /// given register. Our goal here is to use as few instructions as /// possible to get this value into the register. - fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> i32 { + fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { let mut current = value; if current <= 0xffff { @@ -680,6 +683,31 @@ impl Assembler ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } + fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr) { + let src_addr = cb.get_write_ptr().into_i64(); + let dst_addr = dst_ptr.into_i64(); + + // If the offset is short enough, then we'll use the + // branch instruction. Otherwise, we'll move the + // destination into a register and use the branch + // register instruction. + let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { + b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); + 1 + } else { + let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); + br(cb, Assembler::SCRATCH0); + num_insns + 1 + }; + + // Make sure it's always a consistent number of + // instructions in case it gets patched and has to + // use the other branch. + for _ in num_insns..(JMP_PTR_BYTES / 4) { + nop(cb); + } + } + // dbg!(&self.insns); // List of GC offsets @@ -687,7 +715,13 @@ impl Assembler // For each instruction let start_write_pos = cb.get_write_pos(); - for insn in &self.insns { + let mut insn_idx: usize = 0; + while let Some(insn) = self.insns.get(insn_idx) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec<u32> = Vec::new(); + match insn { Insn::Comment(text) => { if cfg!(feature = "asm_comments") { @@ -796,7 +830,7 @@ impl Assembler cb.write_bytes(&value.as_u64().to_le_bytes()); let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); + insn_gc_offsets.push(ptr_offset); }, Opnd::None => { unreachable!("Attempted to load from None operand"); @@ -904,28 +938,7 @@ impl Assembler Insn::Jmp(target) => { match target { Target::CodePtr(dst_ptr) => { - let src_addr = cb.get_write_ptr().into_i64(); - let dst_addr = dst_ptr.into_i64(); - - // If the offset is short enough, then we'll use the - // branch instruction. Otherwise, we'll move the - // destination into a register and use the branch - // register instruction. - let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { - b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); - 0 - } else { - let num_insns = emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); - br(cb, Self::SCRATCH0); - num_insns - }; - - // Make sure it's always a consistent number of - // instructions in case it gets patched and has to - // use the other branch. - for _ in num_insns..4 { - nop(cb); - } + emit_jmp_ptr(cb, *dst_ptr); }, Target::Label(label_idx) => { // Here we're going to save enough space for @@ -997,13 +1010,21 @@ impl Assembler csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code - Insn::PadEntryExit => { - let jmp_len = 5 * 4; // Op::Jmp may emit 5 instructions - while (cb.get_write_pos() - start_write_pos) < jmp_len { + Insn::PadInvalPatch => { + while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < JMP_PTR_BYTES { nop(cb); } } }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + } else { + insn_idx += 1; + gc_offsets.append(&mut insn_gc_offsets); + } } gc_offsets @@ -1020,21 +1041,23 @@ impl Assembler assert!(label_idx == idx); } - let start_write_pos = cb.get_write_pos(); + let start_ptr = cb.get_write_ptr(); let gc_offsets = asm.arm64_emit(cb); - if !cb.has_dropped_bytes() { + if cb.has_dropped_bytes() { + cb.clear_labels(); + } else { cb.link_labels(); - } - // Invalidate icache for newly written out region so we don't run stale code. - #[cfg(not(test))] - { - let start = cb.get_ptr(start_write_pos).raw_ptr(); - let write_ptr = cb.get_write_ptr().raw_ptr(); - let codeblock_end = cb.get_ptr(cb.get_mem_size()).raw_ptr(); - let end = std::cmp::min(write_ptr, codeblock_end); - unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + // Invalidate icache for newly written out region so we don't run stale code. + // It should invalidate only the code ranges of the current cb because the code + // ranges of the other cb might have a memory region that is still PROT_NONE. + #[cfg(not(test))] + cb.without_page_end_reserve(|cb| { + for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { + unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; + } + }); } gc_offsets diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index ba7e372188..e11235aec9 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -5,6 +5,7 @@ use std::cell::Cell; use std::fmt; use std::convert::From; +use std::io::Write; use std::mem::take; use crate::cruby::{VALUE}; use crate::virtualmem::{CodePtr}; @@ -433,9 +434,9 @@ pub enum Insn { // binary OR operation. Or { left: Opnd, right: Opnd, out: Opnd }, - /// Pad nop instructions to accomodate Op::Jmp in case the block is - /// invalidated. - PadEntryExit, + /// Pad nop instructions to accomodate Op::Jmp in case the block or the insn + /// is invalidated. + PadInvalPatch, // Mark a position in the generated code PosMarker(PosMarkerFn), @@ -521,7 +522,7 @@ impl Insn { Insn::Mov { .. } => "Mov", Insn::Not { .. } => "Not", Insn::Or { .. } => "Or", - Insn::PadEntryExit => "PadEntryExit", + Insn::PadInvalPatch => "PadEntryExit", Insn::PosMarker(_) => "PosMarker", Insn::RShift { .. } => "RShift", Insn::Store { .. } => "Store", @@ -658,7 +659,7 @@ impl<'a> Iterator for InsnOpndIterator<'a> { Insn::Jz(_) | Insn::Label(_) | Insn::LeaLabel { .. } | - Insn::PadEntryExit | + Insn::PadInvalPatch | Insn::PosMarker(_) => None, Insn::CPopInto(opnd) | Insn::CPush(opnd) | @@ -755,7 +756,7 @@ impl<'a> InsnOpndMutIterator<'a> { Insn::Jz(_) | Insn::Label(_) | Insn::LeaLabel { .. } | - Insn::PadEntryExit | + Insn::PadInvalPatch | Insn::PosMarker(_) => None, Insn::CPopInto(opnd) | Insn::CPush(opnd) | @@ -1474,8 +1475,8 @@ impl Assembler { out } - pub fn pad_entry_exit(&mut self) { - self.push_insn(Insn::PadEntryExit); + pub fn pad_inval_patch(&mut self) { + self.push_insn(Insn::PadInvalPatch); } //pub fn pos_marker<F: FnMut(CodePtr)>(&mut self, marker_fn: F) diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 1bad8642a2..3098c7e3b0 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -231,7 +231,7 @@ fn test_jcc_ptr() { let (mut asm, mut cb) = setup_asm(); - let side_exit = Target::CodePtr((5 as *mut u8).into()); + let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); let not_mask = asm.not(Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_MASK)); asm.test( Opnd::mem(32, EC, RUBY_OFFSET_EC_INTERRUPT_FLAG), @@ -248,7 +248,7 @@ fn test_jmp_ptr() { let (mut asm, mut cb) = setup_asm(); - let stub = Target::CodePtr((5 as *mut u8).into()); + let stub = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); asm.jmp(stub); asm.compile_with_num_regs(&mut cb, 0); @@ -259,7 +259,7 @@ fn test_jo() { let (mut asm, mut cb) = setup_asm(); - let side_exit = Target::CodePtr((5 as *mut u8).into()); + let side_exit = Target::CodePtr(((cb.get_write_ptr().raw_ptr() as usize + 4) as *mut u8).into()); let arg1 = Opnd::mem(64, SP, 0); let arg0 = Opnd::mem(64, SP, 8); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index f6bd822727..c8aa1a0ed5 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -9,6 +9,7 @@ use crate::asm::x86_64::*; use crate::codegen::{JITState}; use crate::cruby::*; use crate::backend::ir::*; +use crate::codegen::CodegenGlobals; // Use the x86 register type for this platform pub type Reg = X86Reg; @@ -32,6 +33,9 @@ pub const _C_ARG_OPNDS: [Opnd; 6] = [ pub const C_RET_REG: Reg = RAX_REG; pub const _C_RET_OPND: Opnd = Opnd::Reg(RAX_REG); +// The number of bytes that are generated by jmp_ptr +pub const JMP_PTR_BYTES: usize = 6; + /// Map Opnd to X86Opnd impl From<Opnd> for X86Opnd { fn from(opnd: Opnd) -> Self { @@ -375,7 +379,13 @@ impl Assembler // For each instruction let start_write_pos = cb.get_write_pos(); - for insn in &self.insns { + let mut insns_idx: usize = 0; + while let Some(insn) = self.insns.get(insns_idx) { + let src_ptr = cb.get_write_ptr(); + let had_dropped_bytes = cb.has_dropped_bytes(); + let old_label_state = cb.get_label_state(); + let mut insn_gc_offsets: Vec<u32> = Vec::new(); + match insn { Insn::Comment(text) => { if cfg!(feature = "asm_comments") { @@ -461,7 +471,7 @@ impl Assembler if !val.special_const_p() { // The pointer immediate is encoded as the last part of the mov written out let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); - gc_offsets.push(ptr_offset); + insn_gc_offsets.push(ptr_offset); } } }, @@ -651,11 +661,10 @@ impl Assembler emit_csel(cb, *truthy, *falsy, *out, cmovl); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code - Insn::PadEntryExit => { - // We assume that our Op::Jmp usage that gets invalidated is <= 5 - let code_size: u32 = (cb.get_write_pos() - start_write_pos).try_into().unwrap(); - if code_size < 5 { - nop(cb, 5 - code_size); + Insn::PadInvalPatch => { + let code_size = cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos())); + if code_size < JMP_PTR_BYTES { + nop(cb, (JMP_PTR_BYTES - code_size) as u32); } } @@ -666,6 +675,15 @@ impl Assembler #[allow(unreachable_patterns)] _ => panic!("unsupported instruction passed to x86 backend: {:?}", insn) }; + + // On failure, jump to the next page and retry the current insn + if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, jmp_ptr) { + // Reset cb states before retrying the current Insn + cb.set_label_state(old_label_state); + } else { + insns_idx += 1; + gc_offsets.append(&mut insn_gc_offsets); + } } gc_offsets @@ -684,7 +702,9 @@ impl Assembler let gc_offsets = asm.x86_emit(cb); - if !cb.has_dropped_bytes() { + if cb.has_dropped_bytes() { + cb.clear_labels(); + } else { cb.link_labels(); } |