Rust YJIT

In December 2021, we opened an [issue] to solicit feedback regarding the porting of the YJIT codebase from C99 to Rust. There were some reservations, but this project was given the go ahead by Ruby core developers and Matz. Since then, we have successfully completed the port of YJIT to Rust. The new Rust version of YJIT has reached parity with the C version, in that it passes all the CRuby tests, is able to run all of the YJIT benchmarks, and performs similarly to the C version (because it works the same way and largely generates the same machine code). We've even incorporated some design improvements, such as a more fine-grained constant invalidation mechanism which we expect will make a big difference in Ruby on Rails applications. Because we want to be careful, YJIT is guarded behind a configure option: ```shell ./configure --enable-yjit # Build YJIT in release mode ./configure --enable-yjit=dev # Build YJIT in dev/debug mode ``` By default, YJIT does not get compiled and cargo/rustc is not required. If YJIT is built in dev mode, then `cargo` is used to fetch development dependencies, but when building in release, `cargo` is not required, only `rustc`. At the moment YJIT requires Rust 1.60.0 or newer. The YJIT command-line options remain mostly unchanged, and more details about the build process are documented in `doc/yjit/yjit.md`. The CI tests have been updated and do not take any more resources than before. The development history of the Rust port is available at the following commit for interested parties: https://github.com/Shopify/ruby/commit/1fd9573d8b4b65219f1c2407f30a0a60e537f8be Our hope is that Rust YJIT will be compiled and included as a part of system packages and compiled binaries of the Ruby 3.2 release. We do not anticipate any major problems as Rust is well supported on every platform which YJIT supports, but to make sure that this process works smoothly, we would like to reach out to those who take care of building systems packages before the 3.2 release is shipped and resolve any issues that may come up. [issue]: https://bugs.ruby-lang.org/issues/18481 Co-authored-by: Maxime Chevalier-Boisvert <maximechevalierb@gmail.com> Co-authored-by: Noah Gibbs <the.codefolio.guy@gmail.com> Co-authored-by: Kevin Newton <kddnewton@gmail.com>
author: Alan Wu <alanwu@ruby-lang.org> 2022-04-19 14:40:21 -0400
committer: Alan Wu <XrXr@users.noreply.github.com> 2022-04-27 11:00:22 -0400
commit: f90549cd38518231a6a74432fe1168c943a7cc18 (patch)
tree: c277bbfab47e230bd549bd5f607f60c3e812a714
parent: f553180a86b71830a1de49dd04874b3880c5c698 (diff)
download: ruby-f90549cd38518231a6a74432fe1168c943a7cc18.tar.gz
55 files changed, 16145 insertions, 11529 deletions
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 15abc79af6..c8d7ec5e0d 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -4,6 +4,7 @@
 
 # YJIT sources and tests
 yjit* @maximecb @xrxr @tenderlove
+yjit/* @maximecb @xrxr @tenderlove
 doc/yjit/* @maximecb @xrxr @tenderlove
 bootstraptest/test_yjit* @maximecb @xrxr @tenderlove
 test/ruby/test_yjit* @maximecb @xrxr @tenderlove
diff --git a/.github/workflows/yjit-ubuntu.yml b/.github/workflows/yjit-ubuntu.yml
index beec059e0c..5dac4375f2 100644
--- a/.github/workflows/yjit-ubuntu.yml
+++ b/.github/workflows/yjit-ubuntu.yml
@@ -16,33 +16,51 @@ concurrency:
   cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }}
 
 jobs:
+  cargo:
+    name: Rust cargo test
+    # GitHub Action's image seems to already contain a Rust 1.58.0.
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+      # For now we can't run cargo test --offline because it complains about the
+      # capstone dependency, even though the dependency is optional
+      #- run: cargo test --offline
+      - run: RUST_BACKTRACE=1 cargo test
+        working-directory: yjit
+      # Also compile and test with all features enabled
+      - run: RUST_BACKTRACE=1 cargo test --all-features
+        working-directory: yjit
+      # Check that we can build in release mode too
+      - run: cargo build --release
+        working-directory: yjit
   make:
     strategy:
+      fail-fast: false
       matrix:
-        test_task: ["check"] # "test-bundler-parallel",
-        os:
-          - ubuntu-20.04
-#         - ubuntu-18.04
-        yjit_opts: [
-          "--yjit",
-          "--yjit --yjit-call-threshold=1",
-        ]
-        configure: ["", "cppflags=-DRUBY_DEBUG"]
         include:
+          - test_task: "check-yjit-bindings"
+            configure: "--with-gcc=clang-12 --enable-yjit=dev"
+
+          - test_task: "check"
+            configure: "--enable-yjit" # release build
+
+          - test_task: "check"
+            configure: "--enable-yjit=dev"
+
+          - test_task: "check"
+            configure: "--enable-yjit=dev"
+            yjit_opts: "--yjit-call-threshold=1"
+
           - test_task: "test-all TESTS=--repeat-count=2"
-            os: ubuntu-20.04
-            configure: ""
-            yjit_enable_env: RUBY_YJIT_ENABLE
+            configure: "--enable-yjit=dev"
+
           - test_task: "test-bundled-gems"
-            os: ubuntu-20.04
-            configure: "cppflags=-DRUBY_DEBUG"
-            yjit_enable_env: RUBY_YJIT_ENABLE
-      fail-fast: false
+            configure: "--enable-yjit=dev"
     env:
       GITPULLOPTIONS: --no-tags origin ${{github.ref}}
       RUN_OPTS: ${{ matrix.yjit_opts }}
       RUBY_DEBUG: ci
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-20.04
     if: ${{ !startsWith(github.event.head_commit.message, '[DOC]') && !contains(github.event.pull_request.labels.*.name, 'Documentation') }}
     steps:
       - run: mkdir build
@@ -76,7 +94,7 @@ jobs:
       - name: Run configure
         run: ../src/configure -C --disable-install-doc ${{ matrix.configure }}
       - run: make incs
-      - run: make
+      - run: make -j
       - run: make leaked-globals
         if: ${{ matrix.test_task == 'check' }}
       - run: make prepare-gems
@@ -87,7 +105,6 @@ jobs:
         if: ${{ matrix.test_task == 'check' }}
       - name: Enable YJIT through ENV
         run: echo "RUBY_YJIT_ENABLE=1" >> $GITHUB_ENV
-        if: ${{ matrix.yjit_enable_env }}
       - run: make -s ${{ matrix.test_task }} RUN_OPTS="$RUN_OPTS"
         timeout-minutes: 60
         env:
diff --git a/.github/workflows/yjit_asm_tests.yml b/.github/workflows/yjit_asm_tests.yml
deleted file mode 100644
index a7e05066dd..0000000000
--- a/.github/workflows/yjit_asm_tests.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-name: YJIT x86 assembler tests
-
-on:
-  push:
-    paths-ignore:
-      - 'doc/**'
-      - '**.md'
-      - '**.rdoc'
-  pull_request:
-    paths-ignore:
-      - 'doc/**'
-      - '**.md'
-      - '**.rdoc'
-
-concurrency:
-  group: ${{ github.workflow }} / ${{ startsWith(github.event_name, 'pull') && github.ref_name || github.sha }}
-  cancel-in-progress: ${{ startsWith(github.event_name, 'pull') }}
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    if: ${{ !startsWith(github.event.head_commit.message, '[DOC]') && !contains(github.event.pull_request.labels.*.name, 'Documentation') }}
-    steps:
-      - name: Install dependencies
-        run: |
-          set -x
-          sudo apt-get update -q || :
-          sudo apt-get install --no-install-recommends -q -y build-essential
-      - name: git config
-        run: |
-          git config --global advice.detachedHead 0
-          git config --global init.defaultBranch garbage
-      - uses: actions/checkout@v3
-        with:
-          path: src
-      - name: Run ASM tests
-        run: ./misc/test_yjit_asm.sh
-        working-directory: src
diff --git a/bootstraptest/test_yjit.rb b/bootstraptest/test_yjit.rb
index a84a9e035a..b0c17229c5 100644
--- a/bootstraptest/test_yjit.rb
+++ b/bootstraptest/test_yjit.rb
@@ -2057,7 +2057,6 @@ assert_equal '[:itself]', %q{
     itself
   end
 
-
   tracing_ractor = Ractor.new do
     # 1: start tracing
     events = []
@@ -2806,3 +2805,48 @@ assert_equal '', %q{
   foo
   foo
 }
+
+# Make sure we're correctly reading RStruct's as.ary union for embedded RStructs
+assert_equal '3,12', %q{
+  pt_struct = Struct.new(:x, :y)
+  p = pt_struct.new(3, 12)
+  def pt_inspect(pt)
+    "#{pt.x},#{pt.y}"
+  end
+
+  # Make sure pt_inspect is JITted
+  10.times { pt_inspect(p) }
+
+  # Make sure it's returning '3,12' instead of e.g. '3,false'
+  pt_inspect(p)
+}
+
+# Regression test for deadlock between branch_stub_hit and ractor_receive_if
+assert_equal '10', %q{
+  r = Ractor.new Ractor.current do |main|
+    main << 1
+    main << 2
+    main << 3
+    main << 4
+    main << 5
+    main << 6
+    main << 7
+    main << 8
+    main << 9
+    main << 10
+  end
+
+  a = []
+  a << Ractor.receive_if{|msg| msg == 10}
+  a << Ractor.receive_if{|msg| msg == 9}
+  a << Ractor.receive_if{|msg| msg == 8}
+  a << Ractor.receive_if{|msg| msg == 7}
+  a << Ractor.receive_if{|msg| msg == 6}
+  a << Ractor.receive_if{|msg| msg == 5}
+  a << Ractor.receive_if{|msg| msg == 4}
+  a << Ractor.receive_if{|msg| msg == 3}
+  a << Ractor.receive_if{|msg| msg == 2}
+  a << Ractor.receive_if{|msg| msg == 1}
+
+  a.length
+}
diff --git a/bootstraptest/test_yjit_rust_port.rb b/bootstraptest/test_yjit_rust_port.rb
new file mode 100644
index 0000000000..e399e0e49e
--- /dev/null
+++ b/bootstraptest/test_yjit_rust_port.rb
@@ -0,0 +1,422 @@
+# Simple tests that we know we can pass
+# To keep track of what we got working during the Rust port
+# And avoid breaking/losing functionality
+#
+# Say "Thread" here to dodge WASM CI check. We use ractors here
+# which WASM doesn't support and it only greps for "Thread".
+
+# Test for opt_mod
+assert_equal '2', %q{
+  def mod(a, b)
+    a % b
+  end
+
+  mod(7, 5)
+  mod(7, 5)
+}
+
+# Test for opt_mult
+assert_equal '12', %q{
+  def mult(a, b)
+    a * b
+  end
+
+  mult(6, 2)
+  mult(6, 2)
+}
+
+# Test for opt_div
+assert_equal '3', %q{
+  def div(a, b)
+    a / b
+  end
+
+  div(6, 2)
+  div(6, 2)
+}
+
+assert_equal '5', %q{
+  def plus(a, b)
+    a + b
+  end
+
+  plus(3, 2)
+}
+
+assert_equal '1', %q{
+  def foo(a, b)
+    a - b
+  end
+
+  foo(3, 2)
+}
+
+assert_equal 'true', %q{
+  def foo(a, b)
+    a < b
+  end
+
+  foo(2, 3)
+}
+
+# Bitwise left shift
+assert_equal '4', %q{
+  def foo(a, b)
+    1 << 2
+  end
+
+  foo(1, 2)
+}
+
+assert_equal '-7', %q{
+  def foo(a, b)
+    -7
+  end
+
+  foo(1, 2)
+}
+
+# Putstring
+assert_equal 'foo', %q{
+  def foo(a, b)
+    "foo"
+  end
+
+  foo(1, 2)
+}
+
+assert_equal '-6', %q{
+  def foo(a, b)
+    a + -7
+  end
+
+  foo(1, 2)
+}
+
+assert_equal 'true', %q{
+  def foo(a, b)
+    a == b
+  end
+
+  foo(3, 3)
+}
+
+assert_equal 'true', %q{
+  def foo(a, b)
+    a < b
+  end
+
+  foo(3, 5)
+}
+
+assert_equal '777', %q{
+  def foo(a)
+    if a
+      777
+    else
+      333
+    end
+  end
+
+  foo(true)
+}
+
+assert_equal '5', %q{
+  def foo(a, b)
+    while a < b
+      a += 1
+    end
+    a
+  end
+
+  foo(1, 5)
+}
+
+# opt_aref
+assert_equal '2', %q{
+  def foo(a, b)
+    a[b]
+  end
+
+  foo([0, 1, 2], 2)
+}
+
+# Simple function calls with 0, 1, 2 arguments
+assert_equal '-2', %q{
+  def bar()
+    -2
+  end
+
+  def foo(a, b)
+    bar()
+  end
+
+  foo(3, 2)
+}
+assert_equal '2', %q{
+  def bar(a)
+    a
+  end
+
+  def foo(a, b)
+    bar(b)
+  end
+
+  foo(3, 2)
+}
+assert_equal '1', %q{
+  def bar(a, b)
+    a - b
+  end
+
+  def foo(a, b)
+    bar(a, b)
+  end
+
+  foo(3, 2)
+}
+
+# Regression test for assembler bug
+assert_equal '1', %q{
+  def check_index(index)
+    if 0x40000000 < index
+        return -1
+    end
+    1
+  end
+
+  check_index 2
+}
+
+# Setivar test
+assert_equal '2', %q{
+  class Klass
+    attr_accessor :a
+
+    def set()
+        @a = 2
+    end
+
+    def get()
+        @a
+    end
+  end
+
+  o = Klass.new
+  o.set()
+  o.a
+}
+
+# Regression for putobject bug
+assert_equal '1.5', %q{
+  def foo(x)
+    x
+  end
+
+  def bar
+    foo(1.5)
+  end
+
+  bar()
+}
+
+# Getivar with an extended ivar table
+assert_equal '3', %q{
+  class Foo
+    def initialize
+      @x1 = 1
+      @x2 = 1
+      @x3 = 1
+      @x4 = 3
+    end
+
+    def bar
+      @x4
+    end
+  end
+
+  f = Foo.new
+  f.bar
+}
+
+assert_equal 'true', %q{
+  x = [[false, true]]
+  for i, j in x
+    ;
+  end
+  j
+}
+
+# Regression for getivar
+assert_equal '[nil]', %q{
+  [TrueClass].each do |klass|
+    klass.class_eval("def foo = @foo")
+  end
+
+  [true].map do |instance|
+    instance.foo
+  end
+}
+
+# Regression for send
+assert_equal 'ok', %q{
+  def bar(baz: 2)
+    baz
+  end
+
+  def foo
+    bar(1, baz: 123)
+  end
+
+  begin
+    foo
+    foo
+  rescue ArgumentError => e
+    print "ok"
+  end
+}
+
+# Array access regression test
+assert_equal '[0, 1, 2, 3, 4, 5]', %q{
+  def expandarray_useless_splat
+    arr = [0, 1, 2, 3, 4, 5]
+    a, * = arr
+  end
+
+  expandarray_useless_splat
+}
+
+# Make sure we're correctly reading RStruct's as.ary union for embedded RStructs
+assert_equal '3,12', %q{
+  pt_struct = Struct.new(:x, :y)
+  p = pt_struct.new(3, 12)
+  def pt_inspect(pt)
+    "#{pt.x},#{pt.y}"
+  end
+
+  # Make sure pt_inspect is JITted
+  10.times { pt_inspect(p) }
+
+  # Make sure it's returning '3,12' instead of e.g. '3,false'
+  pt_inspect(p)
+}
+
+assert_equal '2', %q{
+  def foo(s)
+    s.foo
+  end
+
+  S = Struct.new(:foo)
+  foo(S.new(1))
+  foo(S.new(2))
+}
+
+# Try to compile new method while OOM
+assert_equal 'ok', %q{
+  def foo
+    :ok
+  end
+
+  RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT)
+
+  foo
+}
+
+# test hitting a branch stub when out of memory
+assert_equal 'ok', %q{
+  def nimai(jita)
+    if jita
+      :ng
+    else
+      :ok
+    end
+  end
+
+  nimai(true)
+  nimai(true)
+
+  RubyVM::YJIT.simulate_oom! if defined?(RubyVM::YJIT)
+
+  nimai(false)
+}
+
+# Ractor.current returns a current ractor
+assert_equal 'Ractor', %q{
+  Ractor.current.class
+}
+
+# Ractor.new returns new Ractor
+assert_equal 'Ractor', %q{
+  Ractor.new{}.class
+}
+
+# Ractor.allocate is not supported
+assert_equal "[:ok, :ok]", %q{
+  rs = []
+  begin
+    Ractor.allocate
+  rescue => e
+    rs << :ok if e.message == 'allocator undefined for Ractor'
+  end
+
+  begin
+    Ractor.new{}.dup
+  rescue
+    rs << :ok if e.message == 'allocator undefined for Ractor'
+  end
+
+  rs
+}
+
+# A return value of a Ractor block will be a message from the Ractor.
+assert_equal 'ok', %q{
+  # join
+  r = Ractor.new do
+    'ok'
+  end
+  r.take
+}
+
+# Passed arguments to Ractor.new will be a block parameter
+# The values are passed with Ractor-communication pass.
+assert_equal 'ok', %q{
+  # ping-pong with arg
+  r = Ractor.new 'ok' do |msg|
+    msg
+  end
+  r.take
+}
+
+# Pass multiple arguments to Ractor.new
+assert_equal 'ok', %q{
+  # ping-pong with two args
+  r =  Ractor.new 'ping', 'pong' do |msg, msg2|
+    [msg, msg2]
+  end
+  'ok' if r.take == ['ping', 'pong']
+}
+
+# Ractor#send passes an object with copy to a Ractor
+# and Ractor.receive in the Ractor block can receive the passed value.
+assert_equal 'ok', %q{
+  r = Ractor.new do
+    msg = Ractor.receive
+  end
+  r.send 'ok'
+  r.take
+}
+
+assert_equal '[1, 2, 3]', %q{
+  def foo(arr)
+    arr << 1
+    arr << 2
+    arr << 3
+    arr
+  end
+
+  def bar()
+    foo([])
+  end
+
+  bar()
+}
diff --git a/common.mk b/common.mk
index b4adb2729c..905bb929c9 100644
--- a/common.mk
+++ b/common.mk
@@ -153,7 +153,7 @@ COMMONOBJS    = array.$(OBJEXT) \
 		vm_dump.$(OBJEXT) \
 		vm_sync.$(OBJEXT) \
 		vm_trace.$(OBJEXT) \
-		yjit.$(OBJEXT) \
+		$(YJIT_OBJ) \
 		$(COROUTINE_OBJ) \
 		$(DTRACE_OBJ) \
 		$(BUILTIN_ENCOBJS) \
@@ -1974,7 +1974,6 @@ ast.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 ast.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 ast.$(OBJEXT): {$(VPATH)}builtin.h
 ast.$(OBJEXT): {$(VPATH)}config.h
-ast.$(OBJEXT): {$(VPATH)}darray.h
 ast.$(OBJEXT): {$(VPATH)}defines.h
 ast.$(OBJEXT): {$(VPATH)}encoding.h
 ast.$(OBJEXT): {$(VPATH)}id.h
@@ -2352,7 +2351,6 @@ builtin.$(OBJEXT): {$(VPATH)}builtin.c
 builtin.$(OBJEXT): {$(VPATH)}builtin.h
 builtin.$(OBJEXT): {$(VPATH)}builtin_binary.inc
 builtin.$(OBJEXT): {$(VPATH)}config.h
-builtin.$(OBJEXT): {$(VPATH)}darray.h
 builtin.$(OBJEXT): {$(VPATH)}defines.h
 builtin.$(OBJEXT): {$(VPATH)}id.h
 builtin.$(OBJEXT): {$(VPATH)}intern.h
@@ -2542,7 +2540,6 @@ class.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 class.$(OBJEXT): {$(VPATH)}class.c
 class.$(OBJEXT): {$(VPATH)}config.h
 class.$(OBJEXT): {$(VPATH)}constant.h
-class.$(OBJEXT): {$(VPATH)}darray.h
 class.$(OBJEXT): {$(VPATH)}debug_counter.h
 class.$(OBJEXT): {$(VPATH)}defines.h
 class.$(OBJEXT): {$(VPATH)}encoding.h
@@ -2936,7 +2933,6 @@ compile.$(OBJEXT): {$(VPATH)}builtin.h
 compile.$(OBJEXT): {$(VPATH)}compile.c
 compile.$(OBJEXT): {$(VPATH)}config.h
 compile.$(OBJEXT): {$(VPATH)}constant.h
-compile.$(OBJEXT): {$(VPATH)}darray.h
 compile.$(OBJEXT): {$(VPATH)}debug_counter.h
 compile.$(OBJEXT): {$(VPATH)}defines.h
 compile.$(OBJEXT): {$(VPATH)}encindex.h
@@ -3327,7 +3323,6 @@ cont.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 cont.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 cont.$(OBJEXT): {$(VPATH)}config.h
 cont.$(OBJEXT): {$(VPATH)}cont.c
-cont.$(OBJEXT): {$(VPATH)}darray.h
 cont.$(OBJEXT): {$(VPATH)}debug_counter.h
 cont.$(OBJEXT): {$(VPATH)}defines.h
 cont.$(OBJEXT): {$(VPATH)}eval_intern.h
@@ -3519,7 +3514,6 @@ debug.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 debug.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 debug.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 debug.$(OBJEXT): {$(VPATH)}config.h
-debug.$(OBJEXT): {$(VPATH)}darray.h
 debug.$(OBJEXT): {$(VPATH)}debug.c
 debug.$(OBJEXT): {$(VPATH)}debug_counter.h
 debug.$(OBJEXT): {$(VPATH)}defines.h
@@ -5980,7 +5974,6 @@ error.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 error.$(OBJEXT): {$(VPATH)}builtin.h
 error.$(OBJEXT): {$(VPATH)}config.h
 error.$(OBJEXT): {$(VPATH)}constant.h
-error.$(OBJEXT): {$(VPATH)}darray.h
 error.$(OBJEXT): {$(VPATH)}defines.h
 error.$(OBJEXT): {$(VPATH)}encoding.h
 error.$(OBJEXT): {$(VPATH)}error.c
@@ -6190,7 +6183,6 @@ eval.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 eval.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 eval.$(OBJEXT): {$(VPATH)}config.h
 eval.$(OBJEXT): {$(VPATH)}constant.h
-eval.$(OBJEXT): {$(VPATH)}darray.h
 eval.$(OBJEXT): {$(VPATH)}debug_counter.h
 eval.$(OBJEXT): {$(VPATH)}defines.h
 eval.$(OBJEXT): {$(VPATH)}encoding.h
@@ -6636,7 +6628,6 @@ gc.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 gc.$(OBJEXT): {$(VPATH)}builtin.h
 gc.$(OBJEXT): {$(VPATH)}config.h
 gc.$(OBJEXT): {$(VPATH)}constant.h
-gc.$(OBJEXT): {$(VPATH)}darray.h
 gc.$(OBJEXT): {$(VPATH)}debug.h
 gc.$(OBJEXT): {$(VPATH)}debug_counter.h
 gc.$(OBJEXT): {$(VPATH)}defines.h
@@ -6858,7 +6849,6 @@ goruby.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 goruby.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 goruby.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 goruby.$(OBJEXT): {$(VPATH)}config.h
-goruby.$(OBJEXT): {$(VPATH)}darray.h
 goruby.$(OBJEXT): {$(VPATH)}defines.h
 goruby.$(OBJEXT): {$(VPATH)}golf_prelude.c
 goruby.$(OBJEXT): {$(VPATH)}golf_prelude.rb
@@ -7428,7 +7418,6 @@ io.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 io.$(OBJEXT): {$(VPATH)}builtin.h
 io.$(OBJEXT): {$(VPATH)}config.h
 io.$(OBJEXT): {$(VPATH)}constant.h
-io.$(OBJEXT): {$(VPATH)}darray.h
 io.$(OBJEXT): {$(VPATH)}defines.h
 io.$(OBJEXT): {$(VPATH)}dln.h
 io.$(OBJEXT): {$(VPATH)}encindex.h
@@ -7826,7 +7815,6 @@ iseq.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 iseq.$(OBJEXT): {$(VPATH)}builtin.h
 iseq.$(OBJEXT): {$(VPATH)}config.h
 iseq.$(OBJEXT): {$(VPATH)}constant.h
-iseq.$(OBJEXT): {$(VPATH)}darray.h
 iseq.$(OBJEXT): {$(VPATH)}debug_counter.h
 iseq.$(OBJEXT): {$(VPATH)}defines.h
 iseq.$(OBJEXT): {$(VPATH)}encoding.h
@@ -9270,7 +9258,6 @@ miniinit.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 miniinit.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 miniinit.$(OBJEXT): {$(VPATH)}builtin.h
 miniinit.$(OBJEXT): {$(VPATH)}config.h
-miniinit.$(OBJEXT): {$(VPATH)}darray.h
 miniinit.$(OBJEXT): {$(VPATH)}defines.h
 miniinit.$(OBJEXT): {$(VPATH)}dir.rb
 miniinit.$(OBJEXT): {$(VPATH)}encoding.h
@@ -9492,7 +9479,6 @@ mjit.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 mjit.$(OBJEXT): {$(VPATH)}builtin.h
 mjit.$(OBJEXT): {$(VPATH)}config.h
 mjit.$(OBJEXT): {$(VPATH)}constant.h
-mjit.$(OBJEXT): {$(VPATH)}darray.h
 mjit.$(OBJEXT): {$(VPATH)}debug.h
 mjit.$(OBJEXT): {$(VPATH)}debug_counter.h
 mjit.$(OBJEXT): {$(VPATH)}defines.h
@@ -9712,7 +9698,6 @@ mjit_compile.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 mjit_compile.$(OBJEXT): {$(VPATH)}builtin.h
 mjit_compile.$(OBJEXT): {$(VPATH)}config.h
 mjit_compile.$(OBJEXT): {$(VPATH)}constant.h
-mjit_compile.$(OBJEXT): {$(VPATH)}darray.h
 mjit_compile.$(OBJEXT): {$(VPATH)}debug_counter.h
 mjit_compile.$(OBJEXT): {$(VPATH)}defines.h
 mjit_compile.$(OBJEXT): {$(VPATH)}id.h
@@ -9909,7 +9894,6 @@ node.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 node.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 node.$(OBJEXT): {$(VPATH)}config.h
 node.$(OBJEXT): {$(VPATH)}constant.h
-node.$(OBJEXT): {$(VPATH)}darray.h
 node.$(OBJEXT): {$(VPATH)}defines.h
 node.$(OBJEXT): {$(VPATH)}id.h
 node.$(OBJEXT): {$(VPATH)}id_table.h
@@ -10896,7 +10880,6 @@ proc.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 proc.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 proc.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 proc.$(OBJEXT): {$(VPATH)}config.h
-proc.$(OBJEXT): {$(VPATH)}darray.h
 proc.$(OBJEXT): {$(VPATH)}defines.h
 proc.$(OBJEXT): {$(VPATH)}encoding.h
 proc.$(OBJEXT): {$(VPATH)}eval_intern.h
@@ -11112,7 +11095,6 @@ process.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 process.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 process.$(OBJEXT): {$(VPATH)}config.h
 process.$(OBJEXT): {$(VPATH)}constant.h
-process.$(OBJEXT): {$(VPATH)}darray.h
 process.$(OBJEXT): {$(VPATH)}debug_counter.h
 process.$(OBJEXT): {$(VPATH)}defines.h
 process.$(OBJEXT): {$(VPATH)}dln.h
@@ -11329,7 +11311,6 @@ ractor.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 ractor.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 ractor.$(OBJEXT): {$(VPATH)}builtin.h
 ractor.$(OBJEXT): {$(VPATH)}config.h
-ractor.$(OBJEXT): {$(VPATH)}darray.h
 ractor.$(OBJEXT): {$(VPATH)}debug_counter.h
 ractor.$(OBJEXT): {$(VPATH)}defines.h
 ractor.$(OBJEXT): {$(VPATH)}encoding.h
@@ -13276,7 +13257,6 @@ ruby.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 ruby.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 ruby.$(OBJEXT): {$(VPATH)}config.h
 ruby.$(OBJEXT): {$(VPATH)}constant.h
-ruby.$(OBJEXT): {$(VPATH)}darray.h
 ruby.$(OBJEXT): {$(VPATH)}debug_counter.h
 ruby.$(OBJEXT): {$(VPATH)}defines.h
 ruby.$(OBJEXT): {$(VPATH)}dln.h
@@ -13480,7 +13460,6 @@ scheduler.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 scheduler.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 scheduler.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 scheduler.$(OBJEXT): {$(VPATH)}config.h
-scheduler.$(OBJEXT): {$(VPATH)}darray.h
 scheduler.$(OBJEXT): {$(VPATH)}defines.h
 scheduler.$(OBJEXT): {$(VPATH)}encoding.h
 scheduler.$(OBJEXT): {$(VPATH)}fiber/scheduler.h
@@ -13841,7 +13820,6 @@ signal.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 signal.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 signal.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 signal.$(OBJEXT): {$(VPATH)}config.h
-signal.$(OBJEXT): {$(VPATH)}darray.h
 signal.$(OBJEXT): {$(VPATH)}debug_counter.h
 signal.$(OBJEXT): {$(VPATH)}defines.h
 signal.$(OBJEXT): {$(VPATH)}encoding.h
@@ -14826,7 +14804,6 @@ struct.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 struct.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 struct.$(OBJEXT): {$(VPATH)}builtin.h
 struct.$(OBJEXT): {$(VPATH)}config.h
-struct.$(OBJEXT): {$(VPATH)}darray.h
 struct.$(OBJEXT): {$(VPATH)}defines.h
 struct.$(OBJEXT): {$(VPATH)}encoding.h
 struct.$(OBJEXT): {$(VPATH)}id.h
@@ -15230,7 +15207,6 @@ thread.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 thread.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 thread.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 thread.$(OBJEXT): {$(VPATH)}config.h
-thread.$(OBJEXT): {$(VPATH)}darray.h
 thread.$(OBJEXT): {$(VPATH)}debug.h
 thread.$(OBJEXT): {$(VPATH)}debug_counter.h
 thread.$(OBJEXT): {$(VPATH)}defines.h
@@ -16176,7 +16152,6 @@ variable.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 variable.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 variable.$(OBJEXT): {$(VPATH)}config.h
 variable.$(OBJEXT): {$(VPATH)}constant.h
-variable.$(OBJEXT): {$(VPATH)}darray.h
 variable.$(OBJEXT): {$(VPATH)}debug_counter.h
 variable.$(OBJEXT): {$(VPATH)}defines.h
 variable.$(OBJEXT): {$(VPATH)}encoding.h
@@ -16383,7 +16358,6 @@ version.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 version.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 version.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 version.$(OBJEXT): {$(VPATH)}config.h
-version.$(OBJEXT): {$(VPATH)}darray.h
 version.$(OBJEXT): {$(VPATH)}debug_counter.h
 version.$(OBJEXT): {$(VPATH)}defines.h
 version.$(OBJEXT): {$(VPATH)}id.h
@@ -16594,7 +16568,6 @@ vm.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 vm.$(OBJEXT): {$(VPATH)}builtin.h
 vm.$(OBJEXT): {$(VPATH)}config.h
 vm.$(OBJEXT): {$(VPATH)}constant.h
-vm.$(OBJEXT): {$(VPATH)}darray.h
 vm.$(OBJEXT): {$(VPATH)}debug_counter.h
 vm.$(OBJEXT): {$(VPATH)}defines.h
 vm.$(OBJEXT): {$(VPATH)}defs/opt_operand.def
@@ -16821,7 +16794,6 @@ vm_backtrace.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 vm_backtrace.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 vm_backtrace.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 vm_backtrace.$(OBJEXT): {$(VPATH)}config.h
-vm_backtrace.$(OBJEXT): {$(VPATH)}darray.h
 vm_backtrace.$(OBJEXT): {$(VPATH)}debug.h
 vm_backtrace.$(OBJEXT): {$(VPATH)}defines.h
 vm_backtrace.$(OBJEXT): {$(VPATH)}encoding.h
@@ -17021,7 +16993,6 @@ vm_dump.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 vm_dump.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 vm_dump.$(OBJEXT): {$(VPATH)}config.h
 vm_dump.$(OBJEXT): {$(VPATH)}constant.h
-vm_dump.$(OBJEXT): {$(VPATH)}darray.h
 vm_dump.$(OBJEXT): {$(VPATH)}defines.h
 vm_dump.$(OBJEXT): {$(VPATH)}gc.h
 vm_dump.$(OBJEXT): {$(VPATH)}id.h
@@ -17210,7 +17181,6 @@ vm_sync.$(OBJEXT): {$(VPATH)}backward/2/long_long.h
 vm_sync.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 vm_sync.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 vm_sync.$(OBJEXT): {$(VPATH)}config.h
-vm_sync.$(OBJEXT): {$(VPATH)}darray.h
 vm_sync.$(OBJEXT): {$(VPATH)}debug_counter.h
 vm_sync.$(OBJEXT): {$(VPATH)}defines.h
 vm_sync.$(OBJEXT): {$(VPATH)}gc.h
@@ -17403,7 +17373,6 @@ vm_trace.$(OBJEXT): {$(VPATH)}backward/2/stdalign.h
 vm_trace.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 vm_trace.$(OBJEXT): {$(VPATH)}builtin.h
 vm_trace.$(OBJEXT): {$(VPATH)}config.h
-vm_trace.$(OBJEXT): {$(VPATH)}darray.h
 vm_trace.$(OBJEXT): {$(VPATH)}debug.h
 vm_trace.$(OBJEXT): {$(VPATH)}debug_counter.h
 vm_trace.$(OBJEXT): {$(VPATH)}defines.h
@@ -17616,7 +17585,6 @@ yjit.$(OBJEXT): {$(VPATH)}backward/2/stdarg.h
 yjit.$(OBJEXT): {$(VPATH)}builtin.h
 yjit.$(OBJEXT): {$(VPATH)}config.h
 yjit.$(OBJEXT): {$(VPATH)}constant.h
-yjit.$(OBJEXT): {$(VPATH)}darray.h
 yjit.$(OBJEXT): {$(VPATH)}debug_counter.h
 yjit.$(OBJEXT): {$(VPATH)}defines.h
 yjit.$(OBJEXT): {$(VPATH)}encoding.h
@@ -17801,13 +17769,4 @@ yjit.$(OBJEXT): {$(VPATH)}yjit.c
 yjit.$(OBJEXT): {$(VPATH)}yjit.h
 yjit.$(OBJEXT): {$(VPATH)}yjit.rb
 yjit.$(OBJEXT): {$(VPATH)}yjit.rbinc
-yjit.$(OBJEXT): {$(VPATH)}yjit_asm.c
-yjit.$(OBJEXT): {$(VPATH)}yjit_asm.h
-yjit.$(OBJEXT): {$(VPATH)}yjit_codegen.c
-yjit.$(OBJEXT): {$(VPATH)}yjit_codegen.h
-yjit.$(OBJEXT): {$(VPATH)}yjit_core.c
-yjit.$(OBJEXT): {$(VPATH)}yjit_core.h
-yjit.$(OBJEXT): {$(VPATH)}yjit_iface.c
-yjit.$(OBJEXT): {$(VPATH)}yjit_iface.h
-yjit.$(OBJEXT): {$(VPATH)}yjit_utils.c
 # AUTOGENERATED DEPENDENCIES END
diff --git a/configure.ac b/configure.ac
index cbc53231d8..2222b8aa8e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3749,6 +3749,47 @@ AS_IF([test x"$MJIT_SUPPORT" = "xyes"],
 
 AC_SUBST(MJIT_SUPPORT)
 
+AC_ARG_ENABLE(yjit,
+        AS_HELP_STRING([--enable-yjit],
+        [enable experimental in-process JIT compiler that requires Rust build tools [default=no]]),
+        [YJIT_SUPPORT=$enableval], [YJIT_SUPPORT=no])
+
+CARGO=
+CARGO_BUILD_ARGS=
+YJIT_LIBS=
+AS_CASE(["${YJIT_SUPPORT}"],
+[yes|dev], [
+    AS_IF([test x"$enable_jit_support" = "xno"],
+        AC_MSG_ERROR([--disable-jit-support but --enable-yjit. YJIT requires JIT support])
+    )
+    AC_CHECK_TOOL(RUSTC, [rustc], [no])
+    AS_IF([test x"$RUSTC" = "xno"],
+        AC_MSG_ERROR([rustc is required. Installation instructions available at https://www.rust-lang.org/tools/install])
+    )
+    AS_IF([test x"$YJIT_SUPPORT" = "xyes"],
+            [rb_rust_target_subdir=release
+             CARGO_BUILD_ARGS='--release'],
+            [rb_rust_target_subdir=debug
+             CARGO_BUILD_ARGS='--features stats,disasm,asm_comments'
+             AC_CHECK_TOOL(CARGO, [cargo], [no])
+             AS_IF([test x"$CARGO" = "xno"],
+                AC_MSG_ERROR([cargo is required. Installation instructions available at https://www.rust-lang.org/tools/install])
+             )
+             AC_DEFINE(RUBY_DEBUG, 1)])
+    YJIT_LIBS="yjit/target/${rb_rust_target_subdir}/libyjit.a"
+    YJIT_OBJ='yjit.$(OBJEXT)'
+    AC_DEFINE(USE_YJIT, 1)
+], [AC_DEFINE(USE_YJIT, 0)])
+
+
+dnl These variables end up in ::RbConfig::CONFIG
+AC_SUBST(YJIT_SUPPORT)dnl what flavor of YJIT the Ruby build includes
+AC_SUBST(RUSTC)dnl Rust compiler command
+AC_SUBST(CARGO)dnl Cargo command for Rust builds
+AC_SUBST(CARGO_BUILD_ARGS)dnl for selecting Rust build profiles
+AC_SUBST(YJIT_LIBS)dnl for optionally building the Rust parts of YJIT
+AC_SUBST(YJIT_OBJ)dnl for optionally building the C parts of YJIT
+
 AC_ARG_ENABLE(install-static-library,
 	AS_HELP_STRING([--disable-install-static-library], [do not install static ruby library]),
 	[INSTALL_STATIC_LIBRARY=$enableval
@@ -4401,6 +4442,7 @@ config_summary "warnflags"           "$warnflags"
 config_summary "strip command"       "$STRIP"
 config_summary "install doc"         "$DOCTARGETS"
 config_summary "JIT support"         "$MJIT_SUPPORT"
+config_summary "YJIT support"        "$YJIT_SUPPORT"
 config_summary "man page type"       "$MANTYPE"
 config_summary "search path"         "$search_path"
 config_summary "static-linked-ext"   ${EXTSTATIC:+"yes"}
diff --git a/defs/gmake.mk b/defs/gmake.mk
index 27e3e21cc4..93aa784fb4 100644
--- a/defs/gmake.mk
+++ b/defs/gmake.mk
@@ -357,6 +357,61 @@ ifneq ($(REVISION_IN_HEADER),$(REVISION_LATEST))
 $(srcdir)/revision.h: $(REVISION_H)
 endif
 
+# Show Cargo progress when doing `make V=1`
+CARGO_VERBOSE_0 = -q
+CARGO_VERBOSE_1 =
+CARGO_VERBOSE = $(CARGO_VERBOSE_$(V))
+
+# Select between different build profiles with macro substitution
+.PHONY: yjit-static-lib
+yjit-static-lib: yjit-static-lib-$(YJIT_SUPPORT)
+
+# YJIT_SUPPORT=yes when `configure` gets `--enable-yjit`
+yjit-static-lib-yes:
+	$(ECHO) 'building Rust YJIT (release mode)'
+	$(Q) $(RUSTC) \
+	        --crate-name=yjit \
+	        --crate-type=staticlib \
+	        --edition=2021 \
+	        -C opt-level=3 \
+	        -C overflow-checks=on \
+	        '--out-dir=$(CARGO_TARGET_DIR)/release/' \
+	        $(top_srcdir)/yjit/src/lib.rs
+
+yjit-static-lib-no:
+	$(ECHO) 'Error: Tried to build YJIT without configuring it first. Check `make showconfig`?'
+	@false
+
+yjit-static-lib-dev:
+	$(ECHO) 'building Rust YJIT (dev mode)'
+	$(Q) cd $(top_srcdir)/yjit && \
+	        CARGO_TARGET_DIR='$(CARGO_TARGET_DIR)' \
+	        CARGO_TERM_PROGRESS_WHEN='never' \
+	        $(CARGO) $(CARGO_VERBOSE) build $(CARGO_BUILD_ARGS)
+
+# This PHONY prerequisite makes it so that we always run cargo. When there are
+# no Rust changes on rebuild, Cargo does not touch the mtime of the static
+# library and GNU make avoids relinking. $(empty) seems to be important to
+# trigger rebuild each time in release mode.
+$(YJIT_LIBS): yjit-static-lib
+	$(empty)
+
+# Put this here instead of in common.mk to avoid breaking nmake builds
+# TODO: might need to move for BSD Make support
+miniruby$(EXEEXT): $(YJIT_LIBS)
+
+# Generate Rust bindings. See source for details.
+# Needs `./configure --enable-yjit=dev` and Clang.
+ifneq ($(strip $(CARGO)),) # if configure found Cargo
+.PHONY: yjit-bindgen
+yjit-bindgen: yjit.$(OBJEXT)
+	YJIT_SRC_ROOT_PATH='$(top_srcdir)' $(CARGO) run --manifest-path '$(top_srcdir)/yjit/bindgen/Cargo.toml' -- $(CFLAGS) $(XCFLAGS) $(CPPFLAGS)
+
+# For CI, check whether YJIT's FFI bindings are up-to-date.
+check-yjit-bindings: yjit-bindgen
+	git -C "$(top_srcdir)" diff --exit-code yjit/src/cruby_bindings.inc.rs
+endif
+
 # Query on the generated rdoc
 #
 #   $ make rdoc:Integer#+
diff --git a/doc/yjit/yjit.md b/doc/yjit/yjit.md
index 3c731247a3..a09ff8439d 100644
--- a/doc/yjit/yjit.md
+++ b/doc/yjit/yjit.md
@@ -51,39 +51,55 @@ Because there is no GC for generated code yet, your software could run out of ex
 
 ## Installation
 
-Current YJIT versions are installed by default with CRuby. Make sure to specify the "--yjit" command line option to enable it at runtime.
+### Requirements
 
-Experimental YJIT patches that have not yet been merged with CRuby can be found in ruby-build:
+You will need to install:
+- A C compiler such as GCC or Clang
+- GNU Make and Autoconf
+- The Rust compiler `rustc` and Cargo (if you want to build in dev/debug mode)
 
-```
-ruby-build yjit-dev ~/.rubies/ruby-yjit-dev
-```
+To install the Rust build toolchain, we suggest following the [recommended installation method][rust-install]. Rust also provides first class [support][editor-tools] for many source code editors.
+
+[rust-install]: https://www.rust-lang.org/tools/install
+[editor-tools]: https://www.rust-lang.org/tools
 
-They can also be found in the Shopify/yjit repository, which is cloned and build like CRuby.
+### Building YJIT
 
-Start by cloning the `Shopify/yjit` repository:
+Start by cloning the `ruby/ruby` repository:
 
 ```
-git clone https://github.com/Shopify/yjit
+git clone https://github.com/ruby/ruby yjit
 cd yjit
 ```
 
-The YJIT `ruby` binary can be built with either GCC or Clang. For development, we recommend enabling debug symbols so that assertions are enabled as this makes debugging easier. Enabling debug mode will also make it possible for you to disassemble code generated by YJIT. However, this causes a performance hit. For maximum performance, compile with GCC, without the `-DRUBY_DEBUG` or `-DYJIT_STATS` build options. More detailed build instructions are provided in the [Ruby README](https://github.com/ruby/ruby#how-to-compile-and-install).
-To support disassembly of the generated code, `libcapstone` is also required (`brew install capstone` on MacOS, `sudo apt-get install -y libcapstone-dev` on Ubuntu/Debian and `sudo dnf -y install capstone-devel` on Fedora).
+The YJIT `ruby` binary can be built with either GCC or Clang. It can be built either in dev (debug) mode or in release mode. For maximum performance, compile YJIT in release mode with GCC. More detailed build instructions are provided in the [Ruby README](https://github.com/ruby/ruby#how-to-compile-and-install). To support disassembly of the generated code, `libcapstone` is also required (`brew install capstone` on MacOS, `sudo apt-get install -y libcapstone-dev` on Ubuntu/Debian and `sudo dnf -y install capstone-devel` on Fedora).
 
 ```
-# Configure with debugging/stats options for development, build and install
+# Configure in release mode for maximum performance, build and install
 ./autogen.sh
-./configure cppflags="-DRUBY_DEBUG -DYJIT_STATS" --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc --disable--install-rdoc
-make -j16 install
+./configure --enable-yjit --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc --disable--install-rdoc
+make -j install
+```
+
+or
+
+```
+# Configure in dev (debug) mode for development, build and install
+./autogen.sh
+./configure --enable-yjit=dev --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc --disable--install-rdoc
+make -j install
 ```
 
 On macOS, you may need to specify where to find openssl, libyaml and gdbm:
 
 ```
-# Configure with debugging/stats options for development, build and install
-./configure cppflags="-DRUBY_DEBUG -DYJIT_STATS" --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc --disable--install-rdoc --with-opt-dir=$(brew --prefix openssl):$(brew --prefix readline):$(brew --prefix libyaml):$(brew --prefix gdbm)
-make -j16 install
+# Install dependencies
+brew install openssl readline libyaml
+
+# Configure in dev (debug) mode for development, build and install
+./autogen.sh
+./configure --enable-yjit=dev --prefix=$HOME/.rubies/ruby-yjit --disable-install-doc --disable--install-rdoc --with-opt-dir=$(brew --prefix openssl):$(brew --prefix readline):$(brew --prefix libyaml)
+make -j install
 ```
 
 Typically configure will choose default C compiler. To specify the C compiler, use
@@ -100,7 +116,7 @@ You can test that YJIT works correctly by running:
 make btest
 
 # Complete set of tests
-make -j16 test-all
+make -j test-all
 ```
 
 ## Usage
@@ -128,10 +144,10 @@ The machine code generated for a given method can be printed by adding `puts Rub
 
 YJIT supports all command-line options supported by upstream CRuby, but also adds a few YJIT-specific options:
 
-- `--disable-yjit`: turn off YJIT (enabled by default)
-- `--yjit-stats`: produce statistics after the execution of a program (must compile with `cppflags=-DRUBY_DEBUG` to use this)
-- `--yjit-exec-mem-size=N`: size of the executable memory block to allocate, in MiB (default 256 MiB)
+- `--yjit`: enable YJIT (disabled by default)
 - `--yjit-call-threshold=N`: number of calls after which YJIT begins to compile a function (default 2)
+- `--yjit-exec-mem-size=N`: size of the executable memory block to allocate, in MiB (default 256 MiB)
+- `--yjit-stats`: produce statistics after the execution of a program (must compile with `cppflags=-DRUBY_DEBUG` to use this)
 - `--yjit-max-versions=N`: maximum number of versions to generate per basic block (default 4)
 - `--yjit-greedy-versioning`: greedy versioning mode (disabled by default, may increase code size)
 
@@ -215,12 +231,16 @@ you can contribute things we will want to merge into YJIT.
 ### Source Code Organization
 
 The YJIT source code is divided between:
-- `yjit_asm.c`: x86 in-memory assembler we use to generate machine code
-- `yjit_codegen.c`: logic for translating Ruby bytecode to machine code
-- `yjit_core.c`: basic block versioning logic, core structure of YJIT
-- `yjit_iface.c`: code YJIT uses to interface with the rest of CRuby
+- `yjit.c`: code YJIT uses to interface with the rest of CRuby
 - `yjit.h`: C definitions YJIT exposes to the rest of the CRuby
 - `yjit.rb`: `YJIT` Ruby module that is exposed to Ruby
+- `yjit/src/asm/*`: in-memory assembler we use to generate machine code
+- `yjit/src/codegen.rs`: logic for translating Ruby bytecode to machine code
+- `yjit/src/core.rb`: basic block versioning logic, core structure of YJIT
+- `yjit/src/stats.rs`: gathering of run-time statistics
+- `yjit/src/options.rs`: handling of command-line options
+- `yjit/bindgen/src/main.rs`: C bindings exposed to the Rust codebase through bindgen
+- `yjit/src/cruby.rs`: C bindings manually exposed to the Rust codebase
 - `misc/test_yjit_asm.sh`: script to compile and run the in-memory assembler tests
 - `misc/yjit_asm_tests.c`: tests for the in-memory assembler
 
@@ -229,6 +249,20 @@ The core of CRuby's interpreter logic is found in:
 - `vm_insnshelper.c`: logic used by Ruby's bytecode instructions
 - `vm_exec.c`: Ruby interpreter loop
 
+### Generating C bindings with bindgen
+
+In order to expose C functions to the Rust codebase, you will need to generate C bindings:
+
+```sh
+CC=clang ./configure --enable-yjit=dev
+make -j yjit-bindgen
+```
+
+This uses the bindgen tools to generate/update `yjit/src/cruby_bindings.inc.rs` based on the
+bindings listed in `yjit/bindgen/src/main.rs`. Avoid manually editing this file
+as it could be automatically regenerated at a later time. If you need to manually add C bindings,
+add them to `yjit/cruby.rs` instead.
+
 ### Coding & Debugging Protips
 
 There are 3 test suites:
@@ -240,7 +274,7 @@ There are 3 test suites:
 The tests can be run in parallel like this:
 
 ```
-make -j16 test-all RUN_OPTS="--yjit-call-threshold=1"
+make -j test-all RUN_OPTS="--yjit-call-threshold=1"
 ```
 
 Or single-threaded like this, to more easily identify which specific test is failing:
@@ -273,3 +307,46 @@ You can use the Intel syntax for disassembly in LLDB, keeping it consistent with
 ```
 echo "settings set target.x86-disassembly-flavor intel" >> ~/.lldbinit
 ```
+
+## Running YJIT on M1
+
+It is possible to run YJIT on an Apple M1 via Rosetta.  You can find basic
+instructions below, but there are a few caveats listed further down.
+
+First, install Rosetta:
+
+```
+$ softwareupdate --install-rosetta
+```
+
+Now any command can be run with Rosetta via the `arch` command line tool.
+
+Then you can start your shell in an x86 environment:
+
+```
+$ arch -x86_64 zsh
+```
+
+You can double check your current architecture via the `arch` command:
+
+```
+$ arch -x86_64 zsh
+$ arch
+i386
+```
+
+You may need to set the default target for `rustc` to x86-64, e.g.
+
+```
+$ rustup default stable-x86_64-apple-darwin
+```
+
+While in your i386 shell, install Cargo and Homebrew, then hack away!
+
+### M1 Caveats
+
+1. You must install a version of Homebrew for each architecture
+2. Cargo will install in $HOME/.cargo by default, and I don't know a good way to change architectures after install
+3. `dev` won't work if you have i386 Homebrew installed on an M1
+
+If you use Fish shell you can [read this link](https://tenderlovemaking.com/2022/01/07/homebrew-rosetta-and-ruby.html) for information on making the dev environment easier.
diff --git a/ext/coverage/depend b/ext/coverage/depend
index 7c6aa6c2d8..57d368d3f5 100644
--- a/ext/coverage/depend
+++ b/ext/coverage/depend
@@ -165,7 +165,6 @@ coverage.o: $(top_srcdir)/ccan/check_type/check_type.h
 coverage.o: $(top_srcdir)/ccan/container_of/container_of.h
 coverage.o: $(top_srcdir)/ccan/list/list.h
 coverage.o: $(top_srcdir)/ccan/str/str.h
-coverage.o: $(top_srcdir)/darray.h
 coverage.o: $(top_srcdir)/gc.h
 coverage.o: $(top_srcdir)/internal.h
 coverage.o: $(top_srcdir)/internal/array.h
diff --git a/ext/objspace/depend b/ext/objspace/depend
index 377b803d2a..c4da8031cc 100644
--- a/ext/objspace/depend
+++ b/ext/objspace/depend
@@ -533,7 +533,6 @@ objspace_dump.o: $(top_srcdir)/ccan/check_type/check_type.h
 objspace_dump.o: $(top_srcdir)/ccan/container_of/container_of.h
 objspace_dump.o: $(top_srcdir)/ccan/list/list.h
 objspace_dump.o: $(top_srcdir)/ccan/str/str.h
-objspace_dump.o: $(top_srcdir)/darray.h
 objspace_dump.o: $(top_srcdir)/gc.h
 objspace_dump.o: $(top_srcdir)/internal.h
 objspace_dump.o: $(top_srcdir)/internal/array.h
diff --git a/internal/cmdlineopt.h b/internal/cmdlineopt.h
index adc76fc2e4..71568a8745 100644
--- a/internal/cmdlineopt.h
+++ b/internal/cmdlineopt.h
@@ -26,9 +26,6 @@ typedef struct ruby_cmdline_options {
 #if USE_MJIT
     struct mjit_options mjit;
 #endif
-#if YJIT_SUPPORTED_P
-    struct rb_yjit_options yjit;
-#endif
 
     int sflag, xflag;
     unsigned int warning: 1;
diff --git a/iseq.c b/iseq.c
index 129d6d3b2c..42cf9a2fa6 100644
--- a/iseq.c
+++ b/iseq.c
@@ -175,7 +175,9 @@ rb_iseq_free(const rb_iseq_t *iseq)
         iseq_clear_ic_references(iseq);
         struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
 	mjit_free_iseq(iseq); /* Notify MJIT */
-        rb_yjit_iseq_free(body);
+#if YJIT_BUILD
+        rb_yjit_iseq_free(body->yjit_payload);
+#endif
 	ruby_xfree((void *)body->iseq_encoded);
 	ruby_xfree((void *)body->insns_info.body);
 	if (body->insns_info.positions) ruby_xfree((void *)body->insns_info.positions);
@@ -439,7 +441,9 @@ rb_iseq_update_references(rb_iseq_t *iseq)
 #if USE_MJIT
         mjit_update_references(iseq);
 #endif
-        rb_yjit_iseq_update_references(body);
+#if YJIT_BUILD
+        rb_yjit_iseq_update_references(body->yjit_payload);
+#endif
     }
 }
 
@@ -527,7 +531,9 @@ rb_iseq_mark(const rb_iseq_t *iseq)
 #if USE_MJIT
         mjit_mark_cc_entries(body);
 #endif
-        rb_yjit_iseq_mark(body);
+#if YJIT_BUILD
+        rb_yjit_iseq_mark(body->yjit_payload);
+#endif
     }
 
     if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) {
diff --git a/load.c b/load.c
index c29c32f2dc..496188d850 100644
--- a/load.c
+++ b/load.c
@@ -14,6 +14,7 @@
 #include "internal/variable.h"
 #include "iseq.h"
 #include "probes.h"
+#include "darray.h"
 #include "ruby/encoding.h"
 #include "ruby/util.h"
 
diff --git a/misc/test_yjit_asm.sh b/misc/test_yjit_asm.sh
deleted file mode 100755
index e09d83f0fb..0000000000
--- a/misc/test_yjit_asm.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-set -e
-set -x
-
-clang -std=gnu99 -Wall -Werror -Wno-error=unused-function -Wshorten-64-to-32 -I "${0%/*/*}" "${0%/*}/yjit_asm_tests.c" -o asm_test
-
-./asm_test
-
-rm asm_test
diff --git a/misc/yjit_asm_tests.c b/misc/yjit_asm_tests.c
deleted file mode 100644
index ccf8822bbe..0000000000
--- a/misc/yjit_asm_tests.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// For MAP_ANONYMOUS on GNU/Linux
-#define _GNU_SOURCE 1
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-// This test executable doesn't compile with the rest of Ruby
-// so we need to define a rb_bug().
-_Noreturn
-static void rb_bug(const char *message, ...)
-{
-    va_list args;
-    va_start(args, message);
-    vfprintf(stderr, message, args);
-    va_end(args);
-    abort();
-}
-
-#include "yjit_asm.c"
-
-// Print the bytes in a code block
-void print_bytes(codeblock_t* cb)
-{
-    for (uint32_t i = 0; i < cb->write_pos; ++i)
-    {
-        printf("%02X", (int)*cb_get_ptr(cb, i));
-    }
-
-    printf("\n");
-}
-
-// Check that the code block contains the given sequence of bytes
-void check_bytes(codeblock_t* cb, const char* bytes)
-{
-    printf("checking encoding: %s\n", bytes);
-
-    size_t len = strlen(bytes);
-    assert (len % 2 == 0);
-    size_t num_bytes = len / 2;
-
-    if (cb->write_pos != num_bytes)
-    {
-        fprintf(stderr, "incorrect encoding length, expected %ld, got %d\n",
-            num_bytes,
-            cb->write_pos
-        );
-        printf("%s\n", bytes);
-        print_bytes(cb);
-        exit(-1);
-    }
-
-    for (uint32_t i = 0; i < num_bytes; ++i)
-    {
-        char byte_str[] = {0, 0, 0, 0};
-        strncpy(byte_str, bytes + (2 * i), 2);
-        char* endptr;
-        long int byte = strtol(byte_str, &endptr, 16);
-
-        uint8_t cb_byte = *cb_get_ptr(cb, i);
-
-        if (cb_byte != byte)
-        {
-            fprintf(stderr, "incorrect encoding at position %d, expected %02X, got %02X\n",
-                i,
-                (int)byte,
-                (int)cb_byte
-            );
-            printf("%s\n", bytes);
-            print_bytes(cb);
-            exit(-1);
-        }
-    }
-}
-
-void run_assembler_tests(void)
-{
-    printf("Running assembler tests\n");
-
-    codeblock_t cb_obj;
-    codeblock_t* cb = &cb_obj;
-    uint8_t* mem_block = alloc_exec_mem(4096);
-    cb_init(cb, mem_block, 4096);
-
-    // add
-    cb_set_pos(cb, 0); add(cb, CL, imm_opnd(3)); check_bytes(cb, "80C103");
-    cb_set_pos(cb, 0); add(cb, CL, BL); check_bytes(cb, "00D9");
-    cb_set_pos(cb, 0); add(cb, CL, SPL); check_bytes(cb, "4000E1");
-    cb_set_pos(cb, 0); add(cb, CX, BX); check_bytes(cb, "6601D9");
-    cb_set_pos(cb, 0); add(cb, RAX, RBX); check_bytes(cb, "4801D8");
-    cb_set_pos(cb, 0); add(cb, ECX, EDX); check_bytes(cb, "01D1");
-    cb_set_pos(cb, 0); add(cb, RDX, R14); check_bytes(cb, "4C01F2");
-    cb_set_pos(cb, 0); add(cb, mem_opnd(64, RAX, 0), RDX); check_bytes(cb, "480110");
-    cb_set_pos(cb, 0); add(cb, RDX, mem_opnd(64, RAX, 0)); check_bytes(cb, "480310");
-    cb_set_pos(cb, 0); add(cb, RDX, mem_opnd(64, RAX, 8)); check_bytes(cb, "48035008");
-    cb_set_pos(cb, 0); add(cb, RDX, mem_opnd(64, RAX, 255)); check_bytes(cb, "480390FF000000");
-    cb_set_pos(cb, 0); add(cb, mem_opnd(64, RAX, 127), imm_opnd(255)); check_bytes(cb, "4881407FFF000000");
-    cb_set_pos(cb, 0); add(cb, mem_opnd(32, RAX, 0), EDX); check_bytes(cb, "0110");
-    cb_set_pos(cb, 0); add(cb, RSP, imm_opnd(8)); check_bytes(cb, "4883C408");
-    cb_set_pos(cb, 0); add(cb, ECX, imm_opnd(8)); check_bytes(cb, "83C108");
-    cb_set_pos(cb, 0); add(cb, ECX, imm_opnd(255)); check_bytes(cb, "81C1FF000000");
-
-    // and
-    cb_set_pos(cb, 0); and(cb, EBP, R12D); check_bytes(cb, "4421E5");
-    cb_set_pos(cb, 0); and(cb, mem_opnd(64, RAX, 0), imm_opnd(0x08)); check_bytes(cb, "48832008");
-
-    // call
-    {
-        cb_set_pos(cb, 0);
-        uint32_t fn_label = cb_new_label(cb, "foo");
-        call_label(cb, fn_label);
-        cb_link_labels(cb);
-        check_bytes(cb, "E8FBFFFFFF");
-    }
-    cb_set_pos(cb, 0); call(cb, RAX); check_bytes(cb, "FFD0");
-    cb_set_pos(cb, 0); call(cb, mem_opnd(64, RSP, 8)); check_bytes(cb, "FF542408");
-
-    // cmovcc
-    cb_set_pos(cb, 0); cmovg(cb, ESI, EDI); check_bytes(cb, "0F4FF7");
-    cb_set_pos(cb, 0); cmovg(cb, ESI, mem_opnd(32, RBP, 12)); check_bytes(cb, "0F4F750C");
-    cb_set_pos(cb, 0); cmovl(cb, EAX, ECX); check_bytes(cb, "0F4CC1");
-    cb_set_pos(cb, 0); cmovl(cb, RBX, RBP); check_bytes(cb, "480F4CDD");
-    cb_set_pos(cb, 0); cmovle(cb, ESI, mem_opnd(32, RSP, 4)); check_bytes(cb, "0F4E742404");
-
-    // cmp
-    cb_set_pos(cb, 0); cmp(cb, CL, DL); check_bytes(cb, "38D1");
-    cb_set_pos(cb, 0); cmp(cb, ECX, EDI); check_bytes(cb, "39F9");
-    cb_set_pos(cb, 0); cmp(cb, RDX, mem_opnd(64, R12, 0)); check_bytes(cb, "493B1424");
-    cb_set_pos(cb, 0); cmp(cb, RAX, imm_opnd(2)); check_bytes(cb, "4883F802");
-
-    // cqo
-    cb_set_pos(cb, 0); cqo(cb); check_bytes(cb, "4899");
-
-    // div
-    /*
-    test(
-        delegate void (CodeBlock cb) { cb.div(X86Opnd(EDX)); },
-        "F7F2"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.div(X86Opnd(32, RSP, -12)); },
-        "F77424F4"
-    );
-    */
-
-    // jcc to label
-    {
-        cb_set_pos(cb, 0);
-        uint32_t loop_label = cb_new_label(cb, "loop");
-        jge_label(cb, loop_label);
-        cb_link_labels(cb);
-        check_bytes(cb, "0F8DFAFFFFFF");
-    }
-    {
-        cb_set_pos(cb, 0);
-        uint32_t loop_label = cb_new_label(cb, "loop");
-        jo_label(cb, loop_label);
-        cb_link_labels(cb);
-        check_bytes(cb, "0F80FAFFFFFF");
-    }
-
-    // jmp to label
-    {
-        cb_set_pos(cb, 0);
-        uint32_t loop_label = cb_new_label(cb, "loop");
-        jmp_label(cb, loop_label);
-        cb_link_labels(cb);
-        check_bytes(cb, "E9FBFFFFFF");
-    }
-
-    // jmp with RM operand
-    cb_set_pos(cb, 0); jmp_rm(cb, R12); check_bytes(cb, "41FFE4");
-
-    // lea
-    cb_set_pos(cb, 0); lea(cb, RDX, mem_opnd(64, RCX, 8)); check_bytes(cb, "488D5108");
-    cb_set_pos(cb, 0); lea(cb, RAX, mem_opnd(8, RIP, 0)); check_bytes(cb, "488D0500000000");
-    cb_set_pos(cb, 0); lea(cb, RAX, mem_opnd(8, RIP, 5)); check_bytes(cb, "488D0505000000");
-    cb_set_pos(cb, 0); lea(cb, RDI, mem_opnd(8, RIP, 5)); check_bytes(cb, "488D3D05000000");
-
-    // mov
-    cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(7)); check_bytes(cb, "B807000000");
-    cb_set_pos(cb, 0); mov(cb, EAX, imm_opnd(-3)); check_bytes(cb, "B8FDFFFFFF");
-    cb_set_pos(cb, 0); mov(cb, R15, imm_opnd(3)); check_bytes(cb, "41BF03000000");
-    cb_set_pos(cb, 0); mov(cb, EAX, EBX); check_bytes(cb, "89D8");
-    cb_set_pos(cb, 0); mov(cb, EAX, ECX); check_bytes(cb, "89C8");
-    cb_set_pos(cb, 0); mov(cb, EDX, mem_opnd(32, RBX, 128)); check_bytes(cb, "8B9380000000");
-
-    // Test `mov rax, 3` => `mov eax, 3` optimization
-    cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x34)); check_bytes(cb, "41B834000000");
-    cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(0x80000000)); check_bytes(cb, "49B80000008000000000");
-    cb_set_pos(cb, 0); mov(cb, R8, imm_opnd(-1)); check_bytes(cb, "49B8FFFFFFFFFFFFFFFF");
-
-    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x34)); check_bytes(cb, "B834000000");
-    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(0x80000000)); check_bytes(cb, "48B80000008000000000");
-    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-52)); check_bytes(cb, "48B8CCFFFFFFFFFFFFFF");
-    cb_set_pos(cb, 0); mov(cb, RAX, imm_opnd(-1)); check_bytes(cb, "48B8FFFFFFFFFFFFFFFF");
-    /*
-    test(
-        delegate void (CodeBlock cb) { cb.mov(X86Opnd(AL), X86Opnd(8, RCX, 0, 1, RDX)); },
-        "8A0411"
-    );
-    */
-    cb_set_pos(cb, 0); mov(cb, CL, R9B); check_bytes(cb, "4488C9");
-    cb_set_pos(cb, 0); mov(cb, RBX, RAX); check_bytes(cb, "4889C3");
-    cb_set_pos(cb, 0); mov(cb, RDI, RBX); check_bytes(cb, "4889DF");
-    cb_set_pos(cb, 0); mov(cb, SIL, imm_opnd(11)); check_bytes(cb, "40B60B");
-    cb_set_pos(cb, 0); mov(cb, mem_opnd(8, RSP, 0), imm_opnd(-3)); check_bytes(cb, "C60424FD");
-    cb_set_pos(cb, 0); mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1)); check_bytes(cb, "48C7470801000000");
-
-    // movsx
-    cb_set_pos(cb, 0); movsx(cb, AX, AL); check_bytes(cb, "660FBEC0");
-    cb_set_pos(cb, 0); movsx(cb, EDX, AL); check_bytes(cb, "0FBED0");
-    cb_set_pos(cb, 0); movsx(cb, RAX, BL); check_bytes(cb, "480FBEC3");
-    cb_set_pos(cb, 0); movsx(cb, ECX, AX); check_bytes(cb, "0FBFC8");
-    cb_set_pos(cb, 0); movsx(cb, R11, CL); check_bytes(cb, "4C0FBED9");
-    cb_set_pos(cb, 0); movsx(cb, R10, mem_opnd(32, RSP, 12)); check_bytes(cb, "4C6354240C");
-    cb_set_pos(cb, 0); movsx(cb, RAX, mem_opnd(8, RSP, 0)); check_bytes(cb, "480FBE0424");
-
-    // neg
-    cb_set_pos(cb, 0); neg(cb, RAX); check_bytes(cb, "48F7D8");
-
-    // nop
-    cb_set_pos(cb, 0); nop(cb, 1); check_bytes(cb, "90");
-
-    // not
-    cb_set_pos(cb, 0); not(cb, AX); check_bytes(cb, "66F7D0");
-    cb_set_pos(cb, 0); not(cb, EAX); check_bytes(cb, "F7D0");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(64, R12, 0)); check_bytes(cb, "49F71424");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSP, 301)); check_bytes(cb, "F794242D010000");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSP, 0)); check_bytes(cb, "F71424");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSP, 3)); check_bytes(cb, "F7542403");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RBP, 0)); check_bytes(cb, "F75500");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RBP, 13)); check_bytes(cb, "F7550D");
-    cb_set_pos(cb, 0); not(cb, RAX); check_bytes(cb, "48F7D0");
-    cb_set_pos(cb, 0); not(cb, R11); check_bytes(cb, "49F7D3");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RAX, 0)); check_bytes(cb, "F710");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RSI, 0)); check_bytes(cb, "F716");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDI, 0)); check_bytes(cb, "F717");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, 55)); check_bytes(cb, "F75237");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, 1337)); check_bytes(cb, "F79239050000");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, -55)); check_bytes(cb, "F752C9");
-    cb_set_pos(cb, 0); not(cb, mem_opnd(32, RDX, -555)); check_bytes(cb, "F792D5FDFFFF");
-    /*
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RAX, 0, 1, RBX)); },
-        "F71418"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RAX, 0, 1, R12)); },
-        "42F71420"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 0, 1, R12)); },
-        "43F71427"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 5, 1, R12)); },
-        "43F7542705"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 5, 8, R12)); },
-        "43F754E705"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R15, 5, 8, R13)); },
-        "43F754EF05"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R12, 5, 4, R9)); },
-        "43F7548C05"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, R12, 301, 4, R9)); },
-        "43F7948C2D010000"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RAX, 5, 4, RDX)); },
-        "F7549005"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(64, RAX, 0, 2, RDX)); },
-        "48F71450"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RSP, 0, 1, RBX)); },
-        "F7141C"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RSP, 3, 1, RBX)); },
-        "F7541C03"
-    );
-    test(
-        delegate void (CodeBlock cb) { cb.not(X86Opnd(32, RBP, 13, 1, RDX)); },
-        "F754150D"
-    );
-    */
-
-    // or
-    cb_set_pos(cb, 0); or(cb, EDX, ESI); check_bytes(cb, "09F2");
-
-    // pop
-    cb_set_pos(cb, 0); pop(cb, RAX); check_bytes(cb, "58");
-    cb_set_pos(cb, 0); pop(cb, RBX); check_bytes(cb, "5B");
-    cb_set_pos(cb, 0); pop(cb, RSP); check_bytes(cb, "5C");
-    cb_set_pos(cb, 0); pop(cb, RBP); check_bytes(cb, "5D");
-    cb_set_pos(cb, 0); pop(cb, R12); check_bytes(cb, "415C");
-    cb_set_pos(cb, 0); pop(cb, mem_opnd(64, RAX, 0)); check_bytes(cb, "8F00");
-    cb_set_pos(cb, 0); pop(cb, mem_opnd(64, R8, 0)); check_bytes(cb, "418F00");
-    cb_set_pos(cb, 0); pop(cb, mem_opnd(64, R8, 3)); check_bytes(cb, "418F4003");
-    cb_set_pos(cb, 0); pop(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)); check_bytes(cb, "8F44C803");
-    cb_set_pos(cb, 0); pop(cb, mem_opnd_sib(64, R8, RCX, 8, 3)); check_bytes(cb, "418F44C803");
-
-    // push
-    cb_set_pos(cb, 0); push(cb, RAX); check_bytes(cb, "50");
-    cb_set_pos(cb, 0); push(cb, RBX); check_bytes(cb, "53");
-    cb_set_pos(cb, 0); push(cb, R12); check_bytes(cb, "4154");
-    cb_set_pos(cb, 0); push(cb, mem_opnd(64, RAX, 0)); check_bytes(cb, "FF30");
-    cb_set_pos(cb, 0); push(cb, mem_opnd(64, R8, 0)); check_bytes(cb, "41FF30");
-    cb_set_pos(cb, 0); push(cb, mem_opnd(64, R8, 3)); check_bytes(cb, "41FF7003");
-    cb_set_pos(cb, 0); push(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)); check_bytes(cb, "FF74C803");
-    cb_set_pos(cb, 0); push(cb, mem_opnd_sib(64, R8, RCX, 8, 3)); check_bytes(cb, "41FF74C803");
-
-    // ret
-    cb_set_pos(cb, 0); ret(cb); check_bytes(cb, "C3");
-
-    // sal
-    cb_set_pos(cb, 0); sal(cb, CX, imm_opnd(1)); check_bytes(cb, "66D1E1");
-    cb_set_pos(cb, 0); sal(cb, ECX, imm_opnd(1)); check_bytes(cb, "D1E1");
-    cb_set_pos(cb, 0); sal(cb, EBP, imm_opnd(5)); check_bytes(cb, "C1E505");
-    cb_set_pos(cb, 0); sal(cb, mem_opnd(32, RSP, 68), imm_opnd(1)); check_bytes(cb, "D1642444");
-
-    // sar
-    cb_set_pos(cb, 0); sar(cb, EDX, imm_opnd(1)); check_bytes(cb, "D1FA");
-
-    // shr
-    cb_set_pos(cb, 0); shr(cb, R14, imm_opnd(7)); check_bytes(cb, "49C1EE07");
-
-    /*
-    // sqrtsd
-    test(
-        delegate void (CodeBlock cb) { cb.sqrtsd(X86Opnd(XMM2), X86Opnd(XMM6)); },
-        "F20F51D6"
-    );
-    */
-
-    // sub
-    cb_set_pos(cb, 0); sub(cb, EAX, imm_opnd(1)); check_bytes(cb, "83E801");
-    cb_set_pos(cb, 0); sub(cb, RAX, imm_opnd(2)); check_bytes(cb, "4883E802");
-
-    // test
-    cb_set_pos(cb, 0); test(cb, AL, AL); check_bytes(cb, "84C0");
-    cb_set_pos(cb, 0); test(cb, AX, AX); check_bytes(cb, "6685C0");
-    cb_set_pos(cb, 0); test(cb, CL, imm_opnd(8)); check_bytes(cb, "F6C108");
-    cb_set_pos(cb, 0); test(cb, DL, imm_opnd(7)); check_bytes(cb, "F6C207");
-    cb_set_pos(cb, 0); test(cb, RCX, imm_opnd(8)); check_bytes(cb, "F6C108");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(8, RDX, 8), imm_opnd(8)); check_bytes(cb, "F6420808");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(8, RDX, 8), imm_opnd(255)); check_bytes(cb, "F64208FF");
-    cb_set_pos(cb, 0); test(cb, DX, imm_opnd(0xFFFF)); check_bytes(cb, "66F7C2FFFF");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(16, RDX, 8), imm_opnd(0xFFFF)); check_bytes(cb, "66F74208FFFF");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(8, RSI, 0), imm_opnd(1)); check_bytes(cb, "F60601");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(8, RSI, 16), imm_opnd(1)); check_bytes(cb, "F6461001");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(8, RSI, -16), imm_opnd(1)); check_bytes(cb, "F646F001");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(32, RSI, 64), EAX); check_bytes(cb, "854640");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(64, RDI, 42), RAX); check_bytes(cb, "4885472A");
-    cb_set_pos(cb, 0); test(cb, RAX, RAX); check_bytes(cb, "4885C0");
-    cb_set_pos(cb, 0); test(cb, RAX, RSI); check_bytes(cb, "4885F0");
-    cb_set_pos(cb, 0); test(cb, mem_opnd(64, RSI, 64), imm_opnd(~0x08)); check_bytes(cb, "48F74640F7FFFFFF");
-
-    // xchg
-    cb_set_pos(cb, 0); xchg(cb, RAX, RCX); check_bytes(cb, "4891");
-    cb_set_pos(cb, 0); xchg(cb, RAX, R13); check_bytes(cb, "4995");
-    cb_set_pos(cb, 0); xchg(cb, RCX, RBX); check_bytes(cb, "4887D9");
-    cb_set_pos(cb, 0); xchg(cb, R9, R15); check_bytes(cb, "4D87F9");
-
-    // xor
-    cb_set_pos(cb, 0); xor(cb, EAX, EAX); check_bytes(cb, "31C0");
-
-    printf("Assembler tests done\n");
-}
-
-void assert_equal(int expected, int actual)
-{
-    if (expected != actual) {
-        fprintf(stderr, "expected %d, got %d\n", expected, actual);
-        exit(-1);
-    }
-}
-
-void run_runtime_tests(void)
-{
-    printf("Running runtime tests\n");
-
-    codeblock_t codeblock;
-    codeblock_t* cb = &codeblock;
-
-    uint8_t* mem_block = alloc_exec_mem(4096);
-    cb_init(cb, mem_block, 4096);
-
-    int (*function)(void);
-    function = (int (*)(void))mem_block;
-
-    #define TEST(BODY) cb_set_pos(cb, 0); BODY ret(cb); cb_mark_all_executable(cb); assert_equal(7, function());
-
-    // add
-    TEST({ mov(cb, RAX, imm_opnd(0)); add(cb, RAX, imm_opnd(7)); })
-    TEST({ mov(cb, RAX, imm_opnd(0)); mov(cb, RCX, imm_opnd(7)); add(cb, RAX, RCX); })
-
-    // and
-    TEST({ mov(cb, RAX, imm_opnd(31)); and(cb, RAX, imm_opnd(7)); })
-    TEST({ mov(cb, RAX, imm_opnd(31)); mov(cb, RCX, imm_opnd(7)); and(cb, RAX, RCX); })
-
-    // or
-    TEST({ mov(cb, RAX, imm_opnd(3)); or(cb, RAX, imm_opnd(4)); })
-    TEST({ mov(cb, RAX, imm_opnd(3)); mov(cb, RCX, imm_opnd(4)); or(cb, RAX, RCX); })
-
-    // push/pop
-    TEST({ mov(cb, RCX, imm_opnd(7)); push(cb, RCX); pop(cb, RAX); })
-
-    // shr
-    TEST({ mov(cb, RAX, imm_opnd(31)); shr(cb, RAX, imm_opnd(2)); })
-
-    // sub
-    TEST({ mov(cb, RAX, imm_opnd(12)); sub(cb, RAX, imm_opnd(5)); })
-    TEST({ mov(cb, RAX, imm_opnd(12)); mov(cb, RCX, imm_opnd(5)); sub(cb, RAX, RCX); })
-
-    // xor
-    TEST({ mov(cb, RAX, imm_opnd(13)); xor(cb, RAX, imm_opnd(10)); })
-    TEST({ mov(cb, RAX, imm_opnd(13)); mov(cb, RCX, imm_opnd(10)); xor(cb, RAX, RCX); })
-
-    #undef TEST
-
-    printf("Runtime tests done\n");
-}
-
-int main(int argc, char** argv)
-{
-    run_assembler_tests();
-    run_runtime_tests();
-
-    return 0;
-}
diff --git a/ruby.c b/ruby.c
index ade434cff3..9e38572751 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1028,36 +1028,23 @@ set_option_encoding_once(const char *type, VALUE *name, const char *e, long elen
 #define yjit_opt_match_arg(s, l, name) \
     opt_match(s, l, name) && (*(s) && *(s+1) ? 1 : (rb_raise(rb_eRuntimeError, "--yjit-" name " needs an argument"), 0))
 
-#if YJIT_SUPPORTED_P
-static void
-setup_yjit_options(const char *s, struct rb_yjit_options *yjit_opt)
+#if YJIT_BUILD
+static bool
+setup_yjit_options(const char *s)
 {
-    const size_t l = strlen(s);
-    if (l == 0) {
-        return;
-    }
-    else if (yjit_opt_match_arg(s, l, "exec-mem-size")) {
-        yjit_opt->exec_mem_size = atoi(s + 1);
-    }
-    else if (yjit_opt_match_arg(s, l, "call-threshold")) {
-        yjit_opt->call_threshold = atoi(s + 1);
-    }
-    else if (yjit_opt_match_arg(s, l, "max-versions")) {
-        yjit_opt->max_versions = atoi(s + 1);
-    }
-    else if (yjit_opt_match_noarg(s, l, "greedy-versioning")) {
-        yjit_opt->greedy_versioning = true;
-    }
-    else if (yjit_opt_match_noarg(s, l, "no-type-prop")) {
-        yjit_opt->no_type_prop = true;
-    }
-    else if (yjit_opt_match_noarg(s, l, "stats")) {
-        yjit_opt->gen_stats = true;
-    }
-    else {
-        rb_raise(rb_eRuntimeError,
-                 "invalid yjit option `%s' (--help will show valid yjit options)", s);
+    // The option parsing is done in yjit/src/options.rs
+    bool rb_yjit_parse_option(const char* s);
+    bool success = rb_yjit_parse_option(s);
+
+    if (success) {
+        return true;
     }
+
+    rb_raise(
+        rb_eRuntimeError,
+        "invalid YJIT option `%s' (--help will show valid yjit options)",
+        s
+    );
 }
 #endif
 
@@ -1446,11 +1433,11 @@ proc_options(long argc, char **argv, ruby_cmdline_options_t *opt, int envopt)
 #endif
             }
             else if (is_option_with_optarg("yjit", '-', true, false, false)) {
-#if YJIT_SUPPORTED_P
+#if YJIT_BUILD
                 FEATURE_SET(opt->features, FEATURE_BIT(yjit));
-                setup_yjit_options(s, &opt->yjit);
+                setup_yjit_options(s);
 #else
-                rb_warn("Ruby was built without JIT support");
+                rb_warn("Ruby was built without YJIT support");
 #endif
             }
 	    else if (strcmp("yydebug", s) == 0) {
@@ -1835,8 +1822,8 @@ process_options(int argc, char **argv, ruby_cmdline_options_t *opt)
             exit(1);
         }
 #endif
-#if YJIT_SUPPORTED_P
-        rb_yjit_init(&opt->yjit);
+#if YJIT_BUILD
+        rb_yjit_init();
 #endif
     }
     if (opt->dump & (DUMP_BIT(version) | DUMP_BIT(version_v))) {
diff --git a/template/Makefile.in b/template/Makefile.in
index 37cdaa7993..fee62c8d30 100644
--- a/template/Makefile.in
+++ b/template/Makefile.in
@@ -27,6 +27,8 @@ CC_WRAPPER = @XCC_WRAPPER@
 CC = @CC@
 CPP = @CPP@
 LD = @LD@
+RUSTC = @RUSTC@
+CARGO = @CARGO@
 YACC = bison
 PURIFY =
 AUTOCONF = autoconf
@@ -106,6 +108,11 @@ MJIT_MIN_HEADER_NAME = rb_mjit_min_header-$(RUBY_PROGRAM_VERSION).h
 MJIT_MIN_HEADER = $(MJIT_HEADER_BUILD_DIR)/$(MJIT_MIN_HEADER_NAME)
 MJIT_HEADER_BUILD_DIR = $(EXTOUT)/include/$(arch)
 MJIT_TABS=@MJIT_TABS@
+YJIT_SUPPORT=@YJIT_SUPPORT@
+YJIT_LIBS=@YJIT_LIBS@
+YJIT_OBJ=@YJIT_OBJ@
+CARGO_TARGET_DIR=@abs_top_builddir@/yjit/target
+CARGO_BUILD_ARGS=@CARGO_BUILD_ARGS@
 LDFLAGS = @STATIC@ $(CFLAGS) @LDFLAGS@
 EXE_LDFLAGS = $(LDFLAGS)
 EXTLDFLAGS = @EXTLDFLAGS@
@@ -120,7 +127,7 @@ XDLDFLAGS = @DLDFLAGS@
 DLDFLAGS = @LIBRUBY_DLDFLAGS@ $(XLDFLAGS) $(ARCH_FLAG)
 SOLIBS = @SOLIBS@
 ENABLE_DEBUG_ENV = @ENABLE_DEBUG_ENV@
-MAINLIBS = @MAINLIBS@
+MAINLIBS = @YJIT_LIBS@ @MAINLIBS@
 ARCHMINIOBJS = @MINIOBJS@
 DLNOBJ = @DLNOBJ@
 ENCOBJS = @ENCOBJS@
@@ -291,11 +298,22 @@ PRE_LIBRUBY_UPDATE = [ -n "$(LIBRUBY_SO_UPDATE)" ] || $(gnumake:yes=exec) $(RM)
 # We must `rm' the library each time this rule is invoked because "updating" a
 # MAB library on Apple/NeXT (see --enable-fat-binary in configure) is not
 # supported.
+#
+# In YJIT builds, merge libyjit.a with libruby_static.a
 $(LIBRUBY_A):
 		@$(RM) $@
 		@-[ -z "$(EXTSTATIC)" ] || $(PRE_LIBRUBY_UPDATE)
 		$(ECHO) linking static-library $@
 		$(Q) $(AR) $(ARFLAGS) $@ $(LIBRUBY_A_OBJS) $(INITOBJS)
+		$(Q) if [ -f '$(YJIT_LIBS)' ]; then \
+		  set -eu && \
+		  echo 'merging $(YJIT_LIBS) into $@' && \
+		  $(RMALL)    '$(CARGO_TARGET_DIR)/libyjit/' && \
+		  $(MAKEDIRS) '$(CARGO_TARGET_DIR)/libyjit/' && \
+		  $(CP) '$(YJIT_LIBS)' '$(CARGO_TARGET_DIR)/libyjit/' && \
+		  (cd '$(CARGO_TARGET_DIR)/libyjit/' && $(AR) -x libyjit.a) && \
+		  $(AR) $(ARFLAGS) $@ $$(find '$(CARGO_TARGET_DIR)/libyjit/' -name '*.o') ; \
+		fi
 		@-$(RANLIB) $@ 2> /dev/null || true
 
 verify-static-library: $(LIBRUBY_A)
diff --git a/test/ruby/test_yjit.rb b/test/ruby/test_yjit.rb
index 88f8e42813..9ad59bc962 100644
--- a/test/ruby/test_yjit.rb
+++ b/test/ruby/test_yjit.rb
@@ -13,6 +13,7 @@ class TestYJIT < Test::Unit::TestCase
     assert_includes(RUBY_DESCRIPTION, '+YJIT')
   end
 
+  # Check that YJIT is in the version string
   def test_yjit_in_version
     [
       %w(--version --yjit),
@@ -42,9 +43,8 @@ class TestYJIT < Test::Unit::TestCase
   def test_command_line_switches
     assert_in_out_err('--yjit-', '', [], /invalid option --yjit-/)
     assert_in_out_err('--yjithello', '', [], /invalid option --yjithello/)
-    assert_in_out_err('--yjit-call-threshold', '', [], /--yjit-call-threshold needs an argument/)
-    assert_in_out_err('--yjit-call-threshold=', '', [], /--yjit-call-threshold needs an argument/)
-    assert_in_out_err('--yjit-greedy-versioning=1', '', [], /warning: argument to --yjit-greedy-versioning is ignored/)
+    #assert_in_out_err('--yjit-call-threshold', '', [], /--yjit-call-threshold needs an argument/)
+    #assert_in_out_err('--yjit-call-threshold=', '', [], /--yjit-call-threshold needs an argument/)
   end
 
   def test_yjit_stats_and_v_no_error
@@ -356,7 +356,7 @@ class TestYJIT < Test::Unit::TestCase
   end
 
   def test_compile_opt_getinlinecache
-    assert_compiles(<<~RUBY, insns: %i[opt_getinlinecache], result: 123, min_calls: 2)
+    assert_compiles(<<~RUBY, insns: %i[opt_getinlinecache], result: 123, call_threshold: 2)
       def get_foo
         FOO
       end
@@ -369,7 +369,7 @@ class TestYJIT < Test::Unit::TestCase
   end
 
   def test_opt_getinlinecache_slowpath
-    assert_compiles(<<~RUBY, exits: { opt_getinlinecache: 1 }, result: [42, 42, 1, 1], min_calls: 2)
+    assert_compiles(<<~RUBY, exits: { opt_getinlinecache: 1 }, result: [42, 42, 1, 1], call_threshold: 2)
       class A
         FOO = 42
         class << self
@@ -397,7 +397,7 @@ class TestYJIT < Test::Unit::TestCase
   end
 
   def test_string_interpolation
-    assert_compiles(<<~'RUBY', insns: %i[objtostring anytostring concatstrings], result: "foobar", min_calls: 2)
+    assert_compiles(<<~'RUBY', insns: %i[objtostring anytostring concatstrings], result: "foobar", call_threshold: 2)
       def make_str(foo, bar)
         "#{foo}#{bar}"
       end
@@ -489,7 +489,7 @@ class TestYJIT < Test::Unit::TestCase
 
   # Tests calling a variadic cfunc with many args
   def test_build_large_struct
-    assert_compiles(<<~RUBY, insns: %i[opt_send_without_block], min_calls: 2)
+    assert_compiles(<<~RUBY, insns: %i[opt_send_without_block], call_threshold: 2)
       ::Foo = Struct.new(:a, :b, :c, :d, :e, :f, :g, :h)
 
       def build_foo
@@ -530,8 +530,8 @@ class TestYJIT < Test::Unit::TestCase
     assert_no_exits('{}.merge(foo: 123, bar: 456, baz: 789)')
   end
 
+  # regression test simplified from URI::Generic#hostname=
   def test_ctx_different_mappings
-    # regression test simplified from URI::Generic#hostname=
     assert_compiles(<<~'RUBY', frozen_string_literal: true)
       def foo(v)
         !(v&.start_with?('[')) && v&.index(':')
@@ -572,7 +572,7 @@ class TestYJIT < Test::Unit::TestCase
   end
 
   ANY = Object.new
-  def assert_compiles(test_script, insns: [], min_calls: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil)
+  def assert_compiles(test_script, insns: [], call_threshold: 1, stdout: nil, exits: {}, result: ANY, frozen_string_literal: nil)
     reset_stats = <<~RUBY
       RubyVM::YJIT.runtime_stats
       RubyVM::YJIT.reset_stats!
@@ -581,29 +581,17 @@ class TestYJIT < Test::Unit::TestCase
     write_results = <<~RUBY
       stats = RubyVM::YJIT.runtime_stats
 
-      def collect_blocks(blocks)
-        blocks.sort_by(&:address).map { |b| [b.iseq_start_index, b.iseq_end_index] }
-      end
-
-      def collect_iseqs(iseq)
-        iseq_array = iseq.to_a
-        insns = iseq_array.last.grep(Array)
-        blocks = RubyVM::YJIT.blocks_for(iseq)
-        h = {
-          name: iseq_array[5],
-          insns: insns,
-          blocks: collect_blocks(blocks),
-        }
-        arr = [h]
-        iseq.each_child { |c| arr.concat collect_iseqs(c) }
-        arr
+      def collect_insns(iseq)
+        insns = RubyVM::YJIT.insns_compiled(iseq)
+        iseq.each_child { |c| insns.concat collect_insns(c) }
+        insns
       end
 
       iseq = RubyVM::InstructionSequence.of(_test_proc)
       IO.open(3).write Marshal.dump({
         result: #{result == ANY ? "nil" : "result"},
         stats: stats,
-        iseqs: collect_iseqs(iseq),
+        insns: collect_insns(iseq),
         disasm: iseq.disasm
       })
     RUBY
@@ -618,7 +606,7 @@ class TestYJIT < Test::Unit::TestCase
       #{write_results}
     RUBY
 
-    status, out, err, stats = eval_with_jit(script, min_calls: min_calls)
+    status, out, err, stats = eval_with_jit(script, call_threshold: call_threshold)
 
     assert status.success?, "exited with status #{status.to_i}, stderr:\n#{err}"
 
@@ -629,10 +617,11 @@ class TestYJIT < Test::Unit::TestCase
     end
 
     runtime_stats = stats[:stats]
-    iseqs = stats[:iseqs]
+    insns_compiled = stats[:insns]
     disasm = stats[:disasm]
 
-    # Only available when RUBY_DEBUG enabled
+    # Check that exit counts are as expected
+    # Full stats are only available when RUBY_DEBUG enabled
     if runtime_stats[:all_stats]
       recorded_exits = runtime_stats.select { |k, v| k.to_s.start_with?("exit_") }
       recorded_exits = recorded_exits.reject { |k, v| v == 0 }
@@ -647,38 +636,24 @@ class TestYJIT < Test::Unit::TestCase
     # Only available when RUBY_DEBUG enabled
     if runtime_stats[:all_stats]
       missed_insns = insns.dup
-      all_compiled_blocks = {}
-      iseqs.each do |iseq|
-        compiled_blocks = iseq[:blocks].map { |from, to| (from...to) }
-        all_compiled_blocks[iseq[:name]] = compiled_blocks
-        compiled_insns = iseq[:insns]
-        next_idx = 0
-        compiled_insns.map! do |insn|
-          # TODO: not sure this is accurate for determining insn size
-          idx = next_idx
-          next_idx += insn.length
-          [idx, *insn]
-        end
-
-        compiled_insns.each do |idx, op, *arguments|
-          next unless missed_insns.include?(op)
-          next unless compiled_blocks.any? { |block| block === idx }
 
+      insns_compiled.each do |op|
+        if missed_insns.include?(op)
           # This instruction was compiled
           missed_insns.delete(op)
         end
       end
 
       unless missed_insns.empty?
-        flunk "Expected to compile instructions #{missed_insns.join(", ")} but didn't.\nCompiled ranges: #{all_compiled_blocks.inspect}\niseq:\n#{disasm}"
+        flunk "Expected to compile instructions #{missed_insns.join(", ")} but didn't.\niseq:\n#{disasm}"
       end
     end
   end
 
-  def eval_with_jit(script, min_calls: 1, timeout: 1000)
+  def eval_with_jit(script, call_threshold: 1, timeout: 1000)
     args = [
       "--disable-gems",
-      "--yjit-call-threshold=#{min_calls}",
+      "--yjit-call-threshold=#{call_threshold}",
       "--yjit-stats"
     ]
     args << "-e" << script
diff --git a/vm.c b/vm.c
index 8a1e01b2a3..8b172b5a39 100644
--- a/vm.c
+++ b/vm.c
@@ -1890,7 +1890,7 @@ rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass)
         if (st_lookup(vm_opt_method_def_table, (st_data_t)me->def, &bop)) {
             int flag = vm_redefinition_check_flag(klass);
             if (flag != 0) {
-                rb_yjit_bop_redefined(klass, me, (enum ruby_basic_operators)bop);
+                rb_yjit_bop_redefined(flag, (enum ruby_basic_operators)bop);
                 ruby_vm_redefined_flag[bop] |= flag;
             }
         }
@@ -3971,6 +3971,11 @@ Init_vm_objects(void)
     vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 10000);
 }
 
+/* Stub for builtin function when not building YJIT units*/
+#if !YJIT_BUILD
+void Init_builtin_yjit(void) {}
+#endif
+
 /* top self */
 
 static VALUE
diff --git a/vm_core.h b/vm_core.h
index 9a5269c6bc..196b564383 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -85,7 +85,6 @@
 #include "ruby/st.h"
 #include "ruby_atomic.h"
 #include "vm_opts.h"
-#include "darray.h"
 
 #include "ruby/thread_native.h"
 #include THREAD_IMPL_H
@@ -322,10 +321,6 @@ pathobj_realpath(VALUE pathobj)
 /* Forward declarations */
 struct rb_mjit_unit;
 
-// List of YJIT block versions
-typedef rb_darray(struct yjit_block_version *) rb_yjit_block_array_t;
-typedef rb_darray(rb_yjit_block_array_t) rb_yjit_block_array_array_t;
-
 struct rb_iseq_constant_body {
     enum iseq_type {
 	ISEQ_TYPE_TOP,
@@ -470,7 +465,11 @@ struct rb_iseq_constant_body {
     struct rb_mjit_unit *jit_unit;
 #endif
 
-    rb_yjit_block_array_array_t yjit_blocks; // empty, or has a size equal to iseq_size
+#if USE_YJIT
+    // YJIT stores some data on each iseq.
+    // Note: Cannot use YJIT_BUILD here since yjit.h includes this header.
+    void *yjit_payload;
+#endif
 };
 
 /* T_IMEMO/iseq */
@@ -1206,7 +1205,7 @@ typedef rb_control_frame_t *
 #define GC_GUARDED_PTR_REF(p) VM_TAGGED_PTR_REF((p), 0x03)
 #define GC_GUARDED_PTR_P(p)   (((VALUE)(p)) & 0x01)
 
-enum {
+enum vm_frame_env_flags {
     /* Frame/Environment flag bits:
      *   MMMM MMMM MMMM MMMM ____ _FFF FFFF EEEX (LSB)
      *
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 87aaeefa7e..2eb9fd8630 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -4926,8 +4926,13 @@ vm_opt_newarray_min(rb_execution_context_t *ec, rb_num_t num, const VALUE *ptr)
 
 #define IMEMO_CONST_CACHE_SHAREABLE IMEMO_FL_USER0
 
-// For each getconstant, associate the ID that corresponds to the first operand
-// to that instruction with the inline cache.
+// This is the iterator used by vm_ic_compile for rb_iseq_each. It is used as a
+// callback for each instruction within the ISEQ, and is meant to return a
+// boolean indicating whether or not to keep iterating.
+//
+// This is used to walk through the ISEQ and find all getconstant instructions
+// between the starting opt_getinlinecache and the ending opt_setinlinecache and
+// associating the inline cache with the constant name components on the VM.
 static bool
 vm_ic_compile_i(VALUE *code, VALUE insn, size_t index, void *ic)
 {
diff --git a/vm_method.c b/vm_method.c
index 6194267077..a10f90e7a3 100644
--- a/vm_method.c
+++ b/vm_method.c
@@ -123,7 +123,7 @@ vm_cme_invalidate(rb_callable_method_entry_t *cme)
     METHOD_ENTRY_INVALIDATED_SET(cme);
     RB_DEBUG_COUNTER_INC(cc_cme_invalidate);
 
-    rb_yjit_cme_invalidate((VALUE)cme);
+    rb_yjit_cme_invalidate(cme);
 }
 
 static int
@@ -148,7 +148,7 @@ rb_clear_constant_cache_for_id(ID id)
         ruby_vm_constant_cache_invalidations += ics->num_entries;
     }
 
-    rb_yjit_constant_state_changed();
+    rb_yjit_constant_state_changed(id);
 }
 
 static void
diff --git a/yjit.c b/yjit.c
index 39ade5f1e2..7ec3b0566c 100644
--- a/yjit.c
+++ b/yjit.c
@@ -1,7 +1,19 @@
-// YJIT combined compilation unit. This setup allows spreading functions
-// across different files without having to worry about putting things
-// in headers and prefixing function names.
+// This part of YJIT helps interfacing with the rest of CRuby and with the OS.
+// Sometimes our FFI binding generation tool gives undesirable outputs when it
+// sees C features that Rust doesn't support well. We mitigate that by binding
+// functions which have simple parameter types. The boilerplate C functions for
+// that purpose are in this file.
+// Similarly, we wrap OS facilities we need in simple functions to help with
+// FFI and to avoid the need to use external crates.io Rust libraries.
+
 #include "internal.h"
+#include "internal/sanitizers.h"
+#include "internal/string.h"
+#include "internal/hash.h"
+#include "internal/variable.h"
+#include "internal/compile.h"
+#include "internal/class.h"
+#include "gc.h"
 #include "vm_core.h"
 #include "vm_callinfo.h"
 #include "builtin.h"
@@ -9,178 +21,848 @@
 #include "insns_info.inc"
 #include "vm_sync.h"
 #include "yjit.h"
-
-#ifndef YJIT_CHECK_MODE
-# define YJIT_CHECK_MODE 0
+#include "vm_insnhelper.h"
+#include "probes.h"
+#include "probes_helper.h"
+#include "iseq.h"
+
+// For mmapp(), sysconf()
+#ifndef _WIN32
+#include <unistd.h>
+#include <sys/mman.h>
 #endif
 
-// >= 1: print when output code invalidation happens
-// >= 2: dump list of instructions when regions compile
-#ifndef YJIT_DUMP_MODE
-# define YJIT_DUMP_MODE 0
+#include <errno.h>
+
+// We need size_t to have a known size to simplify code generation and FFI.
+// TODO(alan): check this in configure.ac to fail fast on 32 bit platforms.
+STATIC_ASSERT(64b_size_t, SIZE_MAX == UINT64_MAX);
+// I don't know any C implementation that has uint64_t and puts padding bits
+// into size_t but the standard seems to allow it.
+STATIC_ASSERT(size_t_no_padding_bits, sizeof(size_t) == sizeof(uint64_t));
+
+// NOTE: We can trust that uint8_t has no "padding bits" since the C spec
+// guarantees it. Wording about padding bits is more explicit in C11 compared
+// to C99. See C11 7.20.1.1p2. All this is to say we have _some_ standards backing to
+// use a Rust `*mut u8` to represent a C `uint8_t *`.
+//
+// If we don't want to trust that we can interpreter the C standard correctly, we
+// could outsource that work to the Rust standard library by sticking to fundamental
+// types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on
+// the Rust side.
+//
+// What's up with the long prefix? The "rb_" part is to apease `make leaked-globals`
+// which runs on upstream CI. The rationale for the check is unclear to Alan as
+// we build with `-fvisibility=hidden` so only explicitly marked functions end
+// up as public symbols in libruby.so. Perhaps the check is for the static
+// libruby and or general namspacing hygiene? Alan admits his bias towards ELF
+// platforms and newer compilers.
+//
+// The "_yjit_" part is for trying to be informative. We might want different
+// suffixes for symbols meant for Rust and symbols meant for broader CRuby.
+
+void
+rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
+{
+    if (mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE)) {
+        rb_bug("Couldn't make JIT page region (%p, %lu bytes) writeable, errno: %s\n",
+            mem_block, (unsigned long)mem_size, strerror(errno));
+    }
+}
+
+void
+rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
+{
+    if (mprotect(mem_block, mem_size, PROT_READ | PROT_EXEC)) {
+        rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s\n",
+            mem_block, (unsigned long)mem_size, strerror(errno));
+    }
+}
+
+uint32_t
+rb_yjit_get_page_size(void)
+{
+#if defined(_SC_PAGESIZE)
+    long page_size = sysconf(_SC_PAGESIZE);
+    if (page_size <= 0) rb_bug("yjit: failed to get page size");
+
+    // 1 GiB limit. x86 CPUs with PDPE1GB can do this and anything larger is unexpected.
+    // Though our design sort of assume we have fine grained control over memory protection
+    // which require small page sizes.
+    if (page_size > 0x40000000l) rb_bug("yjit page size too large");
+
+    return (uint32_t)page_size;
+#else
+#error "YJIT supports POSIX only for now"
 #endif
+}
 
-// USE_MJIT comes from configure options
-#define JIT_ENABLED USE_MJIT
+#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
+// Align the current write position to a multiple of bytes
+static uint8_t *
+align_ptr(uint8_t *ptr, uint32_t multiple)
+{
+    // Compute the pointer modulo the given alignment boundary
+    uint32_t rem = ((uint32_t)(uintptr_t)ptr) % multiple;
 
-// Check if we need to include YJIT in the build
-#if JIT_ENABLED && YJIT_SUPPORTED_P
+    // If the pointer is already aligned, stop
+    if (rem == 0)
+        return ptr;
 
-#include "yjit_asm.c"
+    // Pad the pointer by the necessary amount to align it
+    uint32_t pad = multiple - rem;
 
-// Code block into which we write machine code
-static codeblock_t block;
-static codeblock_t *cb = NULL;
-
-// Code block into which we write out-of-line machine code
-static codeblock_t outline_block;
-static codeblock_t *ocb = NULL;
+    return ptr + pad;
+}
+#endif
 
-#if YJIT_STATS
-// Comments for generated code
-struct yjit_comment {
-    uint32_t offset;
-    const char *comment;
+// Allocate a block of executable memory
+uint8_t *
+rb_yjit_alloc_exec_mem(uint32_t mem_size)
+{
+#ifndef _WIN32
+    uint8_t *mem_block;
+
+    // On Linux
+    #if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
+        // Align the requested address to page size
+        uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE);
+        uint8_t *req_addr = align_ptr((uint8_t*)&rb_yjit_alloc_exec_mem, page_size);
+
+        do {
+            // Try to map a chunk of memory as executable
+            mem_block = (uint8_t*)mmap(
+                (void*)req_addr,
+                mem_size,
+                PROT_READ | PROT_EXEC,
+                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
+                -1,
+                0
+            );
+
+            // If we succeeded, stop
+            if (mem_block != MAP_FAILED) {
+                break;
+            }
+
+            // +4MB
+            req_addr += 4 * 1024 * 1024;
+        } while (req_addr < (uint8_t*)&rb_yjit_alloc_exec_mem + INT32_MAX);
+
+    // On MacOS and other platforms
+    #else
+        // Try to map a chunk of memory as executable
+        mem_block = (uint8_t*)mmap(
+            (void*)rb_yjit_alloc_exec_mem,
+            mem_size,
+            PROT_READ | PROT_EXEC,
+            MAP_PRIVATE | MAP_ANONYMOUS,
+            -1,
+            0
+        );
+    #endif
+
+    // Fallback
+    if (mem_block == MAP_FAILED) {
+        // Try again without the address hint (e.g., valgrind)
+        mem_block = (uint8_t*)mmap(
+            NULL,
+            mem_size,
+            PROT_READ | PROT_EXEC,
+            MAP_PRIVATE | MAP_ANONYMOUS,
+            -1,
+            0
+        );
+    }
+
+    // Check that the memory mapping was successful
+    if (mem_block == MAP_FAILED) {
+        perror("mmap call failed");
+        exit(-1);
+    }
+
+    // Fill the executable memory with PUSH DS (0x1E) so that
+    // executing uninitialized memory will fault with #UD in
+    // 64-bit mode.
+    rb_yjit_mark_writable(mem_block, mem_size);
+    memset(mem_block, 0x1E, mem_size);
+    rb_yjit_mark_executable(mem_block, mem_size);
+
+    return mem_block;
+#else
+    // Windows not supported for now
+    return NULL;
+#endif
+}
+
+// Is anyone listening for :c_call and :c_return event currently?
+bool
+rb_c_method_tracing_currently_enabled(rb_execution_context_t *ec)
+{
+    rb_event_flag_t tracing_events;
+    if (rb_multi_ractor_p()) {
+        tracing_events = ruby_vm_event_enabled_global_flags;
+    }
+    else {
+        // At the time of writing, events are never removed from
+        // ruby_vm_event_enabled_global_flags so always checking using it would
+        // mean we don't compile even after tracing is disabled.
+        tracing_events = rb_ec_ractor_hooks(ec)->events;
+    }
+
+    return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
+}
+
+// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
+// like the interpreter. When tracing for c_return is enabled, we patch the code after
+// the C method return to call into this to fire the event.
+void
+rb_full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
+{
+    rb_control_frame_t *cfp = ec->cfp;
+    RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
+    const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
+
+    RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
+    RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
+
+    // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
+
+    // Pop the C func's frame and fire the c_return TracePoint event
+    // Note that this is the same order as vm_call_cfunc_with_frame().
+    rb_vm_pop_frame(ec);
+    EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
+    // Note, this deviates from the interpreter in that users need to enable
+    // a c_return TracePoint for this DTrace hook to work. A reasonable change
+    // since the Ruby return event works this way as well.
+    RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
+
+    // Push return value into the caller's stack. We know that it's a frame that
+    // uses cfp->sp because we are patching a call done with gen_send_cfunc().
+    ec->cfp->sp[0] = return_value;
+    ec->cfp->sp++;
+}
+
+unsigned int
+rb_iseq_encoded_size(const rb_iseq_t *iseq)
+{
+    return iseq->body->iseq_size;
+}
+
+// TODO(alan): consider using an opaque pointer for the payload rather than a void pointer
+void *
+rb_iseq_get_yjit_payload(const rb_iseq_t *iseq)
+{
+    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
+    if (iseq->body) {
+        return iseq->body->yjit_payload;
+    }
+    else {
+        // Body is NULL when constructing the iseq.
+        return NULL;
+    }
+}
+
+void
+rb_iseq_set_yjit_payload(const rb_iseq_t *iseq, void *payload)
+{
+    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
+    RUBY_ASSERT_ALWAYS(iseq->body);
+    RUBY_ASSERT_ALWAYS(NULL == iseq->body->yjit_payload);
+    iseq->body->yjit_payload = payload;
+}
+
+void
+rb_iseq_reset_jit_func(const rb_iseq_t *iseq)
+{
+    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
+    iseq->body->jit_func = NULL;
+}
+
+// Get the PC for a given index in an iseq
+VALUE *
+rb_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx)
+{
+    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
+    RUBY_ASSERT_ALWAYS(insn_idx < iseq->body->iseq_size);
+    VALUE *encoded = iseq->body->iseq_encoded;
+    VALUE *pc = &encoded[insn_idx];
+    return pc;
+}
+
+// Get the opcode given a program counter. Can return trace opcode variants.
+int
+rb_iseq_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc)
+{
+    // YJIT should only use iseqs after AST to bytecode compilation
+    RUBY_ASSERT_ALWAYS(FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED));
+
+    const VALUE at_pc = *pc;
+    return rb_vm_insn_addr2opcode((const void *)at_pc);
+}
+
+// used by jit_rb_str_bytesize in codegen.rs
+VALUE
+rb_str_bytesize(VALUE str)
+{
+    return LONG2NUM(RSTRING_LEN(str));
+}
+
+// This is defined only as a named struct inside rb_iseq_constant_body.
+// By giving it a separate typedef, we make it nameable by rust-bindgen.
+// Bindgen's temp/anon name isn't guaranteed stable.
+typedef struct rb_iseq_param_keyword rb_seq_param_keyword_struct;
+
+const char *
+rb_insn_name(VALUE insn)
+{
+    return insn_name(insn);
+}
+
+// Query the instruction length in bytes for YARV opcode insn
+int
+rb_insn_len(VALUE insn)
+{
+    return insn_len(insn);
+}
+
+unsigned int
+rb_vm_ci_argc(const struct rb_callinfo *ci)
+{
+    return vm_ci_argc(ci);
+}
+
+ID
+rb_vm_ci_mid(const struct rb_callinfo *ci)
+{
+    return vm_ci_mid(ci);
+}
+
+unsigned int
+rb_vm_ci_flag(const struct rb_callinfo *ci)
+{
+    return vm_ci_flag(ci);
+}
+
+const struct rb_callinfo_kwarg *
+rb_vm_ci_kwarg(const struct rb_callinfo *ci)
+{
+    return vm_ci_kwarg(ci);
+}
+
+int
+rb_get_cikw_keyword_len(const struct rb_callinfo_kwarg *cikw)
+{
+    return cikw->keyword_len;
+}
+
+VALUE
+rb_get_cikw_keywords_idx(const struct rb_callinfo_kwarg *cikw, int idx)
+{
+    return cikw->keywords[idx];
+}
+
+rb_method_visibility_t
+rb_METHOD_ENTRY_VISI(rb_callable_method_entry_t *me)
+{
+    return METHOD_ENTRY_VISI(me);
+}
+
+rb_method_type_t
+rb_get_cme_def_type(rb_callable_method_entry_t *cme)
+{
+    return cme->def->type;
+}
+
+ID
+rb_get_cme_def_body_attr_id(rb_callable_method_entry_t *cme)
+{
+    return cme->def->body.attr.id;
+}
+
+enum method_optimized_type
+rb_get_cme_def_body_optimized_type(rb_callable_method_entry_t *cme)
+{
+    return cme->def->body.optimized.type;
+}
+
+unsigned int
+rb_get_cme_def_body_optimized_index(rb_callable_method_entry_t *cme)
+{
+    return cme->def->body.optimized.index;
+}
+
+rb_method_cfunc_t *
+rb_get_cme_def_body_cfunc(rb_callable_method_entry_t *cme)
+{
+    return UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
+}
+
+uintptr_t
+rb_get_def_method_serial(rb_method_definition_t *def)
+{
+    return def->method_serial;
+}
+
+ID
+rb_get_def_original_id(rb_method_definition_t *def)
+{
+    return def->original_id;
+}
+
+int
+rb_get_mct_argc(rb_method_cfunc_t *mct)
+{
+    return mct->argc;
+}
+
+void *
+rb_get_mct_func(rb_method_cfunc_t *mct)
+{
+    return (void*)mct->func; // this field is defined as type VALUE (*func)(ANYARGS)
+}
+
+const rb_iseq_t *
+rb_get_def_iseq_ptr(rb_method_definition_t *def)
+{
+    return def_iseq_ptr(def);
+}
+
+rb_iseq_t *
+rb_get_iseq_body_local_iseq(rb_iseq_t  *iseq)
+{
+    return iseq->body->local_iseq;
+}
+
+unsigned int
+rb_get_iseq_body_local_table_size(rb_iseq_t *iseq)
+{
+    return iseq->body->local_table_size;
+}
+
+VALUE *
+rb_get_iseq_body_iseq_encoded(rb_iseq_t *iseq)
+{
+    return iseq->body->iseq_encoded;
+}
+
+bool
+rb_get_iseq_body_builtin_inline_p(rb_iseq_t *iseq)
+{
+    return iseq->body->builtin_inline_p;
+}
+
+unsigned
+rb_get_iseq_body_stack_max(rb_iseq_t *iseq)
+{
+    return iseq->body->stack_max;
+}
+
+bool
+rb_get_iseq_flags_has_opt(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.has_opt;
+}
+
+bool
+rb_get_iseq_flags_has_kw(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.has_kw;
+}
+
+bool
+rb_get_iseq_flags_has_post(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.has_post;
+}
+
+bool
+rb_get_iseq_flags_has_kwrest(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.has_kwrest;
+}
+
+bool
+rb_get_iseq_flags_has_rest(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.has_rest;
+}
+
+bool
+rb_get_iseq_flags_has_block(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.has_block;
+}
+
+bool
+rb_get_iseq_flags_has_accepts_no_kwarg(rb_iseq_t *iseq)
+{
+    return iseq->body->param.flags.accepts_no_kwarg;
+}
+
+const rb_seq_param_keyword_struct *
+rb_get_iseq_body_param_keyword(rb_iseq_t *iseq)
+{
+    return iseq->body->param.keyword;
+}
+
+unsigned
+rb_get_iseq_body_param_size(rb_iseq_t *iseq)
+{
+    return iseq->body->param.size;
+}
+
+int
+rb_get_iseq_body_param_lead_num(rb_iseq_t *iseq)
+{
+    return iseq->body->param.lead_num;
+}
+
+int
+rb_get_iseq_body_param_opt_num(rb_iseq_t *iseq)
+{
+    return iseq->body->param.opt_num;
+}
+
+const VALUE *
+rb_get_iseq_body_param_opt_table(rb_iseq_t *iseq)
+{
+    return iseq->body->param.opt_table;
+}
+
+// If true, the iseq is leaf and it can be replaced by a single C call.
+bool
+rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
+{
+    unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
+    unsigned int leave_len = insn_len(BIN(leave));
+
+    return (iseq->body->iseq_size == (invokebuiltin_len + leave_len) &&
+        rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
+        rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
+        iseq->body->builtin_inline_p
+    );
+}
+
+// Return an rb_builtin_function if the iseq contains only that leaf builtin function.
+const struct rb_builtin_function *
+rb_leaf_builtin_function(const rb_iseq_t *iseq)
+{
+    if (!rb_leaf_invokebuiltin_iseq_p(iseq))
+        return NULL;
+    return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
+}
+
+struct rb_control_frame_struct *
+rb_get_ec_cfp(rb_execution_context_t *ec)
+{
+    return ec->cfp;
+}
+
+VALUE *
+rb_get_cfp_pc(struct rb_control_frame_struct *cfp)
+{
+    return (VALUE*)cfp->pc;
+}
+
+VALUE *
+rb_get_cfp_sp(struct rb_control_frame_struct *cfp)
+{
+    return cfp->sp;
+}
+
+void
+rb_set_cfp_pc(struct rb_control_frame_struct *cfp, const VALUE *pc)
+{
+    cfp->pc = pc;
+}
+
+void
+rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp)
+{
+    cfp->sp = sp;
+}
+
+rb_iseq_t *
+rb_cfp_get_iseq(struct rb_control_frame_struct *cfp)
+{
+    // TODO(alan) could assert frame type here to make sure that it's a ruby frame with an iseq.
+    return (rb_iseq_t*)cfp->iseq;
+}
+
+VALUE
+rb_get_cfp_self(struct rb_control_frame_struct *cfp)
+{
+    return cfp->self;
+}
+
+VALUE *
+rb_get_cfp_ep(struct rb_control_frame_struct *cfp)
+{
+    return (VALUE*)cfp->ep;
+}
+
+VALUE
+rb_yarv_class_of(VALUE obj)
+{
+    return rb_class_of(obj);
+}
+
+// YJIT needs this function to never allocate and never raise
+VALUE
+rb_yarv_str_eql_internal(VALUE str1, VALUE str2)
+{
+    // We wrap this since it's static inline
+    return rb_str_eql_internal(str1, str2);
+}
+
+// YJIT needs this function to never allocate and never raise
+VALUE
+rb_yarv_ary_entry_internal(VALUE ary, long offset)
+{
+    return rb_ary_entry_internal(ary, offset);
+}
+
+// Print the Ruby source location of some ISEQ for debugging purposes
+void
+rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx)
+{
+    char *ptr;
+    long len;
+    VALUE path = rb_iseq_path(iseq);
+    RSTRING_GETMEM(path, ptr, len);
+    fprintf(stderr, "%s %.*s:%u\n", __func__, (int)len, ptr, rb_iseq_line_no(iseq, insn_idx));
+}
+
+// The FL_TEST() macro
+VALUE
+rb_FL_TEST(VALUE obj, VALUE flags)
+{
+    return RB_FL_TEST(obj, flags);
+}
+
+// The FL_TEST_RAW() macro, normally an internal implementation detail
+VALUE
+rb_FL_TEST_RAW(VALUE obj, VALUE flags)
+{
+    return FL_TEST_RAW(obj, flags);
+}
+
+// The RB_TYPE_P macro
+bool
+rb_RB_TYPE_P(VALUE obj, enum ruby_value_type t)
+{
+    return RB_TYPE_P(obj, t);
+}
+
+long
+rb_RSTRUCT_LEN(VALUE st)
+{
+    return RSTRUCT_LEN(st);
+}
+
+// There are RSTRUCT_SETs in ruby/internal/core/rstruct.h and internal/struct.h
+// with different types (int vs long) for k. Here we use the one from ruby/internal/core/rstruct.h,
+// which takes an int.
+void
+rb_RSTRUCT_SET(VALUE st, int k, VALUE v)
+{
+    RSTRUCT_SET(st, k, v);
+}
+
+const struct rb_callinfo *
+rb_get_call_data_ci(struct rb_call_data *cd)
+{
+    return cd->ci;
+}
+
+bool
+rb_BASIC_OP_UNREDEFINED_P(enum ruby_basic_operators bop, uint32_t klass)
+{
+    return BASIC_OP_UNREDEFINED_P(bop, klass);
+}
+
+VALUE
+rb_RCLASS_ORIGIN(VALUE c)
+{
+    return RCLASS_ORIGIN(c);
+}
+
+bool
+rb_yjit_multi_ractor_p(void)
+{
+    return rb_multi_ractor_p();
+}
+
+// For debug builds
+void
+rb_assert_iseq_handle(VALUE handle)
+{
+    RUBY_ASSERT_ALWAYS(rb_objspace_markable_object_p(handle));
+    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(handle, imemo_iseq));
+}
+
+int
+rb_IMEMO_TYPE_P(VALUE imemo, enum imemo_type imemo_type)
+{
+    return IMEMO_TYPE_P(imemo, imemo_type);
+}
+
+void
+rb_assert_cme_handle(VALUE handle)
+{
+    RUBY_ASSERT_ALWAYS(rb_objspace_markable_object_p(handle));
+    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(handle, imemo_ment));
+}
+
+typedef void (*iseq_callback)(const rb_iseq_t *);
+
+// Heap-walking callback for rb_yjit_for_each_iseq().
+static int
+for_each_iseq_i(void *vstart, void *vend, size_t stride, void *data)
+{
+    const iseq_callback callback = (iseq_callback)data;
+    VALUE v = (VALUE)vstart;
+    for (; v != (VALUE)vend; v += stride) {
+        void *ptr = asan_poisoned_object_p(v);
+        asan_unpoison_object(v, false);
+
+        if (rb_obj_is_iseq(v)) {
+            rb_iseq_t *iseq = (rb_iseq_t *)v;
+            callback(iseq);
+        }
+
+        asan_poison_object_if(ptr, v);
+    }
+    return 0;
+}
+
+// Iterate through the whole GC heap and invoke a callback for each iseq.
+// Used for global code invalidation.
+void
+rb_yjit_for_each_iseq(iseq_callback callback)
+{
+    rb_objspace_each_objects(for_each_iseq_i, (void *)callback);
+}
+
+// For running write barriers from Rust. Required when we add a new edge in the
+// object graph from `old` to `young`.
+void
+rb_yjit_obj_written(VALUE old, VALUE young, const char *file, int line)
+{
+    rb_obj_written(old, Qundef, young, file, line);
+}
+
+// Acquire the VM lock and then signal all other Ruby threads (ractors) to
+// contend for the VM lock, putting them to sleep. YJIT uses this to evict
+// threads running inside generated code so among other things, it can
+// safely change memory protection of regions housing generated code.
+void
+rb_yjit_vm_lock_then_barrier(unsigned int *recursive_lock_level, const char *file, int line)
+{
+    rb_vm_lock_enter(recursive_lock_level, file, line);
+    rb_vm_barrier();
+}
+
+// Release the VM lock. The lock level must point to the same integer used to
+// acquire the lock.
+void
+rb_yjit_vm_unlock(unsigned int *recursive_lock_level, const char *file, int line)
+{
+    rb_vm_lock_leave(recursive_lock_level, file, line);
+}
+
+// Pointer to a YJIT entry point (machine code generated by YJIT)
+typedef VALUE (*yjit_func_t)(rb_execution_context_t *, rb_control_frame_t *);
+
+bool
+rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec)
+{
+    bool success = true;
+    RB_VM_LOCK_ENTER();
+    rb_vm_barrier();
+
+    // Compile a block version starting at the first instruction
+    uint8_t *rb_yjit_iseq_gen_entry_point(const rb_iseq_t *iseq, rb_execution_context_t *ec); // defined in Rust
+    uint8_t *code_ptr = rb_yjit_iseq_gen_entry_point(iseq, ec);
+
+    if (code_ptr) {
+        iseq->body->jit_func = (yjit_func_t)code_ptr;
+    }
+    else {
+        iseq->body->jit_func = 0;
+        success = false;
+    }
+
+    RB_VM_LOCK_LEAVE();
+    return success;
+}
+
+// GC root for interacting with the GC
+struct yjit_root_struct {
+    bool unused; // empty structs are not legal in C99
 };
 
-typedef rb_darray(struct yjit_comment) yjit_comment_array_t;
-static yjit_comment_array_t yjit_code_comments;
-
-// Counters for generated code
-#define YJIT_DECLARE_COUNTERS(...) struct rb_yjit_runtime_counters { \
-    int64_t __VA_ARGS__; \
-}; \
-static char yjit_counter_names[] = #__VA_ARGS__;
-
-YJIT_DECLARE_COUNTERS(
-    exec_instruction,
-
-    send_keywords,
-    send_kw_splat,
-    send_args_splat,
-    send_block_arg,
-    send_ivar_set_method,
-    send_zsuper_method,
-    send_undef_method,
-    send_optimized_method,
-    send_optimized_method_send,
-    send_optimized_method_call,
-    send_optimized_method_block_call,
-    send_missing_method,
-    send_bmethod,
-    send_refined_method,
-    send_cfunc_argc_mismatch,
-    send_cfunc_toomany_args,
-    send_cfunc_tracing,
-    send_cfunc_kwargs,
-    send_attrset_kwargs,
-    send_iseq_tailcall,
-    send_iseq_arity_error,
-    send_iseq_only_keywords,
-    send_iseq_kwargs_req_and_opt_missing,
-    send_iseq_kwargs_mismatch,
-    send_iseq_complex_callee,
-    send_not_implemented_method,
-    send_getter_arity,
-    send_se_cf_overflow,
-    send_se_protected_check_failed,
-
-    traced_cfunc_return,
-
-    invokesuper_me_changed,
-    invokesuper_block,
-
-    leave_se_interrupt,
-    leave_interp_return,
-    leave_start_pc_non_zero,
-
-    getivar_se_self_not_heap,
-    getivar_idx_out_of_range,
-    getivar_megamorphic,
-
-    setivar_se_self_not_heap,
-    setivar_idx_out_of_range,
-    setivar_val_heapobject,
-    setivar_name_not_mapped,
-    setivar_not_object,
-    setivar_frozen,
-
-    oaref_argc_not_one,
-    oaref_arg_not_fixnum,
-
-    opt_getinlinecache_miss,
-
-    binding_allocations,
-    binding_set,
-
-    vm_insns_count,
-    compiled_iseq_count,
-    compiled_block_count,
-    compilation_failure,
-
-    exit_from_branch_stub,
-
-    invalidation_count,
-    invalidate_method_lookup,
-    invalidate_bop_redefined,
-    invalidate_ractor_spawn,
-    invalidate_constant_state_bump,
-    invalidate_constant_ic_fill,
-
-    constant_state_bumps,
-
-    expandarray_splat,
-    expandarray_postarg,
-    expandarray_not_array,
-    expandarray_rhs_too_small,
-
-    gbpp_block_param_modified,
-    gbpp_block_handler_not_iseq,
-
-    // Member with known name for iterating over counters
-    last_member
-)
-
-static struct rb_yjit_runtime_counters yjit_runtime_counters = { 0 };
-#undef YJIT_DECLARE_COUNTERS
-
-#endif // YJIT_STATS
-
-// The number of bytes counting from the beginning of the inline code block
-// that should not be changed. After patching for global invalidation, no one
-// should make changes to the invalidated code region anymore. This is used to
-// break out of invalidation race when there are multiple ractors.
-static uint32_t yjit_codepage_frozen_bytes = 0;
-
-#include "yjit_utils.c"
-#include "yjit_core.c"
-#include "yjit_iface.c"
-#include "yjit_codegen.c"
+static void
+yjit_root_free(void *ptr)
+{
+    // Do nothing. The root lives as long as the process.
+}
+
+static size_t
+yjit_root_memsize(const void *ptr)
+{
+    // Count off-gc-heap allocation size of the dependency table
+    return 0; // TODO: more accurate accounting
+}
+
+// GC callback during compaction
+static void
+yjit_root_update_references(void *ptr)
+{
+    // Do nothing since we use rb_gc_mark(), which pins.
+}
+
+void rb_yjit_root_mark(void *ptr); // in Rust
+
+// Custom type for interacting with the GC
+// TODO: make this write barrier protected
+static const rb_data_type_t yjit_root_type = {
+    "yjit_root",
+    {rb_yjit_root_mark, yjit_root_free, yjit_root_memsize, yjit_root_update_references},
+    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
+};
 
-#else
-// !JIT_ENABLED || !YJIT_SUPPORTED_P
-// In these builds, YJIT could never be turned on. Provide dummy
-// implementations for YJIT functions exposed to the rest of the code base.
-// See yjit.h.
-
-void Init_builtin_yjit(void) {}
-bool rb_yjit_enabled_p(void) { return false; }
-unsigned rb_yjit_call_threshold(void) { return UINT_MAX; }
-void rb_yjit_invalidate_all_method_lookup_assumptions(void) {};
-void rb_yjit_method_lookup_change(VALUE klass, ID mid) {};
-void rb_yjit_cme_invalidate(VALUE cme) {}
-void rb_yjit_collect_vm_usage_insn(int insn) {}
-void rb_yjit_collect_binding_alloc(void) {}
-void rb_yjit_collect_binding_set(void) {}
-bool rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec) { return false; }
-void rb_yjit_init(struct rb_yjit_options *options) {}
-void rb_yjit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_operators bop) {}
-void rb_yjit_constant_state_changed(void) {}
-void rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body) {}
-void rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body) {}
-void rb_yjit_iseq_free(const struct rb_iseq_constant_body *body) {}
-void rb_yjit_before_ractor_spawn(void) {}
-void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic) {}
-void rb_yjit_tracing_invalidate_all(void) {}
-
-#endif // if JIT_ENABLED && YJIT_SUPPORTED_P
+// For dealing with refinements
+void
+rb_yjit_invalidate_all_method_lookup_assumptions(void)
+{
+    // It looks like Module#using actually doesn't need to invalidate all the
+    // method caches, so we do nothing here for now.
+}
+
+// Primitives used by yjit.rb
+VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
+VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self);
+VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
+VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq);
+VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq);
+VALUE rb_yjit_simulate_oom_bang(rb_execution_context_t *ec, VALUE self);
+VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self);
+
+// Preprocessed yjit.rb generated during build
+#include "yjit.rbinc"
+
+// Can raise RuntimeError
+void
+rb_yjit_init(void)
+{
+    // Call the Rust initialization code
+    void rb_yjit_init_rust(void);
+    rb_yjit_init_rust();
+
+    // Initialize the GC hooks. Do this second as some code depend on Rust initialization.
+    struct yjit_root_struct *root;
+    VALUE yjit_root = TypedData_Make_Struct(0, struct yjit_root_struct, &yjit_root_type, root);
+    rb_gc_register_mark_object(yjit_root);
+}
diff --git a/yjit.h b/yjit.h
index cc1e23d327..a1e3efb330 100644
--- a/yjit.h
+++ b/yjit.h
@@ -22,52 +22,58 @@
 # define YJIT_SUPPORTED_P 0
 #endif
 
-struct rb_yjit_options {
-    // Enable compilation with YJIT
-    bool yjit_enabled;
-
-    // Size of the executable memory block to allocate in MiB
-    unsigned exec_mem_size;
-
-    // Number of method calls after which to start generating code
-    // Threshold==1 means compile on first execution
-    unsigned call_threshold;
-
-    // Generate versions greedily until the limit is hit
-    bool greedy_versioning;
-
-    // Disable the propagation of type information
-    bool no_type_prop;
-
-    // Maximum number of versions per block
-    // 1 means always create generic versions
-    unsigned max_versions;
-
-    // Capture and print out stats
-    bool gen_stats;
+// Is the output binary going to include YJIT?
+#if USE_MJIT && USE_YJIT && YJIT_SUPPORTED_P
+# define YJIT_BUILD 1
+#else
+# define YJIT_BUILD 0
+#endif
 
-    // Run backend tests
-    bool test_backend;
-};
+#if YJIT_BUILD
 
+// Expose these as declarations since we are building YJIT.
 bool rb_yjit_enabled_p(void);
 unsigned rb_yjit_call_threshold(void);
-
 void rb_yjit_invalidate_all_method_lookup_assumptions(void);
 void rb_yjit_method_lookup_change(VALUE klass, ID mid);
-void rb_yjit_cme_invalidate(VALUE cme);
+void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme);
 void rb_yjit_collect_vm_usage_insn(int insn);
 void rb_yjit_collect_binding_alloc(void);
 void rb_yjit_collect_binding_set(void);
 bool rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec);
-void rb_yjit_init(struct rb_yjit_options *options);
-void rb_yjit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_operators bop);
-void rb_yjit_constant_state_changed(void);
-void rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body);
-void rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body);
-void rb_yjit_iseq_free(const struct rb_iseq_constant_body *body);
+void rb_yjit_init(void);
+void rb_yjit_bop_redefined(int redefined_flag, enum ruby_basic_operators bop);
+void rb_yjit_constant_state_changed(ID id);
+void rb_yjit_iseq_mark(void *payload);
+void rb_yjit_iseq_update_references(void *payload);
+void rb_yjit_iseq_free(void *payload);
 void rb_yjit_before_ractor_spawn(void);
 void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic);
 void rb_yjit_tracing_invalidate_all(void);
 
+#else
+// !YJIT_BUILD
+// In these builds, YJIT could never be turned on. Provide dummy implementations.
+
+static inline bool rb_yjit_enabled_p(void) { return false; }
+static inline unsigned rb_yjit_call_threshold(void) { return UINT_MAX; }
+static inline void rb_yjit_invalidate_all_method_lookup_assumptions(void) {}
+static inline void rb_yjit_method_lookup_change(VALUE klass, ID mid) {}
+static inline void rb_yjit_cme_invalidate(rb_callable_method_entry_t *cme) {}
+static inline void rb_yjit_collect_vm_usage_insn(int insn) {}
+static inline void rb_yjit_collect_binding_alloc(void) {}
+static inline void rb_yjit_collect_binding_set(void) {}
+static inline bool rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec) { return false; }
+static inline void rb_yjit_init(void) {}
+static inline void rb_yjit_bop_redefined(int redefined_flag, enum ruby_basic_operators bop) {}
+static inline void rb_yjit_constant_state_changed(ID id) {}
+static inline void rb_yjit_iseq_mark(void *payload) {}
+static inline void rb_yjit_iseq_update_references(void *payload) {}
+static inline void rb_yjit_iseq_free(void *payload) {}
+static inline void rb_yjit_before_ractor_spawn(void) {}
+static inline void rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic) {}
+static inline void rb_yjit_tracing_invalidate_all(void) {}
+
+#endif // #if YJIT_BUILD
+
 #endif // #ifndef YJIT_H
diff --git a/yjit.rb b/yjit.rb
index 09ed3faaf3..7c5311d79a 100644
--- a/yjit.rb
+++ b/yjit.rb
@@ -9,155 +9,58 @@
 # for which CRuby is built. There is also no API stability guarantee as to in
 # what situations this module is defined.
 module RubyVM::YJIT
-  if defined?(Disasm)
-    def self.disasm(iseq, tty: $stdout && $stdout.tty?)
-      iseq = RubyVM::InstructionSequence.of(iseq)
-
-      blocks = blocks_for(iseq)
-      return if blocks.empty?
-
-      str = String.new
-      str << iseq.disasm
-      str << "\n"
-
-      # Sort the blocks by increasing addresses
-      sorted_blocks = blocks.sort_by(&:address)
-
-      highlight = ->(str) {
-        if tty
-          "\x1b[1m#{str}\x1b[0m"
-        else
-          str
-        end
-      }
-
-      cs = Disasm.new
-      sorted_blocks.each_with_index do |block, i|
-        str << "== BLOCK #{i+1}/#{blocks.length}: #{block.code.length} BYTES, ISEQ RANGE [#{block.iseq_start_index},#{block.iseq_end_index}) ".ljust(80, "=")
-        str << "\n"
-
-        comments = comments_for(block.address, block.address + block.code.length)
-        comment_idx = 0
-        cs.disasm(block.code, block.address).each do |i|
-          while (comment = comments[comment_idx]) && comment.address <= i.address
-            str << "  ; #{highlight.call(comment.comment)}\n"
-            comment_idx += 1
-          end
-
-          str << sprintf(
-            "  %<address>08x:  %<instruction>s\t%<details>s\n",
-            address: i.address,
-            instruction: i.mnemonic,
-            details: i.op_str
-          )
-        end
-      end
-
-      block_sizes = blocks.map { |block| block.code.length }
-      total_bytes = block_sizes.sum
-      str << "\n"
-      str << "Total code size: #{total_bytes} bytes"
-      str << "\n"
-
-      str
-    end
-
-    def self.comments_for(start_address, end_address)
-      Primitive.comments_for(start_address, end_address)
-    end
-
-    def self.graphviz_for(iseq)
-      iseq = RubyVM::InstructionSequence.of(iseq)
-      cs = Disasm.new
-
-      highlight = ->(comment) { "<b>#{comment}</b>" }
-      linebreak = "<br align=\"left\"/>\n"
-
-      buff = +''
-      blocks = blocks_for(iseq).sort_by(&:id)
-      buff << "digraph g {\n"
-
-      # Write the iseq info as a legend
-      buff << "  legend [shape=record fontsize=\"30\" fillcolor=\"lightgrey\" style=\"filled\"];\n"
-      buff << "  legend [label=\"{ Instruction Disassembly For: | {#{iseq.base_label}@#{iseq.absolute_path}:#{iseq.first_lineno}}}\"];\n"
-
-      # Subgraph contains disassembly
-      buff << "  subgraph disasm {\n"
-      buff << "  node [shape=record fontname=\"courier\"];\n"
-      buff << "  edge [fontname=\"courier\" penwidth=3];\n"
-      blocks.each do |block|
-        disasm = disasm_block(cs, block, highlight)
-
-        # convert newlines to breaks that graphviz understands
-        disasm.gsub!(/\n/, linebreak)
-
-        # strip leading whitespace
-        disasm.gsub!(/^\s+/, '')
-
-        buff << "b#{block.id} [label=<#{disasm}>];\n"
-        buff << block.outgoing_ids.map { |id|
-          next_block = blocks.bsearch { |nb| id <=> nb.id }
-          if next_block.address == (block.address + block.code.length)
-            "b#{block.id} -> b#{id}[label=\"Fall\"];"
-          else
-            "b#{block.id} -> b#{id}[label=\"Jump\" style=dashed];"
-          end
-        }.join("\n")
-        buff << "\n"
-      end
-      buff << "  }"
-      buff << "}"
-      buff
-    end
+  # Check if YJIT is enabled
+  def self.enabled?
+    Primitive.cexpr! 'RBOOL(rb_yjit_enabled_p())'
+  end
 
-    def self.disasm_block(cs, block, highlight)
-      comments = comments_for(block.address, block.address + block.code.length)
-      comment_idx = 0
-      str = +''
-      cs.disasm(block.code, block.address).each do |i|
-        while (comment = comments[comment_idx]) && comment.address <= i.address
-          str << "  ; #{highlight.call(comment.comment)}\n"
-          comment_idx += 1
-        end
+  def self.stats_enabled?
+    Primitive.rb_yjit_stats_enabled_p
+  end
 
-        str << sprintf(
-          "  %<address>08x:  %<instruction>s\t%<details>s\n",
-          address: i.address,
-          instruction: i.mnemonic,
-          details: i.op_str
-        )
-      end
-      str
-    end
+  # Discard statistics collected for --yjit-stats.
+  def self.reset_stats!
+    Primitive.rb_yjit_reset_stats_bang
   end
 
   # Return a hash for statistics generated for the --yjit-stats command line option.
   # Return nil when option is not passed or unavailable.
   def self.runtime_stats
-    # defined in yjit_iface.c
-    Primitive.get_yjit_stats
+    Primitive.rb_yjit_get_stats
   end
 
-  # Discard statistics collected for --yjit-stats.
-  def self.reset_stats!
-    # defined in yjit_iface.c
-    Primitive.reset_stats_bang
+  # Produce disassembly for an iseq
+  def self.disasm(iseq)
+    # If a method or proc is passed in, get its iseq
+    iseq = RubyVM::InstructionSequence.of(iseq)
+
+    if self.enabled?
+      # Produce the disassembly string
+      # Include the YARV iseq disasm in the string for additional context
+      iseq.disasm + "\n" + Primitive.rb_yjit_disasm_iseq(iseq)
+    else
+      iseq.disasm
+    end
   end
 
-  def self.stats_enabled?
-    Primitive.yjit_stats_enabled_p
-  end
+  # Produce a list of instructions compiled by YJIT for an iseq
+  def self.insns_compiled(iseq)
+    # If a method or proc is passed in, get its iseq
+    iseq = RubyVM::InstructionSequence.of(iseq)
 
-  def self.enabled?
-    Primitive.cexpr! 'RBOOL(rb_yjit_enabled_p())'
+    if self.enabled?
+      Primitive.rb_yjit_insns_compiled(iseq)
+    else
+      Qnil
+    end
   end
 
   def self.simulate_oom!
-    Primitive.simulate_oom_bang
+    Primitive.rb_yjit_simulate_oom_bang
   end
 
   # Avoid calling a method here to not interfere with compilation tests
-  if Primitive.yjit_stats_enabled_p
+  if Primitive.rb_yjit_stats_enabled_p
     at_exit { _print_stats }
   end
 
diff --git a/yjit/.gitignore b/yjit/.gitignore
new file mode 100644
index 0000000000..e208d6dc85
--- /dev/null
+++ b/yjit/.gitignore
@@ -0,0 +1,2 @@
+# Build output
+target/
diff --git a/yjit/Cargo.lock b/yjit/Cargo.lock
new file mode 100644
index 0000000000..e9a59cb771
--- /dev/null
+++ b/yjit/Cargo.lock
@@ -0,0 +1,42 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "capstone"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66b5d1f14c3539b6ff22fcb602fea5f1c4416148c8b7965a2e74860aa169b7b5"
+dependencies = [
+ "capstone-sys",
+ "libc",
+]
+
+[[package]]
+name = "capstone-sys"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df653a22d0ad34b0d91cc92a6289d96e44aac1c9a96250a094c9aeec4a91084f"
+dependencies = [
+ "cc",
+ "libc",
+]
+
+[[package]]
+name = "cc"
+version = "1.0.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+
+[[package]]
+name = "libc"
+version = "0.2.124"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
+
+[[package]]
+name = "yjit"
+version = "0.1.0"
+dependencies = [
+ "capstone",
+]
diff --git a/yjit/Cargo.toml b/yjit/Cargo.toml
new file mode 100644
index 0000000000..8969f897ad
--- /dev/null
+++ b/yjit/Cargo.toml
@@ -0,0 +1,39 @@
+# NOTE: please avoid adding dependencies to external crates as these can
+# make building and packaging YJIT more challenging.
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[package]
+name = "yjit"
+version = "0.1.0"       # YJIT version
+edition = "2021"        # Rust 2021 edition to compile with
+rust-version = "1.60.0" # Minimally supported rust version
+publish = false         # Don't publish to crates.io
+
+[lib]
+crate-type = ["staticlib"]
+
+[dependencies]
+# No required dependencies to simplify build process. TODO: Link to yet to be
+# written rationale. Optional For development and testing purposes
+capstone = { version = "0.10.0", optional = true }
+
+[features]
+# NOTE: Development builds select a set of these via configure.ac
+# For debugging, `make V=1` shows exact cargo invocation.
+disasm = ["capstone"]
+stats = []
+asm_comments = []
+
+[profile.dev]
+opt-level = 0
+debug = true
+debug-assertions = true
+overflow-checks = true
+
+[profile.release]
+# NOTE: --enable-yjit builds use `rustc` without going through Cargo. You
+# might want to update the `rustc` invocation if you change this profile.
+opt-level = 3
+# The extra robustness that comes from checking for arithmetic overflow is
+# worth the performance cost for the compiler.
+overflow-checks = true
diff --git a/yjit/bindgen/Cargo.lock b/yjit/bindgen/Cargo.lock
new file mode 100644
index 0000000000..cf23cbf783
--- /dev/null
+++ b/yjit/bindgen/Cargo.lock
@@ -0,0 +1,345 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.59.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "clap",
+ "env_logger",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "which",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clang-sys"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cc00842eed744b858222c4c9faf7243aafc6d33f92f96935263ef4d8a41ce21"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "env_logger"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3"
+dependencies = [
+ "atty",
+ "humantime",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.124"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
+
+[[package]]
+name = "libloading"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "efbc0f03f9a775e9f6aed295c6a1ba2253c5757a9e03d55c6caa46a681abcddd"
+dependencies = [
+ "cfg-if",
+ "winapi",
+]
+
+[[package]]
+name = "log"
+version = "0.4.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "memchr"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "shlex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
+
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "termcolor"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "unicode-width"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "which"
+version = "4.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c4fb54e6113b6a8772ee41c3404fb0301ac79604489467e0a9ce1f3e97c24ae"
+dependencies = [
+ "either",
+ "lazy_static",
+ "libc",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "yjit-bindgen"
+version = "0.1.0"
+dependencies = [
+ "bindgen",
+]
diff --git a/yjit/bindgen/Cargo.toml b/yjit/bindgen/Cargo.toml
new file mode 100644
index 0000000000..18a8fbc463
--- /dev/null
+++ b/yjit/bindgen/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "yjit-bindgen"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+bindgen = "0.59.2"
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
new file mode 100644
index 0000000000..c7766a2992
--- /dev/null
+++ b/yjit/bindgen/src/main.rs
@@ -0,0 +1,286 @@
+//! See https://docs.rs/bindgen/0.59.2/bindgen/struct.Builder.html
+//! This is the binding generation tool that the YJIT cruby module talks about.
+//! More docs later once we have more experience with this, for now, check
+//! the output to make sure it looks reasonable and allowlist things you want
+//! to use in Rust.
+extern crate bindgen;
+
+use std::env;
+use std::path::PathBuf;
+
+const SRC_ROOT_ENV: &str = "YJIT_SRC_ROOT_PATH";
+
+fn main() {
+    // Path to repo is a required input for supporting running `configure`
+    // in a directory away from the code.
+    let src_root = env::var(SRC_ROOT_ENV).expect(
+        format!(
+            r#"The "{}" env var must be a path to the root of the Ruby repo"#,
+            SRC_ROOT_ENV
+        )
+        .as_ref(),
+    );
+    let src_root = PathBuf::from(src_root);
+
+    assert!(
+        src_root.is_dir(),
+        "{} must be set to a path to a directory",
+        SRC_ROOT_ENV
+    );
+
+    // Remove this flag so rust-bindgen generates bindings
+    // that are internal functions not public in libruby
+    let filtered_clang_args = env::args().filter(|arg| arg != "-fvisibility=hidden");
+
+    let bindings = bindgen::builder()
+        .clang_args(filtered_clang_args)
+        .header("internal.h")
+        .header("internal/re.h")
+        .header("include/ruby/ruby.h")
+        .header("vm_core.h")
+        .header("vm_callinfo.h")
+
+        // Our C file for glue code
+        .header(src_root.join("yjit.c").to_str().unwrap())
+
+        // Don't want to copy over C comment
+        .generate_comments(false)
+
+        // Don't want layout tests as they are platform dependent
+        .layout_tests(false)
+
+        // Block for stability since output is different on Darwin and Linux
+        .blocklist_type("size_t")
+        .blocklist_type("fpos_t")
+
+        // Prune these types since they are system dependant and we don't use them
+        .blocklist_type("__.*")
+
+        // From include/ruby/internal/intern/string.h
+        .allowlist_function("rb_utf8_str_new")
+
+        // This struct is public to Ruby C extensions
+        // From include/ruby/internal/core/rbasic.h
+        .allowlist_type("RBasic")
+
+        // From internal.h
+        // This function prints info about a value and is useful for debugging
+        .allowlist_function("rb_obj_info_dump")
+
+        // From ruby/internal/intern/object.h
+        .allowlist_function("rb_obj_is_kind_of")
+
+        // From include/hash.h
+        .allowlist_function("rb_hash_new")
+
+        // From internal/hash.h
+        .allowlist_function("rb_hash_new_with_size")
+        .allowlist_function("rb_hash_resurrect")
+
+        // From include/ruby/internal/intern/hash.h
+        .allowlist_function("rb_hash_aset")
+        .allowlist_function("rb_hash_aref")
+        .allowlist_function("rb_hash_bulk_insert")
+
+        // From include/ruby/internal/intern/array.h
+        .allowlist_function("rb_ary_new_capa")
+        .allowlist_function("rb_ary_store")
+        .allowlist_function("rb_ary_resurrect")
+        .allowlist_function("rb_ary_clear")
+
+        // From internal/array.h
+        .allowlist_function("rb_ec_ary_new_from_values")
+        .allowlist_function("rb_ary_tmp_new_from_values")
+
+        // From include/ruby/internal/intern/class.h
+        .allowlist_function("rb_singleton_class")
+
+        // From include/ruby/internal/core/rclass.h
+        .allowlist_function("rb_class_get_superclass")
+
+        // From include/ruby/internal/intern/gc.h
+        .allowlist_function("rb_gc_mark")
+        .allowlist_function("rb_gc_mark_movable")
+        .allowlist_function("rb_gc_location")
+
+        // VALUE variables for Ruby class objects
+        // From include/ruby/internal/globals.h
+        .allowlist_var("rb_cBasicObject")
+        .allowlist_var("rb_cModule")
+        .allowlist_var("rb_cNilClass")
+        .allowlist_var("rb_cTrueClass")
+        .allowlist_var("rb_cFalseClass")
+        .allowlist_var("rb_cInteger")
+        .allowlist_var("rb_cSymbol")
+        .allowlist_var("rb_cFloat")
+        .allowlist_var("rb_cString")
+        .allowlist_var("rb_cThread")
+        .allowlist_var("rb_cArray")
+        .allowlist_var("rb_cHash")
+
+        // From ruby/internal/globals.h
+        .allowlist_var("rb_mKernel")
+
+        // From vm_callinfo.h
+        .allowlist_type("VM_CALL.*")         // This doesn't work, possibly due to the odd structure of the #defines
+        .allowlist_type("vm_call_flag_bits") // So instead we include the other enum and do the bit-shift ourselves.
+        .allowlist_type("rb_call_data")
+        .blocklist_type("rb_callcache.*")      // Not used yet - opaque to make it easy to import rb_call_data
+        .opaque_type("rb_callcache.*")
+        .blocklist_type("rb_callinfo_kwarg") // Contains a VALUE[] array of undefined size, so we don't import
+        .opaque_type("rb_callinfo_kwarg")
+        .allowlist_type("rb_callinfo")
+
+        // From vm_insnhelper.h
+        .allowlist_var("VM_ENV_DATA_INDEX_ME_CREF")
+        .allowlist_var("rb_block_param_proxy")
+
+        // From include/ruby/internal/intern/range.h
+        .allowlist_function("rb_range_new")
+
+        // From include/ruby/internal/symbol.h
+        .allowlist_function("rb_intern")
+        .allowlist_function("rb_id2sym")
+        .allowlist_function("rb_sym2id")
+        .allowlist_function("rb_str_intern")
+
+        // From internal/string.h
+        .allowlist_function("rb_ec_str_resurrect")
+        .allowlist_function("rb_str_concat_literals")
+        .allowlist_function("rb_obj_as_string_result")
+
+        // From include/ruby/internal/intern/parse.h
+        .allowlist_function("rb_backref_get")
+
+        // From include/ruby/internal/intern/re.h
+        .allowlist_function("rb_reg_last_match")
+        .allowlist_function("rb_reg_match_pre")
+        .allowlist_function("rb_reg_match_post")
+        .allowlist_function("rb_reg_match_last")
+        .allowlist_function("rb_reg_nth_match")
+
+        // From internal/re.h
+        .allowlist_function("rb_reg_new_ary")
+
+        // `ruby_value_type` is a C enum and this stops it from
+        // prefixing all the members with the name of the type
+        .prepend_enum_name(false)
+        .translate_enum_integer_types(true) // so we get fixed width Rust types for members
+        // From include/ruby/internal/value_type.h
+        .allowlist_type("ruby_value_type") // really old C extension API
+
+        // Autogenerated into id.h
+        .allowlist_type("ruby_method_ids")
+
+        // From method.h
+        .allowlist_type("rb_method_visibility_t")
+        .allowlist_type("rb_method_type_t")
+        .allowlist_type("method_optimized_type")
+        .allowlist_type("rb_callable_method_entry_t")
+        .allowlist_type("rb_callable_method_entry_struct")
+        .allowlist_function("rb_method_entry_at")
+        .allowlist_type("rb_method_entry_t")
+        .blocklist_type("rb_method_cfunc_t")
+        .blocklist_type("rb_method_definition_.*") // Large struct with a bitfield and union of many types - don't import (yet?)
+        .opaque_type("rb_method_definition_.*")
+
+        // From vm_core.h
+        .allowlist_var("rb_mRubyVMFrozenCore")
+        .allowlist_var("VM_BLOCK_HANDLER_NONE")
+        .allowlist_type("vm_frame_env_flags")
+        .allowlist_type("rb_seq_param_keyword_struct")
+        .allowlist_type("ruby_basic_operators")
+        .allowlist_var(".*_REDEFINED_OP_FLAG")
+        .allowlist_type("rb_num_t")
+        .allowlist_function("rb_callable_method_entry")
+        .allowlist_function("rb_vm_frame_method_entry")
+        .allowlist_type("IVC") // pointer to iseq_inline_iv_cache_entry
+        .allowlist_type("IC")  // pointer to iseq_inline_constant_cache
+        .allowlist_type("iseq_inline_constant_cache_entry")
+        .blocklist_type("rb_cref_t")         // don't need this directly, opaqued to allow IC import
+        .opaque_type("rb_cref_t")
+        .allowlist_type("iseq_inline_iv_cache_entry")
+        .allowlist_type("ICVARC") // pointer to iseq_inline_cvar_cache_entry
+        .allowlist_type("iseq_inline_cvar_cache_entry")
+        .blocklist_type("rb_execution_context_.*") // Large struct with various-type fields and an ifdef, so we don't import
+        .opaque_type("rb_execution_context_.*")
+        .blocklist_type("rb_control_frame_struct")
+        .opaque_type("rb_control_frame_struct")
+
+        // From yjit.c
+        .allowlist_function("rb_iseq_(get|set)_yjit_payload")
+        .allowlist_function("rb_iseq_pc_at_idx")
+        .allowlist_function("rb_iseq_opcode_at_pc")
+        .allowlist_function("rb_yjit_mark_writable")
+        .allowlist_function("rb_yjit_mark_executable")
+        .allowlist_function("rb_yjit_get_page_size")
+        .allowlist_function("rb_leaf_invokebuiltin_iseq_p")
+        .allowlist_function("rb_leaf_builtin_function")
+        .allowlist_function("rb_set_cfp_(pc|sp)")
+        .allowlist_function("rb_cfp_get_iseq")
+        .allowlist_function("rb_yjit_multi_ractor_p")
+        .allowlist_function("rb_c_method_tracing_currently_enabled")
+        .allowlist_function("rb_full_cfunc_return")
+        .allowlist_function("rb_yjit_vm_lock_then_barrier")
+        .allowlist_function("rb_yjit_vm_unlock")
+        .allowlist_function("rb_assert_(iseq|cme)_handle")
+        .allowlist_function("rb_IMEMO_TYPE_P")
+        .allowlist_function("rb_iseq_reset_jit_func")
+        .allowlist_function("rb_yjit_dump_iseq_loc")
+        .allowlist_function("rb_yjit_for_each_iseq")
+        .allowlist_function("rb_yjit_obj_written")
+
+        // from vm_sync.h
+        .allowlist_function("rb_vm_barrier")
+
+        // Not sure why it's picking these up, but don't.
+        .blocklist_type("FILE")
+        .blocklist_type("_IO_.*")
+
+        // From internal/compile.h
+        .allowlist_function("rb_vm_insn_decode")
+
+        // From iseq.h
+        .allowlist_function("rb_vm_insn_addr2opcode")
+        .allowlist_function("rb_iseqw_to_iseq")
+        .allowlist_function("rb_iseq_each")
+
+        // From builtin.h
+        .allowlist_type("rb_builtin_function.*")
+
+        // From internal/variable.h
+        .allowlist_function("rb_gvar_(get|set)")
+        .allowlist_function("rb_obj_ensure_iv_index_mapping")
+
+        // From include/ruby/internal/intern/variable.h
+        .allowlist_function("rb_attr_get")
+        .allowlist_function("rb_ivar_get")
+
+        // From include/ruby/internal/intern/vm.h
+        .allowlist_function("rb_get_alloc_func")
+
+        // From gc.h and internal/gc.h
+        .allowlist_function("rb_class_allocate_instance")
+        .allowlist_function("rb_obj_info")
+
+        // We define VALUE manually, don't import it
+        .blocklist_type("VALUE")
+
+        // From iseq.h
+        .opaque_type("rb_iseq_t")
+        .blocklist_type("rb_iseq_t")
+
+        // Finish the builder and generate the bindings.
+        .generate()
+        // Unwrap the Result and panic on failure.
+        .expect("Unable to generate bindings");
+
+    let mut out_path: PathBuf = src_root;
+    out_path.push("yjit");
+    out_path.push("src");
+    out_path.push("cruby_bindings.inc.rs");
+
+    bindings
+        .write_to_file(out_path)
+        .expect("Couldn't write bindings!");
+}
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs
new file mode 100644
index 0000000000..0d61cd654a
--- /dev/null
+++ b/yjit/src/asm/mod.rs
@@ -0,0 +1,392 @@
+use std::collections::BTreeMap;
+use std::mem;
+
+// Lots of manual vertical alignment in there that rustfmt doesn't handle well.
+#[rustfmt::skip]
+pub mod x86_64;
+
+/// Pointer to a piece of machine code
+/// We may later change this to wrap an u32
+/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead.
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)]
+#[repr(C)]
+pub struct CodePtr(*const u8);
+
+impl CodePtr {
+    pub fn raw_ptr(&self) -> *const u8 {
+        let CodePtr(ptr) = *self;
+        return ptr;
+    }
+
+    fn into_i64(&self) -> i64 {
+        let CodePtr(ptr) = self;
+        *ptr as i64
+    }
+
+    fn into_usize(&self) -> usize {
+        let CodePtr(ptr) = self;
+        *ptr as usize
+    }
+}
+
+impl From<*mut u8> for CodePtr {
+    fn from(value: *mut u8) -> Self {
+        assert!(value as usize != 0);
+        return CodePtr(value);
+    }
+}
+
+/// Compute an offset in bytes of a given struct field
+macro_rules! offset_of {
+    ($struct_type:ty, $field_name:tt) => {{
+        // Null pointer to our struct type
+        let foo = (0 as *const $struct_type);
+
+        unsafe {
+            let ptr_field = (&(*foo).$field_name as *const _ as usize);
+            let ptr_base = (foo as usize);
+            ptr_field - ptr_base
+        }
+    }};
+}
+pub(crate) use offset_of;
+
+//
+// TODO: need a field_size_of macro, to compute the size of a struct field in bytes
+//
+
+// 1 is not aligned so this won't match any pages
+const ALIGNED_WRITE_POSITION_NONE: usize = 1;
+
+/// Reference to an ASM label
+struct LabelRef {
+    // Position in the code block where the label reference exists
+    pos: usize,
+
+    // Label which this refers to
+    label_idx: usize,
+}
+
+/// Block of memory into which instructions can be assembled
+pub struct CodeBlock {
+    // Block of non-executable memory used for dummy code blocks
+    // This memory is owned by this block and lives as long as the block
+    dummy_block: Vec<u8>,
+
+    // Pointer to memory we are writing into
+    mem_block: *mut u8,
+
+    // Memory block size
+    mem_size: usize,
+
+    // Current writing position
+    write_pos: usize,
+
+    // Table of registered label addresses
+    label_addrs: Vec<usize>,
+
+    // Table of registered label names
+    label_names: Vec<String>,
+
+    // References to labels
+    label_refs: Vec<LabelRef>,
+
+    // Comments for assembly instructions, if that feature is enabled
+    asm_comments: BTreeMap<usize, Vec<String>>,
+
+    // Keep track of the current aligned write position.
+    // Used for changing protection when writing to the JIT buffer
+    current_aligned_write_pos: usize,
+
+    // Memory protection works at page granularity and this is the
+    // the size of each page. Used to implement W^X.
+    page_size: usize,
+
+    // Set if the CodeBlock is unable to output some instructions,
+    // for example, when there is not enough space or when a jump
+    // target is too far away.
+    dropped_bytes: bool,
+}
+
+impl CodeBlock {
+    pub fn new_dummy(mem_size: usize) -> Self {
+        // Allocate some non-executable memory
+        let mut dummy_block = vec![0; mem_size];
+        let mem_ptr = dummy_block.as_mut_ptr();
+
+        Self {
+            dummy_block: dummy_block,
+            mem_block: mem_ptr,
+            mem_size: mem_size,
+            write_pos: 0,
+            label_addrs: Vec::new(),
+            label_names: Vec::new(),
+            label_refs: Vec::new(),
+            asm_comments: BTreeMap::new(),
+            current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
+            page_size: 4096,
+            dropped_bytes: false,
+        }
+    }
+
+    pub fn new(mem_block: *mut u8, mem_size: usize, page_size: usize) -> Self {
+        Self {
+            dummy_block: vec![0; 0],
+            mem_block: mem_block,
+            mem_size: mem_size,
+            write_pos: 0,
+            label_addrs: Vec::new(),
+            label_names: Vec::new(),
+            label_refs: Vec::new(),
+            asm_comments: BTreeMap::new(),
+            current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE,
+            page_size,
+            dropped_bytes: false,
+        }
+    }
+
+    // Check if this code block has sufficient remaining capacity
+    pub fn has_capacity(&self, num_bytes: usize) -> bool {
+        self.write_pos + num_bytes < self.mem_size
+    }
+
+    /// Add an assembly comment if the feature is on.
+    /// If not, this becomes an inline no-op.
+    #[inline]
+    pub fn add_comment(&mut self, comment: &str) {
+        if cfg!(feature = "asm_comments") {
+            let cur_ptr = self.get_write_ptr().into_usize();
+            let this_line_comments = self.asm_comments.get(&cur_ptr);
+
+            // If there's no current list of comments for this line number, add one.
+            if this_line_comments.is_none() {
+                let new_comments = Vec::new();
+                self.asm_comments.insert(cur_ptr, new_comments);
+            }
+            let this_line_comments = self.asm_comments.get_mut(&cur_ptr).unwrap();
+
+            // Unless this comment is the same as the last one at this same line, add it.
+            let string_comment = String::from(comment);
+            if this_line_comments.last() != Some(&string_comment) {
+                this_line_comments.push(string_comment);
+            }
+        }
+    }
+
+    pub fn comments_at(&self, pos: usize) -> Option<&Vec<String>> {
+        self.asm_comments.get(&pos)
+    }
+
+    pub fn get_mem_size(&self) -> usize {
+        self.mem_size
+    }
+
+    pub fn get_write_pos(&self) -> usize {
+        self.write_pos
+    }
+
+    // Set the current write position
+    pub fn set_pos(&mut self, pos: usize) {
+        // Assert here since while CodeBlock functions do bounds checking, there is
+        // nothing stopping users from taking out an out-of-bounds pointer and
+        // doing bad accesses with it.
+        assert!(pos < self.mem_size);
+        self.write_pos = pos;
+    }
+
+    // Align the current write pointer to a multiple of bytes
+    pub fn align_pos(&mut self, multiple: u32) {
+        // Compute the alignment boundary that is lower or equal
+        // Do everything with usize
+        let multiple: usize = multiple.try_into().unwrap();
+        let pos = self.get_write_ptr().raw_ptr() as usize;
+        let remainder = pos % multiple;
+        let prev_aligned = pos - remainder;
+
+        if prev_aligned == pos {
+            // Already aligned so do nothing
+        } else {
+            // Align by advancing
+            let pad = multiple - remainder;
+            self.set_pos(self.get_write_pos() + pad);
+        }
+    }
+
+    // Set the current write position from a pointer
+    pub fn set_write_ptr(&mut self, code_ptr: CodePtr) {
+        let pos = (code_ptr.raw_ptr() as usize) - (self.mem_block as usize);
+        self.set_pos(pos);
+    }
+
+    // Get a direct pointer into the executable memory block
+    pub fn get_ptr(&self, offset: usize) -> CodePtr {
+        unsafe {
+            let ptr = self.mem_block.offset(offset as isize);
+            CodePtr(ptr)
+        }
+    }
+
+    // Get a direct pointer to the current write position
+    pub fn get_write_ptr(&mut self) -> CodePtr {
+        self.get_ptr(self.write_pos)
+    }
+
+    // Write a single byte at the current position
+    pub fn write_byte(&mut self, byte: u8) {
+        if self.write_pos < self.mem_size {
+            self.mark_position_writable(self.write_pos);
+            unsafe { self.mem_block.add(self.write_pos).write(byte) };
+            self.write_pos += 1;
+        } else {
+            self.dropped_bytes = true;
+        }
+    }
+
+    // Read a single byte at the given position
+    pub fn read_byte(&self, pos: usize) -> u8 {
+        assert!(pos < self.mem_size);
+        unsafe { self.mem_block.add(pos).read() }
+    }
+
+    // Write multiple bytes starting from the current position
+    pub fn write_bytes(&mut self, bytes: &[u8]) {
+        for byte in bytes {
+            self.write_byte(*byte);
+        }
+    }
+
+    // Write a signed integer over a given number of bits at the current position
+    pub fn write_int(&mut self, val: u64, num_bits: u32) {
+        assert!(num_bits > 0);
+        assert!(num_bits % 8 == 0);
+
+        // Switch on the number of bits
+        match num_bits {
+            8 => self.write_byte(val as u8),
+            16 => self.write_bytes(&[(val & 0xff) as u8, ((val >> 8) & 0xff) as u8]),
+            32 => self.write_bytes(&[
+                (val & 0xff) as u8,
+                ((val >> 8) & 0xff) as u8,
+                ((val >> 16) & 0xff) as u8,
+                ((val >> 24) & 0xff) as u8,
+            ]),
+            _ => {
+                let mut cur = val;
+
+                // Write out the bytes
+                for _byte in 0..(num_bits / 8) {
+                    self.write_byte((cur & 0xff) as u8);
+                    cur >>= 8;
+                }
+            }
+        }
+    }
+
+    /// Check if bytes have been dropped (unwritten because of insufficient space)
+    pub fn has_dropped_bytes(&self) -> bool {
+        self.dropped_bytes
+    }
+
+    /// Allocate a new label with a given name
+    pub fn new_label(&mut self, name: String) -> usize {
+        // This label doesn't have an address yet
+        self.label_addrs.push(0);
+        self.label_names.push(name);
+
+        return self.label_addrs.len() - 1;
+    }
+
+    /// Write a label at the current address
+    pub fn write_label(&mut self, label_idx: usize) {
+        // TODO: make sure that label_idx is valid
+        // TODO: add an asseer here
+
+        self.label_addrs[label_idx] = self.write_pos;
+    }
+
+    // Add a label reference at the current write position
+    pub fn label_ref(&mut self, label_idx: usize) {
+        // TODO: make sure that label_idx is valid
+        // TODO: add an asseer here
+
+        // Keep track of the reference
+        self.label_refs.push(LabelRef {
+            pos: self.write_pos,
+            label_idx,
+        });
+    }
+
+    // Link internal label references
+    pub fn link_labels(&mut self) {
+        let orig_pos = self.write_pos;
+
+        // For each label reference
+        for label_ref in mem::take(&mut self.label_refs) {
+            let ref_pos = label_ref.pos;
+            let label_idx = label_ref.label_idx;
+            assert!(ref_pos < self.mem_size);
+
+            let label_addr = self.label_addrs[label_idx];
+            assert!(label_addr < self.mem_size);
+
+            // Compute the offset from the reference's end to the label
+            let offset = (label_addr as i64) - ((ref_pos + 4) as i64);
+
+            self.set_pos(ref_pos);
+            self.write_int(offset as u64, 32);
+        }
+
+        self.write_pos = orig_pos;
+
+        // Clear the label positions and references
+        self.label_addrs.clear();
+        self.label_names.clear();
+        assert!(self.label_refs.is_empty());
+    }
+
+    pub fn mark_position_writable(&mut self, write_pos: usize) {
+        let page_size = self.page_size;
+        let aligned_position = (write_pos / page_size) * page_size;
+
+        if self.current_aligned_write_pos != aligned_position {
+            self.current_aligned_write_pos = aligned_position;
+
+            #[cfg(not(test))]
+            unsafe {
+                use core::ffi::c_void;
+                let page_ptr = self.get_ptr(aligned_position).raw_ptr() as *mut c_void;
+                crate::cruby::rb_yjit_mark_writable(page_ptr, page_size.try_into().unwrap());
+            }
+        }
+    }
+
+    pub fn mark_all_executable(&mut self) {
+        self.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
+
+        #[cfg(not(test))]
+        unsafe {
+            use core::ffi::c_void;
+            // NOTE(alan): Right now we do allocate one big chunck and give the top half to the outlined codeblock
+            // The start of the top half of the region isn't necessarily a page boundary...
+            let cb_start = self.get_ptr(0).raw_ptr() as *mut c_void;
+            crate::cruby::rb_yjit_mark_executable(cb_start, self.mem_size.try_into().unwrap());
+        }
+    }
+}
+
+/// Wrapper struct so we can use the type system to distinguish
+/// Between the inlined and outlined code blocks
+pub struct OutlinedCb {
+    // This must remain private
+    cb: CodeBlock,
+}
+
+impl OutlinedCb {
+    pub fn wrap(cb: CodeBlock) -> Self {
+        OutlinedCb { cb: cb }
+    }
+
+    pub fn unwrap(&mut self) -> &mut CodeBlock {
+        &mut self.cb
+    }
+}
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs
new file mode 100644
index 0000000000..c748ec1154
--- /dev/null
+++ b/yjit/src/asm/x86_64/mod.rs
@@ -0,0 +1,1395 @@
+use std::io::{Result, Write};
+use std::mem;
+use crate::asm::*;
+
+// Import the assembler tests module
+mod tests;
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86Imm
+{
+    // Size in bits
+    num_bits: u8,
+
+    // The value of the immediate
+    value: i64
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86UImm
+{
+    // Size in bits
+    num_bits: u8,
+
+    // The value of the immediate
+    value: u64
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum RegType
+{
+    GP,
+    //FP,
+    //XMM,
+    IP,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86Reg
+{
+    // Size in bits
+    num_bits: u8,
+
+    // Register type
+    reg_type: RegType,
+
+    // Register index number
+    reg_no: u8,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct X86Mem
+{
+    // Size in bits
+    num_bits: u8,
+
+    /// Base register number
+    base_reg_no: u8,
+
+    /// Index register number
+    idx_reg_no: Option<u8>,
+
+    /// SIB scale exponent value (power of two, two bits)
+    scale_exp: u8,
+
+    /// Constant displacement from the base, not scaled
+    disp: i32,
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum X86Opnd
+{
+    // Dummy operand
+    None,
+
+    // Immediate value
+    Imm(X86Imm),
+
+    // Unsigned immediate
+    UImm(X86UImm),
+
+    // General-purpose register
+    Reg(X86Reg),
+
+    // Memory location
+    Mem(X86Mem),
+
+    // IP-relative memory location
+    IPRel(i32)
+}
+
+impl X86Opnd {
+    fn rex_needed(&self) -> bool {
+        match self {
+            X86Opnd::None => false,
+            X86Opnd::Imm(_) => false,
+            X86Opnd::UImm(_) => false,
+            X86Opnd::Reg(reg) => reg.reg_no > 7 || reg.num_bits == 8 && reg.reg_no >= 4,
+            X86Opnd::Mem(mem) => (mem.base_reg_no > 7 || (mem.idx_reg_no.unwrap_or(0) > 7)),
+            X86Opnd::IPRel(_) => false
+        }
+    }
+
+    // Check if an SIB byte is needed to encode this operand
+    fn sib_needed(&self) -> bool {
+        match self {
+            X86Opnd::Mem(mem) => {
+                mem.idx_reg_no.is_some() ||
+                mem.base_reg_no == RSP_REG_NO ||
+                mem.base_reg_no == R12_REG_NO
+            },
+            _ => false
+        }
+    }
+
+    fn disp_size(&self) -> u32 {
+        match self {
+            X86Opnd::IPRel(_) => 32,
+            X86Opnd::Mem(mem) => {
+                if mem.disp != 0 {
+                    // Compute the required displacement size
+                    let num_bits = sig_imm_size(mem.disp.into());
+                    if num_bits > 32 {
+                        panic!("displacement does not fit in 32 bits");
+                    }
+
+                    // x86 can only encode 8-bit and 32-bit displacements
+                    if num_bits == 16 { 32 } else { 8 }
+                } else if mem.base_reg_no == RBP_REG_NO || mem.base_reg_no == R13_REG_NO {
+                    // If EBP or RBP or R13 is used as the base, displacement must be encoded
+                    8
+                } else {
+                    0
+                }
+            },
+            _ => 0
+        }
+    }
+
+    pub fn num_bits(&self) -> u8 {
+        match self {
+            X86Opnd::Reg(reg) => reg.num_bits,
+            X86Opnd::Imm(imm) => imm.num_bits,
+            X86Opnd::UImm(uimm) => uimm.num_bits,
+            X86Opnd::Mem(mem) => mem.num_bits,
+            _ => unreachable!()
+        }
+    }
+}
+
+// Instruction pointer
+pub const RIP: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::IP, reg_no: 5 });
+
+// 64-bit GP registers
+const RAX_REG_NO: u8 = 0;
+const RSP_REG_NO: u8 = 4;
+const RBP_REG_NO: u8 = 5;
+const R12_REG_NO: u8 = 12;
+const R13_REG_NO: u8 = 13;
+
+pub const RAX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RAX_REG_NO });
+pub const RCX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 1 });
+pub const RDX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 2 });
+pub const RBX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 3 });
+pub const RSP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RSP_REG_NO });
+pub const RBP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: RBP_REG_NO });
+pub const RSI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 6 });
+pub const RDI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 7 });
+pub const R8:  X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 8 });
+pub const R9:  X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 9 });
+pub const R10: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 10 });
+pub const R11: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 11 });
+pub const R12: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R12_REG_NO });
+pub const R13: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: R13_REG_NO });
+pub const R14: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 14 });
+pub const R15: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 64, reg_type: RegType::GP, reg_no: 15 });
+
+// 32-bit GP registers
+pub const EAX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 0 });
+pub const ECX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 1 });
+pub const EDX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 2 });
+pub const EBX: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 3 });
+pub const ESP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 4 });
+pub const EBP: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 5 });
+pub const ESI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 6 });
+pub const EDI: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 7 });
+pub const R8D: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 8 });
+pub const R9D: X86Opnd  = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 9 });
+pub const R10D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 10 });
+pub const R11D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 11 });
+pub const R12D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 12 });
+pub const R13D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 13 });
+pub const R14D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 14 });
+pub const R15D: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 32, reg_type: RegType::GP, reg_no: 15 });
+
+// 16-bit GP registers
+pub const AX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 0 });
+pub const CX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 1 });
+pub const DX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 2 });
+pub const BX:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 3 });
+pub const SP:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 4 });
+pub const BP:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 5 });
+pub const SI:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 6 });
+pub const DI:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 7 });
+pub const R8W:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 8 });
+pub const R9W:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 9 });
+pub const R10W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 10 });
+pub const R11W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 11 });
+pub const R12W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 12 });
+pub const R13W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 13 });
+pub const R14W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 14 });
+pub const R15W: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 16, reg_type: RegType::GP, reg_no: 15 });
+
+// 8-bit GP registers
+pub const AL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 0 });
+pub const CL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 1 });
+pub const DL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 2 });
+pub const BL:   X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 3 });
+pub const SPL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 4 });
+pub const BPL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 5 });
+pub const SIL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 6 });
+pub const DIL:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 7 });
+pub const R8B:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 8 });
+pub const R9B:  X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 9 });
+pub const R10B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 10 });
+pub const R11B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 11 });
+pub const R12B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 12 });
+pub const R13B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 13 });
+pub const R14B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 14 });
+pub const R15B: X86Opnd = X86Opnd::Reg(X86Reg { num_bits: 8, reg_type: RegType::GP, reg_no: 15 });
+
+// C argument registers
+pub const C_ARG_REGS: [X86Opnd; 6] = [RDI, RSI, RDX, RCX, R8, R9];
+
+//===========================================================================
+
+/// Compute the number of bits needed to encode a signed value
+pub fn sig_imm_size(imm: i64) -> u8
+{
+    // Compute the smallest size this immediate fits in
+    if imm >= i8::MIN.into() && imm <= i8::MAX.into() {
+        return 8;
+    }
+    if imm >= i16::MIN.into() && imm <= i16::MAX.into() {
+        return 16;
+    }
+    if imm >= i32::MIN.into() && imm <= i32::MAX.into() {
+        return 32;
+    }
+
+    return 64;
+}
+
+/// Compute the number of bits needed to encode an unsigned value
+pub fn unsig_imm_size(imm: u64) -> u8
+{
+    // Compute the smallest size this immediate fits in
+    if imm <= u8::MAX.into() {
+        return 8;
+    }
+    else if imm <= u16::MAX.into() {
+        return 16;
+    }
+    else if imm <= u32::MAX.into() {
+        return 32;
+    }
+
+    return 64;
+}
+
+/// Shorthand for memory operand with base register and displacement
+pub fn mem_opnd(num_bits: u8, base_reg: X86Opnd, disp: i32) -> X86Opnd
+{
+    let base_reg = match base_reg {
+        X86Opnd::Reg(reg) => reg,
+        _ => unreachable!()
+    };
+
+    if base_reg.reg_type == RegType::IP {
+        X86Opnd::IPRel(disp)
+    } else {
+        X86Opnd::Mem(
+            X86Mem {
+                num_bits: num_bits,
+                base_reg_no: base_reg.reg_no,
+                idx_reg_no: None,
+                scale_exp: 0,
+                disp: disp,
+            }
+        )
+    }
+}
+
+/// Memory operand with SIB (Scale Index Base) indexing
+pub fn mem_opnd_sib(num_bits: u8, base_opnd: X86Opnd, index_opnd: X86Opnd, scale: i32, disp: i32) -> X86Opnd {
+    if let (X86Opnd::Reg(base_reg), X86Opnd::Reg(index_reg)) = (base_opnd, index_opnd) {
+        let scale_exp: u8;
+
+        match scale {
+            8 => { scale_exp = 3; },
+            4 => { scale_exp = 2; },
+            2 => { scale_exp = 1; },
+            1 => { scale_exp = 0; },
+            _ => unreachable!()
+        };
+
+        X86Opnd::Mem(X86Mem {
+            num_bits,
+            base_reg_no: base_reg.reg_no,
+            idx_reg_no: Some(index_reg.reg_no),
+            scale_exp,
+            disp
+        })
+    } else {
+        unreachable!()
+    }
+}
+
+/*
+// Struct member operand
+#define member_opnd(base_reg, struct_type, member_name) mem_opnd( \
+    8 * sizeof(((struct_type*)0)->member_name), \
+    base_reg,                                   \
+    offsetof(struct_type, member_name)          \
+)
+
+// Struct member operand with an array index
+#define member_opnd_idx(base_reg, struct_type, member_name, idx) mem_opnd( \
+    8 * sizeof(((struct_type*)0)->member_name[0]),     \
+    base_reg,                                       \
+    (offsetof(struct_type, member_name) +           \
+     sizeof(((struct_type*)0)->member_name[0]) * idx)  \
+)
+*/
+
+/*
+// TODO: this should be a method, X86Opnd.resize() or X86Opnd.subreg()
+static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits)
+{
+    assert (num_bits % 8 == 0);
+    x86opnd_t sub = opnd;
+    sub.num_bits = num_bits;
+    return sub;
+}
+*/
+
+pub fn imm_opnd(value: i64) -> X86Opnd
+{
+    X86Opnd::Imm(X86Imm { num_bits: sig_imm_size(value), value })
+}
+
+pub fn uimm_opnd(value: u64) -> X86Opnd
+{
+    X86Opnd::UImm(X86UImm { num_bits: unsig_imm_size(value), value })
+}
+
+pub fn const_ptr_opnd(ptr: *const u8) -> X86Opnd
+{
+    uimm_opnd(ptr as u64)
+}
+
+pub fn code_ptr_opnd(code_ptr: CodePtr) -> X86Opnd
+{
+    uimm_opnd(code_ptr.raw_ptr() as u64)
+}
+
+/// Write the REX byte
+fn write_rex(cb: &mut CodeBlock, w_flag: bool, reg_no: u8, idx_reg_no: u8, rm_reg_no: u8) {
+    // 0 1 0 0 w r x b
+    // w - 64-bit operand size flag
+    // r - MODRM.reg extension
+    // x - SIB.index extension
+    // b - MODRM.rm or SIB.base extension
+    let w: u8 = if w_flag { 1 } else { 0 };
+    let r: u8 = if (reg_no & 8) > 0 { 1 } else { 0 };
+    let x: u8 = if (idx_reg_no & 8) > 0 { 1 } else { 0 };
+    let b: u8 = if (rm_reg_no & 8) > 0 { 1 } else { 0 };
+
+    // Encode and write the REX byte
+    cb.write_byte(0x40 + (w << 3) + (r << 2) + (x << 1) + (b));
+}
+
+/// Write an opcode byte with an embedded register operand
+fn write_opcode(cb: &mut CodeBlock, opcode: u8, reg: X86Reg) {
+    let op_byte: u8 = opcode | (reg.reg_no & 7);
+    cb.write_byte(op_byte);
+}
+
+/// Encode an RM instruction
+fn write_rm(cb: &mut CodeBlock, sz_pref: bool, rex_w: bool, r_opnd: X86Opnd, rm_opnd: X86Opnd, op_ext: u8, bytes: &[u8]) {
+    let op_len = bytes.len();
+    assert!(op_len > 0 && op_len <= 3);
+    assert!(matches!(r_opnd, X86Opnd::Reg(_) | X86Opnd::None), "Can only encode an RM instruction with a register or a none");
+
+    // Flag to indicate the REX prefix is needed
+    let need_rex = rex_w || r_opnd.rex_needed() || rm_opnd.rex_needed();
+
+    // Flag to indicate SIB byte is needed
+    let need_sib = r_opnd.sib_needed() || rm_opnd.sib_needed();
+
+    // Add the operand-size prefix, if needed
+    if sz_pref {
+        cb.write_byte(0x66);
+    }
+
+    // Add the REX prefix, if needed
+    if need_rex {
+        // 0 1 0 0 w r x b
+        // w - 64-bit operand size flag
+        // r - MODRM.reg extension
+        // x - SIB.index extension
+        // b - MODRM.rm or SIB.base extension
+
+        let w = if rex_w { 1 } else { 0 };
+        let r = match r_opnd {
+            X86Opnd::None => 0,
+            X86Opnd::Reg(reg) => if (reg.reg_no & 8) > 0 { 1 } else { 0 },
+            _ => unreachable!()
+        };
+
+        let x = match (need_sib, rm_opnd) {
+            (true, X86Opnd::Mem(mem)) => if (mem.idx_reg_no.unwrap_or(0) & 8) > 0 { 1 } else { 0 },
+            _ => 0
+        };
+
+        let b = match rm_opnd {
+            X86Opnd::Reg(reg) => if (reg.reg_no & 8) > 0 { 1 } else { 0 },
+            X86Opnd::Mem(mem) => if (mem.base_reg_no & 8) > 0 { 1 } else { 0 },
+            _ => 0
+        };
+
+        // Encode and write the REX byte
+        let rex_byte: u8 = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b);
+        cb.write_byte(rex_byte);
+    }
+
+    // Write the opcode bytes to the code block
+    for byte in bytes {
+        cb.write_byte(*byte)
+    }
+
+    // MODRM.mod (2 bits)
+    // MODRM.reg (3 bits)
+    // MODRM.rm  (3 bits)
+
+    assert!(
+        !(op_ext != 0xff && !matches!(r_opnd, X86Opnd::None)),
+        "opcode extension and register operand present"
+    );
+
+    // Encode the mod field
+    let rm_mod = match rm_opnd {
+        X86Opnd::Reg(_) => 3,
+        X86Opnd::IPRel(_) => 0,
+        X86Opnd::Mem(_mem) => {
+            match rm_opnd.disp_size() {
+                0 => 0,
+                8 => 1,
+                32 => 2,
+                _ => unreachable!()
+            }
+        },
+        _ => unreachable!()
+    };
+
+    // Encode the reg field
+    let reg: u8;
+    if op_ext != 0xff {
+        reg = op_ext;
+    } else {
+        reg = match r_opnd {
+            X86Opnd::Reg(reg) => reg.reg_no & 7,
+            _ => 0
+        };
+    }
+
+    // Encode the rm field
+    let rm = match rm_opnd {
+        X86Opnd::Reg(reg) => reg.reg_no & 7,
+        X86Opnd::Mem(mem) => if need_sib { 4 } else { mem.base_reg_no & 7 },
+        X86Opnd::IPRel(_) => 0b101,
+        _ => unreachable!()
+    };
+
+    // Encode and write the ModR/M byte
+    let rm_byte: u8 = (rm_mod << 6) + (reg << 3) + (rm);
+    cb.write_byte(rm_byte);
+
+    // Add the SIB byte, if needed
+    if need_sib {
+        // SIB.scale (2 bits)
+        // SIB.index (3 bits)
+        // SIB.base  (3 bits)
+
+        match rm_opnd {
+            X86Opnd::Mem(mem) => {
+                // Encode the scale value
+                let scale = mem.scale_exp;
+
+                // Encode the index value
+                let index = mem.idx_reg_no.map(|no| no & 7).unwrap_or(4);
+
+                // Encode the base register
+                let base = mem.base_reg_no & 7;
+
+                // Encode and write the SIB byte
+                let sib_byte: u8 = (scale << 6) + (index << 3) + (base);
+                cb.write_byte(sib_byte);
+            },
+            _ => panic!("Expected mem operand")
+        }
+    }
+
+    // Add the displacement
+    match rm_opnd {
+        X86Opnd::Mem(mem) => {
+            let disp_size = rm_opnd.disp_size();
+            if disp_size > 0 {
+                cb.write_int(mem.disp as u64, disp_size);
+            }
+        },
+        X86Opnd::IPRel(rel) => {
+            cb.write_int(rel as u64, 32);
+        },
+        _ => ()
+    };
+}
+
+// Encode a mul-like single-operand RM instruction
+fn write_rm_unary(cb: &mut CodeBlock, op_mem_reg_8: u8, op_mem_reg_pref: u8, op_ext: u8, opnd: X86Opnd) {
+    assert!(matches!(opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+    let opnd_size = opnd.num_bits();
+    assert!(opnd_size == 8 || opnd_size == 16 || opnd_size == 32 || opnd_size == 64);
+
+    if opnd_size == 8 {
+        write_rm(cb, false, false, X86Opnd::None, opnd, op_ext, &[op_mem_reg_8]);
+    } else {
+        let sz_pref = opnd_size == 16;
+        let rex_w = opnd_size == 64;
+        write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd, op_ext, &[op_mem_reg_pref]);
+    }
+}
+
+// Encode an add-like RM instruction with multiple possible encodings
+fn write_rm_multi(cb: &mut CodeBlock, op_mem_reg8: u8, op_mem_reg_pref: u8, op_reg_mem8: u8, op_reg_mem_pref: u8, op_mem_imm8: u8, op_mem_imm_sml: u8, op_mem_imm_lrg: u8, op_ext_imm: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
+    assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+    // Check the size of opnd0
+    let opnd_size = opnd0.num_bits();
+    assert!(opnd_size == 8 || opnd_size == 16 || opnd_size == 32 || opnd_size == 64);
+
+    // Check the size of opnd1
+    match opnd1 {
+        X86Opnd::Reg(reg) => assert!(reg.num_bits == opnd_size),
+        X86Opnd::Mem(mem) => assert!(mem.num_bits == opnd_size),
+        X86Opnd::Imm(imm) => assert!(imm.num_bits <= opnd_size),
+        X86Opnd::UImm(uimm) => assert!(uimm.num_bits <= opnd_size),
+        _ => ()
+    };
+
+    let sz_pref = opnd_size == 16;
+    let rex_w = opnd_size == 64;
+
+    match (opnd0, opnd1) {
+        // R/M + Reg
+        (X86Opnd::Mem(_), X86Opnd::Reg(_)) | (X86Opnd::Reg(_), X86Opnd::Reg(_)) => {
+            if opnd_size == 8 {
+                write_rm(cb, false, false, opnd1, opnd0, 0xff, &[op_mem_reg8]);
+            } else {
+                write_rm(cb, sz_pref, rex_w, opnd1, opnd0, 0xff, &[op_mem_reg_pref]);
+            }
+        },
+        // Reg + R/M/IPRel
+        (X86Opnd::Reg(_), X86Opnd::Mem(_) | X86Opnd::IPRel(_)) => {
+            if opnd_size == 8 {
+                write_rm(cb, false, false, opnd0, opnd1, 0xff, &[op_reg_mem8]);
+            } else {
+                write_rm(cb, sz_pref, rex_w, opnd0, opnd1, 0xff, &[op_reg_mem_pref]);
+            }
+        },
+        // R/M + Imm
+        (_, X86Opnd::Imm(imm)) => {
+            if imm.num_bits <= 8 {
+                // 8-bit immediate
+
+                if opnd_size == 8 {
+                    write_rm(cb, false, false, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm8]);
+                } else {
+                    write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_sml]);
+                }
+
+                cb.write_int(imm.value as u64, 8);
+            } else if imm.num_bits <= 32 {
+                // 32-bit immediate
+
+                assert!(imm.num_bits <= opnd_size);
+                write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]);
+                cb.write_int(imm.value as u64, if opnd_size > 32 { 32 } else { opnd_size.into() });
+            } else {
+                panic!("immediate value too large");
+            }
+        },
+        // R/M + UImm
+        (_, X86Opnd::UImm(uimm)) => {
+            let num_bits = sig_imm_size(uimm.value.try_into().unwrap());
+
+            if num_bits <= 8 {
+                // 8-bit immediate
+
+                if opnd_size == 8 {
+                    write_rm(cb, false, false, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm8]);
+                } else {
+                    write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_sml]);
+                }
+
+                cb.write_int(uimm.value, 8);
+            } else if num_bits <= 32 {
+                // 32-bit immediate
+
+                assert!(num_bits <= opnd_size);
+                write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext_imm, &[op_mem_imm_lrg]);
+                cb.write_int(uimm.value, if opnd_size > 32 { 32 } else { opnd_size.into() });
+            } else {
+                panic!("immediate value too large");
+            }
+        },
+        _ => unreachable!()
+    };
+}
+
+// LOCK - lock prefix for atomic shared memory operations
+pub fn write_lock_prefix(cb: &mut CodeBlock) {
+    cb.write_byte(0xf0);
+}
+
+/// add - Integer addition
+pub fn add(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x00, // opMemReg8
+        0x01, // opMemRegPref
+        0x02, // opRegMem8
+        0x03, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x00, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// and - Bitwise AND
+pub fn and(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x20, // opMemReg8
+        0x21, // opMemRegPref
+        0x22, // opRegMem8
+        0x23, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x04, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// call - Call to a pointer with a 32-bit displacement offset
+pub fn call_rel32(cb: &mut CodeBlock, rel32: i32) {
+    // Write the opcode
+    cb.write_byte(0xe8);
+
+    // Write the relative 32-bit jump offset
+    cb.write_bytes(&rel32.to_le_bytes());
+}
+
+/// call - Call a pointer, encode with a 32-bit offset if possible
+pub fn call_ptr(cb: &mut CodeBlock, scratch_opnd: X86Opnd, dst_ptr: *const u8) {
+    if let X86Opnd::Reg(_scratch_reg) = scratch_opnd {
+        // Pointer to the end of this call instruction
+        let end_ptr = cb.get_ptr(cb.write_pos + 5);
+
+        // Compute the jump offset
+        let rel64: i64 = dst_ptr as i64 - end_ptr.into_i64();
+
+        // If the offset fits in 32-bit
+        if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
+            call_rel32(cb, rel64.try_into().unwrap());
+            return;
+        }
+
+        // Move the pointer into the scratch register and call
+        mov(cb, scratch_opnd, const_ptr_opnd(dst_ptr));
+        call(cb, scratch_opnd);
+    } else {
+        unreachable!();
+    }
+}
+
+/// call - Call to label with 32-bit offset
+pub fn call_label(cb: &mut CodeBlock, label_idx: usize) {
+    // Write the opcode
+    cb.write_byte(0xE8);
+
+    // Add a reference to the label
+    cb.label_ref(label_idx);
+
+    // Relative 32-bit offset to be patched
+    cb.write_int(0, 32);
+}
+
+/// call - Indirect call with an R/M operand
+pub fn call(cb: &mut CodeBlock, opnd: X86Opnd) {
+    write_rm(cb, false, false, X86Opnd::None, opnd, 2, &[0xff]);
+}
+
+/// Encode a conditional move instruction
+fn write_cmov(cb: &mut CodeBlock, opcode1: u8, dst: X86Opnd, src: X86Opnd) {
+    if let X86Opnd::Reg(reg) = dst {
+        match src {
+            X86Opnd::Reg(_) => (),
+            X86Opnd::Mem(_) => (),
+            _ => unreachable!()
+        };
+
+        assert!(reg.num_bits >= 16);
+        let sz_pref = reg.num_bits == 16;
+        let rex_w = reg.num_bits == 64;
+
+        write_rm(cb, sz_pref, rex_w, dst, src, 0xff, &[0x0f, opcode1]);
+    } else {
+        unreachable!()
+    }
+}
+
+// cmovcc - Conditional move
+pub fn cmova(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x47, dst, src); }
+pub fn cmovae(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); }
+pub fn cmovb(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); }
+pub fn cmovbe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x46, dst, src); }
+pub fn cmovc(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); }
+pub fn cmove(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x44, dst, src); }
+pub fn cmovg(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4f, dst, src); }
+pub fn cmovge(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4d, dst, src); }
+pub fn cmovl(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4c, dst, src); }
+pub fn cmovle(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4e, dst, src); }
+pub fn cmovna(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x46, dst, src); }
+pub fn cmovnae(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x42, dst, src); }
+pub fn cmovnb(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); }
+pub fn cmovnbe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x47, dst, src); }
+pub fn cmovnc(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x43, dst, src); }
+pub fn cmovne(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x45, dst, src); }
+pub fn cmovng(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4e, dst, src); }
+pub fn cmovnge(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4c, dst, src); }
+pub fn cmovnl(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb,  0x4d, dst, src); }
+pub fn cmovnle(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4f, dst, src); }
+pub fn cmovno(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x41, dst, src); }
+pub fn cmovnp(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4b, dst, src); }
+pub fn cmovns(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x49, dst, src); }
+pub fn cmovnz(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x45, dst, src); }
+pub fn cmovo(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x40, dst, src); }
+pub fn cmovp(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4a, dst, src); }
+pub fn cmovpe(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4a, dst, src); }
+pub fn cmovpo(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x4b, dst, src); }
+pub fn cmovs(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x48, dst, src); }
+pub fn cmovz(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) { write_cmov(cb, 0x44, dst, src); }
+
+/// cmp - Compare and set flags
+pub fn cmp(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x38, // opMemReg8
+        0x39, // opMemRegPref
+        0x3A, // opRegMem8
+        0x3B, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x07, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// cdq - Convert doubleword to quadword
+pub fn cdq(cb: &mut CodeBlock) {
+    cb.write_byte(0x99);
+}
+
+/// cqo - Convert quadword to octaword
+pub fn cqo(cb: &mut CodeBlock) {
+    cb.write_bytes(&[0x48, 0x99]);
+}
+
+/// Interrupt 3 - trap to debugger
+pub fn int3(cb: &mut CodeBlock) {
+    cb.write_byte(0xcc);
+}
+
+// Encode a relative jump to a label (direct or conditional)
+// Note: this always encodes a 32-bit offset
+fn write_jcc(cb: &mut CodeBlock, op0: u8, op1: u8, label_idx: usize) {
+    // Write the opcode
+    if op0 != 0xff {
+        cb.write_byte(op0);
+    }
+
+    cb.write_byte(op1);
+
+    // Add a reference to the label
+    cb.label_ref(label_idx);
+
+    // Relative 32-bit offset to be patched
+    cb.write_int( 0, 32);
+}
+
+/// jcc - relative jumps to a label
+pub fn ja_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
+pub fn jae_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
+pub fn jb_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
+pub fn jbe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
+pub fn jc_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
+pub fn je_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
+pub fn jg_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
+pub fn jge_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
+pub fn jl_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
+pub fn jle_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
+pub fn jna_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x86, label_idx); }
+pub fn jnae_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x82, label_idx); }
+pub fn jnb_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
+pub fn jnbe_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x87, label_idx); }
+pub fn jnc_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x83, label_idx); }
+pub fn jne_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
+pub fn jng_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8E, label_idx); }
+pub fn jnge_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8C, label_idx); }
+pub fn jnl_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8D, label_idx); }
+pub fn jnle_label(cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8F, label_idx); }
+pub fn jno_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x81, label_idx); }
+pub fn jnp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8b, label_idx); }
+pub fn jns_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x89, label_idx); }
+pub fn jnz_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x85, label_idx); }
+pub fn jo_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x80, label_idx); }
+pub fn jp_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
+pub fn jpe_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8A, label_idx); }
+pub fn jpo_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x8B, label_idx); }
+pub fn js_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x88, label_idx); }
+pub fn jz_label  (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0x0F, 0x84, label_idx); }
+pub fn jmp_label (cb: &mut CodeBlock, label_idx: usize) { write_jcc(cb, 0xFF, 0xE9, label_idx); }
+
+/// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional)
+fn write_jcc_ptr(cb: &mut CodeBlock, op0: u8, op1: u8, dst_ptr: CodePtr) {
+    // Write the opcode
+    if op0 != 0xFF {
+        cb.write_byte(op0);
+    }
+
+    cb.write_byte(op1);
+
+    // Pointer to the end of this jump instruction
+    let end_ptr = cb.get_ptr(cb.write_pos + 4);
+
+    // Compute the jump offset
+    let rel64 = (dst_ptr.0 as i64) - (end_ptr.0 as i64);
+
+    if rel64 >= i32::MIN.into() && rel64 <= i32::MAX.into() {
+        // Write the relative 32-bit jump offset
+        cb.write_int(rel64 as u64, 32);
+    }
+    else {
+        // Offset doesn't fit in 4 bytes. Report error.
+        cb.dropped_bytes = true;
+    }
+}
+
+/// jcc - relative jumps to a pointer (32-bit offset)
+pub fn ja_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x87, ptr); }
+pub fn jae_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); }
+pub fn jb_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); }
+pub fn jbe_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x86, ptr); }
+pub fn jc_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); }
+pub fn je_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x84, ptr); }
+pub fn jg_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8F, ptr); }
+pub fn jge_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8D, ptr); }
+pub fn jl_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8C, ptr); }
+pub fn jle_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8E, ptr); }
+pub fn jna_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x86, ptr); }
+pub fn jnae_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x82, ptr); }
+pub fn jnb_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); }
+pub fn jnbe_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x87, ptr); }
+pub fn jnc_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x83, ptr); }
+pub fn jne_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x85, ptr); }
+pub fn jng_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8E, ptr); }
+pub fn jnge_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8C, ptr); }
+pub fn jnl_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8D, ptr); }
+pub fn jnle_ptr(cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8F, ptr); }
+pub fn jno_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x81, ptr); }
+pub fn jnp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8b, ptr); }
+pub fn jns_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x89, ptr); }
+pub fn jnz_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x85, ptr); }
+pub fn jo_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x80, ptr); }
+pub fn jp_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8A, ptr); }
+pub fn jpe_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8A, ptr); }
+pub fn jpo_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x8B, ptr); }
+pub fn js_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x88, ptr); }
+pub fn jz_ptr  (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0x0F, 0x84, ptr); }
+pub fn jmp_ptr (cb: &mut CodeBlock, ptr: CodePtr) { write_jcc_ptr(cb, 0xFF, 0xE9, ptr); }
+
+/// jmp - Indirect jump near to an R/M operand.
+pub fn jmp_rm(cb: &mut CodeBlock, opnd: X86Opnd) {
+    write_rm(cb, false, false, X86Opnd::None, opnd, 4, &[0xff]);
+}
+
+// jmp - Jump with relative 32-bit offset
+pub fn jmp32(cb: &mut CodeBlock, offset: i32) {
+    cb.write_byte(0xE9);
+    cb.write_int(offset as u64, 32);
+}
+
+/// lea - Load Effective Address
+pub fn lea(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
+    if let X86Opnd::Reg(reg) = dst {
+        assert!(reg.num_bits == 64);
+        write_rm(cb, false, true, dst, src, 0xff, &[0x8d]);
+    } else {
+        unreachable!();
+    }
+}
+
+/// mov - Data move operation
+pub fn mov(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
+    match (dst, src) {
+        // R + Imm
+        (X86Opnd::Reg(reg), X86Opnd::Imm(imm)) => {
+            assert!(imm.num_bits <= reg.num_bits);
+
+            // In case the source immediate could be zero extended to be 64
+            // bit, we can use the 32-bit operands version of the instruction.
+            // For example, we can turn mov(rax, 0x34) into the equivalent
+            // mov(eax, 0x34).
+            if (reg.num_bits == 64) && (imm.value > 0) && (imm.num_bits <= 32) {
+                if dst.rex_needed() {
+                    write_rex(cb, false, 0, 0, reg.reg_no);
+                }
+                write_opcode(cb, 0xB8, reg);
+                cb.write_int(imm.value as u64, 32);
+            } else {
+                if reg.num_bits == 16 {
+                    cb.write_byte(0x66);
+                }
+
+                if dst.rex_needed() || reg.num_bits == 64 {
+                    write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
+                }
+
+                write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
+                cb.write_int(imm.value as u64, reg.num_bits.into());
+            }
+        },
+        // R + UImm
+        (X86Opnd::Reg(reg), X86Opnd::UImm(uimm)) => {
+            assert!(uimm.num_bits <= reg.num_bits);
+
+            // In case the source immediate could be zero extended to be 64
+            // bit, we can use the 32-bit operands version of the instruction.
+            // For example, we can turn mov(rax, 0x34) into the equivalent
+            // mov(eax, 0x34).
+            if (reg.num_bits == 64) && (uimm.value <= u32::MAX.into()) {
+                if dst.rex_needed() {
+                    write_rex(cb, false, 0, 0, reg.reg_no);
+                }
+                write_opcode(cb, 0xB8, reg);
+                cb.write_int(uimm.value, 32);
+            } else {
+                if reg.num_bits == 16 {
+                    cb.write_byte(0x66);
+                }
+
+                if dst.rex_needed() || reg.num_bits == 64 {
+                    write_rex(cb, reg.num_bits == 64, 0, 0, reg.reg_no);
+                }
+
+                write_opcode(cb, if reg.num_bits == 8 { 0xb0 } else { 0xb8 }, reg);
+                cb.write_int(uimm.value, reg.num_bits.into());
+            }
+        },
+        // M + Imm
+        (X86Opnd::Mem(mem), X86Opnd::Imm(imm)) => {
+            assert!(imm.num_bits <= mem.num_bits);
+
+            if mem.num_bits == 8 {
+                write_rm(cb, false, false, X86Opnd::None, dst, 0xff, &[0xc6]);
+            } else {
+                write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, 0, &[0xc7]);
+            }
+
+            let output_num_bits:u32 = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
+            assert!(sig_imm_size(imm.value) <= (output_num_bits as u8));
+            cb.write_int(imm.value as u64, output_num_bits);
+        },
+        // M + UImm
+        (X86Opnd::Mem(mem), X86Opnd::UImm(uimm)) => {
+            assert!(uimm.num_bits <= mem.num_bits);
+
+            if mem.num_bits == 8 {
+                write_rm(cb, false, false, X86Opnd::None, dst, 0xff, &[0xc6]);
+            }
+            else {
+                write_rm(cb, mem.num_bits == 16, mem.num_bits == 64, X86Opnd::None, dst, 0, &[0xc7]);
+            }
+
+            let output_num_bits = if mem.num_bits > 32 { 32 } else { mem.num_bits.into() };
+            assert!(sig_imm_size(uimm.value as i64) <= (output_num_bits as u8));
+            cb.write_int(uimm.value, output_num_bits);
+        },
+        // * + Imm/UImm
+        (_, X86Opnd::Imm(_) | X86Opnd::UImm(_)) => unreachable!(),
+        // * + *
+        (_, _) => {
+            write_rm_multi(
+                cb,
+                0x88, // opMemReg8
+                0x89, // opMemRegPref
+                0x8A, // opRegMem8
+                0x8B, // opRegMemPref
+                0xC6, // opMemImm8
+                0xFF, // opMemImmSml (not available)
+                0xFF, // opMemImmLrg
+                0xFF, // opExtImm
+                dst,
+                src
+            );
+        }
+    };
+}
+
+/// movsx - Move with sign extension (signed integers)
+pub fn movsx(cb: &mut CodeBlock, dst: X86Opnd, src: X86Opnd) {
+    if let X86Opnd::Reg(_dst_reg) = dst {
+        assert!(matches!(src, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+        let src_num_bits = src.num_bits();
+        let dst_num_bits = dst.num_bits();
+        assert!(src_num_bits < dst_num_bits);
+
+        match src_num_bits {
+            8 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, 0xff, &[0x0f, 0xbe]),
+            16 => write_rm(cb, dst_num_bits == 16, dst_num_bits == 64, dst, src, 0xff, &[0x0f, 0xbf]),
+            32 => write_rm(cb, false, true, dst, src, 0xff, &[0x63]),
+            _ => unreachable!()
+        };
+    } else {
+        unreachable!();
+    }
+}
+
+/*
+/// movzx - Move with zero extension (unsigned values)
+void movzx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
+{
+    cb.writeASM("movzx", dst, src);
+
+    uint32_t dstSize;
+    if (dst.isReg)
+        dstSize = dst.reg.size;
+    else
+        assert (false, "movzx dst must be a register");
+
+    uint32_t srcSize;
+    if (src.isReg)
+        srcSize = src.reg.size;
+    else if (src.isMem)
+        srcSize = src.mem.size;
+    else
+        assert (false);
+
+    assert (
+        srcSize < dstSize,
+        "movzx: srcSize >= dstSize"
+    );
+
+    if (srcSize is 8)
+    {
+        cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB6)(dstSize is 16, dstSize is 64, dst, src);
+    }
+    else if (srcSize is 16)
+    {
+        cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB7)(dstSize is 16, dstSize is 64, dst, src);
+    }
+    else
+    {
+        assert (false, "invalid src operand size for movxz");
+    }
+}
+*/
+
+/// nop - Noop, one or multiple bytes long
+pub fn nop(cb: &mut CodeBlock, length: u32) {
+    match length {
+        0 => {},
+        1 => cb.write_byte(0x90),
+        2 => cb.write_bytes(&[0x66, 0x90]),
+        3 => cb.write_bytes(&[0x0f, 0x1f, 0x00]),
+        4 => cb.write_bytes(&[0x0f, 0x1f, 0x40, 0x00]),
+        5 => cb.write_bytes(&[0x0f, 0x1f, 0x44, 0x00, 0x00]),
+        6 => cb.write_bytes(&[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00]),
+        7 => cb.write_bytes(&[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00]),
+        8 => cb.write_bytes(&[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]),
+        9 => cb.write_bytes(&[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00]),
+        _ => {
+            let mut written: u32 = 0;
+            while written + 9 <= length {
+                nop(cb, 9);
+                written += 9;
+            }
+            nop(cb, length - written);
+        }
+    };
+}
+
+/// not - Bitwise NOT
+pub fn not(cb: &mut CodeBlock, opnd: X86Opnd) {
+    write_rm_unary(
+        cb,
+        0xf6, // opMemReg8
+        0xf7, // opMemRegPref
+        0x02, // opExt
+        opnd
+    );
+}
+
+/// or - Bitwise OR
+pub fn or(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x08, // opMemReg8
+        0x09, // opMemRegPref
+        0x0A, // opRegMem8
+        0x0B, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x01, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+/// pop - Pop a register off the stack
+pub fn pop(cb: &mut CodeBlock, opnd: X86Opnd) {
+    match opnd {
+        X86Opnd::Reg(reg) => {
+            assert!(reg.num_bits == 64);
+
+            if opnd.rex_needed() {
+                write_rex(cb, false, 0, 0, reg.reg_no);
+            }
+            write_opcode(cb, 0x58, reg);
+        },
+        X86Opnd::Mem(mem) => {
+            assert!(mem.num_bits == 64);
+
+            write_rm(cb, false, false, X86Opnd::None, opnd, 0, &[0x8f]);
+        },
+        _ => unreachable!()
+    };
+}
+
+/// popfq - Pop the flags register (64-bit)
+pub fn popfq(cb: &mut CodeBlock) {
+    // REX.W + 0x9D
+    cb.write_bytes(&[0x48, 0x9d]);
+}
+
+/// push - Push an operand on the stack
+pub fn push(cb: &mut CodeBlock, opnd: X86Opnd) {
+    match opnd {
+        X86Opnd::Reg(reg) => {
+            if opnd.rex_needed() {
+                write_rex(cb, false, 0, 0, reg.reg_no);
+            }
+            write_opcode(cb, 0x50, reg);
+        },
+        X86Opnd::Mem(_mem) => {
+            write_rm(cb, false, false, X86Opnd::None, opnd, 6, &[0xff]);
+        },
+        _ => unreachable!()
+    }
+}
+
+/// pushfq - Push the flags register (64-bit)
+pub fn pushfq(cb: &mut CodeBlock) {
+    cb.write_byte(0x9C);
+}
+
+/// ret - Return from call, popping only the return address
+pub fn ret(cb: &mut CodeBlock) {
+    cb.write_byte(0xC3);
+}
+
+// Encode a single-operand shift instruction
+fn write_shift(cb: &mut CodeBlock, op_mem_one_pref: u8, _op_mem_cl_pref: u8, op_mem_imm_pref: u8, op_ext: u8, opnd0: X86Opnd, opnd1: X86Opnd) {
+    assert!(matches!(opnd0, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+
+    // Check the size of opnd0
+    let opnd_size = opnd0.num_bits();
+    assert!(opnd_size == 16 || opnd_size == 32 || opnd_size == 64);
+
+    let sz_pref = opnd_size == 16;
+    let rex_w = opnd_size == 64;
+
+    if let X86Opnd::UImm(imm) = opnd1 {
+        if imm.value == 1 {
+            write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_one_pref]);
+        } else {
+            assert!(imm.num_bits <= 8);
+            write_rm(cb, sz_pref, rex_w, X86Opnd::None, opnd0, op_ext, &[op_mem_imm_pref]);
+            cb.write_byte(imm.value as u8);
+        }
+    } else {
+        unreachable!();
+    }
+}
+
+// sal - Shift arithmetic left
+pub fn sal(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x04,
+        opnd0,
+        opnd1
+    );
+}
+
+/// sar - Shift arithmetic right (signed)
+pub fn sar(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x07,
+        opnd0,
+        opnd1
+    );
+}
+
+// shl - Shift logical left
+pub fn shl(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x04,
+        opnd0,
+        opnd1
+    );
+}
+
+/// shr - Shift logical right (unsigned)
+pub fn shr(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_shift(
+        cb,
+        0xD1, // opMemOnePref,
+        0xD3, // opMemClPref,
+        0xC1, // opMemImmPref,
+        0x05,
+        opnd0,
+        opnd1
+    );
+}
+
+/// sub - Integer subtraction
+pub fn sub(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x28, // opMemReg8
+        0x29, // opMemRegPref
+        0x2A, // opRegMem8
+        0x2B, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x05, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
+
+fn resize_opnd(opnd: X86Opnd, num_bits: u8) -> X86Opnd {
+    match opnd {
+        X86Opnd::Reg(reg) => {
+            let mut cloned = reg.clone();
+            cloned.num_bits = num_bits;
+            X86Opnd::Reg(cloned)
+        },
+        X86Opnd::Mem(mem) => {
+            let mut cloned = mem.clone();
+            cloned.num_bits = num_bits;
+            X86Opnd::Mem(cloned)
+        },
+        _ => unreachable!()
+    }
+}
+
+/// test - Logical Compare
+pub fn test(cb: &mut CodeBlock, rm_opnd: X86Opnd, test_opnd: X86Opnd) {
+    assert!(matches!(rm_opnd, X86Opnd::Reg(_) | X86Opnd::Mem(_)));
+    let rm_num_bits = rm_opnd.num_bits();
+
+    match test_opnd {
+        X86Opnd::UImm(uimm) => {
+            assert!(uimm.num_bits <= 32);
+            assert!(uimm.num_bits <= rm_num_bits);
+
+            // Use the smallest operand size possible
+            assert!(rm_num_bits % 8 == 0);
+            let rm_resized = resize_opnd(rm_opnd, uimm.num_bits);
+
+            if uimm.num_bits == 8 {
+                write_rm(cb, false, false, X86Opnd::None, rm_resized, 0x00, &[0xf6]);
+                cb.write_int(uimm.value, uimm.num_bits.into());
+            } else {
+                write_rm(cb, uimm.num_bits == 16, false, X86Opnd::None, rm_resized, 0x00, &[0xf7]);
+                cb.write_int(uimm.value, uimm.num_bits.into());
+            }
+        },
+        X86Opnd::Imm(imm) => {
+            // This mode only applies to 64-bit R/M operands with 32-bit signed immediates
+            assert!(imm.num_bits <= 32);
+            assert!(rm_num_bits == 64);
+
+            write_rm(cb, false, true, X86Opnd::None, rm_opnd, 0x00, &[0xf7]);
+            cb.write_int(imm.value as u64, 32);
+        },
+        X86Opnd::Reg(reg) => {
+            assert!(reg.num_bits == rm_num_bits);
+
+            if rm_num_bits == 8 {
+                write_rm(cb, false, false, test_opnd, rm_opnd, 0xff, &[0x84]);
+            } else {
+                write_rm(cb, rm_num_bits == 16, rm_num_bits == 64, test_opnd, rm_opnd, 0xff, &[0x85]);
+            }
+        },
+        _ => unreachable!()
+    };
+}
+
+/// Undefined opcode
+pub fn ud2(cb: &mut CodeBlock) {
+    cb.write_bytes(&[0x0f, 0x0b]);
+}
+
+/// xchg - Exchange Register/Memory with Register
+pub fn xchg(cb: &mut CodeBlock, rm_opnd: X86Opnd, r_opnd: X86Opnd) {
+    if let (X86Opnd::Reg(rm_reg), X86Opnd::Reg(r_reg)) = (rm_opnd, r_opnd) {
+        assert!(rm_reg.num_bits == 64);
+        assert!(r_reg.num_bits == 64);
+
+        // If we're exchanging with RAX
+        if rm_reg.reg_no == RAX_REG_NO {
+            // Write the REX byte
+            write_rex(cb, true, 0, 0, r_reg.reg_no);
+
+            // Write the opcode and register number
+            cb.write_byte(0x90 + (r_reg.reg_no & 7));
+        } else {
+            write_rm(cb, false, true, r_opnd, rm_opnd, 0xff, &[0x87]);
+        }
+    } else {
+        unreachable!();
+    }
+}
+
+/// xor - Exclusive bitwise OR
+pub fn xor(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) {
+    write_rm_multi(
+        cb,
+        0x30, // opMemReg8
+        0x31, // opMemRegPref
+        0x32, // opRegMem8
+        0x33, // opRegMemPref
+        0x80, // opMemImm8
+        0x83, // opMemImmSml
+        0x81, // opMemImmLrg
+        0x06, // opExtImm
+        opnd0,
+        opnd1
+    );
+}
diff --git a/yjit/src/asm/x86_64/tests.rs b/yjit/src/asm/x86_64/tests.rs
new file mode 100644
index 0000000000..bb36468a34
--- /dev/null
+++ b/yjit/src/asm/x86_64/tests.rs
@@ -0,0 +1,447 @@
+#![cfg(test)]
+
+use crate::asm::x86_64::*;
+use std::fmt;
+
+/// Produce hex string output from the bytes in a code block
+impl<'a> fmt::LowerHex for super::CodeBlock {
+    fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
+        for pos in 0..self.write_pos {
+            let byte = self.read_byte(pos);
+            fmtr.write_fmt(format_args!("{:02x}", byte))?;
+        }
+        Ok(())
+    }
+}
+
+/// Check that the bytes for an instruction sequence match a hex string
+fn check_bytes<R>(bytes: &str, run: R) where R: FnOnce(&mut super::CodeBlock) {
+    let mut cb = super::CodeBlock::new_dummy(4096);
+    run(&mut cb);
+    assert_eq!(format!("{:x}", cb), bytes);
+}
+
+#[test]
+fn test_add() {
+    check_bytes("80c103", |cb| add(cb, CL, imm_opnd(3)));
+    check_bytes("00d9", |cb| add(cb, CL, BL));
+    check_bytes("4000e1", |cb| add(cb, CL, SPL));
+    check_bytes("6601d9", |cb| add(cb, CX, BX));
+    check_bytes("4801d8", |cb| add(cb, RAX, RBX));
+    check_bytes("01d1", |cb| add(cb, ECX, EDX));
+    check_bytes("4c01f2", |cb| add(cb, RDX, R14));
+    check_bytes("480110", |cb| add(cb, mem_opnd(64, RAX, 0), RDX));
+    check_bytes("480310", |cb| add(cb, RDX, mem_opnd(64, RAX, 0)));
+    check_bytes("48035008", |cb| add(cb, RDX, mem_opnd(64, RAX, 8)));
+    check_bytes("480390ff000000", |cb| add(cb, RDX, mem_opnd(64, RAX, 255)));
+    check_bytes("4881407fff000000", |cb| add(cb, mem_opnd(64, RAX, 127), imm_opnd(255)));
+    check_bytes("0110", |cb| add(cb, mem_opnd(32, RAX, 0), EDX));
+    check_bytes("4883c408", |cb| add(cb, RSP, imm_opnd(8)));
+    check_bytes("83c108", |cb| add(cb, ECX, imm_opnd(8)));
+    check_bytes("81c1ff000000", |cb| add(cb, ECX, imm_opnd(255)));
+}
+
+#[test]
+fn test_add_unsigned() {
+    // ADD r/m8, imm8
+    check_bytes("4180c001", |cb| add(cb, R8B, uimm_opnd(1)));
+    check_bytes("4180c07f", |cb| add(cb, R8B, imm_opnd(i8::MAX.try_into().unwrap())));
+
+    // ADD r/m16, imm16
+    check_bytes("664183c001", |cb| add(cb, R8W, uimm_opnd(1)));
+    check_bytes("664181c0ff7f", |cb| add(cb, R8W, uimm_opnd(i16::MAX.try_into().unwrap())));
+
+    // ADD r/m32, imm32
+    check_bytes("4183c001", |cb| add(cb, R8D, uimm_opnd(1)));
+    check_bytes("4181c0ffffff7f", |cb| add(cb, R8D, uimm_opnd(i32::MAX.try_into().unwrap())));
+
+    // ADD r/m64, imm32
+    check_bytes("4983c001", |cb| add(cb, R8, uimm_opnd(1)));
+    check_bytes("4981c0ffffff7f", |cb| add(cb, R8, uimm_opnd(i32::MAX.try_into().unwrap())));
+}
+
+#[test]
+fn test_and() {
+    check_bytes("4421e5", |cb| and(cb, EBP, R12D));
+    check_bytes("48832008", |cb| and(cb, mem_opnd(64, RAX, 0), imm_opnd(0x08)));
+}
+
+#[test]
+fn test_call_label() {
+    check_bytes("e8fbffffff", |cb| {
+        let label_idx = cb.new_label("fn".to_owned());
+        call_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_call_ptr() {
+    // calling a lower address
+    check_bytes("e8fbffffff", |cb| {
+        let ptr = cb.get_write_ptr();
+        call_ptr(cb, RAX, ptr.raw_ptr());
+    });
+}
+
+#[test]
+fn test_call_reg() {
+    check_bytes("ffd0", |cb| call(cb, RAX));
+}
+
+#[test]
+fn test_call_mem() {
+    check_bytes("ff542408", |cb| call(cb, mem_opnd(64, RSP, 8)));
+}
+
+#[test]
+fn test_cmovcc() {
+    check_bytes("0f4ff7", |cb| cmovg(cb, ESI, EDI));
+    check_bytes("0f4f750c", |cb| cmovg(cb, ESI, mem_opnd(32, RBP, 12)));
+    check_bytes("0f4cc1", |cb| cmovl(cb, EAX, ECX));
+    check_bytes("480f4cdd", |cb| cmovl(cb, RBX, RBP));
+    check_bytes("0f4e742404", |cb| cmovle(cb, ESI, mem_opnd(32, RSP, 4)));
+}
+
+#[test]
+fn test_cmp() {
+    check_bytes("38d1", |cb| cmp(cb, CL, DL));
+    check_bytes("39f9", |cb| cmp(cb, ECX, EDI));
+    check_bytes("493b1424", |cb| cmp(cb, RDX, mem_opnd(64, R12, 0)));
+    check_bytes("4883f802", |cb| cmp(cb, RAX, imm_opnd(2)));
+}
+
+#[test]
+fn test_cqo() {
+    check_bytes("4899", |cb| cqo(cb));
+}
+
+#[test]
+fn test_jge_label() {
+    check_bytes("0f8dfaffffff", |cb| {
+        let label_idx = cb.new_label("loop".to_owned());
+        jge_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_jmp_label() {
+    // Forward jump
+    check_bytes("e900000000", |cb| {
+        let label_idx = cb.new_label("next".to_owned());
+        jmp_label(cb, label_idx);
+        cb.write_label(label_idx);
+        cb.link_labels();
+    });
+
+    // Backwards jump
+    check_bytes("e9fbffffff", |cb| {
+        let label_idx = cb.new_label("loop".to_owned());
+        cb.write_label(label_idx);
+        jmp_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_jmp_rm() {
+    check_bytes("41ffe4", |cb| jmp_rm(cb, R12));
+}
+
+#[test]
+fn test_jo_label() {
+    check_bytes("0f80faffffff", |cb| {
+        let label_idx = cb.new_label("loop".to_owned());
+        jo_label(cb, label_idx);
+        cb.link_labels();
+    });
+}
+
+#[test]
+fn test_lea() {
+    check_bytes("488d5108", |cb| lea(cb, RDX, mem_opnd(64, RCX, 8)));
+    check_bytes("488d0500000000", |cb| lea(cb, RAX, mem_opnd(8, RIP, 0)));
+    check_bytes("488d0505000000", |cb| lea(cb, RAX, mem_opnd(8, RIP, 5)));
+    check_bytes("488d3d05000000", |cb| lea(cb, RDI, mem_opnd(8, RIP, 5)));
+}
+
+#[test]
+fn test_mov() {
+    check_bytes("b807000000", |cb| mov(cb, EAX, imm_opnd(7)));
+    check_bytes("b8fdffffff", |cb| mov(cb, EAX, imm_opnd(-3)));
+    check_bytes("41bf03000000", |cb| mov(cb, R15, imm_opnd(3)));
+    check_bytes("89d8", |cb| mov(cb, EAX, EBX));
+    check_bytes("89c8", |cb| mov(cb, EAX, ECX));
+    check_bytes("8b9380000000", |cb| mov(cb, EDX, mem_opnd(32, RBX, 128)));
+    check_bytes("488b442404", |cb| mov(cb, RAX, mem_opnd(64, RSP, 4)));
+
+    // Test `mov rax, 3` => `mov eax, 3` optimization
+    check_bytes("41b834000000", |cb| mov(cb, R8, imm_opnd(0x34)));
+    check_bytes("49b80000008000000000", |cb| mov(cb, R8, imm_opnd(0x80000000)));
+    check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, imm_opnd(-1)));
+
+    check_bytes("b834000000", |cb| mov(cb, RAX, imm_opnd(0x34)));
+    check_bytes("48b8020000000000c0ff", |cb| mov(cb, RAX, imm_opnd(-18014398509481982)));
+    check_bytes("48b80000008000000000", |cb| mov(cb, RAX, imm_opnd(0x80000000)));
+    check_bytes("48b8ccffffffffffffff", |cb| mov(cb, RAX, imm_opnd(-52))); // yasm thinks this could use a dword immediate instead of qword
+    check_bytes("48b8ffffffffffffffff", |cb| mov(cb, RAX, imm_opnd(-1))); // yasm thinks this could use a dword immediate instead of qword
+    check_bytes("4488c9", |cb| mov(cb, CL, R9B));
+    check_bytes("4889c3", |cb| mov(cb, RBX, RAX));
+    check_bytes("4889df", |cb| mov(cb, RDI, RBX));
+    check_bytes("40b60b", |cb| mov(cb, SIL, imm_opnd(11)));
+
+    check_bytes("c60424fd", |cb| mov(cb, mem_opnd(8, RSP, 0), imm_opnd(-3)));
+    check_bytes("48c7470801000000", |cb| mov(cb, mem_opnd(64, RDI, 8), imm_opnd(1)));
+    //check_bytes("67c7400411000000", |cb| mov(cb, mem_opnd(32, EAX, 4), imm_opnd(0x34))); // We don't distinguish between EAX and RAX here - that's probably fine?
+    check_bytes("c7400411000000", |cb| mov(cb, mem_opnd(32, RAX, 4), imm_opnd(17)));
+    check_bytes("41895814", |cb| mov(cb, mem_opnd(32, R8, 20), EBX));
+    check_bytes("4d8913", |cb| mov(cb, mem_opnd(64, R11, 0), R10));
+    check_bytes("48c742f8f4ffffff", |cb| mov(cb, mem_opnd(64, RDX, -8), imm_opnd(-12)));
+}
+
+#[test]
+fn test_mov_unsigned() {
+    // MOV AL, imm8
+    check_bytes("b001", |cb| mov(cb, AL, uimm_opnd(1)));
+    check_bytes("b0ff", |cb| mov(cb, AL, uimm_opnd(u8::MAX.into())));
+
+    // MOV AX, imm16
+    check_bytes("66b80100", |cb| mov(cb, AX, uimm_opnd(1)));
+    check_bytes("66b8ffff", |cb| mov(cb, AX, uimm_opnd(u16::MAX.into())));
+
+    // MOV EAX, imm32
+    check_bytes("b801000000", |cb| mov(cb, EAX, uimm_opnd(1)));
+    check_bytes("b8ffffffff", |cb| mov(cb, EAX, uimm_opnd(u32::MAX.into())));
+    check_bytes("41b800000000", |cb| mov(cb, R8, uimm_opnd(0)));
+    check_bytes("41b8ffffffff", |cb| mov(cb, R8, uimm_opnd(0xFF_FF_FF_FF)));
+
+    // MOV RAX, imm64, will move down into EAX since it fits into 32 bits
+    check_bytes("b801000000", |cb| mov(cb, RAX, uimm_opnd(1)));
+    check_bytes("b8ffffffff", |cb| mov(cb, RAX, uimm_opnd(u32::MAX.into())));
+
+    // MOV RAX, imm64, will not move down into EAX since it does not fit into 32 bits
+    check_bytes("48b80000000001000000", |cb| mov(cb, RAX, uimm_opnd(u32::MAX as u64 + 1)));
+    check_bytes("48b8ffffffffffffffff", |cb| mov(cb, RAX, uimm_opnd(u64::MAX.into())));
+    check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, uimm_opnd(u64::MAX)));
+
+    // MOV r8, imm8
+    check_bytes("41b001", |cb| mov(cb, R8B, uimm_opnd(1)));
+    check_bytes("41b0ff", |cb| mov(cb, R8B, uimm_opnd(u8::MAX.into())));
+
+    // MOV r16, imm16
+    check_bytes("6641b80100", |cb| mov(cb, R8W, uimm_opnd(1)));
+    check_bytes("6641b8ffff", |cb| mov(cb, R8W, uimm_opnd(u16::MAX.into())));
+
+    // MOV r32, imm32
+    check_bytes("41b801000000", |cb| mov(cb, R8D, uimm_opnd(1)));
+    check_bytes("41b8ffffffff", |cb| mov(cb, R8D, uimm_opnd(u32::MAX.into())));
+
+    // MOV r64, imm64, will move down into 32 bit since it fits into 32 bits
+    check_bytes("41b801000000", |cb| mov(cb, R8, uimm_opnd(1)));
+
+    // MOV r64, imm64, will not move down into 32 bit since it does not fit into 32 bits
+    check_bytes("49b8ffffffffffffffff", |cb| mov(cb, R8, uimm_opnd(u64::MAX)));
+}
+
+#[test]
+fn test_mov_iprel() {
+    check_bytes("8b0500000000", |cb| mov(cb, EAX, mem_opnd(32, RIP, 0)));
+    check_bytes("8b0505000000", |cb| mov(cb, EAX, mem_opnd(32, RIP, 5)));
+
+    check_bytes("488b0500000000", |cb| mov(cb, RAX, mem_opnd(64, RIP, 0)));
+    check_bytes("488b0505000000", |cb| mov(cb, RAX, mem_opnd(64, RIP, 5)));
+    check_bytes("488b3d05000000", |cb| mov(cb, RDI, mem_opnd(64, RIP, 5)));
+}
+
+#[test]
+fn test_movsx() {
+    check_bytes("660fbec0", |cb| movsx(cb, AX, AL));
+    check_bytes("0fbed0", |cb| movsx(cb, EDX, AL));
+    check_bytes("480fbec3", |cb| movsx(cb, RAX, BL));
+    check_bytes("0fbfc8", |cb| movsx(cb, ECX, AX));
+    check_bytes("4c0fbed9", |cb| movsx(cb, R11, CL));
+    check_bytes("4c6354240c", |cb| movsx(cb, R10, mem_opnd(32, RSP, 12)));
+    check_bytes("480fbe0424", |cb| movsx(cb, RAX, mem_opnd(8, RSP, 0)));
+    check_bytes("490fbf5504", |cb| movsx(cb, RDX, mem_opnd(16, R13, 4)));
+}
+
+#[test]
+fn test_nop() {
+    check_bytes("90", |cb| nop(cb, 1));
+    check_bytes("6690", |cb| nop(cb, 2));
+    check_bytes("0f1f00", |cb| nop(cb, 3));
+    check_bytes("0f1f4000", |cb| nop(cb, 4));
+    check_bytes("0f1f440000", |cb| nop(cb, 5));
+    check_bytes("660f1f440000", |cb| nop(cb, 6));
+    check_bytes("0f1f8000000000", |cb| nop(cb, 7));
+    check_bytes("0f1f840000000000", |cb| nop(cb, 8));
+    check_bytes("660f1f840000000000", |cb| nop(cb, 9));
+    check_bytes("660f1f84000000000090", |cb| nop(cb, 10));
+    check_bytes("660f1f8400000000006690", |cb| nop(cb, 11));
+    check_bytes("660f1f8400000000000f1f00", |cb| nop(cb, 12));
+}
+
+#[test]
+fn test_not() {
+    check_bytes("66f7d0", |cb| not(cb, AX));
+    check_bytes("f7d0", |cb| not(cb, EAX));
+    check_bytes("49f71424", |cb| not(cb, mem_opnd(64, R12, 0)));
+    check_bytes("f794242d010000", |cb| not(cb, mem_opnd(32, RSP, 301)));
+    check_bytes("f71424", |cb| not(cb, mem_opnd(32, RSP, 0)));
+    check_bytes("f7542403", |cb| not(cb, mem_opnd(32, RSP, 3)));
+    check_bytes("f75500", |cb| not(cb, mem_opnd(32, RBP, 0)));
+    check_bytes("f7550d", |cb| not(cb, mem_opnd(32, RBP, 13)));
+    check_bytes("48f7d0", |cb| not(cb, RAX));
+    check_bytes("49f7d3", |cb| not(cb, R11));
+    check_bytes("f710", |cb| not(cb, mem_opnd(32, RAX, 0)));
+    check_bytes("f716", |cb| not(cb, mem_opnd(32, RSI, 0)));
+    check_bytes("f717", |cb| not(cb, mem_opnd(32, RDI, 0)));
+    check_bytes("f75237", |cb| not(cb, mem_opnd(32, RDX, 55)));
+    check_bytes("f79239050000", |cb| not(cb, mem_opnd(32, RDX, 1337)));
+    check_bytes("f752c9", |cb| not(cb, mem_opnd(32, RDX, -55)));
+    check_bytes("f792d5fdffff", |cb| not(cb, mem_opnd(32, RDX, -555)));
+}
+
+#[test]
+fn test_or() {
+    check_bytes("09f2", |cb| or(cb, EDX, ESI));
+}
+
+#[test]
+fn test_pop() {
+    check_bytes("58", |cb| pop(cb, RAX));
+    check_bytes("5b", |cb| pop(cb, RBX));
+    check_bytes("5c", |cb| pop(cb, RSP));
+    check_bytes("5d", |cb| pop(cb, RBP));
+    check_bytes("415c", |cb| pop(cb, R12));
+    check_bytes("8f00", |cb| pop(cb, mem_opnd(64, RAX, 0)));
+    check_bytes("418f00", |cb| pop(cb, mem_opnd(64, R8, 0)));
+    check_bytes("418f4003", |cb| pop(cb, mem_opnd(64, R8, 3)));
+    check_bytes("8f44c803", |cb| pop(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)));
+    check_bytes("418f44c803", |cb| pop(cb, mem_opnd_sib(64, R8, RCX, 8, 3)));
+}
+
+#[test]
+fn test_push() {
+    check_bytes("50", |cb| push(cb, RAX));
+    check_bytes("53", |cb| push(cb, RBX));
+    check_bytes("4154", |cb| push(cb, R12));
+    check_bytes("ff30", |cb| push(cb, mem_opnd(64, RAX, 0)));
+    check_bytes("41ff30", |cb| push(cb, mem_opnd(64, R8, 0)));
+    check_bytes("41ff7003", |cb| push(cb, mem_opnd(64, R8, 3)));
+    check_bytes("ff74c803", |cb| push(cb, mem_opnd_sib(64, RAX, RCX, 8, 3)));
+    check_bytes("41ff74c803", |cb| push(cb, mem_opnd_sib(64, R8, RCX, 8, 3)));
+}
+
+#[test]
+fn test_ret() {
+    check_bytes("c3", |cb| ret(cb));
+}
+
+#[test]
+fn test_sal() {
+    check_bytes("66d1e1", |cb| sal(cb, CX, uimm_opnd(1)));
+    check_bytes("d1e1", |cb| sal(cb, ECX, uimm_opnd(1)));
+    check_bytes("c1e505", |cb| sal(cb, EBP, uimm_opnd(5)));
+    check_bytes("d1642444", |cb| sal(cb, mem_opnd(32, RSP, 68), uimm_opnd(1)));
+}
+
+#[test]
+fn test_sar() {
+    check_bytes("d1fa", |cb| sar(cb, EDX, uimm_opnd(1)));
+}
+
+#[test]
+fn test_shr() {
+    check_bytes("49c1ee07", |cb| shr(cb, R14, uimm_opnd(7)));
+}
+
+#[test]
+fn test_sub() {
+    check_bytes("83e801", |cb| sub(cb, EAX, imm_opnd(1)));
+    check_bytes("4883e802", |cb| sub(cb, RAX, imm_opnd(2)));
+}
+
+#[test]
+fn test_test() {
+    check_bytes("84c0", |cb| test(cb, AL, AL));
+    check_bytes("6685c0", |cb| test(cb, AX, AX));
+    check_bytes("f6c108", |cb| test(cb, CL, uimm_opnd(8)));
+    check_bytes("f6c207", |cb| test(cb, DL, uimm_opnd(7)));
+    check_bytes("f6c108", |cb| test(cb, RCX, uimm_opnd(8)));
+    check_bytes("f6420808", |cb| test(cb, mem_opnd(8, RDX, 8), uimm_opnd(8)));
+    check_bytes("f64208ff", |cb| test(cb, mem_opnd(8, RDX, 8), uimm_opnd(255)));
+    check_bytes("66f7c2ffff", |cb| test(cb, DX, uimm_opnd(0xffff)));
+    check_bytes("66f74208ffff", |cb| test(cb, mem_opnd(16, RDX, 8), uimm_opnd(0xffff)));
+    check_bytes("f60601", |cb| test(cb, mem_opnd(8, RSI, 0), uimm_opnd(1)));
+    check_bytes("f6461001", |cb| test(cb, mem_opnd(8, RSI, 16), uimm_opnd(1)));
+    check_bytes("f646f001", |cb| test(cb, mem_opnd(8, RSI, -16), uimm_opnd(1)));
+    check_bytes("854640", |cb| test(cb, mem_opnd(32, RSI, 64), EAX));
+    check_bytes("4885472a", |cb| test(cb, mem_opnd(64, RDI, 42), RAX));
+    check_bytes("4885c0", |cb| test(cb, RAX, RAX));
+    check_bytes("4885f0", |cb| test(cb, RAX, RSI));
+    check_bytes("48f74640f7ffffff", |cb| test(cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)));
+    check_bytes("48f7464008000000", |cb| test(cb, mem_opnd(64, RSI, 64), imm_opnd(0x08)));
+    check_bytes("48f7c108000000", |cb| test(cb, RCX, imm_opnd(0x08)));
+    //check_bytes("48a9f7ffff0f", |cb| test(cb, RAX, imm_opnd(0x0FFFFFF7)));
+}
+
+#[test]
+fn test_xchg() {
+    check_bytes("4891", |cb| xchg(cb, RAX, RCX));
+    check_bytes("4995", |cb| xchg(cb, RAX, R13));
+    check_bytes("4887d9", |cb| xchg(cb, RCX, RBX));
+    check_bytes("4d87f9", |cb| xchg(cb, R9, R15));
+}
+
+#[test]
+fn test_xor() {
+    check_bytes("31c0", |cb| xor(cb, EAX, EAX));
+}
+
+#[test]
+#[cfg(feature = "disasm")]
+fn basic_capstone_usage() -> std::result::Result<(), capstone::Error> {
+    // Test drive Capstone with simple input
+    extern crate capstone;
+    use capstone::prelude::*;
+    let cs = Capstone::new()
+        .x86()
+        .mode(arch::x86::ArchMode::Mode64)
+        .syntax(arch::x86::ArchSyntax::Intel)
+        .build()?;
+
+    let insns = cs.disasm_all(&[0xCC], 0x1000)?;
+
+    match insns.as_ref() {
+        [insn] => {
+            assert_eq!(Some("int3"), insn.mnemonic());
+            Ok(())
+        }
+        _ => Err(capstone::Error::CustomError(
+            "expected to disassemble to int3",
+        )),
+    }
+}
+
+#[test]
+#[cfg(feature = "asm_comments")]
+fn block_comments() {
+    let mut cb = super::CodeBlock::new_dummy(4096);
+
+    let first_write_ptr = cb.get_write_ptr().into_usize();
+    cb.add_comment("Beginning");
+    xor(&mut cb, EAX, EAX); // 2 bytes long
+    let second_write_ptr = cb.get_write_ptr().into_usize();
+    cb.add_comment("Two bytes in");
+    cb.add_comment("Still two bytes in");
+    cb.add_comment("Still two bytes in"); // Duplicate, should be ignored
+    test(&mut cb, mem_opnd(64, RSI, 64), imm_opnd(!0x08)); // 8 bytes long
+    let third_write_ptr = cb.get_write_ptr().into_usize();
+    cb.add_comment("Ten bytes in");
+
+    assert_eq!(&vec!( "Beginning".to_string() ), cb.comments_at(first_write_ptr).unwrap());
+    assert_eq!(&vec!( "Two bytes in".to_string(), "Still two bytes in".to_string() ), cb.comments_at(second_write_ptr).unwrap());
+    assert_eq!(&vec!( "Ten bytes in".to_string() ), cb.comments_at(third_write_ptr).unwrap());
+}
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
new file mode 100644
index 0000000000..5c5c221c76
--- /dev/null
+++ b/yjit/src/codegen.rs
@@ -0,0 +1,6180 @@
+use crate::asm::x86_64::*;
+use crate::asm::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::invariants::*;
+use crate::options::*;
+use crate::stats::*;
+use crate::utils::*;
+use CodegenStatus::*;
+use InsnOpnd::*;
+
+use std::cell::RefMut;
+use std::cmp;
+use std::collections::HashMap;
+use std::ffi::CStr;
+use std::mem::{self, size_of};
+use std::os::raw::c_uint;
+use std::ptr;
+use std::slice;
+
+// Callee-saved registers
+pub const REG_CFP: X86Opnd = R13;
+pub const REG_EC: X86Opnd = R12;
+pub const REG_SP: X86Opnd = RBX;
+
+// Scratch registers used by YJIT
+pub const REG0: X86Opnd = RAX;
+pub const REG0_32: X86Opnd = EAX;
+pub const REG0_8: X86Opnd = AL;
+pub const REG1: X86Opnd = RCX;
+pub const REG1_32: X86Opnd = ECX;
+
+/// Status returned by code generation functions
+#[derive(PartialEq, Debug)]
+enum CodegenStatus {
+    EndBlock,
+    KeepCompiling,
+    CantCompile,
+}
+
+/// Code generation function signature
+type InsnGenFn = fn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus;
+
+/// Code generation state
+/// This struct only lives while code is being generated
+pub struct JITState {
+    // Block version being compiled
+    block: BlockRef,
+
+    // Instruction sequence this is associated with
+    iseq: IseqPtr,
+
+    // Index of the current instruction being compiled
+    insn_idx: u32,
+
+    // Opcode for the instruction being compiled
+    opcode: usize,
+
+    // PC of the instruction being compiled
+    pc: *mut VALUE,
+
+    // Side exit to the instruction being compiled. See :side-exit:.
+    side_exit_for_pc: Option<CodePtr>,
+
+    // Execution context when compilation started
+    // This allows us to peek at run-time values
+    ec: Option<EcPtr>,
+
+    // Whether we need to record the code address at
+    // the end of this bytecode instruction for global invalidation
+    record_boundary_patch_point: bool,
+}
+
+impl JITState {
+    pub fn new(blockref: &BlockRef) -> Self {
+        JITState {
+            block: blockref.clone(),
+            iseq: ptr::null(), // TODO: initialize this from the blockid
+            insn_idx: 0,
+            opcode: 0,
+            pc: ptr::null_mut::<VALUE>(),
+            side_exit_for_pc: None,
+            ec: None,
+            record_boundary_patch_point: false,
+        }
+    }
+
+    pub fn get_block(&self) -> BlockRef {
+        self.block.clone()
+    }
+
+    pub fn get_insn_idx(&self) -> u32 {
+        self.insn_idx
+    }
+
+    pub fn get_iseq(self: &JITState) -> IseqPtr {
+        self.iseq
+    }
+
+    pub fn get_opcode(self: &JITState) -> usize {
+        self.opcode
+    }
+
+    pub fn set_opcode(self: &mut JITState, opcode: usize) {
+        self.opcode = opcode;
+    }
+
+    pub fn add_gc_object_offset(self: &mut JITState, ptr_offset: u32) {
+        let mut gc_obj_vec: RefMut<_> = self.block.borrow_mut();
+        gc_obj_vec.add_gc_object_offset(ptr_offset);
+    }
+
+    pub fn get_pc(self: &JITState) -> *mut VALUE {
+        self.pc
+    }
+
+    pub fn set_pc(self: &mut JITState, pc: *mut VALUE) {
+        self.pc = pc;
+    }
+}
+
+use crate::codegen::JCCKinds::*;
+
+#[allow(non_camel_case_types)]
+pub enum JCCKinds {
+    JCC_JNE,
+    JCC_JNZ,
+    JCC_JZ,
+    JCC_JE,
+    JCC_JBE,
+    JCC_JNA,
+}
+
+pub fn jit_get_arg(jit: &JITState, arg_idx: isize) -> VALUE {
+    // insn_len require non-test config
+    #[cfg(not(test))]
+    assert!(insn_len(jit.get_opcode()) > (arg_idx + 1).try_into().unwrap());
+    unsafe { *(jit.pc.offset(arg_idx + 1)) }
+}
+
+// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
+pub fn jit_mov_gc_ptr(jit: &mut JITState, cb: &mut CodeBlock, reg: X86Opnd, ptr: VALUE) {
+    assert!(matches!(reg, X86Opnd::Reg(_)));
+    assert!(reg.num_bits() == 64);
+
+    // Load the pointer constant into the specified register
+    mov(cb, reg, const_ptr_opnd(ptr.as_ptr()));
+
+    // The pointer immediate is encoded as the last part of the mov written out
+    let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32);
+
+    if !ptr.special_const_p() {
+        jit.add_gc_object_offset(ptr_offset);
+    }
+}
+
+// Get the index of the next instruction
+fn jit_next_insn_idx(jit: &JITState) -> u32 {
+    jit.insn_idx + insn_len(jit.get_opcode())
+}
+
+// Check if we are compiling the instruction at the stub PC
+// Meaning we are compiling the instruction that is next to execute
+fn jit_at_current_insn(jit: &JITState) -> bool {
+    let ec_pc: *mut VALUE = unsafe { get_cfp_pc(get_ec_cfp(jit.ec.unwrap())) };
+    ec_pc == jit.pc
+}
+
+// Peek at the nth topmost value on the Ruby stack.
+// Returns the topmost value when n == 0.
+fn jit_peek_at_stack(jit: &JITState, ctx: &Context, n: isize) -> VALUE {
+    assert!(jit_at_current_insn(jit));
+    assert!(n < ctx.get_stack_size() as isize);
+
+    // Note: this does not account for ctx->sp_offset because
+    // this is only available when hitting a stub, and while
+    // hitting a stub, cfp->sp needs to be up to date in case
+    // codegen functions trigger GC. See :stub-sp-flush:.
+    return unsafe {
+        let sp: *mut VALUE = get_cfp_sp(get_ec_cfp(jit.ec.unwrap()));
+
+        *(sp.offset(-1 - n))
+    };
+}
+
+fn jit_peek_at_self(jit: &JITState) -> VALUE {
+    unsafe { get_cfp_self(get_ec_cfp(jit.ec.unwrap())) }
+}
+
+fn jit_peek_at_local(jit: &JITState, n: i32) -> VALUE {
+    assert!(jit_at_current_insn(jit));
+
+    let local_table_size: isize = unsafe { get_iseq_body_local_table_size(jit.iseq) }
+        .try_into()
+        .unwrap();
+    assert!(n < local_table_size.try_into().unwrap());
+
+    unsafe {
+        let ep = get_cfp_ep(get_ec_cfp(jit.ec.unwrap()));
+        let n_isize: isize = n.try_into().unwrap();
+        let offs: isize = -(VM_ENV_DATA_SIZE as isize) - local_table_size + n_isize + 1;
+        *ep.offset(offs)
+    }
+}
+
+// Add a comment at the current position in the code block
+fn add_comment(cb: &mut CodeBlock, comment_str: &str) {
+    if cfg!(feature = "asm_comments") {
+        cb.add_comment(comment_str);
+    }
+}
+
+/// Increment a profiling counter with counter_name
+#[cfg(not(feature = "stats"))]
+macro_rules! gen_counter_incr {
+    ($cb:tt, $counter_name:ident) => {};
+}
+#[cfg(feature = "stats")]
+macro_rules! gen_counter_incr {
+    ($cb:tt, $counter_name:ident) => {
+        if (get_option!(gen_stats)) {
+            // Get a pointer to the counter variable
+            let ptr = ptr_to_counter!($counter_name);
+
+            // Use REG1 because there might be return value in REG0
+            mov($cb, REG1, const_ptr_opnd(ptr as *const u8));
+            write_lock_prefix($cb); // for ractors.
+            add($cb, mem_opnd(64, REG1, 0), imm_opnd(1));
+        }
+    };
+}
+
+/// Increment a counter then take an existing side exit
+#[cfg(not(feature = "stats"))]
+macro_rules! counted_exit {
+    ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {{
+        let _ = $ocb;
+        $existing_side_exit
+    }};
+}
+#[cfg(feature = "stats")]
+macro_rules! counted_exit {
+    ($ocb:tt, $existing_side_exit:tt, $counter_name:ident) => {
+        // The counter is only incremented when stats are enabled
+        if (!get_option!(gen_stats)) {
+            $existing_side_exit
+        } else {
+            let ocb = $ocb.unwrap();
+            let code_ptr = ocb.get_write_ptr();
+
+            // Increment the counter
+            gen_counter_incr!(ocb, $counter_name);
+
+            // Jump to the existing side exit
+            jmp_ptr(ocb, $existing_side_exit);
+
+            // Pointer to the side-exit code
+            code_ptr
+        }
+    };
+}
+
+// Save the incremented PC on the CFP
+// This is necessary when callees can raise or allocate
+fn jit_save_pc(jit: &JITState, cb: &mut CodeBlock, scratch_reg: X86Opnd) {
+    let pc: *mut VALUE = jit.get_pc();
+    let ptr: *mut VALUE = unsafe {
+        let cur_insn_len = insn_len(jit.get_opcode()) as isize;
+        pc.offset(cur_insn_len)
+    };
+    mov(cb, scratch_reg, const_ptr_opnd(ptr as *const u8));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), scratch_reg);
+}
+
+/// Save the current SP on the CFP
+/// This realigns the interpreter SP with the JIT SP
+/// Note: this will change the current value of REG_SP,
+///       which could invalidate memory operands
+fn gen_save_sp(cb: &mut CodeBlock, ctx: &mut Context) {
+    if ctx.get_sp_offset() != 0 {
+        let stack_pointer = ctx.sp_opnd(0);
+        lea(cb, REG_SP, stack_pointer);
+        let cfp_sp_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP);
+        mov(cb, cfp_sp_opnd, REG_SP);
+        ctx.set_sp_offset(0);
+    }
+}
+
+/// jit_save_pc() + gen_save_sp(). Should be used before calling a routine that
+/// could:
+///  - Perform GC allocation
+///  - Take the VM lock through RB_VM_LOCK_ENTER()
+///  - Perform Ruby method call
+fn jit_prepare_routine_call(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    scratch_reg: X86Opnd,
+) {
+    jit.record_boundary_patch_point = true;
+    jit_save_pc(jit, cb, scratch_reg);
+    gen_save_sp(cb, ctx);
+
+    // In case the routine calls Ruby methods, it can set local variables
+    // through Kernel#binding and other means.
+    ctx.clear_local_types();
+}
+
+/// Record the current codeblock write position for rewriting into a jump into
+/// the outlined block later. Used to implement global code invalidation.
+fn record_global_inval_patch(cb: &mut CodeBlock, outline_block_target_pos: CodePtr) {
+    CodegenGlobals::push_global_inval_patch(cb.get_write_ptr(), outline_block_target_pos);
+}
+
+/// Verify the ctx's types and mappings against the compile-time stack, self,
+/// and locals.
+fn verify_ctx(jit: &JITState, ctx: &Context) {
+    fn obj_info_str<'a>(val: VALUE) -> &'a str {
+        unsafe { CStr::from_ptr(rb_obj_info(val)).to_str().unwrap() }
+    }
+
+    // Only able to check types when at current insn
+    assert!(jit_at_current_insn(jit));
+
+    let self_val = jit_peek_at_self(jit);
+    let self_val_type = Type::from(self_val);
+
+    // Verify self operand type
+    if self_val_type.diff(ctx.get_opnd_type(SelfOpnd)) == usize::MAX {
+        panic!(
+            "verify_ctx: ctx self type ({:?}) incompatible with actual value of self {}",
+            ctx.get_opnd_type(SelfOpnd),
+            obj_info_str(self_val)
+        );
+    }
+
+    // Verify stack operand types
+    let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u16);
+    for i in 0..top_idx {
+        let (learned_mapping, learned_type) = ctx.get_opnd_mapping(StackOpnd(i));
+        let stack_val = jit_peek_at_stack(jit, ctx, i as isize);
+        let val_type = Type::from(stack_val);
+
+        match learned_mapping {
+            TempMapping::MapToSelf => {
+                if self_val != stack_val {
+                    panic!(
+                        "verify_ctx: stack value was mapped to self, but values did not match!\n  stack: {}\n  self: {}",
+                        obj_info_str(stack_val),
+                        obj_info_str(self_val)
+                    );
+                }
+            }
+            TempMapping::MapToLocal(local_idx) => {
+                let local_val = jit_peek_at_local(jit, local_idx.into());
+                if local_val != stack_val {
+                    panic!(
+                        "verify_ctx: stack value was mapped to local, but values did not match\n  stack: {}\n  local {}: {}",
+                        obj_info_str(stack_val),
+                        local_idx,
+                        obj_info_str(local_val)
+                    );
+                }
+            }
+            TempMapping::MapToStack => {}
+        }
+
+        // If the actual type differs from the learned type
+        if val_type.diff(learned_type) == usize::MAX {
+            panic!(
+                "verify_ctx: ctx type ({:?}) incompatible with actual value on stack: {}",
+                learned_type,
+                obj_info_str(stack_val)
+            );
+        }
+    }
+
+    // Verify local variable types
+    let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
+    let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES);
+    for i in 0..top_idx {
+        let learned_type = ctx.get_local_type(i);
+        let local_val = jit_peek_at_local(jit, i as i32);
+        let local_type = Type::from(local_val);
+
+        if local_type.diff(learned_type) == usize::MAX {
+            panic!(
+                "verify_ctx: ctx type ({:?}) incompatible with actual value of local: {} (type {:?})",
+                learned_type,
+                obj_info_str(local_val),
+                local_type
+            );
+        }
+    }
+}
+
+/// Generate an exit to return to the interpreter
+fn gen_exit(exit_pc: *mut VALUE, ctx: &Context, cb: &mut CodeBlock) -> CodePtr {
+    let code_ptr = cb.get_write_ptr();
+
+    add_comment(cb, "exit to interpreter");
+
+    // Generate the code to exit to the interpreters
+    // Write the adjusted SP back into the CFP
+    if ctx.get_sp_offset() != 0 {
+        let stack_pointer = ctx.sp_opnd(0);
+        lea(cb, REG_SP, stack_pointer);
+        mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG_SP);
+    }
+
+    // Update CFP->PC
+    mov(cb, RAX, const_ptr_opnd(exit_pc as *const u8));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC), RAX);
+
+    // Accumulate stats about interpreter exits
+    #[cfg(feature = "stats")]
+    if get_option!(gen_stats) {
+        mov(cb, RDI, const_ptr_opnd(exit_pc as *const u8));
+        call_ptr(cb, RSI, rb_yjit_count_side_exit_op as *const u8);
+    }
+
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, uimm_opnd(Qundef.into()));
+    ret(cb);
+
+    return code_ptr;
+}
+
+// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
+// to the interpreter when it cannot service a stub by generating new code.
+// Before coming here, branch_stub_hit() takes care of fully reconstructing
+// interpreter state.
+fn gen_code_for_exit_from_stub(ocb: &mut OutlinedCb) -> CodePtr {
+    let ocb = ocb.unwrap();
+    let code_ptr = ocb.get_write_ptr();
+
+    gen_counter_incr!(ocb, exit_from_branch_stub);
+
+    pop(ocb, REG_SP);
+    pop(ocb, REG_EC);
+    pop(ocb, REG_CFP);
+
+    mov(ocb, RAX, uimm_opnd(Qundef.into()));
+    ret(ocb);
+
+    return code_ptr;
+}
+
+// :side-exit:
+// Get an exit for the current instruction in the outlined block. The code
+// for each instruction often begins with several guards before proceeding
+// to do work. When guards fail, an option we have is to exit to the
+// interpreter at an instruction boundary. The piece of code that takes
+// care of reconstructing interpreter state and exiting out of generated
+// code is called the side exit.
+//
+// No guards change the logic for reconstructing interpreter state at the
+// moment, so there is one unique side exit for each context. Note that
+// it's incorrect to jump to the side exit after any ctx stack push/pop operations
+// since they change the logic required for reconstructing interpreter state.
+fn get_side_exit(jit: &mut JITState, ocb: &mut OutlinedCb, ctx: &Context) -> CodePtr {
+    match jit.side_exit_for_pc {
+        None => {
+            let exit_code = gen_exit(jit.pc, ctx, ocb.unwrap());
+            jit.side_exit_for_pc = Some(exit_code);
+            exit_code
+        }
+        Some(code_ptr) => code_ptr,
+    }
+}
+
+// Ensure that there is an exit for the start of the block being compiled.
+// Block invalidation uses this exit.
+pub fn jit_ensure_block_entry_exit(jit: &mut JITState, ocb: &mut OutlinedCb) {
+    let blockref = jit.block.clone();
+    let mut block = blockref.borrow_mut();
+    let block_ctx = block.get_ctx();
+    let blockid = block.get_blockid();
+
+    if block.entry_exit.is_some() {
+        return;
+    }
+
+    if jit.insn_idx == blockid.idx {
+        // We are compiling the first instruction in the block.
+        // Generate the exit with the cache in jitstate.
+        block.entry_exit = Some(get_side_exit(jit, ocb, &block_ctx));
+    } else {
+        let pc = unsafe { rb_iseq_pc_at_idx(blockid.iseq, blockid.idx) };
+        block.entry_exit = Some(gen_exit(pc, &block_ctx, ocb.unwrap()));
+    }
+}
+
+// Generate a runtime guard that ensures the PC is at the expected
+// instruction index in the iseq, otherwise takes a side-exit.
+// This is to handle the situation of optional parameters.
+// When a function with optional parameters is called, the entry
+// PC for the method isn't necessarily 0.
+fn gen_pc_guard(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) {
+    //RUBY_ASSERT(cb != NULL);
+
+    let pc_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_PC);
+    let expected_pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+    let expected_pc_opnd = const_ptr_opnd(expected_pc as *const u8);
+    mov(cb, REG0, pc_opnd);
+    mov(cb, REG1, expected_pc_opnd);
+    cmp(cb, REG0, REG1);
+
+    let pc_match = cb.new_label("pc_match".to_string());
+    je_label(cb, pc_match);
+
+    // We're not starting at the first PC, so we need to exit.
+    gen_counter_incr!(cb, leave_start_pc_non_zero);
+
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, imm_opnd(Qundef.into()));
+    ret(cb);
+
+    // PC should match the expected insn_idx
+    cb.write_label(pc_match);
+    cb.link_labels();
+}
+
+// Landing code for when c_return tracing is enabled. See full_cfunc_return().
+fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> CodePtr {
+    let cb = ocb.unwrap();
+    let code_ptr = cb.get_write_ptr();
+
+    // This chunk of code expect REG_EC to be filled properly and
+    // RAX to contain the return value of the C method.
+
+    // Call full_cfunc_return()
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(cb, C_ARG_REGS[1], RAX);
+    call_ptr(cb, REG0, rb_full_cfunc_return as *const u8);
+
+    // Count the exit
+    gen_counter_incr!(cb, traced_cfunc_return);
+
+    // Return to the interpreter
+    pop(cb, REG_SP);
+    pop(cb, REG_EC);
+    pop(cb, REG_CFP);
+
+    mov(cb, RAX, uimm_opnd(Qundef.into()));
+    ret(cb);
+
+    return code_ptr;
+}
+
+/// Generate a continuation for leave that exits to the interpreter at REG_CFP->pc.
+/// This is used by gen_leave() and gen_entry_prologue()
+fn gen_leave_exit(ocb: &mut OutlinedCb) -> CodePtr {
+    let ocb = ocb.unwrap();
+    let code_ptr = ocb.get_write_ptr();
+
+    // Note, gen_leave() fully reconstructs interpreter state and leaves the
+    // return value in RAX before coming here.
+
+    // Every exit to the interpreter should be counted
+    gen_counter_incr!(ocb, leave_interp_return);
+
+    pop(ocb, REG_SP);
+    pop(ocb, REG_EC);
+    pop(ocb, REG_CFP);
+
+    ret(ocb);
+
+    return code_ptr;
+}
+
+/// Compile an interpreter entry block to be inserted into an iseq
+/// Returns None if compilation fails.
+pub fn gen_entry_prologue(cb: &mut CodeBlock, iseq: IseqPtr, insn_idx: u32) -> Option<CodePtr> {
+    const MAX_PROLOGUE_SIZE: usize = 1024;
+
+    // Check if we have enough executable memory
+    if !cb.has_capacity(MAX_PROLOGUE_SIZE) {
+        return None;
+    }
+
+    let old_write_pos = cb.get_write_pos();
+
+    // Align the current write position to cache line boundaries
+    cb.align_pos(64);
+
+    let code_ptr = cb.get_write_ptr();
+    add_comment(cb, "yjit entry");
+
+    push(cb, REG_CFP);
+    push(cb, REG_EC);
+    push(cb, REG_SP);
+
+    // We are passed EC and CFP
+    mov(cb, REG_EC, C_ARG_REGS[0]);
+    mov(cb, REG_CFP, C_ARG_REGS[1]);
+
+    // Load the current SP from the CFP into REG_SP
+    mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP));
+
+    // Setup cfp->jit_return
+    mov(
+        cb,
+        REG0,
+        code_ptr_opnd(CodegenGlobals::get_leave_exit_code()),
+    );
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0);
+
+    // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
+    // the case of optional parameters, the interpreter can set the pc to a
+    // different location depending on the optional parameters.  If an iseq
+    // has optional parameters, we'll add a runtime check that the PC we've
+    // compiled for is the same PC that the interpreter wants us to run with.
+    // If they don't match, then we'll take a side exit.
+    if unsafe { get_iseq_flags_has_opt(iseq) } {
+        gen_pc_guard(cb, iseq, insn_idx);
+    }
+
+    // Verify MAX_PROLOGUE_SIZE
+    assert!(cb.get_write_pos() - old_write_pos <= MAX_PROLOGUE_SIZE);
+
+    return Some(code_ptr);
+}
+
+// Generate code to check for interrupts and take a side-exit.
+// Warning: this function clobbers REG0
+fn gen_check_ints(cb: &mut CodeBlock, side_exit: CodePtr) {
+    // Check for interrupts
+    // see RUBY_VM_CHECK_INTS(ec) macro
+    add_comment(cb, "RUBY_VM_CHECK_INTS(ec)");
+    mov(
+        cb,
+        REG0_32,
+        mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_MASK),
+    );
+    not(cb, REG0_32);
+    test(
+        cb,
+        mem_opnd(32, REG_EC, RUBY_OFFSET_EC_INTERRUPT_FLAG),
+        REG0_32,
+    );
+    jnz_ptr(cb, side_exit);
+}
+
+// Generate a stubbed unconditional jump to the next bytecode instruction.
+// Blocks that are part of a guard chain can use this to share the same successor.
+fn jump_to_next_insn(
+    jit: &mut JITState,
+    current_context: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) {
+    // Reset the depth since in current usages we only ever jump to to
+    // chain_depth > 0 from the same instruction.
+    let mut reset_depth = current_context.clone();
+    reset_depth.reset_chain_depth();
+
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jit_next_insn_idx(jit),
+    };
+
+    // We are at the end of the current instruction. Record the boundary.
+    if jit.record_boundary_patch_point {
+        let next_insn = unsafe { jit.pc.offset(insn_len(jit.opcode).try_into().unwrap()) };
+        let exit_pos = gen_exit(next_insn, &reset_depth, ocb.unwrap());
+        record_global_inval_patch(cb, exit_pos);
+        jit.record_boundary_patch_point = false;
+    }
+
+    // Generate the jump instruction
+    gen_direct_jump(jit, &reset_depth, jump_block, cb);
+}
+
+// Compile a sequence of bytecode instructions for a given basic block version.
+// Part of gen_block_version().
+// Note: this function will mutate its context while generating code,
+//       but the input start_ctx argument should remain immutable.
+pub fn gen_single_block(
+    blockid: BlockId,
+    start_ctx: &Context,
+    ec: EcPtr,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> Result<BlockRef, ()> {
+    // Limit the number of specialized versions for this block
+    let mut ctx = limit_block_versions(blockid, start_ctx);
+
+    verify_blockid(blockid);
+    assert!(!(blockid.idx == 0 && ctx.get_stack_size() > 0));
+
+    // Instruction sequence to compile
+    let iseq = blockid.iseq;
+    let iseq_size = unsafe { get_iseq_encoded_size(iseq) };
+    let mut insn_idx: c_uint = blockid.idx;
+    let starting_insn_idx = insn_idx;
+
+    // Allocate the new block
+    let blockref = Block::new(blockid, &ctx);
+
+    // Initialize a JIT state object
+    let mut jit = JITState::new(&blockref);
+    jit.iseq = blockid.iseq;
+    jit.ec = Some(ec);
+
+    // Mark the start position of the block
+    blockref.borrow_mut().set_start_addr(cb.get_write_ptr());
+
+    // For each instruction to compile
+    // NOTE: could rewrite this loop with a std::iter::Iterator
+    while insn_idx < iseq_size {
+        // Get the current pc and opcode
+        let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+        // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
+        let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
+            .try_into()
+            .unwrap();
+
+        // opt_getinlinecache wants to be in a block all on its own. Cut the block short
+        // if we run into it. See gen_opt_getinlinecache() for details.
+        if opcode == OP_OPT_GETINLINECACHE && insn_idx > starting_insn_idx {
+            jump_to_next_insn(&mut jit, &ctx, cb, ocb);
+            break;
+        }
+
+        // Set the current instruction
+        jit.insn_idx = insn_idx;
+        jit.opcode = opcode;
+        jit.pc = pc;
+        jit.side_exit_for_pc = None;
+
+        // If previous instruction requested to record the boundary
+        if jit.record_boundary_patch_point {
+            // Generate an exit to this instruction and record it
+            let exit_pos = gen_exit(jit.pc, &ctx, ocb.unwrap());
+            record_global_inval_patch(cb, exit_pos);
+            jit.record_boundary_patch_point = false;
+        }
+
+        // In debug mode, verify our existing assumption
+        #[cfg(debug_assertions)]
+        if get_option!(verify_ctx) && jit_at_current_insn(&jit) {
+            verify_ctx(&jit, &ctx);
+        }
+
+        // Lookup the codegen function for this instruction
+        let mut status = CantCompile;
+        if let Some(gen_fn) = get_gen_fn(VALUE(opcode)) {
+            // :count-placement:
+            // Count bytecode instructions that execute in generated code.
+            // Note that the increment happens even when the output takes side exit.
+            gen_counter_incr!(cb, exec_instruction);
+
+            // Add a comment for the name of the YARV instruction
+            add_comment(cb, &insn_name(opcode));
+
+            // If requested, dump instructions for debugging
+            if get_option!(dump_insns) {
+                println!("compiling {}", insn_name(opcode));
+                print_str(cb, &format!("executing {}", insn_name(opcode)));
+            }
+
+            // Call the code generation function
+            status = gen_fn(&mut jit, &mut ctx, cb, ocb);
+        }
+
+        // If we can't compile this instruction
+        // exit to the interpreter and stop compiling
+        if status == CantCompile {
+            let mut block = jit.block.borrow_mut();
+
+            // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
+            // the exit this generates would be wrong. We could save a copy of the entry context
+            // and assert that ctx is the same here.
+            let exit = gen_exit(jit.pc, &ctx, cb);
+
+            // If this is the first instruction in the block, then we can use
+            // the exit for block->entry_exit.
+            if insn_idx == block.get_blockid().idx {
+                block.entry_exit = Some(exit);
+            }
+
+            break;
+        }
+
+        // For now, reset the chain depth after each instruction as only the
+        // first instruction in the block can concern itself with the depth.
+        ctx.reset_chain_depth();
+
+        // Move to the next instruction to compile
+        insn_idx += insn_len(opcode);
+
+        // If the instruction terminates this block
+        if status == EndBlock {
+            break;
+        }
+    }
+
+    // Finish filling out the block
+    {
+        let mut block = jit.block.borrow_mut();
+
+        // Mark the end position of the block
+        block.set_end_addr(cb.get_write_ptr());
+
+        // Store the index of the last instruction in the block
+        block.set_end_idx(insn_idx);
+    }
+
+    // We currently can't handle cases where the request is for a block that
+    // doesn't go to the next instruction.
+    //assert!(!jit.record_boundary_patch_point);
+
+    // If code for the block doesn't fit, fail
+    if cb.has_dropped_bytes() || ocb.unwrap().has_dropped_bytes() {
+        return Err(());
+    }
+
+    // TODO: we may want a feature for this called dump_insns? Can leave commented for now
+    /*
+    if (YJIT_DUMP_MODE >= 2) {
+        // Dump list of compiled instrutions
+        fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
+        for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
+            int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
+            fprintf(stderr, "  %04d %s\n", idx, insn_name(opcode));
+            idx += insn_len(opcode);
+        }
+    }
+    */
+
+    // Block compiled successfully
+    Ok(blockref)
+}
+
+fn gen_nop(
+    _jit: &mut JITState,
+    _ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Do nothing
+    KeepCompiling
+}
+
+fn gen_pop(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Decrement SP
+    ctx.stack_pop(1);
+    KeepCompiling
+}
+
+fn gen_dup(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let dup_val = ctx.stack_pop(0);
+    let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0));
+
+    let loc0 = ctx.stack_push_mapping((mapping, tmp_type));
+    mov(cb, REG0, dup_val);
+    mov(cb, loc0, REG0);
+
+    KeepCompiling
+}
+
+// duplicate stack top n elements
+fn gen_dupn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    // In practice, seems to be only used for n==2
+    if n != 2 {
+        return CantCompile;
+    }
+
+    let opnd1: X86Opnd = ctx.stack_opnd(1);
+    let opnd0: X86Opnd = ctx.stack_opnd(0);
+
+    let mapping1 = ctx.get_opnd_mapping(StackOpnd(1));
+    let mapping0 = ctx.get_opnd_mapping(StackOpnd(0));
+
+    let dst1: X86Opnd = ctx.stack_push_mapping(mapping1);
+    mov(cb, REG0, opnd1);
+    mov(cb, dst1, REG0);
+
+    let dst0: X86Opnd = ctx.stack_push_mapping(mapping0);
+    mov(cb, REG0, opnd0);
+    mov(cb, dst0, REG0);
+
+    KeepCompiling
+}
+
+// Swap top 2 stack entries
+fn gen_swap(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    stack_swap(ctx, cb, 0, 1, REG0, REG1);
+    KeepCompiling
+}
+
+fn stack_swap(
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    offset0: u16,
+    offset1: u16,
+    _reg0: X86Opnd,
+    _reg1: X86Opnd,
+) {
+    let opnd0 = ctx.stack_opnd(offset0 as i32);
+    let opnd1 = ctx.stack_opnd(offset1 as i32);
+
+    let mapping0 = ctx.get_opnd_mapping(StackOpnd(offset0));
+    let mapping1 = ctx.get_opnd_mapping(StackOpnd(offset1));
+
+    mov(cb, REG0, opnd0);
+    mov(cb, REG1, opnd1);
+    mov(cb, opnd0, REG1);
+    mov(cb, opnd1, REG0);
+
+    ctx.set_opnd_mapping(StackOpnd(offset0), mapping1);
+    ctx.set_opnd_mapping(StackOpnd(offset1), mapping0);
+}
+
+fn gen_putnil(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    jit_putobject(jit, ctx, cb, Qnil);
+    KeepCompiling
+}
+
+fn jit_putobject(jit: &mut JITState, ctx: &mut Context, cb: &mut CodeBlock, arg: VALUE) {
+    let val_type: Type = Type::from(arg);
+    let stack_top = ctx.stack_push(val_type);
+
+    if arg.special_const_p() {
+        // Immediates will not move and do not need to be tracked for GC
+        // Thanks to this we can mov directly to memory when possible.
+        let imm = imm_opnd(arg.as_i64());
+
+        // 64-bit immediates can't be directly written to memory
+        if imm.num_bits() <= 32 {
+            mov(cb, stack_top, imm);
+        } else {
+            mov(cb, REG0, imm);
+            mov(cb, stack_top, REG0);
+        }
+    } else {
+        // Load the value to push into REG0
+        // Note that this value may get moved by the GC
+        jit_mov_gc_ptr(jit, cb, REG0, arg);
+
+        // Write argument at SP
+        mov(cb, stack_top, REG0);
+    }
+}
+
+fn gen_putobject_int2fix(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let opcode = jit.opcode;
+    let cst_val: usize = if opcode == OP_PUTOBJECT_INT2FIX_0_ {
+        0
+    } else {
+        1
+    };
+
+    jit_putobject(jit, ctx, cb, VALUE::fixnum_from_usize(cst_val));
+    KeepCompiling
+}
+
+fn gen_putobject(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let arg: VALUE = jit_get_arg(jit, 0);
+
+    jit_putobject(jit, ctx, cb, arg);
+    KeepCompiling
+}
+
+fn gen_putself(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Load self from CFP
+    let cf_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG_CFP, RUBY_OFFSET_CFP_SELF);
+    mov(cb, REG0, cf_opnd);
+
+    // Write it on the stack
+    let stack_top: X86Opnd = ctx.stack_push_self();
+    mov(cb, stack_top, REG0);
+
+    KeepCompiling
+}
+
+fn gen_putspecialobject(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let object_type = jit_get_arg(jit, 0);
+
+    if object_type == VALUE(VM_SPECIAL_OBJECT_VMCORE) {
+        let stack_top: X86Opnd = ctx.stack_push(Type::UnknownHeap);
+        jit_mov_gc_ptr(jit, cb, REG0, unsafe { rb_mRubyVMFrozenCore });
+        mov(cb, stack_top, REG0);
+        KeepCompiling
+    } else {
+        // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
+        // VM_SPECIAL_OBJECT_CONST_BASE
+        CantCompile
+    }
+}
+
+// set Nth stack entry to stack top
+fn gen_setn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    let top_val: X86Opnd = ctx.stack_pop(0);
+    let dst_opnd: X86Opnd = ctx.stack_opnd(n.try_into().unwrap());
+    mov(cb, REG0, top_val);
+    mov(cb, dst_opnd, REG0);
+
+    let mapping = ctx.get_opnd_mapping(StackOpnd(0));
+    ctx.set_opnd_mapping(StackOpnd(n.try_into().unwrap()), mapping);
+
+    KeepCompiling
+}
+
+// get nth stack value, then push it
+fn gen_topn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    let top_n_val = ctx.stack_opnd(n.try_into().unwrap());
+    let mapping = ctx.get_opnd_mapping(StackOpnd(n.try_into().unwrap()));
+
+    let loc0 = ctx.stack_push_mapping(mapping);
+    mov(cb, REG0, top_n_val);
+    mov(cb, loc0, REG0);
+
+    KeepCompiling
+}
+
+// Pop n values off the stack
+fn gen_adjuststack(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let nval: VALUE = jit_get_arg(jit, 0);
+    let VALUE(n) = nval;
+
+    ctx.stack_pop(n);
+    KeepCompiling
+}
+
+fn gen_opt_plus(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Add arg0 + arg1 and test for overflow
+        mov(cb, REG0, arg0);
+        sub(cb, REG0, imm_opnd(1));
+        add(cb, REG0, arg1);
+        jo_ptr(cb, side_exit);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+// new array initialized from top N values
+fn gen_newarray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let n = jit_get_arg(jit, 0).as_u32();
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let offset_magnitude = SIZEOF_VALUE as u32 * n;
+    let values_ptr = ctx.sp_opnd(-(offset_magnitude as isize));
+
+    // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(cb, C_ARG_REGS[1], imm_opnd(n.into()));
+    lea(cb, C_ARG_REGS[2], values_ptr);
+    call_ptr(cb, REG0, rb_ec_ary_new_from_values as *const u8);
+
+    ctx.stack_pop(n.as_usize());
+    let stack_ret = ctx.stack_push(Type::Array);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// dup array
+fn gen_duparray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let ary = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // call rb_ary_resurrect(VALUE ary);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
+    call_ptr(cb, REG0, rb_ary_resurrect as *const u8);
+
+    let stack_ret = ctx.stack_push(Type::Array);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// dup hash
+fn gen_duphash(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let hash = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // call rb_hash_resurrect(VALUE hash);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
+    call_ptr(cb, REG0, rb_hash_resurrect as *const u8);
+
+    let stack_ret = ctx.stack_push(Type::Hash);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// call to_a on the array on the stack
+fn gen_splatarray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let flag = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let ary_opnd = ctx.stack_pop(1);
+
+    // Call rb_vm_splat_array(flag, ary)
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
+    mov(cb, C_ARG_REGS[1], ary_opnd);
+    call_ptr(cb, REG1, rb_vm_splat_array as *const u8);
+
+    let stack_ret = ctx.stack_push(Type::Array);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// new range initialized from top 2 values
+fn gen_newrange(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let flag = jit_get_arg(jit, 0);
+
+    // rb_range_new() allocates and can raise
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // val = rb_range_new(low, high, (int)flag);
+    mov(cb, C_ARG_REGS[0], ctx.stack_opnd(1));
+    mov(cb, C_ARG_REGS[1], ctx.stack_opnd(0));
+    mov(cb, C_ARG_REGS[2], uimm_opnd(flag.into()));
+    call_ptr(cb, REG0, rb_range_new as *const u8);
+
+    ctx.stack_pop(2);
+    let stack_ret = ctx.stack_push(Type::UnknownHeap);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn guard_object_is_heap(
+    cb: &mut CodeBlock,
+    object_opnd: X86Opnd,
+    _ctx: &mut Context,
+    side_exit: CodePtr,
+) {
+    add_comment(cb, "guard object is heap");
+
+    // Test that the object is not an immediate
+    test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
+    jnz_ptr(cb, side_exit);
+
+    // Test that the object is not false or nil
+    cmp(cb, object_opnd, uimm_opnd(Qnil.into()));
+    jbe_ptr(cb, side_exit);
+}
+
+fn guard_object_is_array(
+    cb: &mut CodeBlock,
+    object_opnd: X86Opnd,
+    flags_opnd: X86Opnd,
+    _ctx: &mut Context,
+    side_exit: CodePtr,
+) {
+    add_comment(cb, "guard object is array");
+
+    // Pull out the type mask
+    mov(
+        cb,
+        flags_opnd,
+        mem_opnd(
+            8 * SIZEOF_VALUE as u8,
+            object_opnd,
+            RUBY_OFFSET_RBASIC_FLAGS,
+        ),
+    );
+    and(cb, flags_opnd, uimm_opnd(RUBY_T_MASK as u64));
+
+    // Compare the result with T_ARRAY
+    cmp(cb, flags_opnd, uimm_opnd(RUBY_T_ARRAY as u64));
+    jne_ptr(cb, side_exit);
+}
+
+// push enough nils onto the stack to fill out an array
+fn gen_expandarray(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let flag = jit_get_arg(jit, 1);
+    let VALUE(flag_value) = flag;
+
+    // If this instruction has the splat flag, then bail out.
+    if flag_value & 0x01 != 0 {
+        incr_counter!(expandarray_splat);
+        return CantCompile;
+    }
+
+    // If this instruction has the postarg flag, then bail out.
+    if flag_value & 0x02 != 0 {
+        incr_counter!(expandarray_postarg);
+        return CantCompile;
+    }
+
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // num is the number of requested values. If there aren't enough in the
+    // array then we're going to push on nils.
+    let num = jit_get_arg(jit, 0);
+    let array_type = ctx.get_opnd_type(StackOpnd(0));
+    let array_opnd = ctx.stack_pop(1);
+
+    if matches!(array_type, Type::Nil) {
+        // special case for a, b = nil pattern
+        // push N nils onto the stack
+        for _i in 0..(num.into()) {
+            let push_opnd = ctx.stack_push(Type::Nil);
+            mov(cb, push_opnd, uimm_opnd(Qnil.into()));
+        }
+        return KeepCompiling;
+    }
+
+    // Move the array from the stack into REG0 and check that it's an array.
+    mov(cb, REG0, array_opnd);
+    guard_object_is_heap(
+        cb,
+        REG0,
+        ctx,
+        counted_exit!(ocb, side_exit, expandarray_not_array),
+    );
+    guard_object_is_array(
+        cb,
+        REG0,
+        REG1,
+        ctx,
+        counted_exit!(ocb, side_exit, expandarray_not_array),
+    );
+
+    // If we don't actually want any values, then just return.
+    if num == VALUE(0) {
+        return KeepCompiling;
+    }
+
+    // Pull out the embed flag to check if it's an embedded array.
+    let flags_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RBASIC_FLAGS);
+    mov(cb, REG1, flags_opnd);
+
+    // Move the length of the embedded array into REG1.
+    and(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_MASK as u64));
+    shr(cb, REG1, uimm_opnd(RARRAY_EMBED_LEN_SHIFT as u64));
+
+    // Conditionally move the length of the heap array into REG1.
+    test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64));
+    let array_len_opnd = mem_opnd(
+        (8 * size_of::<std::os::raw::c_long>()) as u8,
+        REG0,
+        RUBY_OFFSET_RARRAY_AS_HEAP_LEN,
+    );
+    cmovz(cb, REG1, array_len_opnd);
+
+    // Only handle the case where the number of values in the array is greater
+    // than or equal to the number of values requested.
+    cmp(cb, REG1, uimm_opnd(num.into()));
+    jl_ptr(cb, counted_exit!(ocb, side_exit, expandarray_rhs_too_small));
+
+    // Load the address of the embedded array into REG1.
+    // (struct RArray *)(obj)->as.ary
+    let ary_opnd = mem_opnd((8 * SIZEOF_VALUE) as u8, REG0, RUBY_OFFSET_RARRAY_AS_ARY);
+    lea(cb, REG1, ary_opnd);
+
+    // Conditionally load the address of the heap array into REG1.
+    // (struct RArray *)(obj)->as.heap.ptr
+    test(cb, flags_opnd, uimm_opnd(RARRAY_EMBED_FLAG as u64));
+    let heap_ptr_opnd = mem_opnd(
+        (8 * size_of::<usize>()) as u8,
+        REG0,
+        RUBY_OFFSET_RARRAY_AS_HEAP_PTR,
+    );
+    cmovz(cb, REG1, heap_ptr_opnd);
+
+    // Loop backward through the array and push each element onto the stack.
+    for i in (0..(num.as_i32())).rev() {
+        let top = ctx.stack_push(Type::Unknown);
+        mov(cb, REG0, mem_opnd(64, REG1, i * (SIZEOF_VALUE as i32)));
+        mov(cb, top, REG0);
+    }
+
+    KeepCompiling
+}
+
+fn gen_getlocal_wc0(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Compute the offset from BP to the local
+    let slot_idx = jit_get_arg(jit, 0).as_i32();
+    let offs: i32 = -(SIZEOF_VALUE as i32) * slot_idx;
+    let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx);
+
+    // Load environment pointer EP (level 0) from CFP
+    gen_get_ep(cb, REG0, 0);
+
+    // Load the local from the EP
+    mov(cb, REG0, mem_opnd(64, REG0, offs));
+
+    // Write the local at SP
+    let stack_top = ctx.stack_push_local(local_idx.as_usize());
+    mov(cb, stack_top, REG0);
+
+    KeepCompiling
+}
+
+// Compute the index of a local variable from its slot index
+fn slot_to_local_idx(iseq: IseqPtr, slot_idx: i32) -> u32 {
+    // Layout illustration
+    // This is an array of VALUE
+    //                                           | VM_ENV_DATA_SIZE |
+    //                                           v                  v
+    // low addr <+-------+-------+-------+-------+------------------+
+    //           |local 0|local 1|  ...  |local n|       ....       |
+    //           +-------+-------+-------+-------+------------------+
+    //           ^       ^                       ^                  ^
+    //           +-------+---local_table_size----+         cfp->ep--+
+    //                   |                                          |
+    //                   +------------------slot_idx----------------+
+    //
+    // See usages of local_var_name() from iseq.c for similar calculation.
+
+    // Equivalent of iseq->body->local_table_size
+    let local_table_size: i32 = unsafe { get_iseq_body_local_table_size(iseq) }
+        .try_into()
+        .unwrap();
+    let op = slot_idx - (VM_ENV_DATA_SIZE as i32);
+    let local_idx = local_table_size - op - 1;
+    assert!(local_idx >= 0 && local_idx < local_table_size);
+    local_idx.try_into().unwrap()
+}
+
+// Get EP at level from CFP
+fn gen_get_ep(cb: &mut CodeBlock, reg: X86Opnd, level: u32) {
+    // Load environment pointer EP from CFP
+    let ep_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP);
+    mov(cb, reg, ep_opnd);
+
+    for _ in (0..level).rev() {
+        // Get the previous EP from the current EP
+        // See GET_PREV_EP(ep) macro
+        // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
+        let offs = (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32);
+        mov(cb, reg, mem_opnd(64, REG0, offs));
+        and(cb, reg, imm_opnd(!0x03));
+    }
+}
+
+fn gen_getlocal_generic(
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    local_idx: u32,
+    level: u32,
+) -> CodegenStatus {
+    gen_get_ep(cb, REG0, level);
+
+    // Load the local from the block
+    // val = *(vm_get_ep(GET_EP(), level) - idx);
+    let offs = -(SIZEOF_VALUE as i32 * local_idx as i32);
+    mov(cb, REG0, mem_opnd(64, REG0, offs));
+
+    // Write the local at SP
+    let stack_top = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_top, REG0);
+
+    KeepCompiling
+}
+
+fn gen_getlocal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0);
+    let level = jit_get_arg(jit, 1);
+    gen_getlocal_generic(ctx, cb, idx.as_u32(), level.as_u32())
+}
+
+fn gen_getlocal_wc1(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0);
+    gen_getlocal_generic(ctx, cb, idx.as_u32(), 1)
+}
+
+fn gen_setlocal_wc0(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    /*
+    vm_env_write(const VALUE *ep, int index, VALUE v)
+    {
+        VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
+        if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
+            VM_STACK_ENV_WRITE(ep, index, v);
+        }
+        else {
+            vm_env_write_slowpath(ep, index, v);
+        }
+    }
+    */
+
+    let slot_idx = jit_get_arg(jit, 0).as_i32();
+    let local_idx = slot_to_local_idx(jit.get_iseq(), slot_idx).as_usize();
+
+    // Load environment pointer EP (level 0) from CFP
+    gen_get_ep(cb, REG0, 0);
+
+    // flags & VM_ENV_FLAG_WB_REQUIRED
+    let flags_opnd = mem_opnd(
+        64,
+        REG0,
+        SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
+    );
+    test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED as i64));
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
+    jnz_ptr(cb, side_exit);
+
+    // Set the type of the local variable in the context
+    let temp_type = ctx.get_opnd_type(StackOpnd(0));
+    ctx.set_local_type(local_idx, temp_type);
+
+    // Pop the value to write from the stack
+    let stack_top = ctx.stack_pop(1);
+    mov(cb, REG1, stack_top);
+
+    // Write the value at the environment pointer
+    let offs: i32 = -8 * slot_idx;
+    mov(cb, mem_opnd(64, REG0, offs), REG1);
+
+    KeepCompiling
+}
+
+fn gen_setlocal_generic(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    local_idx: i32,
+    level: u32,
+) -> CodegenStatus {
+    // Load environment pointer EP at level
+    gen_get_ep(cb, REG0, level);
+
+    // flags & VM_ENV_FLAG_WB_REQUIRED
+    let flags_opnd = mem_opnd(
+        64,
+        REG0,
+        SIZEOF_VALUE as i32 * VM_ENV_DATA_INDEX_FLAGS as i32,
+    );
+    test(cb, flags_opnd, uimm_opnd(VM_ENV_FLAG_WB_REQUIRED.into()));
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
+    jnz_ptr(cb, side_exit);
+
+    // Pop the value to write from the stack
+    let stack_top = ctx.stack_pop(1);
+    mov(cb, REG1, stack_top);
+
+    // Write the value at the environment pointer
+    let offs = -(SIZEOF_VALUE as i32 * local_idx);
+    mov(cb, mem_opnd(64, REG0, offs), REG1);
+
+    KeepCompiling
+}
+
+fn gen_setlocal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0).as_i32();
+    let level = jit_get_arg(jit, 1).as_u32();
+    gen_setlocal_generic(jit, ctx, cb, ocb, idx, level)
+}
+
+fn gen_setlocal_wc1(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let idx = jit_get_arg(jit, 0).as_i32();
+    gen_setlocal_generic(jit, ctx, cb, ocb, idx, 1)
+}
+
+// new hash initialized from top N values
+fn gen_newhash(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let num: i64 = jit_get_arg(jit, 0).as_i64();
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    if num != 0 {
+        // val = rb_hash_new_with_size(num / 2);
+        mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
+        call_ptr(cb, REG0, rb_hash_new_with_size as *const u8);
+
+        // save the allocated hash as we want to push it after insertion
+        push(cb, RAX);
+        push(cb, RAX); // alignment
+
+        // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
+        mov(cb, C_ARG_REGS[0], imm_opnd(num));
+        lea(
+            cb,
+            C_ARG_REGS[1],
+            ctx.stack_opnd((num - 1).try_into().unwrap()),
+        );
+        mov(cb, C_ARG_REGS[2], RAX);
+        call_ptr(cb, REG0, rb_hash_bulk_insert as *const u8);
+
+        pop(cb, RAX); // alignment
+        pop(cb, RAX);
+
+        ctx.stack_pop(num.try_into().unwrap());
+        let stack_ret = ctx.stack_push(Type::Hash);
+        mov(cb, stack_ret, RAX);
+    } else {
+        // val = rb_hash_new();
+        call_ptr(cb, REG0, rb_hash_new as *const u8);
+
+        let stack_ret = ctx.stack_push(Type::Hash);
+        mov(cb, stack_ret, RAX);
+    }
+
+    KeepCompiling
+}
+
+fn gen_putstring(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let put_val = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because the callee will allocate
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
+    call_ptr(cb, REG0, rb_ec_str_resurrect as *const u8);
+
+    let stack_top = ctx.stack_push(Type::String);
+    mov(cb, stack_top, RAX);
+
+    KeepCompiling
+}
+
+// Push Qtrue or Qfalse depending on whether the given keyword was supplied by
+// the caller
+fn gen_checkkeyword(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // When a keyword is unspecified past index 32, a hash will be used
+    // instead. This can only happen in iseqs taking more than 32 keywords.
+    if unsafe { (*get_iseq_body_param_keyword(jit.iseq)).num >= 32 } {
+        return CantCompile;
+    }
+
+    // The EP offset to the undefined bits local
+    let bits_offset = jit_get_arg(jit, 0).as_i32();
+
+    // The index of the keyword we want to check
+    let index: i64 = jit_get_arg(jit, 1).as_i64();
+
+    // Load environment pointer EP
+    gen_get_ep(cb, REG0, 0);
+
+    // VALUE kw_bits = *(ep - bits);
+    let bits_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * -bits_offset);
+
+    // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
+    // if ((b & (0x01 << idx))) {
+    //
+    // We can skip the FIX2ULONG conversion by shifting the bit we test
+    let bit_test: i64 = 0x01 << (index + 1);
+    test(cb, bits_opnd, imm_opnd(bit_test));
+    mov(cb, REG0, uimm_opnd(Qfalse.into()));
+    mov(cb, REG1, uimm_opnd(Qtrue.into()));
+    cmovz(cb, REG0, REG1);
+
+    let stack_ret = ctx.stack_push(Type::UnknownImm);
+    mov(cb, stack_ret, REG0);
+
+    KeepCompiling
+}
+
+fn gen_jnz_to_target0(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => jnz_ptr(cb, target0),
+    }
+}
+
+fn gen_jz_to_target0(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => jz_ptr(cb, target0),
+    }
+}
+
+fn gen_jbe_to_target0(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => jbe_ptr(cb, target0),
+    }
+}
+
+// Generate a jump to a stub that recompiles the current YARV instruction on failure.
+// When depth_limitk is exceeded, generate a jump to a side exit.
+fn jit_chain_guard(
+    jcc: JCCKinds,
+    jit: &JITState,
+    ctx: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    depth_limit: i32,
+    side_exit: CodePtr,
+) {
+    let target0_gen_fn = match jcc {
+        JCC_JNE | JCC_JNZ => gen_jnz_to_target0,
+        JCC_JZ | JCC_JE => gen_jz_to_target0,
+        JCC_JBE | JCC_JNA => gen_jbe_to_target0,
+    };
+
+    if (ctx.get_chain_depth() as i32) < depth_limit {
+        let mut deeper = ctx.clone();
+        deeper.increment_chain_depth();
+        let bid = BlockId {
+            iseq: jit.iseq,
+            idx: jit.insn_idx,
+        };
+
+        gen_branch(jit, ctx, cb, ocb, bid, &deeper, None, None, target0_gen_fn);
+    } else {
+        target0_gen_fn(cb, side_exit, None, BranchShape::Default);
+    }
+}
+
+// up to 5 different classes, and embedded or not for each
+pub const GET_IVAR_MAX_DEPTH: i32 = 10;
+
+// hashes and arrays
+pub const OPT_AREF_MAX_CHAIN_DEPTH: i32 = 2;
+
+// up to 5 different classes
+pub const SEND_MAX_DEPTH: i32 = 5;
+
+// Codegen for setting an instance variable.
+// Preconditions:
+//   - receiver is in REG0
+//   - receiver has the same class as CLASS_OF(comptime_receiver)
+//   - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
+fn gen_set_ivar(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    recv: VALUE,
+    ivar_name: ID,
+) -> CodegenStatus {
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let val_opnd = ctx.stack_pop(1);
+    let recv_opnd = ctx.stack_pop(1);
+
+    let ivar_index: u32 = unsafe { rb_obj_ensure_iv_index_mapping(recv, ivar_name) };
+
+    // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
+    mov(cb, C_ARG_REGS[0], recv_opnd);
+    mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index.into()));
+    mov(cb, C_ARG_REGS[2], val_opnd);
+    call_ptr(cb, REG0, rb_vm_set_ivar_idx as *const u8);
+
+    let out_opnd = ctx.stack_push(Type::Unknown);
+    mov(cb, out_opnd, RAX);
+
+    KeepCompiling
+}
+
+// Codegen for getting an instance variable.
+// Preconditions:
+//   - receiver is in REG0
+//   - receiver has the same class as CLASS_OF(comptime_receiver)
+//   - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
+fn gen_get_ivar(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    max_chain_depth: i32,
+    comptime_receiver: VALUE,
+    ivar_name: ID,
+    reg0_opnd: InsnOpnd,
+    side_exit: CodePtr,
+) -> CodegenStatus {
+    let comptime_val_klass = comptime_receiver.class_of();
+    let starting_context = ctx.clone(); // make a copy for use with jit_chain_guard
+
+    // Check if the comptime class uses a custom allocator
+    let custom_allocator = unsafe { rb_get_alloc_func(comptime_val_klass) };
+    let uses_custom_allocator = match custom_allocator {
+        Some(alloc_fun) => {
+            let allocate_instance = rb_class_allocate_instance as *const u8;
+            alloc_fun as *const u8 != allocate_instance
+        }
+        None => false,
+    };
+
+    // Check if the comptime receiver is a T_OBJECT
+    let receiver_t_object = unsafe { RB_TYPE_P(comptime_receiver, RUBY_T_OBJECT) };
+
+    // If the class uses the default allocator, instances should all be T_OBJECT
+    // NOTE: This assumes nobody changes the allocator of the class after allocation.
+    //       Eventually, we can encode whether an object is T_OBJECT or not
+    //       inside object shapes.
+    if !receiver_t_object || uses_custom_allocator {
+        // General case. Call rb_ivar_get().
+        // VALUE rb_ivar_get(VALUE obj, ID id)
+        add_comment(cb, "call rb_ivar_get()");
+
+        // The function could raise exceptions.
+        jit_prepare_routine_call(jit, ctx, cb, REG1);
+
+        mov(cb, C_ARG_REGS[0], REG0);
+        mov(cb, C_ARG_REGS[1], uimm_opnd(ivar_name));
+        call_ptr(cb, REG1, rb_ivar_get as *const u8);
+
+        if reg0_opnd != SelfOpnd {
+            ctx.stack_pop(1);
+        }
+        // Push the ivar on the stack
+        let out_opnd = ctx.stack_push(Type::Unknown);
+        mov(cb, out_opnd, RAX);
+
+        // Jump to next instruction. This allows guard chains to share the same successor.
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    /*
+    // FIXME:
+    // This check was added because of a failure in a test involving the
+    // Nokogiri Document class where we see a T_DATA that still has the default
+    // allocator.
+    // Aaron Patterson argues that this is a bug in the C extension, because
+    // people could call .allocate() on the class and still get a T_OBJECT
+    // For now I added an extra dynamic check that the receiver is T_OBJECT
+    // so we can safely pass all the tests in Shopify Core.
+    //
+    // Guard that the receiver is T_OBJECT
+    // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
+    add_comment(cb, "guard receiver is T_OBJECT");
+    mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
+    and(cb, REG1, imm_opnd(RUBY_T_MASK));
+    cmp(cb, REG1, imm_opnd(T_OBJECT));
+    jit_chain_guard(JCC_JNE, jit, &starting_context, cb, ocb, max_chain_depth, side_exit);
+    */
+
+    // FIXME: Mapping the index could fail when there is too many ivar names. If we're
+    // compiling for a branch stub that can cause the exception to be thrown from the
+    // wrong PC.
+    let ivar_index =
+        unsafe { rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name) }.as_usize();
+
+    // Pop receiver if it's on the temp stack
+    if reg0_opnd != SelfOpnd {
+        ctx.stack_pop(1);
+    }
+
+    // Compile time self is embedded and the ivar index lands within the object
+    let test_result = unsafe { FL_TEST_RAW(comptime_receiver, VALUE(ROBJECT_EMBED)) != VALUE(0) };
+    if test_result && ivar_index < ROBJECT_EMBED_LEN_MAX {
+        // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
+
+        // Guard that self is embedded
+        // TODO: BT and JC is shorter
+        add_comment(cb, "guard embedded getivar");
+        let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS);
+        test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64));
+        let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic);
+        jit_chain_guard(
+            JCC_JZ,
+            jit,
+            &starting_context,
+            cb,
+            ocb,
+            max_chain_depth,
+            side_exit,
+        );
+
+        // Load the variable
+        let offs = RUBY_OFFSET_ROBJECT_AS_ARY + (ivar_index * SIZEOF_VALUE) as i32;
+        let ivar_opnd = mem_opnd(64, REG0, offs);
+        mov(cb, REG1, ivar_opnd);
+
+        // Guard that the variable is not Qundef
+        cmp(cb, REG1, uimm_opnd(Qundef.into()));
+        mov(cb, REG0, uimm_opnd(Qnil.into()));
+        cmove(cb, REG1, REG0);
+
+        // Push the ivar on the stack
+        let out_opnd = ctx.stack_push(Type::Unknown);
+        mov(cb, out_opnd, REG1);
+    } else {
+        // Compile time value is *not* embeded.
+
+        // Guard that value is *not* embedded
+        // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
+        add_comment(cb, "guard extended getivar");
+        let flags_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS);
+        test(cb, flags_opnd, uimm_opnd(ROBJECT_EMBED as u64));
+        let side_exit = counted_exit!(ocb, side_exit, getivar_megamorphic);
+        jit_chain_guard(
+            JCC_JNZ,
+            jit,
+            &starting_context,
+            cb,
+            ocb,
+            max_chain_depth,
+            side_exit,
+        );
+
+        // Check that the extended table is big enough
+        if ivar_index >= ROBJECT_EMBED_LEN_MAX + 1 {
+            // Check that the slot is inside the extended table (num_slots > index)
+            let num_slots = mem_opnd(32, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV);
+
+            cmp(cb, num_slots, uimm_opnd(ivar_index as u64));
+            jle_ptr(cb, counted_exit!(ocb, side_exit, getivar_idx_out_of_range));
+        }
+
+        // Get a pointer to the extended table
+        let tbl_opnd = mem_opnd(64, REG0, RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR);
+        mov(cb, REG0, tbl_opnd);
+
+        // Read the ivar from the extended table
+        let ivar_opnd = mem_opnd(64, REG0, (SIZEOF_VALUE * ivar_index) as i32);
+        mov(cb, REG0, ivar_opnd);
+
+        // Check that the ivar is not Qundef
+        cmp(cb, REG0, uimm_opnd(Qundef.into()));
+        mov(cb, REG1, uimm_opnd(Qnil.into()));
+        cmove(cb, REG0, REG1);
+
+        // Push the ivar on the stack
+        let out_opnd = ctx.stack_push(Type::Unknown);
+        mov(cb, out_opnd, REG0);
+    }
+
+    // Jump to next instruction. This allows guard chains to share the same successor.
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_getinstancevariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let ivar_name = jit_get_arg(jit, 0).as_u64();
+
+    let comptime_val = jit_peek_at_self(jit);
+    let comptime_val_klass = comptime_val.class_of();
+
+    // Generate a side exit
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Guard that the receiver has the same class as the one from compile time.
+    mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF));
+
+    jit_guard_known_klass(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        comptime_val_klass,
+        SelfOpnd,
+        comptime_val,
+        GET_IVAR_MAX_DEPTH,
+        side_exit,
+    );
+
+    gen_get_ivar(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        GET_IVAR_MAX_DEPTH,
+        comptime_val,
+        ivar_name,
+        SelfOpnd,
+        side_exit,
+    )
+}
+
+fn gen_setinstancevariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let id = jit_get_arg(jit, 0);
+    let ic = jit_get_arg(jit, 1).as_u64(); // type IVC
+
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let val_opnd = ctx.stack_pop(1);
+
+    // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
+    mov(
+        cb,
+        C_ARG_REGS[1],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+    mov(cb, C_ARG_REGS[3], val_opnd);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(id.into()));
+    mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic as *const u8));
+    let iseq = VALUE(jit.iseq as usize);
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], iseq);
+    call_ptr(cb, REG0, rb_vm_setinstancevariable as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_defined(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let op_type = jit_get_arg(jit, 0);
+    let obj = jit_get_arg(jit, 1);
+    let pushval = jit_get_arg(jit, 2);
+
+    // Save the PC and SP because the callee may allocate
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Get the operands from the stack
+    let v_opnd = ctx.stack_pop(1);
+
+    // Call vm_defined(ec, reg_cfp, op_type, obj, v)
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(cb, C_ARG_REGS[1], REG_CFP);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(op_type.into()));
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], obj);
+    mov(cb, C_ARG_REGS[4], v_opnd);
+    call_ptr(cb, REG0, rb_vm_defined as *const u8);
+
+    // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
+    //  val = pushval;
+    // }
+    jit_mov_gc_ptr(jit, cb, REG1, pushval);
+    cmp(cb, AL, imm_opnd(0));
+    mov(cb, RAX, uimm_opnd(Qnil.into()));
+    cmovnz(cb, RAX, REG1);
+
+    // Push the return value onto the stack
+    let out_type = if pushval.special_const_p() {
+        Type::UnknownImm
+    } else {
+        Type::Unknown
+    };
+    let stack_ret = ctx.stack_push(out_type);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_checktype(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let type_val = jit_get_arg(jit, 0).as_u32();
+
+    // Only three types are emitted by compile.c at the moment
+    if let RUBY_T_STRING | RUBY_T_ARRAY | RUBY_T_HASH = type_val {
+        let val_type = ctx.get_opnd_type(StackOpnd(0));
+        let val = ctx.stack_pop(1);
+
+        // Check if we know from type information
+        match (type_val, val_type) {
+            (RUBY_T_STRING, Type::String)
+            | (RUBY_T_ARRAY, Type::Array)
+            | (RUBY_T_HASH, Type::Hash) => {
+                // guaranteed type match
+                let stack_ret = ctx.stack_push(Type::True);
+                mov(cb, stack_ret, uimm_opnd(Qtrue.as_u64()));
+                return KeepCompiling;
+            }
+            _ if val_type.is_imm() || val_type.is_specific() => {
+                // guaranteed not to match T_STRING/T_ARRAY/T_HASH
+                let stack_ret = ctx.stack_push(Type::False);
+                mov(cb, stack_ret, uimm_opnd(Qfalse.as_u64()));
+                return KeepCompiling;
+            }
+            _ => (),
+        }
+
+        mov(cb, REG0, val);
+        mov(cb, REG1, uimm_opnd(Qfalse.as_u64()));
+
+        let ret = cb.new_label("ret".to_string());
+
+        if !val_type.is_heap() {
+            // if (SPECIAL_CONST_P(val)) {
+            // Return Qfalse via REG1 if not on heap
+            test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
+            jnz_label(cb, ret);
+            cmp(cb, REG0, uimm_opnd(Qnil.as_u64()));
+            jbe_label(cb, ret);
+        }
+
+        // Check type on object
+        mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_FLAGS));
+        and(cb, REG0, uimm_opnd(RUBY_T_MASK as u64));
+        cmp(cb, REG0, uimm_opnd(type_val as u64));
+        mov(cb, REG0, uimm_opnd(Qtrue.as_u64()));
+        // REG1 contains Qfalse from above
+        cmove(cb, REG1, REG0);
+
+        cb.write_label(ret);
+        let stack_ret = ctx.stack_push(Type::UnknownImm);
+        mov(cb, stack_ret, REG1);
+        cb.link_labels();
+
+        KeepCompiling
+    } else {
+        CantCompile
+    }
+}
+
+fn gen_concatstrings(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let n = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we are allocating
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * n.as_isize()));
+
+    // call rb_str_concat_literals(long n, const VALUE *strings);
+    mov(cb, C_ARG_REGS[0], imm_opnd(n.into()));
+    lea(cb, C_ARG_REGS[1], values_ptr);
+    call_ptr(cb, REG0, rb_str_concat_literals as *const u8);
+
+    ctx.stack_pop(n.as_usize());
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn guard_two_fixnums(ctx: &mut Context, cb: &mut CodeBlock, side_exit: CodePtr) {
+    // Get the stack operand types
+    let arg1_type = ctx.get_opnd_type(StackOpnd(0));
+    let arg0_type = ctx.get_opnd_type(StackOpnd(1));
+
+    if arg0_type.is_heap() || arg1_type.is_heap() {
+        jmp_ptr(cb, side_exit);
+        return;
+    }
+
+    if arg0_type != Type::Fixnum && arg0_type.is_specific() {
+        jmp_ptr(cb, side_exit);
+        return;
+    }
+
+    if arg1_type != Type::Fixnum && arg0_type.is_specific() {
+        jmp_ptr(cb, side_exit);
+        return;
+    }
+
+    assert!(!arg0_type.is_heap());
+    assert!(!arg1_type.is_heap());
+    assert!(arg0_type == Type::Fixnum || arg0_type.is_unknown());
+    assert!(arg1_type == Type::Fixnum || arg1_type.is_unknown());
+
+    // Get stack operands without popping them
+    let arg1 = ctx.stack_opnd(0);
+    let arg0 = ctx.stack_opnd(1);
+
+    // If not fixnums, fall back
+    if arg0_type != Type::Fixnum {
+        add_comment(cb, "guard arg0 fixnum");
+        test(cb, arg0, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
+        jz_ptr(cb, side_exit);
+    }
+    if arg1_type != Type::Fixnum {
+        add_comment(cb, "guard arg1 fixnum");
+        test(cb, arg1, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
+        jz_ptr(cb, side_exit);
+    }
+
+    // Set stack types in context
+    ctx.upgrade_opnd_type(StackOpnd(0), Type::Fixnum);
+    ctx.upgrade_opnd_type(StackOpnd(1), Type::Fixnum);
+}
+
+// Conditional move operation used by comparison operators
+type CmovFn = fn(cb: &mut CodeBlock, opnd0: X86Opnd, opnd1: X86Opnd) -> ();
+
+fn gen_fixnum_cmp(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    cmov_op: CmovFn,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize base on a runtime receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_LT) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Compare the arguments
+        xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
+        mov(cb, REG1, arg0);
+        cmp(cb, REG1, arg1);
+        mov(cb, REG1, uimm_opnd(Qtrue.into()));
+        cmov_op(cb, REG0, REG1);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Unknown);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_lt(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovl)
+}
+
+fn gen_opt_le(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovle)
+}
+
+fn gen_opt_ge(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovge)
+}
+
+fn gen_opt_gt(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    gen_fixnum_cmp(jit, ctx, cb, ocb, cmovg)
+}
+
+// Implements specialized equality for either two fixnum or two strings
+// Returns true if code was generated, otherwise false
+fn gen_equality_specialized(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    side_exit: CodePtr,
+) -> bool {
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    let a_opnd = ctx.stack_opnd(1);
+    let b_opnd = ctx.stack_opnd(0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_EQ) {
+            // if overridden, emit the generic version
+            return false;
+        }
+
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        mov(cb, REG0, a_opnd);
+        cmp(cb, REG0, b_opnd);
+
+        mov(cb, REG0, imm_opnd(Qfalse.into()));
+        mov(cb, REG1, imm_opnd(Qtrue.into()));
+        cmove(cb, REG0, REG1);
+
+        // Push the output on the stack
+        ctx.stack_pop(2);
+        let dst = ctx.stack_push(Type::UnknownImm);
+        mov(cb, dst, REG0);
+
+        true
+    } else if unsafe { comptime_a.class_of() == rb_cString && comptime_b.class_of() == rb_cString }
+    {
+        if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_EQ) {
+            // if overridden, emit the generic version
+            return false;
+        }
+
+        // Load a and b in preparation for call later
+        mov(cb, C_ARG_REGS[0], a_opnd);
+        mov(cb, C_ARG_REGS[1], b_opnd);
+
+        // Guard that a is a String
+        mov(cb, REG0, C_ARG_REGS[0]);
+        unsafe {
+            // Use of rb_cString here requires an unsafe block
+            jit_guard_known_klass(
+                jit,
+                ctx,
+                cb,
+                ocb,
+                rb_cString,
+                StackOpnd(1),
+                comptime_a,
+                SEND_MAX_DEPTH,
+                side_exit,
+            );
+        }
+
+        let ret = cb.new_label("ret".to_string());
+
+        // If they are equal by identity, return true
+        cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
+        mov(cb, RAX, imm_opnd(Qtrue.into()));
+        je_label(cb, ret);
+
+        // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
+        if ctx.get_opnd_type(StackOpnd(0)) != Type::String {
+            mov(cb, REG0, C_ARG_REGS[1]);
+            // Note: any T_STRING is valid here, but we check for a ::String for simplicity
+            // To pass a mutable static variable (rb_cString) requires an unsafe block
+            unsafe {
+                jit_guard_known_klass(
+                    jit,
+                    ctx,
+                    cb,
+                    ocb,
+                    rb_cString,
+                    StackOpnd(0),
+                    comptime_b,
+                    SEND_MAX_DEPTH,
+                    side_exit,
+                );
+            }
+        }
+
+        // Call rb_str_eql_internal(a, b)
+        call_ptr(cb, REG0, rb_str_eql_internal as *const u8);
+
+        // Push the output on the stack
+        cb.write_label(ret);
+        ctx.stack_pop(2);
+        let dst = ctx.stack_push(Type::UnknownImm);
+        mov(cb, dst, RAX);
+        cb.link_labels();
+
+        true
+    } else {
+        false
+    }
+}
+
+fn gen_opt_eq(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize base on a runtime receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    if gen_equality_specialized(jit, ctx, cb, ocb, side_exit) {
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        EndBlock
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_neq(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // opt_neq is passed two rb_call_data as arguments:
+    // first for ==, second for !=
+    let cd = jit_get_arg(jit, 1).as_ptr();
+    return gen_send_general(jit, ctx, cb, ocb, cd, None);
+}
+
+fn gen_opt_aref(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr();
+    let argc = unsafe { vm_ci_argc((*cd).ci) };
+
+    // Only JIT one arg calls like `ary[6]`
+    if argc != 1 {
+        gen_counter_incr!(cb, oaref_argc_not_one);
+        return CantCompile;
+    }
+
+    // Defer compilation so we can specialize base on a runtime receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    // Remember the context on entry for adding guard chains
+    let starting_context = ctx.clone();
+
+    // Specialize base on compile time values
+    let comptime_idx = jit_peek_at_stack(jit, ctx, 0);
+    let comptime_recv = jit_peek_at_stack(jit, ctx, 1);
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_idx.fixnum_p() {
+        if !assume_bop_not_redefined(jit, ocb, ARRAY_REDEFINED_OP_FLAG, BOP_AREF) {
+            return CantCompile;
+        }
+
+        // Pop the stack operands
+        let idx_opnd = ctx.stack_pop(1);
+        let recv_opnd = ctx.stack_pop(1);
+        mov(cb, REG0, recv_opnd);
+
+        // if (SPECIAL_CONST_P(recv)) {
+        // Bail if receiver is not a heap object
+        test(cb, REG0, uimm_opnd(RUBY_IMMEDIATE_MASK as u64));
+        jnz_ptr(cb, side_exit);
+        cmp(cb, REG0, uimm_opnd(Qfalse.into()));
+        je_ptr(cb, side_exit);
+        cmp(cb, REG0, uimm_opnd(Qnil.into()));
+        je_ptr(cb, side_exit);
+
+        // Bail if recv has a class other than ::Array.
+        // BOP_AREF check above is only good for ::Array.
+        mov(cb, REG1, mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS));
+        mov(cb, REG0, uimm_opnd(unsafe { rb_cArray }.into()));
+        cmp(cb, REG0, REG1);
+        jit_chain_guard(
+            JCC_JNE,
+            jit,
+            &starting_context,
+            cb,
+            ocb,
+            OPT_AREF_MAX_CHAIN_DEPTH,
+            side_exit,
+        );
+
+        // Bail if idx is not a FIXNUM
+        mov(cb, REG1, idx_opnd);
+        test(cb, REG1, uimm_opnd(RUBY_FIXNUM_FLAG as u64));
+        jz_ptr(cb, counted_exit!(ocb, side_exit, oaref_arg_not_fixnum));
+
+        // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
+        // It never raises or allocates, so we don't need to write to cfp->pc.
+        {
+            mov(cb, RDI, recv_opnd);
+            sar(cb, REG1, uimm_opnd(1)); // Convert fixnum to int
+            mov(cb, RSI, REG1);
+            call_ptr(cb, REG0, rb_ary_entry_internal as *const u8);
+
+            // Push the return value onto the stack
+            let stack_ret = ctx.stack_push(Type::Unknown);
+            mov(cb, stack_ret, RAX);
+        }
+
+        // Jump to next instruction. This allows guard chains to share the same successor.
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        return EndBlock;
+    } else if comptime_recv.class_of() == unsafe { rb_cHash } {
+        if !assume_bop_not_redefined(jit, ocb, HASH_REDEFINED_OP_FLAG, BOP_AREF) {
+            return CantCompile;
+        }
+
+        let key_opnd = ctx.stack_opnd(0);
+        let recv_opnd = ctx.stack_opnd(1);
+
+        // Guard that the receiver is a hash
+        mov(cb, REG0, recv_opnd);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cHash },
+            StackOpnd(1),
+            comptime_recv,
+            OPT_AREF_MAX_CHAIN_DEPTH,
+            side_exit,
+        );
+
+        // Setup arguments for rb_hash_aref().
+        mov(cb, C_ARG_REGS[0], REG0);
+        mov(cb, C_ARG_REGS[1], key_opnd);
+
+        // Prepare to call rb_hash_aref(). It might call #hash on the key.
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        call_ptr(cb, REG0, rb_hash_aref as *const u8);
+
+        // Pop the key and the reciever
+        ctx.stack_pop(2);
+
+        // Push the return value onto the stack
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        // Jump to next instruction. This allows guard chains to share the same successor.
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        EndBlock
+    } else {
+        // General case. Call the [] method.
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_aset(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_recv = jit_peek_at_stack(jit, ctx, 2);
+    let comptime_key = jit_peek_at_stack(jit, ctx, 1);
+
+    // Get the operands from the stack
+    let recv = ctx.stack_opnd(2);
+    let key = ctx.stack_opnd(1);
+    let val = ctx.stack_opnd(0);
+
+    if comptime_recv.class_of() == unsafe { rb_cArray } && comptime_key.fixnum_p() {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        // Guard receiver is an Array
+        mov(cb, REG0, recv);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cArray },
+            StackOpnd(2),
+            comptime_recv,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+
+        // Guard key is a fixnum
+        mov(cb, REG0, key);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cInteger },
+            StackOpnd(1),
+            comptime_key,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+
+        // Call rb_ary_store
+        mov(cb, C_ARG_REGS[0], recv);
+        mov(cb, C_ARG_REGS[1], key);
+        sar(cb, C_ARG_REGS[1], uimm_opnd(1)); // FIX2LONG(key)
+        mov(cb, C_ARG_REGS[2], val);
+
+        // We might allocate or raise
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        call_ptr(cb, REG0, rb_ary_store as *const u8);
+
+        // rb_ary_store returns void
+        // stored value should still be on stack
+        mov(cb, REG0, ctx.stack_opnd(0));
+
+        // Push the return value onto the stack
+        ctx.stack_pop(3);
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, REG0);
+
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        return EndBlock;
+    } else if comptime_recv.class_of() == unsafe { rb_cHash } {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        // Guard receiver is a Hash
+        mov(cb, REG0, recv);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            unsafe { rb_cHash },
+            StackOpnd(2),
+            comptime_recv,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+
+        // Call rb_hash_aset
+        mov(cb, C_ARG_REGS[0], recv);
+        mov(cb, C_ARG_REGS[1], key);
+        mov(cb, C_ARG_REGS[2], val);
+
+        // We might allocate or raise
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        call_ptr(cb, REG0, rb_hash_aset as *const u8);
+
+        // Push the return value onto the stack
+        ctx.stack_pop(3);
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        jump_to_next_insn(jit, ctx, cb, ocb);
+        EndBlock
+    } else {
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_and(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_AND) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Do the bitwise and arg0 & arg1
+        mov(cb, REG0, arg0);
+        and(cb, REG0, arg1);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        // Delegate to send, call the method on the recv
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_or(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_OR) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Do the bitwise or arg0 | arg1
+        mov(cb, REG0, arg0);
+        or(cb, REG0, arg1);
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        // Delegate to send, call the method on the recv
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_minus(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Defer compilation so we can specialize on a runtime `self`
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_a = jit_peek_at_stack(jit, ctx, 1);
+    let comptime_b = jit_peek_at_stack(jit, ctx, 0);
+
+    if comptime_a.fixnum_p() && comptime_b.fixnum_p() {
+        // Create a side-exit to fall back to the interpreter
+        // Note: we generate the side-exit before popping operands from the stack
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        if !assume_bop_not_redefined(jit, ocb, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS) {
+            return CantCompile;
+        }
+
+        // Check that both operands are fixnums
+        guard_two_fixnums(ctx, cb, side_exit);
+
+        // Get the operands and destination from the stack
+        let arg1 = ctx.stack_pop(1);
+        let arg0 = ctx.stack_pop(1);
+
+        // Subtract arg0 - arg1 and test for overflow
+        mov(cb, REG0, arg0);
+        sub(cb, REG0, arg1);
+        jo_ptr(cb, side_exit);
+        add(cb, REG0, imm_opnd(1));
+
+        // Push the output on the stack
+        let dst = ctx.stack_push(Type::Fixnum);
+        mov(cb, dst, REG0);
+
+        KeepCompiling
+    } else {
+        // Delegate to send, call the method on the recv
+        gen_opt_send_without_block(jit, ctx, cb, ocb)
+    }
+}
+
+fn gen_opt_mult(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_div(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_mod(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Save the PC and SP because the callee may allocate bignums
+    // Note that this modifies REG_SP, which is why we do it first
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Get the operands from the stack
+    let arg1 = ctx.stack_pop(1);
+    let arg0 = ctx.stack_pop(1);
+
+    // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
+    mov(cb, C_ARG_REGS[0], arg0);
+    mov(cb, C_ARG_REGS[1], arg1);
+    call_ptr(cb, REG0, rb_vm_opt_mod as *const u8);
+
+    // If val == Qundef, bail to do a method call
+    cmp(cb, RAX, imm_opnd(Qundef.as_i64()));
+    je_ptr(cb, side_exit);
+
+    // Push the return value onto the stack
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_opt_ltlt(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_nil_p(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_empty_p(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Delegate to send, call the method on the recv
+    gen_opt_send_without_block(jit, ctx, cb, ocb)
+}
+
+fn gen_opt_str_freeze(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_FREEZE) {
+        return CantCompile;
+    }
+
+    let str = jit_get_arg(jit, 0);
+    jit_mov_gc_ptr(jit, cb, REG0, str);
+
+    // Push the return value onto the stack
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, REG0);
+
+    KeepCompiling
+}
+
+fn gen_opt_str_uminus(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !assume_bop_not_redefined(jit, ocb, STRING_REDEFINED_OP_FLAG, BOP_UMINUS) {
+        return CantCompile;
+    }
+
+    let str = jit_get_arg(jit, 0);
+    jit_mov_gc_ptr(jit, cb, REG0, str);
+
+    // Push the return value onto the stack
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, REG0);
+
+    KeepCompiling
+}
+
+fn gen_opt_not(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_size(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_length(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_regexpmatch2(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    return gen_opt_send_without_block(jit, ctx, cb, ocb);
+}
+
+fn gen_opt_case_dispatch(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    _cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Normally this instruction would lookup the key in a hash and jump to an
+    // offset based on that.
+    // Instead we can take the fallback case and continue with the next
+    // instruciton.
+    // We'd hope that our jitted code will be sufficiently fast without the
+    // hash lookup, at least for small hashes, but it's worth revisiting this
+    // assumption in the future.
+
+    ctx.stack_pop(1);
+
+    KeepCompiling // continue with the next instruction
+}
+
+fn gen_branchif_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    assert!(target1 != None);
+    match shape {
+        BranchShape::Next0 => {
+            jz_ptr(cb, target1.unwrap());
+        }
+        BranchShape::Next1 => {
+            jnz_ptr(cb, target0);
+        }
+        BranchShape::Default => {
+            jnz_ptr(cb, target0);
+            jmp_ptr(cb, target1.unwrap());
+        }
+    }
+}
+
+fn gen_branchif(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Test if any bit (outside of the Qnil bit) is on
+    // RUBY_Qfalse  /* ...0000 0000 */
+    // RUBY_Qnil    /* ...0000 1000 */
+    let val_opnd = ctx.stack_pop(1);
+    test(cb, val_opnd, imm_opnd(!Qnil.as_i64()));
+
+    // Get the branch target instruction offsets
+    let next_idx = jit_next_insn_idx(jit);
+    let jump_idx = (next_idx as i32) + jump_offset;
+    let next_block = BlockId {
+        iseq: jit.iseq,
+        idx: next_idx,
+    };
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx as u32,
+    };
+
+    // Generate the branch instructions
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        jump_block,
+        ctx,
+        Some(next_block),
+        Some(ctx),
+        gen_branchif_branch,
+    );
+
+    EndBlock
+}
+
+fn gen_branchunless_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 => jnz_ptr(cb, target1.unwrap()),
+        BranchShape::Next1 => jz_ptr(cb, target0),
+        BranchShape::Default => {
+            jz_ptr(cb, target0);
+            jmp_ptr(cb, target1.unwrap());
+        }
+    }
+}
+
+fn gen_branchunless(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Test if any bit (outside of the Qnil bit) is on
+    // RUBY_Qfalse  /* ...0000 0000 */
+    // RUBY_Qnil    /* ...0000 1000 */
+    let val_opnd = ctx.stack_pop(1);
+    test(cb, val_opnd, imm_opnd(!Qnil.as_i64()));
+
+    // Get the branch target instruction offsets
+    let next_idx = jit_next_insn_idx(jit) as i32;
+    let jump_idx = next_idx + jump_offset;
+    let next_block = BlockId {
+        iseq: jit.iseq,
+        idx: next_idx.try_into().unwrap(),
+    };
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx.try_into().unwrap(),
+    };
+
+    // Generate the branch instructions
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        jump_block,
+        ctx,
+        Some(next_block),
+        Some(ctx),
+        gen_branchunless_branch,
+    );
+
+    EndBlock
+}
+
+fn gen_branchnil_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 => jne_ptr(cb, target1.unwrap()),
+        BranchShape::Next1 => je_ptr(cb, target0),
+        BranchShape::Default => {
+            je_ptr(cb, target0);
+            jmp_ptr(cb, target1.unwrap());
+        }
+    }
+}
+
+fn gen_branchnil(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Test if the value is Qnil
+    // RUBY_Qnil    /* ...0000 1000 */
+    let val_opnd = ctx.stack_pop(1);
+    cmp(cb, val_opnd, uimm_opnd(Qnil.into()));
+
+    // Get the branch target instruction offsets
+    let next_idx = jit_next_insn_idx(jit) as i32;
+    let jump_idx = next_idx + jump_offset;
+    let next_block = BlockId {
+        iseq: jit.iseq,
+        idx: next_idx.try_into().unwrap(),
+    };
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx.try_into().unwrap(),
+    };
+
+    // Generate the branch instructions
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        jump_block,
+        ctx,
+        Some(next_block),
+        Some(ctx),
+        gen_branchnil_branch,
+    );
+
+    EndBlock
+}
+
+fn gen_jump(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0).as_i32();
+
+    // Check for interrupts, but only on backward branches that may create loops
+    if jump_offset < 0 {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+        gen_check_ints(cb, side_exit);
+    }
+
+    // Get the branch target instruction offsets
+    let jump_idx = (jit_next_insn_idx(jit) as i32) + jump_offset;
+    let jump_block = BlockId {
+        iseq: jit.iseq,
+        idx: jump_idx as u32,
+    };
+
+    // Generate the jump instruction
+    gen_direct_jump(jit, ctx, jump_block, cb);
+
+    EndBlock
+}
+
+/// Guard that self or a stack operand has the same class as `known_klass`, using
+/// `sample_instance` to speculate about the shape of the runtime value.
+/// FIXNUM and on-heap integers are treated as if they have distinct classes, and
+/// the guard generated for one will fail for the other.
+///
+/// Recompile as contingency if possible, or take side exit a last resort.
+
+fn jit_guard_known_klass(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    known_klass: VALUE,
+    insn_opnd: InsnOpnd,
+    sample_instance: VALUE,
+    max_chain_depth: i32,
+    side_exit: CodePtr,
+) -> bool {
+    let val_type = ctx.get_opnd_type(insn_opnd);
+
+    if unsafe { known_klass == rb_cNilClass } {
+        assert!(!val_type.is_heap());
+        if val_type != Type::Nil {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is nil");
+            cmp(cb, REG0, imm_opnd(Qnil.into()));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::Nil);
+        }
+    } else if unsafe { known_klass == rb_cTrueClass } {
+        assert!(!val_type.is_heap());
+        if val_type != Type::True {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is true");
+            cmp(cb, REG0, imm_opnd(Qtrue.into()));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::True);
+        }
+    } else if unsafe { known_klass == rb_cFalseClass } {
+        assert!(!val_type.is_heap());
+        if val_type != Type::False {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is false");
+            assert!(Qfalse.as_i32() == 0);
+            test(cb, REG0, REG0);
+            jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::False);
+        }
+    } else if unsafe { known_klass == rb_cInteger } && sample_instance.fixnum_p() {
+        assert!(!val_type.is_heap());
+        // We will guard fixnum and bignum as though they were separate classes
+        // BIGNUM can be handled by the general else case below
+        if val_type != Type::Fixnum || !val_type.is_imm() {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is fixnum");
+            test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG as i64));
+            jit_chain_guard(JCC_JZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            ctx.upgrade_opnd_type(insn_opnd, Type::Fixnum);
+        }
+    } else if unsafe { known_klass == rb_cSymbol } && sample_instance.static_sym_p() {
+        assert!(!val_type.is_heap());
+        // We will guard STATIC vs DYNAMIC as though they were separate classes
+        // DYNAMIC symbols can be handled by the general else case below
+        if val_type != Type::ImmSymbol || !val_type.is_imm() {
+            assert!(val_type.is_unknown());
+
+            add_comment(cb, "guard object is static symbol");
+            assert!(RUBY_SPECIAL_SHIFT == 8);
+            cmp(cb, REG0_8, uimm_opnd(RUBY_SYMBOL_FLAG as u64));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            ctx.upgrade_opnd_type(insn_opnd, Type::ImmSymbol);
+        }
+    } else if unsafe { known_klass == rb_cFloat } && sample_instance.flonum_p() {
+        assert!(!val_type.is_heap());
+        if val_type != Type::Flonum || !val_type.is_imm() {
+            assert!(val_type.is_unknown());
+
+            // We will guard flonum vs heap float as though they were separate classes
+            add_comment(cb, "guard object is flonum");
+            mov(cb, REG1, REG0);
+            and(cb, REG1, uimm_opnd(RUBY_FLONUM_MASK as u64));
+            cmp(cb, REG1, uimm_opnd(RUBY_FLONUM_FLAG as u64));
+            jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            ctx.upgrade_opnd_type(insn_opnd, Type::Flonum);
+        }
+    } else if unsafe {
+        FL_TEST(known_klass, VALUE(RUBY_FL_SINGLETON)) != VALUE(0)
+            && sample_instance == rb_attr_get(known_klass, id__attached__ as ID)
+    } {
+        // Singleton classes are attached to one specific object, so we can
+        // avoid one memory access (and potentially the is_heap check) by
+        // looking for the expected object directly.
+        // Note that in case the sample instance has a singleton class that
+        // doesn't attach to the sample instance, it means the sample instance
+        // has an empty singleton class that hasn't been materialized yet. In
+        // this case, comparing against the sample instance doesn't gurantee
+        // that its singleton class is empty, so we can't avoid the memory
+        // access. As an example, `Object.new.singleton_class` is an object in
+        // this situation.
+        add_comment(cb, "guard known object with singleton class");
+        // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
+        jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
+        cmp(cb, REG0, REG1);
+        jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+    } else {
+        assert!(!val_type.is_imm());
+
+        // Check that the receiver is a heap object
+        // Note: if we get here, the class doesn't have immediate instances.
+        if !val_type.is_heap() {
+            add_comment(cb, "guard not immediate");
+            assert!(Qfalse.as_i32() < Qnil.as_i32());
+            test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK as i64));
+            jit_chain_guard(JCC_JNZ, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+            cmp(cb, REG0, imm_opnd(Qnil.into()));
+            jit_chain_guard(JCC_JBE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+
+            ctx.upgrade_opnd_type(insn_opnd, Type::UnknownHeap);
+        }
+
+        let klass_opnd = mem_opnd(64, REG0, RUBY_OFFSET_RBASIC_KLASS);
+
+        // Bail if receiver class is different from known_klass
+        // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
+        add_comment(cb, "guard known class");
+        jit_mov_gc_ptr(jit, cb, REG1, known_klass);
+        cmp(cb, klass_opnd, REG1);
+        jit_chain_guard(JCC_JNE, jit, ctx, cb, ocb, max_chain_depth, side_exit);
+    }
+
+    true
+}
+
+// Generate ancestry guard for protected callee.
+// Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
+fn jit_protected_callee_ancestry_guard(
+    jit: &mut JITState,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    cme: *const rb_callable_method_entry_t,
+    side_exit: CodePtr,
+) {
+    // See vm_call_method().
+    mov(
+        cb,
+        C_ARG_REGS[0],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+    let def_class = unsafe { (*cme).defined_class };
+    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], def_class);
+    // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
+    // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
+
+    call_ptr(cb, REG0, rb_obj_is_kind_of as *mut u8);
+    test(cb, RAX, RAX);
+    jz_ptr(
+        cb,
+        counted_exit!(ocb, side_exit, send_se_protected_check_failed),
+    );
+}
+
+// Codegen for rb_obj_not().
+// Note, caller is responsible for generating all the right guards, including
+// arity guards.
+fn jit_rb_obj_not(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    let recv_opnd = ctx.get_opnd_type(StackOpnd(0));
+
+    if recv_opnd == Type::Nil || recv_opnd == Type::False {
+        add_comment(cb, "rb_obj_not(nil_or_false)");
+        ctx.stack_pop(1);
+        let out_opnd = ctx.stack_push(Type::True);
+        mov(cb, out_opnd, uimm_opnd(Qtrue.into()));
+    } else if recv_opnd.is_heap() || recv_opnd.is_specific() {
+        // Note: recv_opnd != Type::Nil && recv_opnd != Type::False.
+        add_comment(cb, "rb_obj_not(truthy)");
+        ctx.stack_pop(1);
+        let out_opnd = ctx.stack_push(Type::False);
+        mov(cb, out_opnd, uimm_opnd(Qfalse.into()));
+    } else {
+        // jit_guard_known_klass() already ran on the receiver which should
+        // have deduced deduced the type of the receiver. This case should be
+        // rare if not unreachable.
+        return false;
+    }
+    true
+}
+
+// Codegen for rb_true()
+fn jit_rb_true(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "nil? == true");
+    ctx.stack_pop(1);
+    let stack_ret = ctx.stack_push(Type::True);
+    mov(cb, stack_ret, uimm_opnd(Qtrue.into()));
+    true
+}
+
+// Codegen for rb_false()
+fn jit_rb_false(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "nil? == false");
+    ctx.stack_pop(1);
+    let stack_ret = ctx.stack_push(Type::False);
+    mov(cb, stack_ret, uimm_opnd(Qfalse.into()));
+    true
+}
+
+// Codegen for rb_obj_equal()
+// object identity comparison
+fn jit_rb_obj_equal(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "equal?");
+    let obj1 = ctx.stack_pop(1);
+    let obj2 = ctx.stack_pop(1);
+
+    mov(cb, REG0, obj1);
+    cmp(cb, REG0, obj2);
+    mov(cb, REG0, uimm_opnd(Qtrue.into()));
+    mov(cb, REG1, uimm_opnd(Qfalse.into()));
+    cmovne(cb, REG0, REG1);
+
+    let stack_ret = ctx.stack_push(Type::UnknownImm);
+    mov(cb, stack_ret, REG0);
+    true
+}
+
+fn jit_rb_str_bytesize(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "String#bytesize");
+
+    let recv = ctx.stack_pop(1);
+    mov(cb, C_ARG_REGS[0], recv);
+    call_ptr(cb, REG0, rb_str_bytesize as *const u8);
+
+    let out_opnd = ctx.stack_push(Type::Fixnum);
+    mov(cb, out_opnd, RAX);
+
+    true
+}
+
+// Codegen for rb_str_to_s()
+// When String#to_s is called on a String instance, the method returns self and
+// most of the overhead comes from setting up the method call. We observed that
+// this situation happens a lot in some workloads.
+fn jit_rb_str_to_s(
+    _jit: &mut JITState,
+    _ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    known_recv_class: *const VALUE,
+) -> bool {
+    if !known_recv_class.is_null() && unsafe { *known_recv_class == rb_cString } {
+        add_comment(cb, "to_s on plain string");
+        // The method returns the receiver, which is already on the stack.
+        // No stack movement.
+        return true;
+    }
+    false
+}
+
+fn jit_thread_s_current(
+    _jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+    _ci: *const rb_callinfo,
+    _cme: *const rb_callable_method_entry_t,
+    _block: Option<IseqPtr>,
+    _argc: i32,
+    _known_recv_class: *const VALUE,
+) -> bool {
+    add_comment(cb, "Thread.current");
+    ctx.stack_pop(1);
+
+    // ec->thread_ptr
+    let ec_thread_ptr = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_THREAD_PTR);
+    mov(cb, REG0, ec_thread_ptr);
+
+    // thread->self
+    let thread_self = mem_opnd(64, REG0, RUBY_OFFSET_THREAD_SELF);
+    mov(cb, REG0, thread_self);
+
+    let stack_ret = ctx.stack_push(Type::UnknownHeap);
+    mov(cb, stack_ret, REG0);
+    true
+}
+
+// Check if we know how to codegen for a particular cfunc method
+fn lookup_cfunc_codegen(def: *const rb_method_definition_t) -> Option<MethodGenFn> {
+    let method_serial = unsafe { get_def_method_serial(def) };
+
+    CodegenGlobals::look_up_codegen_method(method_serial)
+}
+
+// Is anyone listening for :c_call and :c_return event currently?
+fn c_method_tracing_currently_enabled(jit: &JITState) -> bool {
+    // Defer to C implementation in yjit.c
+    unsafe {
+        rb_c_method_tracing_currently_enabled(jit.ec.unwrap() as *mut rb_execution_context_struct)
+    }
+}
+
+// Similar to args_kw_argv_to_hash. It is called at runtime from within the
+// generated assembly to build a Ruby hash of the passed keyword arguments. The
+// keys are the Symbol objects associated with the keywords and the values are
+// the actual values. In the representation, both keys and values are VALUEs.
+unsafe extern "C" fn build_kwhash(ci: *const rb_callinfo, sp: *const VALUE) -> VALUE {
+    let kw_arg = vm_ci_kwarg(ci);
+    let kw_len: usize = get_cikw_keyword_len(kw_arg).try_into().unwrap();
+    let hash = rb_hash_new_with_size(kw_len as u64);
+
+    for kwarg_idx in 0..kw_len {
+        let key = get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap());
+        let val = sp.sub(kw_len).add(kwarg_idx).read();
+        rb_hash_aset(hash, key, val);
+    }
+    hash
+}
+
+fn gen_send_cfunc(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    block: Option<IseqPtr>,
+    argc: i32,
+    recv_known_klass: *const VALUE,
+) -> CodegenStatus {
+    let cfunc = unsafe { get_cme_def_body_cfunc(cme) };
+    let cfunc_argc = unsafe { get_mct_argc(cfunc) };
+
+    // If the function expects a Ruby array of arguments
+    if cfunc_argc < 0 && cfunc_argc != -1 {
+        gen_counter_incr!(cb, send_cfunc_ruby_array_varg);
+        return CantCompile;
+    }
+
+    let kw_arg = unsafe { vm_ci_kwarg(ci) };
+    let kw_arg_num = if kw_arg.is_null() {
+        0
+    } else {
+        unsafe { get_cikw_keyword_len(kw_arg) }
+    };
+
+    // Number of args which will be passed through to the callee
+    // This is adjusted by the kwargs being combined into a hash.
+    let passed_argc = if kw_arg.is_null() {
+        argc
+    } else {
+        argc - kw_arg_num + 1
+    };
+
+    // If the argument count doesn't match
+    if cfunc_argc >= 0 && cfunc_argc != passed_argc {
+        gen_counter_incr!(cb, send_cfunc_argc_mismatch);
+        return CantCompile;
+    }
+
+    // Don't JIT functions that need C stack arguments for now
+    if cfunc_argc >= 0 && passed_argc + 1 > (C_ARG_REGS.len() as i32) {
+        gen_counter_incr!(cb, send_cfunc_toomany_args);
+        return CantCompile;
+    }
+
+    if c_method_tracing_currently_enabled(jit) {
+        // Don't JIT if tracing c_call or c_return
+        gen_counter_incr!(cb, send_cfunc_tracing);
+        return CantCompile;
+    }
+
+    // Delegate to codegen for C methods if we have it.
+    if kw_arg.is_null() {
+        let codegen_p = lookup_cfunc_codegen(unsafe { (*cme).def });
+        if codegen_p.is_some() {
+            let known_cfunc_codegen = codegen_p.unwrap();
+            if known_cfunc_codegen(jit, ctx, cb, ocb, ci, cme, block, argc, recv_known_klass) {
+                // cfunc codegen generated code. Terminate the block so
+                // there isn't multiple calls in the same block.
+                jump_to_next_insn(jit, ctx, cb, ocb);
+                return EndBlock;
+            }
+        }
+    }
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Check for interrupts
+    gen_check_ints(cb, side_exit);
+
+    // Stack overflow check
+    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
+    // REG_CFP <= REG_SP + 4 * SIZEOF_VALUE + sizeof(rb_control_frame_t)
+    add_comment(cb, "stack overflow check");
+    lea(
+        cb,
+        REG0,
+        ctx.sp_opnd((SIZEOF_VALUE * 4 + 2 * RUBY_SIZEOF_CONTROL_FRAME) as isize),
+    );
+    cmp(cb, REG_CFP, REG0);
+    jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow));
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+
+    // Store incremented PC into current control frame in case callee raises.
+    jit_save_pc(jit, cb, REG0);
+
+    if let Some(block_iseq) = block {
+        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+        // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases
+        // with cfp->block_code.
+        jit_mov_gc_ptr(jit, cb, REG0, VALUE(block_iseq as usize));
+        let block_code_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE);
+        mov(cb, block_code_opnd, REG0);
+    }
+
+    // Increment the stack pointer by 3 (in the callee)
+    // sp += 3
+    lea(cb, REG0, ctx.sp_opnd((SIZEOF_VALUE as isize) * 3));
+
+    // Write method entry at sp[-3]
+    // sp[-3] = me;
+    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
+    // any cme we depend on become outdated. See yjit_method_lookup_change().
+    jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize));
+    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
+
+    // Write block handler at sp[-2]
+    // sp[-2] = block_handler;
+    if let Some(_block_iseq) = block {
+        // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
+        let cfp_self = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF);
+        lea(cb, REG1, cfp_self);
+        or(cb, REG1, imm_opnd(1));
+        mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
+    } else {
+        let dst_opnd = mem_opnd(64, REG0, 8 * -2);
+        mov(cb, dst_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into()));
+    }
+
+    // Write env flags at sp[-1]
+    // sp[-1] = frame_type;
+    let mut frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
+    if !kw_arg.is_null() {
+        frame_type |= VM_FRAME_FLAG_CFRAME_KW
+    }
+    mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into()));
+
+    // Allocate a new CFP (ec->cfp--)
+    let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
+    sub(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+
+    // Setup the new frame
+    // *cfp = (const struct rb_control_frame_struct) {
+    //    .pc         = 0,
+    //    .sp         = sp,
+    //    .iseq       = 0,
+    //    .self       = recv,
+    //    .ep         = sp - 1,
+    //    .block_code = 0,
+    //    .__bp__     = sp,
+    // };
+
+    // Can we re-use ec_cfp_opnd from above?
+    let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
+    mov(cb, REG1, ec_cfp_opnd);
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_PC), imm_opnd(0));
+
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SP), REG0);
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_ISEQ), imm_opnd(0));
+    mov(
+        cb,
+        mem_opnd(64, REG1, RUBY_OFFSET_CFP_BLOCK_CODE),
+        imm_opnd(0),
+    );
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_BP), REG0);
+    sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64));
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_EP), REG0);
+    mov(cb, REG0, recv);
+    mov(cb, mem_opnd(64, REG1, RUBY_OFFSET_CFP_SELF), REG0);
+
+    /*
+    // Verify that we are calling the right function
+    if (YJIT_CHECK_MODE > 0) {  // TODO: will we have a YJIT_CHECK_MODE?
+        // Call check_cfunc_dispatch
+        mov(cb, C_ARG_REGS[0], recv);
+        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
+        mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
+        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
+        call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
+    }
+    */
+
+    if !kw_arg.is_null() {
+        // Build a hash from all kwargs passed
+        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], VALUE(ci as usize));
+        lea(cb, C_ARG_REGS[1], ctx.sp_opnd(0));
+        call_ptr(cb, REG0, build_kwhash as *const u8);
+
+        // Replace the stack location at the start of kwargs with the new hash
+        let stack_opnd = ctx.stack_opnd(argc - passed_argc);
+        mov(cb, stack_opnd, RAX);
+    }
+
+    // Copy SP into RAX because REG_SP will get overwritten
+    lea(cb, RAX, ctx.sp_opnd(0));
+
+    // Pop the C function arguments from the stack (in the caller)
+    ctx.stack_pop((argc + 1).try_into().unwrap());
+
+    // Write interpreter SP into CFP.
+    // Needed in case the callee yields to the block.
+    gen_save_sp(cb, ctx);
+
+    // Non-variadic method
+    if cfunc_argc >= 0 {
+        // Copy the arguments from the stack to the C argument registers
+        // self is the 0th argument and is at index argc from the stack top
+        for i in 0..=passed_argc as usize {
+            // "as usize?" Yeah, you can't index an array by an i32.
+            let stack_opnd = mem_opnd(64, RAX, -(argc + 1 - (i as i32)) * SIZEOF_VALUE_I32);
+            let c_arg_reg = C_ARG_REGS[i];
+            mov(cb, c_arg_reg, stack_opnd);
+        }
+    }
+
+    // Variadic method
+    if cfunc_argc == -1 {
+        // The method gets a pointer to the first argument
+        // rb_f_puts(int argc, VALUE *argv, VALUE recv)
+        mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc.into()));
+        lea(
+            cb,
+            C_ARG_REGS[1],
+            mem_opnd(64, RAX, -(argc) * SIZEOF_VALUE_I32),
+        );
+        mov(
+            cb,
+            C_ARG_REGS[2],
+            mem_opnd(64, RAX, -(argc + 1) * SIZEOF_VALUE_I32),
+        );
+    }
+
+    // Call the C function
+    // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
+    // cfunc comes from compile-time cme->def, which we assume to be stable.
+    // Invalidation logic is in yjit_method_lookup_change()
+    add_comment(cb, "call C function");
+    call_ptr(cb, REG0, unsafe { get_mct_func(cfunc) });
+
+    // Record code position for TracePoint patching. See full_cfunc_return().
+    record_global_inval_patch(cb, CodegenGlobals::get_outline_full_cfunc_return_pos());
+
+    // Push the return value on the Ruby stack
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    // Pop the stack frame (ec->cfp++)
+    // Can we reuse ec_cfp_opnd from above?
+    let ec_cfp_opnd = mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP);
+    add(cb, ec_cfp_opnd, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+
+    // cfunc calls may corrupt types
+    ctx.clear_local_types();
+
+    // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
+    // which allows for sharing the same successor.
+
+    // Jump (fall through) to the call continuation block
+    // We do this to end the current block after the call
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_return_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    match shape {
+        BranchShape::Next0 | BranchShape::Next1 => unreachable!(),
+        BranchShape::Default => {
+            mov(cb, REG0, code_ptr_opnd(target0));
+            mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_JIT_RETURN), REG0);
+        }
+    }
+}
+
+fn gen_send_iseq(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    block: Option<IseqPtr>,
+    argc: i32,
+) -> CodegenStatus {
+    let iseq = unsafe { get_def_iseq_ptr((*cme).def) };
+    let mut argc = argc;
+
+    // When you have keyword arguments, there is an extra object that gets
+    // placed on the stack the represents a bitmap of the keywords that were not
+    // specified at the call site. We need to keep track of the fact that this
+    // value is present on the stack in order to properly set up the callee's
+    // stack pointer.
+    let doing_kw_call = unsafe { get_iseq_flags_has_kw(iseq) };
+    let supplying_kws = unsafe { vm_ci_flag(ci) & VM_CALL_KWARG } != 0;
+
+    if unsafe { vm_ci_flag(ci) } & VM_CALL_TAILCALL != 0 {
+        // We can't handle tailcalls
+        gen_counter_incr!(cb, send_iseq_tailcall);
+        return CantCompile;
+    }
+
+    // No support for callees with these parameters yet as they require allocation
+    // or complex handling.
+    if unsafe {
+        get_iseq_flags_has_rest(iseq)
+            || get_iseq_flags_has_post(iseq)
+            || get_iseq_flags_has_kwrest(iseq)
+    } {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    // If we have keyword arguments being passed to a callee that only takes
+    // positionals, then we need to allocate a hash. For now we're going to
+    // call that too complex and bail.
+    if supplying_kws && !unsafe { get_iseq_flags_has_kw(iseq) } {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    // If we have a method accepting no kwargs (**nil), exit if we have passed
+    // it any kwargs.
+    if supplying_kws && unsafe { get_iseq_flags_has_accepts_no_kwarg(iseq) } {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    // For computing number of locals to set up for the callee
+    let mut num_params = unsafe { get_iseq_body_param_size(iseq) };
+
+    // Block parameter handling. This mirrors setup_parameters_complex().
+    if unsafe { get_iseq_flags_has_block(iseq) } {
+        if unsafe { get_iseq_body_local_iseq(iseq) == iseq } {
+            num_params -= 1;
+        } else {
+            // In this case (param.flags.has_block && local_iseq != iseq),
+            // the block argument is setup as a local variable and requires
+            // materialization (allocation). Bail.
+            gen_counter_incr!(cb, send_iseq_complex_callee);
+            return CantCompile;
+        }
+    }
+
+    let mut start_pc_offset = 0;
+    let required_num = unsafe { get_iseq_body_param_lead_num(iseq) };
+
+    // This struct represents the metadata about the caller-specified
+    // keyword arguments.
+    let kw_arg = unsafe { vm_ci_kwarg(ci) };
+    let kw_arg_num = if kw_arg.is_null() {
+        0
+    } else {
+        unsafe { get_cikw_keyword_len(kw_arg) }
+    };
+
+    // Arity handling and optional parameter setup
+    let opts_filled = argc - required_num - kw_arg_num;
+    let opt_num = unsafe { get_iseq_body_param_opt_num(iseq) };
+    let opts_missing: i32 = opt_num - opts_filled;
+
+    if opts_filled < 0 || opts_filled > opt_num {
+        gen_counter_incr!(cb, send_iseq_arity_error);
+        return CantCompile;
+    }
+
+    // If we have unfilled optional arguments and keyword arguments then we
+    // would need to move adjust the arguments location to account for that.
+    // For now we aren't handling this case.
+    if doing_kw_call && opts_missing > 0 {
+        gen_counter_incr!(cb, send_iseq_complex_callee);
+        return CantCompile;
+    }
+
+    if opt_num > 0 {
+        num_params -= opts_missing as u32;
+        unsafe {
+            let opt_table = get_iseq_body_param_opt_table(iseq);
+            start_pc_offset = (*opt_table.offset(opts_filled as isize)).as_u32();
+        }
+    }
+
+    if doing_kw_call {
+        // Here we're calling a method with keyword arguments and specifying
+        // keyword arguments at this call site.
+
+        // This struct represents the metadata about the callee-specified
+        // keyword parameters.
+        let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+        let keyword_num: usize = unsafe { (*keyword).num }.try_into().unwrap();
+        let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+
+        let mut required_kwargs_filled = 0;
+
+        if keyword_num > 30 {
+            // We have so many keywords that (1 << num) encoded as a FIXNUM
+            // (which shifts it left one more) no longer fits inside a 32-bit
+            // immediate.
+            gen_counter_incr!(cb, send_iseq_complex_callee);
+            return CantCompile;
+        }
+
+        // Check that the kwargs being passed are valid
+        if supplying_kws {
+            // This is the list of keyword arguments that the callee specified
+            // in its initial declaration.
+            // SAFETY: see compile.c for sizing of this slice.
+            let callee_kwargs = unsafe { slice::from_raw_parts((*keyword).table, keyword_num) };
+
+            // Here we're going to build up a list of the IDs that correspond to
+            // the caller-specified keyword arguments. If they're not in the
+            // same order as the order specified in the callee declaration, then
+            // we're going to need to generate some code to swap values around
+            // on the stack.
+            let kw_arg_keyword_len: usize =
+                unsafe { get_cikw_keyword_len(kw_arg) }.try_into().unwrap();
+            let mut caller_kwargs: Vec<ID> = vec![0; kw_arg_keyword_len];
+            for kwarg_idx in 0..kw_arg_keyword_len {
+                let sym = unsafe { get_cikw_keywords_idx(kw_arg, kwarg_idx.try_into().unwrap()) };
+                caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
+            }
+
+            // First, we're going to be sure that the names of every
+            // caller-specified keyword argument correspond to a name in the
+            // list of callee-specified keyword parameters.
+            for caller_kwarg in caller_kwargs {
+                let search_result = callee_kwargs
+                    .iter()
+                    .enumerate() // inject element index
+                    .find(|(_, &kwarg)| kwarg == caller_kwarg);
+
+                match search_result {
+                    None => {
+                        // If the keyword was never found, then we know we have a
+                        // mismatch in the names of the keyword arguments, so we need to
+                        // bail.
+                        gen_counter_incr!(cb, send_iseq_kwargs_mismatch);
+                        return CantCompile;
+                    }
+                    Some((callee_idx, _)) if callee_idx < keyword_required_num => {
+                        // Keep a count to ensure all required kwargs are specified
+                        required_kwargs_filled += 1;
+                    }
+                    _ => (),
+                }
+            }
+        }
+        assert!(required_kwargs_filled <= keyword_required_num);
+        if required_kwargs_filled != keyword_required_num {
+            gen_counter_incr!(cb, send_iseq_kwargs_mismatch);
+            return CantCompile;
+        }
+    }
+
+    // Number of locals that are not parameters
+    let num_locals = unsafe { get_iseq_body_local_table_size(iseq) as i32 } - (num_params as i32);
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Check for interrupts
+    gen_check_ints(cb, side_exit);
+
+    let leaf_builtin_raw = unsafe { rb_leaf_builtin_function(iseq) };
+    let leaf_builtin: Option<*const rb_builtin_function> = if leaf_builtin_raw.is_null() {
+        None
+    } else {
+        Some(leaf_builtin_raw)
+    };
+    if let (None, Some(builtin_info)) = (block, leaf_builtin) {
+        let builtin_argc = unsafe { (*builtin_info).argc };
+        if builtin_argc + 1 /* for self */ + 1 /* for ec */ <= (C_ARG_REGS.len() as i32) {
+            add_comment(cb, "inlined leaf builtin");
+
+            // Call the builtin func (ec, recv, arg1, arg2, ...)
+            mov(cb, C_ARG_REGS[0], REG_EC);
+
+            // Copy self and arguments
+            for i in 0..=builtin_argc {
+                let stack_opnd = ctx.stack_opnd(builtin_argc - i);
+                let idx: usize = (i + 1).try_into().unwrap();
+                let c_arg_reg = C_ARG_REGS[idx];
+                mov(cb, c_arg_reg, stack_opnd);
+            }
+            ctx.stack_pop((builtin_argc + 1).try_into().unwrap());
+            let builtin_func_ptr = unsafe { (*builtin_info).func_ptr as *const u8 };
+            call_ptr(cb, REG0, builtin_func_ptr);
+
+            // Push the return value
+            let stack_ret = ctx.stack_push(Type::Unknown);
+            mov(cb, stack_ret, RAX);
+
+            // Note: assuming that the leaf builtin doesn't change local variables here.
+            // Seems like a safe assumption.
+
+            return KeepCompiling;
+        }
+    }
+
+    // Stack overflow check
+    // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
+    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
+    add_comment(cb, "stack overflow check");
+    let stack_max: i32 = unsafe { get_iseq_body_stack_max(iseq) }.try_into().unwrap();
+    let locals_offs =
+        (SIZEOF_VALUE as i32) * (num_locals + stack_max) + 2 * (RUBY_SIZEOF_CONTROL_FRAME as i32);
+    lea(cb, REG0, ctx.sp_opnd(locals_offs as isize));
+    cmp(cb, REG_CFP, REG0);
+    jle_ptr(cb, counted_exit!(ocb, side_exit, send_se_cf_overflow));
+
+    if doing_kw_call {
+        // Here we're calling a method with keyword arguments and specifying
+        // keyword arguments at this call site.
+
+        // Number of positional arguments the callee expects before the first
+        // keyword argument
+        let args_before_kw = required_num + opt_num;
+
+        // This struct represents the metadata about the caller-specified
+        // keyword arguments.
+        let ci_kwarg = unsafe { vm_ci_kwarg(ci) };
+        let caller_keyword_len: usize = if ci_kwarg.is_null() {
+            0
+        } else {
+            unsafe { get_cikw_keyword_len(ci_kwarg) }
+                .try_into()
+                .unwrap()
+        };
+
+        // This struct represents the metadata about the callee-specified
+        // keyword parameters.
+        let keyword = unsafe { get_iseq_body_param_keyword(iseq) };
+
+        add_comment(cb, "keyword args");
+
+        // This is the list of keyword arguments that the callee specified
+        // in its initial declaration.
+        let callee_kwargs = unsafe { (*keyword).table };
+        let total_kwargs: usize = unsafe { (*keyword).num }.try_into().unwrap();
+
+        // Here we're going to build up a list of the IDs that correspond to
+        // the caller-specified keyword arguments. If they're not in the
+        // same order as the order specified in the callee declaration, then
+        // we're going to need to generate some code to swap values around
+        // on the stack.
+        let mut caller_kwargs: Vec<ID> = vec![0; total_kwargs];
+
+        for kwarg_idx in 0..caller_keyword_len {
+            let sym = unsafe { get_cikw_keywords_idx(ci_kwarg, kwarg_idx.try_into().unwrap()) };
+            caller_kwargs[kwarg_idx] = unsafe { rb_sym2id(sym) };
+        }
+        let mut kwarg_idx = caller_keyword_len;
+
+        let mut unspecified_bits = 0;
+
+        let keyword_required_num: usize = unsafe { (*keyword).required_num }.try_into().unwrap();
+        for callee_idx in keyword_required_num..total_kwargs {
+            let mut already_passed = false;
+            let callee_kwarg = unsafe { *(callee_kwargs.offset(callee_idx.try_into().unwrap())) };
+
+            for caller_idx in 0..caller_keyword_len {
+                if caller_kwargs[caller_idx] == callee_kwarg {
+                    already_passed = true;
+                    break;
+                }
+            }
+
+            if !already_passed {
+                // Reserve space on the stack for each default value we'll be
+                // filling in (which is done in the next loop). Also increments
+                // argc so that the callee's SP is recorded correctly.
+                argc += 1;
+                let default_arg = ctx.stack_push(Type::Unknown);
+
+                // callee_idx - keyword->required_num is used in a couple of places below.
+                let req_num: isize = unsafe { (*keyword).required_num }.try_into().unwrap();
+                let callee_idx_isize: isize = callee_idx.try_into().unwrap();
+                let extra_args = callee_idx_isize - req_num;
+
+                //VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
+                let mut default_value = unsafe { *((*keyword).default_values.offset(extra_args)) };
+
+                if default_value == Qundef {
+                    // Qundef means that this value is not constant and must be
+                    // recalculated at runtime, so we record it in unspecified_bits
+                    // (Qnil is then used as a placeholder instead of Qundef).
+                    unspecified_bits |= 0x01 << extra_args;
+                    default_value = Qnil;
+                }
+
+                jit_mov_gc_ptr(jit, cb, REG0, default_value);
+                mov(cb, default_arg, REG0);
+
+                caller_kwargs[kwarg_idx] = callee_kwarg;
+                kwarg_idx += 1;
+            }
+        }
+
+        assert!(kwarg_idx == total_kwargs);
+
+        // Next, we're going to loop through every keyword that was
+        // specified by the caller and make sure that it's in the correct
+        // place. If it's not we're going to swap it around with another one.
+        for kwarg_idx in 0..total_kwargs {
+            let kwarg_idx_isize: isize = kwarg_idx.try_into().unwrap();
+            let callee_kwarg = unsafe { *(callee_kwargs.offset(kwarg_idx_isize)) };
+
+            // If the argument is already in the right order, then we don't
+            // need to generate any code since the expected value is already
+            // in the right place on the stack.
+            if callee_kwarg == caller_kwargs[kwarg_idx] {
+                continue;
+            }
+
+            // In this case the argument is not in the right place, so we
+            // need to find its position where it _should_ be and swap with
+            // that location.
+            for swap_idx in (kwarg_idx + 1)..total_kwargs {
+                if callee_kwarg == caller_kwargs[swap_idx] {
+                    // First we're going to generate the code that is going
+                    // to perform the actual swapping at runtime.
+                    let swap_idx_i32: i32 = swap_idx.try_into().unwrap();
+                    let kwarg_idx_i32: i32 = kwarg_idx.try_into().unwrap();
+                    let offset0: u16 = (argc - 1 - swap_idx_i32 - args_before_kw)
+                        .try_into()
+                        .unwrap();
+                    let offset1: u16 = (argc - 1 - kwarg_idx_i32 - args_before_kw)
+                        .try_into()
+                        .unwrap();
+                    stack_swap(ctx, cb, offset0, offset1, REG1, REG0);
+
+                    // Next we're going to do some bookkeeping on our end so
+                    // that we know the order that the arguments are
+                    // actually in now.
+                    let tmp = caller_kwargs[kwarg_idx];
+                    caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
+                    caller_kwargs[swap_idx] = tmp;
+
+                    break;
+                }
+            }
+        }
+
+        // Keyword arguments cause a special extra local variable to be
+        // pushed onto the stack that represents the parameters that weren't
+        // explicitly given a value and have a non-constant default.
+        let unspec_opnd = uimm_opnd(VALUE::fixnum_from_usize(unspecified_bits).as_u64());
+        mov(cb, ctx.stack_opnd(-1), unspec_opnd);
+    }
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+
+    // Store the updated SP on the current frame (pop arguments and receiver)
+    add_comment(cb, "store caller sp");
+    lea(
+        cb,
+        REG0,
+        ctx.sp_opnd((SIZEOF_VALUE as isize) * -((argc as isize) + 1)),
+    );
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0);
+
+    // Store the next PC in the current frame
+    jit_save_pc(jit, cb, REG0);
+
+    if let Some(block_val) = block {
+        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
+        // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
+        // with cfp->block_code.
+        let gc_ptr = VALUE(block_val as usize);
+        jit_mov_gc_ptr(jit, cb, REG0, gc_ptr);
+        mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE), REG0);
+    }
+
+    // Adjust the callee's stack pointer
+    let offs =
+        (SIZEOF_VALUE as isize) * (3 + (num_locals as isize) + if doing_kw_call { 1 } else { 0 });
+    lea(cb, REG0, ctx.sp_opnd(offs));
+
+    // Initialize local variables to Qnil
+    for i in 0..num_locals {
+        let offs = (SIZEOF_VALUE as i32) * (i - num_locals - 3);
+        mov(cb, mem_opnd(64, REG0, offs), uimm_opnd(Qnil.into()));
+    }
+
+    add_comment(cb, "push env");
+    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
+    // any cme we depend on become outdated. See yjit_method_lookup_change().
+    jit_mov_gc_ptr(jit, cb, REG1, VALUE(cme as usize));
+    // Write method entry at sp[-3]
+    // sp[-3] = me;
+    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
+
+    // Write block handler at sp[-2]
+    // sp[-2] = block_handler;
+    match block {
+        Some(_) => {
+            // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
+            lea(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF));
+            or(cb, REG1, imm_opnd(1));
+            mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
+        }
+        None => {
+            mov(
+                cb,
+                mem_opnd(64, REG0, 8 * -2),
+                uimm_opnd(VM_BLOCK_HANDLER_NONE.into()),
+            );
+        }
+    }
+
+    // Write env flags at sp[-1]
+    // sp[-1] = frame_type;
+    let frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
+    mov(cb, mem_opnd(64, REG0, 8 * -1), uimm_opnd(frame_type.into()));
+
+    add_comment(cb, "push callee CFP");
+    // Allocate a new CFP (ec->cfp--)
+    sub(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+    mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP);
+
+    // Setup the new frame
+    // *cfp = (const struct rb_control_frame_struct) {
+    //    .pc         = pc,
+    //    .sp         = sp,
+    //    .iseq       = iseq,
+    //    .self       = recv,
+    //    .ep         = sp - 1,
+    //    .block_code = 0,
+    //    .__bp__     = sp,
+    // };
+    mov(cb, REG1, recv);
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF), REG1);
+    mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP), REG0);
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BP), REG0);
+    sub(cb, REG0, uimm_opnd(SIZEOF_VALUE as u64));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP), REG0);
+    jit_mov_gc_ptr(jit, cb, REG0, VALUE(iseq as usize));
+    mov(cb, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ), REG0);
+    mov(
+        cb,
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_BLOCK_CODE),
+        imm_opnd(0),
+    );
+
+    // No need to set cfp->pc since the callee sets it whenever calling into routines
+    // that could look at it through jit_save_pc().
+    // mov(cb, REG0, const_ptr_opnd(start_pc));
+    // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
+
+    // Stub so we can return to JITted code
+    let return_block = BlockId {
+        iseq: jit.iseq,
+        idx: jit_next_insn_idx(jit),
+    };
+
+    // Create a context for the callee
+    let mut callee_ctx = Context::new(); // Was DEFAULT_CTX
+
+    // Set the argument types in the callee's context
+    for arg_idx in 0..argc {
+        let stack_offs: u16 = (argc - arg_idx - 1).try_into().unwrap();
+        let arg_type = ctx.get_opnd_type(StackOpnd(stack_offs));
+        callee_ctx.set_local_type(arg_idx.try_into().unwrap(), arg_type);
+    }
+
+    let recv_type = ctx.get_opnd_type(StackOpnd(argc.try_into().unwrap()));
+    callee_ctx.upgrade_opnd_type(SelfOpnd, recv_type);
+
+    // The callee might change locals through Kernel#binding and other means.
+    ctx.clear_local_types();
+
+    // Pop arguments and receiver in return context, push the return value
+    // After the return, sp_offset will be 1. The codegen for leave writes
+    // the return value in case of JIT-to-JIT return.
+    let mut return_ctx = ctx.clone();
+    return_ctx.stack_pop((argc + 1).try_into().unwrap());
+    return_ctx.stack_push(Type::Unknown);
+    return_ctx.set_sp_offset(1);
+    return_ctx.reset_chain_depth();
+
+    // Write the JIT return address on the callee frame
+    gen_branch(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        return_block,
+        &return_ctx,
+        Some(return_block),
+        Some(&return_ctx),
+        gen_return_branch,
+    );
+
+    //print_str(cb, "calling Ruby func:");
+    //print_str(cb, rb_id2name(vm_ci_mid(ci)));
+
+    // Directly jump to the entry point of the callee
+    gen_direct_jump(
+        jit,
+        &callee_ctx,
+        BlockId {
+            iseq: iseq,
+            idx: start_pc_offset,
+        },
+        cb,
+    );
+
+    EndBlock
+}
+
+fn gen_struct_aref(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    comptime_recv: VALUE,
+    _comptime_recv_klass: VALUE,
+) -> CodegenStatus {
+    if unsafe { vm_ci_argc(ci) } != 0 {
+        return CantCompile;
+    }
+
+    let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
+        .try_into()
+        .unwrap();
+
+    // Confidence checks
+    assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
+    assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
+
+    // We are going to use an encoding that takes a 4-byte immediate which
+    // limits the offset to INT32_MAX.
+    {
+        let native_off = (off as i64) * (SIZEOF_VALUE as i64);
+        if native_off > (i32::MAX as i64) {
+            return CantCompile;
+        }
+    }
+
+    // All structs from the same Struct class should have the same
+    // length. So if our comptime_recv is embedded all runtime
+    // structs of the same class should be as well, and the same is
+    // true of the converse.
+    let embedded = unsafe { FL_TEST_RAW(comptime_recv, VALUE(RSTRUCT_EMBED_LEN_MASK)) };
+
+    add_comment(cb, "struct aref");
+
+    let recv = ctx.stack_pop(1);
+
+    mov(cb, REG0, recv);
+
+    if embedded != VALUE(0) {
+        let ary_elt = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_ARY + (8 * off));
+        mov(cb, REG0, ary_elt);
+    } else {
+        let rstruct_ptr = mem_opnd(64, REG0, RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR);
+        mov(cb, REG0, rstruct_ptr);
+        mov(cb, REG0, mem_opnd(64, REG0, (SIZEOF_VALUE as i32) * off));
+    }
+
+    let ret = ctx.stack_push(Type::Unknown);
+    mov(cb, ret, REG0);
+
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_struct_aset(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    comptime_recv: VALUE,
+    _comptime_recv_klass: VALUE,
+) -> CodegenStatus {
+    if unsafe { vm_ci_argc(ci) } != 1 {
+        return CantCompile;
+    }
+
+    let off: i32 = unsafe { get_cme_def_body_optimized_index(cme) }
+        .try_into()
+        .unwrap();
+
+    // Confidence checks
+    assert!(unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRUCT) });
+    assert!((off as i64) < unsafe { RSTRUCT_LEN(comptime_recv) });
+
+    add_comment(cb, "struct aset");
+
+    let val = ctx.stack_pop(1);
+    let recv = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[0], recv);
+    mov(cb, C_ARG_REGS[1], imm_opnd(off as i64));
+    mov(cb, C_ARG_REGS[2], val);
+    call_ptr(cb, REG0, RSTRUCT_SET as *const u8);
+
+    let ret = ctx.stack_push(Type::Unknown);
+    mov(cb, ret, RAX);
+
+    jump_to_next_insn(jit, ctx, cb, ocb);
+    EndBlock
+}
+
+fn gen_send_general(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    cd: *const rb_call_data,
+    block: Option<IseqPtr>,
+) -> CodegenStatus {
+    // Relevant definitions:
+    // rb_execution_context_t       : vm_core.h
+    // invoker, cfunc logic         : method.h, vm_method.c
+    // rb_callinfo                  : vm_callinfo.h
+    // rb_callable_method_entry_t   : method.h
+    // vm_call_cfunc_with_frame     : vm_insnhelper.c
+    //
+    // For a general overview for how the interpreter calls methods,
+    // see vm_call_method().
+
+    let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
+    let argc = unsafe { vm_ci_argc(ci) };
+    let mid = unsafe { vm_ci_mid(ci) };
+    let flags = unsafe { vm_ci_flag(ci) };
+
+    // Don't JIT calls with keyword splat
+    if flags & VM_CALL_KW_SPLAT != 0 {
+        gen_counter_incr!(cb, send_kw_splat);
+        return CantCompile;
+    }
+
+    // Don't JIT calls that aren't simple
+    // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
+    if flags & VM_CALL_ARGS_SPLAT != 0 {
+        gen_counter_incr!(cb, send_args_splat);
+        return CantCompile;
+    }
+    if flags & VM_CALL_ARGS_BLOCKARG != 0 {
+        gen_counter_incr!(cb, send_block_arg);
+        return CantCompile;
+    }
+
+    // Defer compilation so we can specialize on class of receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize);
+    let comptime_recv_klass = comptime_recv.class_of();
+
+    // Guard that the receiver has the same class as the one from compile time
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+    let recv_opnd = StackOpnd(argc.try_into().unwrap());
+    mov(cb, REG0, recv);
+    if !jit_guard_known_klass(
+        jit,
+        ctx,
+        cb,
+        ocb,
+        comptime_recv_klass,
+        recv_opnd,
+        comptime_recv,
+        SEND_MAX_DEPTH,
+        side_exit,
+    ) {
+        return CantCompile;
+    }
+
+    // Do method lookup
+    let mut cme = unsafe { rb_callable_method_entry(comptime_recv_klass, mid) };
+    if cme.is_null() {
+        // TODO: counter
+        return CantCompile;
+    }
+
+    let visi = unsafe { METHOD_ENTRY_VISI(cme) };
+    match visi {
+        METHOD_VISI_PUBLIC => {
+            // Can always call public methods
+        }
+        METHOD_VISI_PRIVATE => {
+            if flags & VM_CALL_FCALL == 0 {
+                // Can only call private methods with FCALL callsites.
+                // (at the moment they are callsites without a receiver or an explicit `self` receiver)
+                return CantCompile;
+            }
+        }
+        METHOD_VISI_PROTECTED => {
+            jit_protected_callee_ancestry_guard(jit, cb, ocb, cme, side_exit);
+        }
+        _ => {
+            panic!("cmes should always have a visibility!");
+        }
+    }
+
+    // Register block for invalidation
+    //assert!(cme->called_id == mid);
+    assume_method_lookup_stable(jit, ocb, comptime_recv_klass, cme);
+
+    // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
+    loop {
+        let def_type = unsafe { get_cme_def_type(cme) };
+        match def_type {
+            VM_METHOD_TYPE_ISEQ => {
+                return gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc);
+            }
+            VM_METHOD_TYPE_CFUNC => {
+                return gen_send_cfunc(
+                    jit,
+                    ctx,
+                    cb,
+                    ocb,
+                    ci,
+                    cme,
+                    block,
+                    argc,
+                    &comptime_recv_klass,
+                );
+            }
+            VM_METHOD_TYPE_IVAR => {
+                if argc != 0 {
+                    // Argument count mismatch. Getters take no arguments.
+                    gen_counter_incr!(cb, send_getter_arity);
+                    return CantCompile;
+                }
+
+                if c_method_tracing_currently_enabled(jit) {
+                    // Can't generate code for firing c_call and c_return events
+                    // :attr-tracing:
+                    // Handling the C method tracing events for attr_accessor
+                    // methods is easier than regular C methods as we know the
+                    // "method" we are calling into never enables those tracing
+                    // events. Once global invalidation runs, the code for the
+                    // attr_accessor is invalidated and we exit at the closest
+                    // instruction boundary which is always outside of the body of
+                    // the attr_accessor code.
+                    gen_counter_incr!(cb, send_cfunc_tracing);
+                    return CantCompile;
+                }
+
+                mov(cb, REG0, recv);
+                let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
+
+                return gen_get_ivar(
+                    jit,
+                    ctx,
+                    cb,
+                    ocb,
+                    SEND_MAX_DEPTH,
+                    comptime_recv,
+                    ivar_name,
+                    recv_opnd,
+                    side_exit,
+                );
+            }
+            VM_METHOD_TYPE_ATTRSET => {
+                if flags & VM_CALL_KWARG != 0 {
+                    gen_counter_incr!(cb, send_attrset_kwargs);
+                    return CantCompile;
+                } else if argc != 1 || unsafe { !RB_TYPE_P(comptime_recv, RUBY_T_OBJECT) } {
+                    gen_counter_incr!(cb, send_ivar_set_method);
+                    return CantCompile;
+                } else if c_method_tracing_currently_enabled(jit) {
+                    // Can't generate code for firing c_call and c_return events
+                    // See :attr-tracing:
+                    gen_counter_incr!(cb, send_cfunc_tracing);
+                    return CantCompile;
+                } else {
+                    let ivar_name = unsafe { get_cme_def_body_attr_id(cme) };
+                    return gen_set_ivar(jit, ctx, cb, comptime_recv, ivar_name);
+                }
+            }
+            // Block method, e.g. define_method(:foo) { :my_block }
+            VM_METHOD_TYPE_BMETHOD => {
+                gen_counter_incr!(cb, send_bmethod);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_ZSUPER => {
+                gen_counter_incr!(cb, send_zsuper_method);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_ALIAS => {
+                // Retrieve the aliased method and re-enter the switch
+                cme = unsafe { rb_aliased_callable_method_entry(cme) };
+                continue;
+            }
+            VM_METHOD_TYPE_UNDEF => {
+                gen_counter_incr!(cb, send_undef_method);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_NOTIMPLEMENTED => {
+                gen_counter_incr!(cb, send_not_implemented_method);
+                return CantCompile;
+            }
+            // Send family of methods, e.g. call/apply
+            VM_METHOD_TYPE_OPTIMIZED => {
+                let opt_type = unsafe { get_cme_def_body_optimized_type(cme) };
+                match opt_type {
+                    OPTIMIZED_METHOD_TYPE_SEND => {
+                        gen_counter_incr!(cb, send_optimized_method_send);
+                        return CantCompile;
+                    }
+                    OPTIMIZED_METHOD_TYPE_CALL => {
+                        gen_counter_incr!(cb, send_optimized_method_call);
+                        return CantCompile;
+                    }
+                    OPTIMIZED_METHOD_TYPE_BLOCK_CALL => {
+                        gen_counter_incr!(cb, send_optimized_method_block_call);
+                        return CantCompile;
+                    }
+                    OPTIMIZED_METHOD_TYPE_STRUCT_AREF => {
+                        return gen_struct_aref(
+                            jit,
+                            ctx,
+                            cb,
+                            ocb,
+                            ci,
+                            cme,
+                            comptime_recv,
+                            comptime_recv_klass,
+                        );
+                    }
+                    OPTIMIZED_METHOD_TYPE_STRUCT_ASET => {
+                        return gen_struct_aset(
+                            jit,
+                            ctx,
+                            cb,
+                            ocb,
+                            ci,
+                            cme,
+                            comptime_recv,
+                            comptime_recv_klass,
+                        );
+                    }
+                    _ => {
+                        panic!("unknown optimized method type!")
+                    }
+                }
+            }
+            VM_METHOD_TYPE_MISSING => {
+                gen_counter_incr!(cb, send_missing_method);
+                return CantCompile;
+            }
+            VM_METHOD_TYPE_REFINED => {
+                gen_counter_incr!(cb, send_refined_method);
+                return CantCompile;
+            }
+            _ => {
+                unreachable!();
+            }
+        }
+    }
+}
+
+fn gen_opt_send_without_block(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd = jit_get_arg(jit, 0).as_ptr();
+
+    gen_send_general(jit, ctx, cb, ocb, cd, None)
+}
+
+fn gen_send(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd = jit_get_arg(jit, 0).as_ptr();
+    let block = jit_get_arg(jit, 1).as_optional_ptr();
+    return gen_send_general(jit, ctx, cb, ocb, cd, block);
+}
+
+fn gen_invokesuper(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let cd: *const rb_call_data = jit_get_arg(jit, 0).as_ptr();
+    let block: Option<IseqPtr> = jit_get_arg(jit, 1).as_optional_ptr();
+
+    // Defer compilation so we can specialize on class of receiver
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let me = unsafe { rb_vm_frame_method_entry(get_ec_cfp(jit.ec.unwrap())) };
+    if me.is_null() {
+        return CantCompile;
+    }
+
+    // FIXME: We should track and invalidate this block when this cme is invalidated
+    let current_defined_class = unsafe { (*me).defined_class };
+    let mid = unsafe { get_def_original_id((*me).def) };
+
+    if me != unsafe { rb_callable_method_entry(current_defined_class, (*me).called_id) } {
+        // Though we likely could generate this call, as we are only concerned
+        // with the method entry remaining valid, assume_method_lookup_stable
+        // below requires that the method lookup matches as well
+        return CantCompile;
+    }
+
+    // vm_search_normal_superclass
+    let rbasic_ptr: *const RBasic = current_defined_class.as_ptr();
+    if current_defined_class.builtin_type() == RUBY_T_ICLASS
+        && unsafe { FL_TEST_RAW((*rbasic_ptr).klass, VALUE(RMODULE_IS_REFINEMENT)) != VALUE(0) }
+    {
+        return CantCompile;
+    }
+    let comptime_superclass =
+        unsafe { rb_class_get_superclass(RCLASS_ORIGIN(current_defined_class)) };
+
+    let ci = unsafe { get_call_data_ci(cd) };
+    let argc = unsafe { vm_ci_argc(ci) };
+
+    let ci_flags = unsafe { vm_ci_flag(ci) };
+
+    // Don't JIT calls that aren't simple
+    // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
+    if ci_flags & VM_CALL_ARGS_SPLAT != 0 {
+        gen_counter_incr!(cb, send_args_splat);
+        return CantCompile;
+    }
+    if ci_flags & VM_CALL_KWARG != 0 {
+        gen_counter_incr!(cb, send_keywords);
+        return CantCompile;
+    }
+    if ci_flags & VM_CALL_KW_SPLAT != 0 {
+        gen_counter_incr!(cb, send_kw_splat);
+        return CantCompile;
+    }
+    if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 {
+        gen_counter_incr!(cb, send_block_arg);
+        return CantCompile;
+    }
+
+    // Ensure we haven't rebound this method onto an incompatible class.
+    // In the interpreter we try to avoid making this check by performing some
+    // cheaper calculations first, but since we specialize on the method entry
+    // and so only have to do this once at compile time this is fine to always
+    // check and side exit.
+    let comptime_recv = jit_peek_at_stack(jit, ctx, argc as isize);
+    if unsafe { rb_obj_is_kind_of(comptime_recv, current_defined_class) } == VALUE(0) {
+        return CantCompile;
+    }
+
+    // Do method lookup
+    let cme = unsafe { rb_callable_method_entry(comptime_superclass, mid) };
+
+    if cme.is_null() {
+        return CantCompile;
+    }
+
+    // Check that we'll be able to write this method dispatch before generating checks
+    let cme_def_type = unsafe { get_cme_def_type(cme) };
+    if cme_def_type != VM_METHOD_TYPE_ISEQ && cme_def_type != VM_METHOD_TYPE_CFUNC {
+        // others unimplemented
+        return CantCompile;
+    }
+
+    // Guard that the receiver has the same class as the one from compile time
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    let cfp = unsafe { get_ec_cfp(jit.ec.unwrap()) };
+    let ep = unsafe { get_cfp_ep(cfp) };
+    let cref_me = unsafe { *ep.offset(VM_ENV_DATA_INDEX_ME_CREF.try_into().unwrap()) };
+    let me_as_value = VALUE(me as usize);
+    if cref_me != me_as_value {
+        // This will be the case for super within a block
+        return CantCompile;
+    }
+
+    add_comment(cb, "guard known me");
+    mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+    let ep_me_opnd = mem_opnd(
+        64,
+        REG0,
+        (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_ME_CREF as i32),
+    );
+    jit_mov_gc_ptr(jit, cb, REG1, me_as_value);
+    cmp(cb, ep_me_opnd, REG1);
+    jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_me_changed));
+
+    if block.is_none() {
+        // Guard no block passed
+        // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
+        // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
+        //
+        // TODO: this could properly forward the current block handler, but
+        // would require changes to gen_send_*
+        add_comment(cb, "guard no block given");
+        // EP is in REG0 from above
+        let ep_specval_opnd = mem_opnd(
+            64,
+            REG0,
+            (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
+        );
+        cmp(cb, ep_specval_opnd, uimm_opnd(VM_BLOCK_HANDLER_NONE.into()));
+        jne_ptr(cb, counted_exit!(ocb, side_exit, invokesuper_block));
+    }
+
+    // Points to the receiver operand on the stack
+    let recv = ctx.stack_opnd(argc);
+    mov(cb, REG0, recv);
+
+    // We need to assume that both our current method entry and the super
+    // method entry we invoke remain stable
+    assume_method_lookup_stable(jit, ocb, current_defined_class, me);
+    assume_method_lookup_stable(jit, ocb, comptime_superclass, cme);
+
+    // Method calls may corrupt types
+    ctx.clear_local_types();
+
+    match cme_def_type {
+        VM_METHOD_TYPE_ISEQ => gen_send_iseq(jit, ctx, cb, ocb, ci, cme, block, argc),
+        VM_METHOD_TYPE_CFUNC => {
+            gen_send_cfunc(jit, ctx, cb, ocb, ci, cme, block, argc, ptr::null())
+        }
+        _ => unreachable!(),
+    }
+}
+
+fn gen_leave(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Only the return value should be on the stack
+    assert!(ctx.get_stack_size() == 1);
+
+    // Create a side-exit to fall back to the interpreter
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // Load environment pointer EP from CFP
+    mov(cb, REG1, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+
+    // Check for interrupts
+    add_comment(cb, "check for interrupts");
+    gen_check_ints(cb, counted_exit!(ocb, side_exit, leave_se_interrupt));
+
+    // Load the return value
+    mov(cb, REG0, ctx.stack_pop(1));
+
+    // Pop the current frame (ec->cfp++)
+    // Note: the return PC is already in the previous CFP
+    add_comment(cb, "pop stack frame");
+    add(cb, REG_CFP, uimm_opnd(RUBY_SIZEOF_CONTROL_FRAME as u64));
+    mov(cb, mem_opnd(64, REG_EC, RUBY_OFFSET_EC_CFP), REG_CFP);
+
+    // Reload REG_SP for the caller and write the return value.
+    // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
+    mov(cb, REG_SP, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SP));
+    mov(cb, mem_opnd(64, REG_SP, 0), REG0);
+
+    // Jump to the JIT return address on the frame that was just popped
+    let offset_to_jit_return =
+        -(RUBY_SIZEOF_CONTROL_FRAME as i32) + (RUBY_OFFSET_CFP_JIT_RETURN as i32);
+    jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
+
+    EndBlock
+}
+
+fn gen_getglobal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let gid = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we might make a Ruby call for warning
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64()));
+
+    call_ptr(cb, REG0, rb_gvar_get as *const u8);
+
+    let top = ctx.stack_push(Type::Unknown);
+    mov(cb, top, RAX);
+
+    KeepCompiling
+}
+
+fn gen_setglobal(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let gid = jit_get_arg(jit, 0);
+
+    // Save the PC and SP because we might make a Ruby call for
+    // Kernel#set_trace_var
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    mov(cb, C_ARG_REGS[0], imm_opnd(gid.as_i64()));
+
+    let val = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[1], val);
+
+    call_ptr(cb, REG0, rb_gvar_set as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_anytostring(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Save the PC and SP because we might make a Ruby call for
+    // Kernel#set_trace_var
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let str = ctx.stack_pop(1);
+    let val = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[0], str);
+    mov(cb, C_ARG_REGS[1], val);
+
+    call_ptr(cb, REG0, rb_obj_as_string_result as *const u8);
+
+    // Push the return value
+    let stack_ret = ctx.stack_push(Type::String);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_objtostring(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    if !jit_at_current_insn(jit) {
+        defer_compilation(jit, ctx, cb, ocb);
+        return EndBlock;
+    }
+
+    let recv = ctx.stack_opnd(0);
+    let comptime_recv = jit_peek_at_stack(jit, ctx, 0);
+
+    if unsafe { RB_TYPE_P(comptime_recv, RUBY_T_STRING) } {
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        mov(cb, REG0, recv);
+        jit_guard_known_klass(
+            jit,
+            ctx,
+            cb,
+            ocb,
+            comptime_recv.class_of(),
+            StackOpnd(0),
+            comptime_recv,
+            SEND_MAX_DEPTH,
+            side_exit,
+        );
+        // No work needed. The string value is already on the top of the stack.
+        KeepCompiling
+    } else {
+        let cd = jit_get_arg(jit, 0).as_ptr();
+        gen_send_general(jit, ctx, cb, ocb, cd, None)
+    }
+}
+
+fn gen_intern(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // Save the PC and SP because we might allocate
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let str = ctx.stack_pop(1);
+
+    mov(cb, C_ARG_REGS[0], str);
+
+    call_ptr(cb, REG0, rb_str_intern as *const u8);
+
+    // Push the return value
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+fn gen_toregexp(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let opt = jit_get_arg(jit, 0).as_i64();
+    let cnt = jit_get_arg(jit, 1).as_usize();
+
+    // Save the PC and SP because this allocates an object and could
+    // raise an exception.
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let values_ptr = ctx.sp_opnd(-((SIZEOF_VALUE as isize) * (cnt as isize)));
+    ctx.stack_pop(cnt);
+
+    mov(cb, C_ARG_REGS[0], imm_opnd(0));
+    mov(cb, C_ARG_REGS[1], imm_opnd(cnt.try_into().unwrap()));
+    lea(cb, C_ARG_REGS[2], values_ptr);
+    call_ptr(cb, REG0, rb_ary_tmp_new_from_values as *const u8);
+
+    // Save the array so we can clear it later
+    push(cb, RAX);
+    push(cb, RAX); // Alignment
+    mov(cb, C_ARG_REGS[0], RAX);
+    mov(cb, C_ARG_REGS[1], imm_opnd(opt));
+    call_ptr(cb, REG0, rb_reg_new_ary as *const u8);
+
+    // The actual regex is in RAX now.  Pop the temp array from
+    // rb_ary_tmp_new_from_values into C arg regs so we can clear it
+    pop(cb, REG1); // Alignment
+    pop(cb, C_ARG_REGS[0]);
+
+    // The value we want to push on the stack is in RAX right now
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    // Clear the temp array.
+    call_ptr(cb, REG0, rb_ary_clear as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_getspecial(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // This takes two arguments, key and type
+    // key is only used when type == 0
+    // A non-zero type determines which type of backref to fetch
+    //rb_num_t key = jit_get_arg(jit, 0);
+    let rtype = jit_get_arg(jit, 1).as_u64();
+
+    if rtype == 0 {
+        // not yet implemented
+        return CantCompile;
+    } else if rtype & 0x01 != 0 {
+        // Fetch a "special" backref based on a char encoded by shifting by 1
+
+        // Can raise if matchdata uninitialized
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        // call rb_backref_get()
+        add_comment(cb, "rb_backref_get");
+        call_ptr(cb, REG0, rb_backref_get as *const u8);
+        mov(cb, C_ARG_REGS[0], RAX);
+
+        let rt_u8: u8 = (rtype >> 1).try_into().unwrap();
+        match rt_u8.into() {
+            '&' => {
+                add_comment(cb, "rb_reg_last_match");
+                call_ptr(cb, REG0, rb_reg_last_match as *const u8);
+            }
+            '`' => {
+                add_comment(cb, "rb_reg_match_pre");
+                call_ptr(cb, REG0, rb_reg_match_pre as *const u8);
+            }
+            '\'' => {
+                add_comment(cb, "rb_reg_match_post");
+                call_ptr(cb, REG0, rb_reg_match_post as *const u8);
+            }
+            '+' => {
+                add_comment(cb, "rb_reg_match_last");
+                call_ptr(cb, REG0, rb_reg_match_last as *const u8);
+            }
+            _ => panic!("invalid back-ref"),
+        }
+
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        KeepCompiling
+    } else {
+        // Fetch the N-th match from the last backref based on type shifted by 1
+
+        // Can raise if matchdata uninitialized
+        jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+        // call rb_backref_get()
+        add_comment(cb, "rb_backref_get");
+        call_ptr(cb, REG0, rb_backref_get as *const u8);
+
+        // rb_reg_nth_match((int)(type >> 1), backref);
+        add_comment(cb, "rb_reg_nth_match");
+        mov(
+            cb,
+            C_ARG_REGS[0],
+            imm_opnd((rtype >> 1).try_into().unwrap()),
+        );
+        mov(cb, C_ARG_REGS[1], RAX);
+        call_ptr(cb, REG0, rb_reg_nth_match as *const u8);
+
+        let stack_ret = ctx.stack_push(Type::Unknown);
+        mov(cb, stack_ret, RAX);
+
+        KeepCompiling
+    }
+}
+
+fn gen_getclassvariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // rb_vm_getclassvariable can raise exceptions.
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ);
+    mov(cb, C_ARG_REGS[0], cfp_iseq_opnd);
+    mov(cb, C_ARG_REGS[1], REG_CFP);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64()));
+    mov(cb, C_ARG_REGS[3], uimm_opnd(jit_get_arg(jit, 1).as_u64()));
+
+    call_ptr(cb, REG0, rb_vm_getclassvariable as *const u8);
+
+    let stack_top = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_top, RAX);
+
+    KeepCompiling
+}
+
+fn gen_setclassvariable(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // rb_vm_setclassvariable can raise exceptions.
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    let cfp_iseq_opnd = mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_ISEQ);
+    mov(cb, C_ARG_REGS[0], cfp_iseq_opnd);
+    mov(cb, C_ARG_REGS[1], REG_CFP);
+    mov(cb, C_ARG_REGS[2], uimm_opnd(jit_get_arg(jit, 0).as_u64()));
+    mov(cb, C_ARG_REGS[3], ctx.stack_pop(1));
+    mov(cb, C_ARG_REGS[4], uimm_opnd(jit_get_arg(jit, 1).as_u64()));
+
+    call_ptr(cb, REG0, rb_vm_setclassvariable as *const u8);
+
+    KeepCompiling
+}
+
+fn gen_opt_getinlinecache(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let jump_offset = jit_get_arg(jit, 0);
+    let const_cache_as_value = jit_get_arg(jit, 1);
+    let ic: *const iseq_inline_constant_cache = const_cache_as_value.as_ptr();
+
+    // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
+    let ice = unsafe { (*ic).entry };
+    if ice.is_null() {
+        // In this case, leave a block that unconditionally side exits
+        // for the interpreter to invalidate.
+        return CantCompile;
+    }
+
+    // Make sure there is an exit for this block as the interpreter might want
+    // to invalidate this block from yjit_constant_ic_update().
+    jit_ensure_block_entry_exit(jit, ocb);
+
+    if !unsafe { (*ice).ic_cref }.is_null() {
+        // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
+        let side_exit = get_side_exit(jit, ocb, ctx);
+
+        // Call function to verify the cache. It doesn't allocate or call methods.
+        mov(cb, C_ARG_REGS[0], const_ptr_opnd(ic as *const u8));
+        mov(cb, C_ARG_REGS[1], mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+        call_ptr(cb, REG0, rb_vm_ic_hit_p as *const u8);
+
+        // Check the result. _Bool is one byte in SysV.
+        test(cb, AL, AL);
+        jz_ptr(cb, counted_exit!(ocb, side_exit, opt_getinlinecache_miss));
+
+        // Push ic->entry->value
+        mov(cb, REG0, const_ptr_opnd(ic as *mut u8));
+        mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_IC_ENTRY));
+        let stack_top = ctx.stack_push(Type::Unknown);
+        mov(cb, REG0, mem_opnd(64, REG0, RUBY_OFFSET_ICE_VALUE));
+        mov(cb, stack_top, REG0);
+    } else {
+        // Optimize for single ractor mode.
+        // FIXME: This leaks when st_insert raises NoMemoryError
+        if !assume_single_ractor_mode(jit, ocb) {
+            return CantCompile;
+        }
+
+        // Invalidate output code on any constant writes associated with
+        // constants referenced within the current block.
+        assume_stable_constant_names(jit, ocb);
+
+        jit_putobject(jit, ctx, cb, unsafe { (*ice).value });
+    }
+
+    // Jump over the code for filling the cache
+    let jump_idx = jit_next_insn_idx(jit) + jump_offset.as_u32();
+    gen_direct_jump(
+        jit,
+        ctx,
+        BlockId {
+            iseq: jit.iseq,
+            idx: jump_idx,
+        },
+        cb,
+    );
+    EndBlock
+}
+
+// Push the explicit block parameter onto the temporary stack. Part of the
+// interpreter's scheme for avoiding Proc allocations when delegating
+// explict block parameters.
+fn gen_getblockparamproxy(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    // A mirror of the interpreter code. Checking for the case
+    // where it's pushing rb_block_param_proxy.
+    let side_exit = get_side_exit(jit, ocb, ctx);
+
+    // EP level
+    let level = jit_get_arg(jit, 1).as_u32();
+
+    // Load environment pointer EP from CFP
+    gen_get_ep(cb, REG0, level);
+
+    // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
+    let flag_check = mem_opnd(
+        64,
+        REG0,
+        (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_FLAGS as i32),
+    );
+    test(
+        cb,
+        flag_check,
+        uimm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM.into()),
+    );
+    jnz_ptr(cb, counted_exit!(ocb, side_exit, gbpp_block_param_modified));
+
+    // Load the block handler for the current frame
+    // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
+    mov(
+        cb,
+        REG0,
+        mem_opnd(
+            64,
+            REG0,
+            (SIZEOF_VALUE as i32) * (VM_ENV_DATA_INDEX_SPECVAL as i32),
+        ),
+    );
+
+    // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
+    and(cb, REG0_8, imm_opnd(0x3));
+
+    // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
+    cmp(cb, REG0_8, imm_opnd(0x1));
+    jnz_ptr(
+        cb,
+        counted_exit!(ocb, side_exit, gbpp_block_handler_not_iseq),
+    );
+
+    // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
+    mov(
+        cb,
+        REG0,
+        const_ptr_opnd(unsafe { rb_block_param_proxy }.as_ptr()),
+    );
+    assert!(!unsafe { rb_block_param_proxy }.special_const_p());
+    let top = ctx.stack_push(Type::UnknownHeap);
+    mov(cb, top, REG0);
+
+    KeepCompiling
+}
+
+fn gen_invokebuiltin(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr();
+    let bf_argc: usize = unsafe { (*bf).argc }.try_into().expect("non negative argc");
+
+    // ec, self, and arguments
+    if bf_argc + 2 > C_ARG_REGS.len() {
+        return CantCompile;
+    }
+
+    // If the calls don't allocate, do they need up to date PC, SP?
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    // Call the builtin func (ec, recv, arg1, arg2, ...)
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(
+        cb,
+        C_ARG_REGS[1],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+
+    // Copy arguments from locals
+    for i in 0..bf_argc {
+        let stack_opnd = ctx.stack_opnd((bf_argc - i - 1) as i32);
+        let c_arg_reg = C_ARG_REGS[2 + i];
+        mov(cb, c_arg_reg, stack_opnd);
+    }
+
+    call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8);
+
+    // Push the return value
+    ctx.stack_pop(bf_argc);
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+// opt_invokebuiltin_delegate calls a builtin function, like
+// invokebuiltin does, but instead of taking arguments from the top of the
+// stack uses the argument locals (and self) from the current method.
+fn gen_opt_invokebuiltin_delegate(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    _ocb: &mut OutlinedCb,
+) -> CodegenStatus {
+    let bf: *const rb_builtin_function = jit_get_arg(jit, 0).as_ptr();
+    let bf_argc = unsafe { (*bf).argc };
+    let start_index = jit_get_arg(jit, 1).as_i32();
+
+    // ec, self, and arguments
+    if bf_argc + 2 > (C_ARG_REGS.len() as i32) {
+        return CantCompile;
+    }
+
+    // If the calls don't allocate, do they need up to date PC, SP?
+    jit_prepare_routine_call(jit, ctx, cb, REG0);
+
+    if bf_argc > 0 {
+        // Load environment pointer EP from CFP
+        mov(cb, REG0, mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_EP));
+    }
+
+    // Call the builtin func (ec, recv, arg1, arg2, ...)
+    mov(cb, C_ARG_REGS[0], REG_EC);
+    mov(
+        cb,
+        C_ARG_REGS[1],
+        mem_opnd(64, REG_CFP, RUBY_OFFSET_CFP_SELF),
+    );
+
+    // Copy arguments from locals
+    for i in 0..bf_argc {
+        let table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
+        let offs: i32 = -(table_size as i32) - (VM_ENV_DATA_SIZE as i32) + 1 + start_index + i;
+        let local_opnd = mem_opnd(64, REG0, offs * (SIZEOF_VALUE as i32));
+        let offs: usize = (i + 2) as usize;
+        let c_arg_reg = C_ARG_REGS[offs];
+        mov(cb, c_arg_reg, local_opnd);
+    }
+    call_ptr(cb, REG0, unsafe { (*bf).func_ptr } as *const u8);
+
+    // Push the return value
+    let stack_ret = ctx.stack_push(Type::Unknown);
+    mov(cb, stack_ret, RAX);
+
+    KeepCompiling
+}
+
+/// Maps a YARV opcode to a code generation function (if supported)
+fn get_gen_fn(opcode: VALUE) -> Option<InsnGenFn> {
+    let VALUE(opcode) = opcode;
+    assert!(opcode < VM_INSTRUCTION_SIZE);
+
+    match opcode {
+        OP_NOP => Some(gen_nop),
+        OP_POP => Some(gen_pop),
+        OP_DUP => Some(gen_dup),
+        OP_DUPN => Some(gen_dupn),
+        OP_SWAP => Some(gen_swap),
+        OP_PUTNIL => Some(gen_putnil),
+        OP_PUTOBJECT => Some(gen_putobject),
+        OP_PUTOBJECT_INT2FIX_0_ => Some(gen_putobject_int2fix),
+        OP_PUTOBJECT_INT2FIX_1_ => Some(gen_putobject_int2fix),
+        OP_PUTSELF => Some(gen_putself),
+        OP_PUTSPECIALOBJECT => Some(gen_putspecialobject),
+        OP_SETN => Some(gen_setn),
+        OP_TOPN => Some(gen_topn),
+        OP_ADJUSTSTACK => Some(gen_adjuststack),
+        OP_GETLOCAL => Some(gen_getlocal),
+        OP_GETLOCAL_WC_0 => Some(gen_getlocal_wc0),
+        OP_GETLOCAL_WC_1 => Some(gen_getlocal_wc1),
+        OP_SETLOCAL => Some(gen_setlocal),
+        OP_SETLOCAL_WC_0 => Some(gen_setlocal_wc0),
+        OP_SETLOCAL_WC_1 => Some(gen_setlocal_wc1),
+        OP_OPT_PLUS => Some(gen_opt_plus),
+        OP_OPT_MINUS => Some(gen_opt_minus),
+        OP_OPT_AND => Some(gen_opt_and),
+        OP_OPT_OR => Some(gen_opt_or),
+        OP_NEWHASH => Some(gen_newhash),
+        OP_DUPHASH => Some(gen_duphash),
+        OP_NEWARRAY => Some(gen_newarray),
+        OP_DUPARRAY => Some(gen_duparray),
+        OP_CHECKTYPE => Some(gen_checktype),
+        OP_OPT_LT => Some(gen_opt_lt),
+        OP_OPT_LE => Some(gen_opt_le),
+        OP_OPT_GT => Some(gen_opt_gt),
+        OP_OPT_GE => Some(gen_opt_ge),
+        OP_OPT_MOD => Some(gen_opt_mod),
+        OP_OPT_STR_FREEZE => Some(gen_opt_str_freeze),
+        OP_OPT_STR_UMINUS => Some(gen_opt_str_uminus),
+        OP_SPLATARRAY => Some(gen_splatarray),
+        OP_NEWRANGE => Some(gen_newrange),
+        OP_PUTSTRING => Some(gen_putstring),
+        OP_EXPANDARRAY => Some(gen_expandarray),
+        OP_DEFINED => Some(gen_defined),
+        OP_CHECKKEYWORD => Some(gen_checkkeyword),
+        OP_CONCATSTRINGS => Some(gen_concatstrings),
+        OP_GETINSTANCEVARIABLE => Some(gen_getinstancevariable),
+        OP_SETINSTANCEVARIABLE => Some(gen_setinstancevariable),
+
+        OP_OPT_EQ => Some(gen_opt_eq),
+        OP_OPT_NEQ => Some(gen_opt_neq),
+        OP_OPT_AREF => Some(gen_opt_aref),
+        OP_OPT_ASET => Some(gen_opt_aset),
+        OP_OPT_MULT => Some(gen_opt_mult),
+        OP_OPT_DIV => Some(gen_opt_div),
+        OP_OPT_LTLT => Some(gen_opt_ltlt),
+        OP_OPT_NIL_P => Some(gen_opt_nil_p),
+        OP_OPT_EMPTY_P => Some(gen_opt_empty_p),
+        OP_OPT_NOT => Some(gen_opt_not),
+        OP_OPT_SIZE => Some(gen_opt_size),
+        OP_OPT_LENGTH => Some(gen_opt_length),
+        OP_OPT_REGEXPMATCH2 => Some(gen_opt_regexpmatch2),
+        OP_OPT_GETINLINECACHE => Some(gen_opt_getinlinecache),
+        OP_INVOKEBUILTIN => Some(gen_invokebuiltin),
+        OP_OPT_INVOKEBUILTIN_DELEGATE => Some(gen_opt_invokebuiltin_delegate),
+        OP_OPT_INVOKEBUILTIN_DELEGATE_LEAVE => Some(gen_opt_invokebuiltin_delegate),
+        OP_OPT_CASE_DISPATCH => Some(gen_opt_case_dispatch),
+        OP_BRANCHIF => Some(gen_branchif),
+        OP_BRANCHUNLESS => Some(gen_branchunless),
+        OP_BRANCHNIL => Some(gen_branchnil),
+        OP_JUMP => Some(gen_jump),
+
+        OP_GETBLOCKPARAMPROXY => Some(gen_getblockparamproxy),
+        OP_OPT_SEND_WITHOUT_BLOCK => Some(gen_opt_send_without_block),
+        OP_SEND => Some(gen_send),
+        OP_INVOKESUPER => Some(gen_invokesuper),
+        OP_LEAVE => Some(gen_leave),
+
+        OP_GETGLOBAL => Some(gen_getglobal),
+        OP_SETGLOBAL => Some(gen_setglobal),
+        OP_ANYTOSTRING => Some(gen_anytostring),
+        OP_OBJTOSTRING => Some(gen_objtostring),
+        OP_INTERN => Some(gen_intern),
+        OP_TOREGEXP => Some(gen_toregexp),
+        OP_GETSPECIAL => Some(gen_getspecial),
+        OP_GETCLASSVARIABLE => Some(gen_getclassvariable),
+        OP_SETCLASSVARIABLE => Some(gen_setclassvariable),
+
+        // Unimplemented opcode, YJIT won't generate code for this yet
+        _ => None,
+    }
+}
+
+// Return true when the codegen function generates code.
+// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
+// See yjit_reg_method().
+type MethodGenFn = fn(
+    jit: &mut JITState,
+    ctx: &mut Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    ci: *const rb_callinfo,
+    cme: *const rb_callable_method_entry_t,
+    block: Option<IseqPtr>,
+    argc: i32,
+    known_recv_class: *const VALUE,
+) -> bool;
+
+/// Global state needed for code generation
+pub struct CodegenGlobals {
+    /// Inline code block (fast path)
+    inline_cb: CodeBlock,
+
+    /// Outlined code block (slow path)
+    outlined_cb: OutlinedCb,
+
+    /// Code for exiting back to the interpreter from the leave instruction
+    leave_exit_code: CodePtr,
+
+    // For exiting from YJIT frame from branch_stub_hit().
+    // Filled by gen_code_for_exit_from_stub().
+    stub_exit_code: CodePtr,
+
+    // Code for full logic of returning from C method and exiting to the interpreter
+    outline_full_cfunc_return_pos: CodePtr,
+
+    /// For implementing global code invalidation
+    global_inval_patches: Vec<CodepagePatch>,
+
+    /// For implementing global code invalidation. The number of bytes counting from the beginning
+    /// of the inline code block that should not be changed. After patching for global invalidation,
+    /// no one should make changes to the invalidated code region anymore. This is used to
+    /// break out of invalidation race when there are multiple ractors.
+    inline_frozen_bytes: usize,
+
+    // Methods for generating code for hardcoded (usually C) methods
+    method_codegen_table: HashMap<u64, MethodGenFn>,
+}
+
+/// For implementing global code invalidation. A position in the inline
+/// codeblock to patch into a JMP rel32 which jumps into some code in
+/// the outlined codeblock to exit to the interpreter.
+pub struct CodepagePatch {
+    pub inline_patch_pos: CodePtr,
+    pub outlined_target_pos: CodePtr,
+}
+
+/// Private singleton instance of the codegen globals
+static mut CODEGEN_GLOBALS: Option<CodegenGlobals> = None;
+
+impl CodegenGlobals {
+    /// Initialize the codegen globals
+    pub fn init() {
+        // Executable memory size in MiB
+        let mem_size = get_option!(exec_mem_size) * 1024 * 1024;
+
+        #[cfg(not(test))]
+        let (mut cb, mut ocb) = {
+            let page_size = unsafe { rb_yjit_get_page_size() }.as_usize();
+            let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) };
+            let cb = CodeBlock::new(mem_block, mem_size / 2, page_size);
+            let ocb = OutlinedCb::wrap(CodeBlock::new(
+                unsafe { mem_block.add(mem_size / 2) },
+                mem_size / 2,
+                page_size,
+            ));
+            (cb, ocb)
+        };
+
+        // In test mode we're not linking with the C code
+        // so we don't allocate executable memory
+        #[cfg(test)]
+        let mut cb = CodeBlock::new_dummy(mem_size / 2);
+        #[cfg(test)]
+        let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(mem_size / 2));
+
+        let leave_exit_code = gen_leave_exit(&mut ocb);
+
+        let stub_exit_code = gen_code_for_exit_from_stub(&mut ocb);
+
+        // Generate full exit code for C func
+        let cfunc_exit_code = gen_full_cfunc_return(&mut ocb);
+
+        // Mark all code memory as executable
+        cb.mark_all_executable();
+        ocb.unwrap().mark_all_executable();
+
+        let mut codegen_globals = CodegenGlobals {
+            inline_cb: cb,
+            outlined_cb: ocb,
+            leave_exit_code: leave_exit_code,
+            stub_exit_code: stub_exit_code,
+            outline_full_cfunc_return_pos: cfunc_exit_code,
+            global_inval_patches: Vec::new(),
+            inline_frozen_bytes: 0,
+            method_codegen_table: HashMap::new(),
+        };
+
+        // Register the method codegen functions
+        codegen_globals.reg_method_codegen_fns();
+
+        // Initialize the codegen globals instance
+        unsafe {
+            CODEGEN_GLOBALS = Some(codegen_globals);
+        }
+    }
+
+    // Register a specialized codegen function for a particular method. Note that
+    // the if the function returns true, the code it generates runs without a
+    // control frame and without interrupt checks. To avoid creating observable
+    // behavior changes, the codegen function should only target simple code paths
+    // that do not allocate and do not make method calls.
+    fn yjit_reg_method(&mut self, klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
+        let id_string = std::ffi::CString::new(mid_str).expect("couldn't convert to CString!");
+        let mid = unsafe { rb_intern(id_string.as_ptr()) };
+        let me = unsafe { rb_method_entry_at(klass, mid) };
+
+        if me.is_null() {
+            panic!("undefined optimized method!");
+        }
+
+        // For now, only cfuncs are supported
+        //RUBY_ASSERT(me && me->def);
+        //RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
+
+        let method_serial = unsafe {
+            let def = (*me).def;
+            get_def_method_serial(def)
+        };
+
+        self.method_codegen_table.insert(method_serial, gen_fn);
+    }
+
+    /// Register codegen functions for some Ruby core methods
+    fn reg_method_codegen_fns(&mut self) {
+        unsafe {
+            // Specialization for C methods. See yjit_reg_method() for details.
+            self.yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
+
+            self.yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
+            self.yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
+
+            self.yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
+            self.yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
+
+            // rb_str_to_s() methods in string.c
+            self.yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
+            self.yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
+            self.yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
+
+            // Thread.current
+            self.yjit_reg_method(
+                rb_singleton_class(rb_cThread),
+                "current",
+                jit_thread_s_current,
+            );
+        }
+    }
+
+    /// Get a mutable reference to the codegen globals instance
+    pub fn get_instance() -> &'static mut CodegenGlobals {
+        unsafe { CODEGEN_GLOBALS.as_mut().unwrap() }
+    }
+
+    /// Get a mutable reference to the inline code block
+    pub fn get_inline_cb() -> &'static mut CodeBlock {
+        &mut CodegenGlobals::get_instance().inline_cb
+    }
+
+    /// Get a mutable reference to the outlined code block
+    pub fn get_outlined_cb() -> &'static mut OutlinedCb {
+        &mut CodegenGlobals::get_instance().outlined_cb
+    }
+
+    pub fn get_leave_exit_code() -> CodePtr {
+        CodegenGlobals::get_instance().leave_exit_code
+    }
+
+    pub fn get_stub_exit_code() -> CodePtr {
+        CodegenGlobals::get_instance().stub_exit_code
+    }
+
+    pub fn push_global_inval_patch(i_pos: CodePtr, o_pos: CodePtr) {
+        let patch = CodepagePatch {
+            inline_patch_pos: i_pos,
+            outlined_target_pos: o_pos,
+        };
+        CodegenGlobals::get_instance()
+            .global_inval_patches
+            .push(patch);
+    }
+
+    // Drain the list of patches and return it
+    pub fn take_global_inval_patches() -> Vec<CodepagePatch> {
+        let globals = CodegenGlobals::get_instance();
+        mem::take(&mut globals.global_inval_patches)
+    }
+
+    pub fn get_inline_frozen_bytes() -> usize {
+        CodegenGlobals::get_instance().inline_frozen_bytes
+    }
+
+    pub fn set_inline_frozen_bytes(frozen_bytes: usize) {
+        CodegenGlobals::get_instance().inline_frozen_bytes = frozen_bytes;
+    }
+
+    pub fn get_outline_full_cfunc_return_pos() -> CodePtr {
+        CodegenGlobals::get_instance().outline_full_cfunc_return_pos
+    }
+
+    pub fn look_up_codegen_method(method_serial: u64) -> Option<MethodGenFn> {
+        let table = &CodegenGlobals::get_instance().method_codegen_table;
+
+        let option_ref = table.get(&method_serial);
+        match option_ref {
+            None => None,
+            Some(&mgf) => Some(mgf), // Deref
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn setup_codegen() -> (JITState, Context, CodeBlock, OutlinedCb) {
+        let block = Block::new(BLOCKID_NULL, &Context::default());
+
+        return (
+            JITState::new(&block),
+            Context::new(),
+            CodeBlock::new_dummy(256 * 1024),
+            OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)),
+        );
+    }
+
+    #[test]
+    fn test_gen_leave_exit() {
+        let mut ocb = OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024));
+        gen_leave_exit(&mut ocb);
+        assert!(ocb.unwrap().get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_exit() {
+        let (_, ctx, mut cb, _) = setup_codegen();
+        gen_exit(0 as *mut VALUE, &ctx, &mut cb);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_get_side_exit() {
+        let (mut jit, ctx, _, mut ocb) = setup_codegen();
+        get_side_exit(&mut jit, &mut ocb, &ctx);
+        assert!(ocb.unwrap().get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_check_ints() {
+        let (_, _ctx, mut cb, mut ocb) = setup_codegen();
+        let side_exit = ocb.unwrap().get_write_ptr();
+        gen_check_ints(&mut cb, side_exit);
+    }
+
+    #[test]
+    fn test_gen_nop() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        let status = gen_nop(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(context.diff(&Context::new()), 0);
+        assert_eq!(cb.get_write_pos(), 0);
+    }
+
+    #[test]
+    fn test_gen_pop() {
+        let (mut jit, _, mut cb, mut ocb) = setup_codegen();
+        let mut context = Context::new_with_stack_size(1);
+        let status = gen_pop(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(context.diff(&Context::new()), 0);
+    }
+
+    #[test]
+    fn test_gen_dup() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        let status = gen_dup(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        // Did we duplicate the type information for the Fixnum type?
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(0)));
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1)));
+
+        assert!(cb.get_write_pos() > 0); // Write some movs
+    }
+
+    #[test]
+    fn test_gen_dupn() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        context.stack_push(Type::Flonum);
+
+        let mut value_array: [u64; 2] = [0, 2]; // We only compile for n == 2
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_dupn(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(3)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2)));
+        assert_eq!(Type::Fixnum, context.get_opnd_type(StackOpnd(1)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() > 0); // Write some movs
+    }
+
+    #[test]
+    fn test_gen_swap() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        context.stack_push(Type::Flonum);
+
+        let status = gen_swap(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+        let (_, tmp_type_next) = context.get_opnd_mapping(StackOpnd(1));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Fixnum);
+        assert_eq!(tmp_type_next, Type::Flonum);
+    }
+
+    #[test]
+    fn test_putnil() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        let status = gen_putnil(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Nil);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_putobject_qtrue() {
+        // Test gen_putobject with Qtrue
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+
+        let mut value_array: [u64; 2] = [0, Qtrue.into()];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::True);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_putobject_fixnum() {
+        // Test gen_putobject with a Fixnum to test another conditional branch
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+
+        // The Fixnum 7 is encoded as 7 * 2 + 1, or 15
+        let mut value_array: [u64; 2] = [0, 15];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_putobject(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Fixnum);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_int2fix() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        jit.opcode = OP_PUTOBJECT_INT2FIX_0_;
+        let status = gen_putobject_int2fix(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        let (_, tmp_type_top) = context.get_opnd_mapping(StackOpnd(0));
+
+        // Right now we're not testing the generated machine code to make sure a literal 1 or 0 was pushed. I've checked locally.
+        assert_eq!(status, KeepCompiling);
+        assert_eq!(tmp_type_top, Type::Fixnum);
+    }
+
+    #[test]
+    fn test_putself() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        let status = gen_putself(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_setn() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Fixnum);
+        context.stack_push(Type::Flonum);
+        context.stack_push(Type::String);
+
+        let mut value_array: [u64; 2] = [0, 2];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_setn(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::String, context.get_opnd_type(StackOpnd(2)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(1)));
+        assert_eq!(Type::String, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() > 0);
+    }
+
+    #[test]
+    fn test_gen_topn() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Flonum);
+        context.stack_push(Type::String);
+
+        let mut value_array: [u64; 2] = [0, 1];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_topn(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(2)));
+        assert_eq!(Type::String, context.get_opnd_type(StackOpnd(1)));
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() > 0); // Write some movs
+    }
+
+    #[test]
+    fn test_gen_adjuststack() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        context.stack_push(Type::Flonum);
+        context.stack_push(Type::String);
+        context.stack_push(Type::Fixnum);
+
+        let mut value_array: [u64; 3] = [0, 2, 0];
+        let pc: *mut VALUE = &mut value_array as *mut u64 as *mut VALUE;
+        jit.set_pc(pc);
+
+        let status = gen_adjuststack(&mut jit, &mut context, &mut cb, &mut ocb);
+
+        assert_eq!(status, KeepCompiling);
+
+        assert_eq!(Type::Flonum, context.get_opnd_type(StackOpnd(0)));
+
+        assert!(cb.get_write_pos() == 0); // No instructions written
+    }
+
+    #[test]
+    fn test_gen_leave() {
+        let (mut jit, mut context, mut cb, mut ocb) = setup_codegen();
+        // Push return value
+        context.stack_push(Type::Fixnum);
+        gen_leave(&mut jit, &mut context, &mut cb, &mut ocb);
+    }
+}
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
new file mode 100644
index 0000000000..5ea3ee8193
--- /dev/null
+++ b/yjit/src/core.rs
@@ -0,0 +1,2071 @@
+use crate::asm::x86_64::*;
+use crate::asm::*;
+use crate::codegen::*;
+use crate::cruby::*;
+use crate::options::*;
+use crate::stats::*;
+use crate::utils::*;
+use core::ffi::c_void;
+use std::cell::*;
+use std::hash::{Hash, Hasher};
+use std::mem;
+use std::mem::size_of;
+use std::ptr;
+use std::rc::{Rc, Weak};
+use InsnOpnd::*;
+use TempMapping::*;
+
+// Maximum number of temp value types we keep track of
+pub const MAX_TEMP_TYPES: usize = 8;
+
+// Maximum number of local variable types we keep track of
+const MAX_LOCAL_TYPES: usize = 8;
+
+// Represent the type of a value (local/stack/self) in YJIT
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum Type {
+    Unknown,
+    UnknownImm,
+    UnknownHeap,
+    Nil,
+    True,
+    False,
+    Fixnum,
+    Flonum,
+    Array,
+    Hash,
+    ImmSymbol,
+    HeapSymbol,
+    String,
+}
+
+// Default initialization
+impl Default for Type {
+    fn default() -> Self {
+        Type::Unknown
+    }
+}
+
+impl Type {
+    /// This returns an appropriate Type based on a known value
+    pub fn from(val: VALUE) -> Type {
+        if val.special_const_p() {
+            if val.fixnum_p() {
+                Type::Fixnum
+            } else if val.nil_p() {
+                Type::Nil
+            } else if val == Qtrue {
+                Type::True
+            } else if val == Qfalse {
+                Type::False
+            } else if val.static_sym_p() {
+                Type::ImmSymbol
+            } else if val.flonum_p() {
+                Type::Flonum
+            } else {
+                unreachable!()
+            }
+        } else {
+            match val.builtin_type() {
+                RUBY_T_ARRAY => Type::Array,
+                RUBY_T_HASH => Type::Hash,
+                RUBY_T_STRING => Type::String,
+                _ => Type::UnknownHeap,
+            }
+        }
+    }
+
+    /// Check if the type is an immediate
+    pub fn is_imm(&self) -> bool {
+        match self {
+            Type::UnknownImm => true,
+            Type::Nil => true,
+            Type::True => true,
+            Type::False => true,
+            Type::Fixnum => true,
+            Type::Flonum => true,
+            Type::ImmSymbol => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true when the type is not specific.
+    pub fn is_unknown(&self) -> bool {
+        match self {
+            Type::Unknown | Type::UnknownImm | Type::UnknownHeap => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true when we know the VALUE is a specific handle type,
+    /// such as a static symbol ([Type::ImmSymbol], i.e. true from RB_STATIC_SYM_P()).
+    /// Opposite of [Self::is_unknown].
+    pub fn is_specific(&self) -> bool {
+        !self.is_unknown()
+    }
+
+    /// Check if the type is a heap object
+    pub fn is_heap(&self) -> bool {
+        match self {
+            Type::UnknownHeap => true,
+            Type::Array => true,
+            Type::Hash => true,
+            Type::HeapSymbol => true,
+            Type::String => true,
+            _ => false,
+        }
+    }
+
+    /// Compute a difference between two value types
+    /// Returns 0 if the two are the same
+    /// Returns > 0 if different but compatible
+    /// Returns usize::MAX if incompatible
+    pub fn diff(self, dst: Self) -> usize {
+        // Perfect match, difference is zero
+        if self == dst {
+            return 0;
+        }
+
+        // Any type can flow into an unknown type
+        if dst == Type::Unknown {
+            return 1;
+        }
+
+        // Specific heap type into unknown heap type is imperfect but valid
+        if self.is_heap() && dst == Type::UnknownHeap {
+            return 1;
+        }
+
+        // Specific immediate type into unknown immediate type is imperfect but valid
+        if self.is_imm() && dst == Type::UnknownImm {
+            return 1;
+        }
+
+        // Incompatible types
+        return usize::MAX;
+    }
+
+    /// Upgrade this type into a more specific compatible type
+    /// The new type must be compatible and at least as specific as the previously known type.
+    fn upgrade(&mut self, src: Self) {
+        // Here we're checking that src is more specific than self
+        assert!(src.diff(*self) != usize::MAX);
+        *self = src;
+    }
+}
+
+// Potential mapping of a value on the temporary stack to
+// self, a local variable or constant so that we can track its type
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
+pub enum TempMapping {
+    MapToStack, // Normal stack value
+    MapToSelf,  // Temp maps to the self operand
+    MapToLocal(u8), // Temp maps to a local variable with index
+                //ConstMapping,         // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue)
+}
+
+impl Default for TempMapping {
+    fn default() -> Self {
+        MapToStack
+    }
+}
+
+// Operand to a bytecode instruction
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum InsnOpnd {
+    // The value is self
+    SelfOpnd,
+
+    // Temporary stack operand with stack index
+    StackOpnd(u16),
+}
+
+/// Code generation context
+/// Contains information we can use to specialize/optimize code
+/// There are a lot of context objects so we try to keep the size small.
+#[derive(Copy, Clone, Default, Debug)]
+pub struct Context {
+    // Number of values currently on the temporary stack
+    stack_size: u16,
+
+    // Offset of the JIT SP relative to the interpreter SP
+    // This represents how far the JIT's SP is from the "real" SP
+    sp_offset: i16,
+
+    // Depth of this block in the sidechain (eg: inline-cache chain)
+    chain_depth: u8,
+
+    // Local variable types we keep track of
+    local_types: [Type; MAX_LOCAL_TYPES],
+
+    // Temporary variable types we keep track of
+    temp_types: [Type; MAX_TEMP_TYPES],
+
+    // Type we track for self
+    self_type: Type,
+
+    // Mapping of temp stack entries to types we track
+    temp_mapping: [TempMapping; MAX_TEMP_TYPES],
+}
+
+/// Tuple of (iseq, idx) used to identify basic blocks
+/// There are a lot of blockid objects so we try to keep the size small.
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub struct BlockId {
+    /// Instruction sequence
+    pub iseq: IseqPtr,
+
+    /// Index in the iseq where the block starts
+    pub idx: u32,
+}
+
+/// Null block id constant
+pub const BLOCKID_NULL: BlockId = BlockId {
+    iseq: ptr::null(),
+    idx: 0,
+};
+
+/// Branch code shape enumeration
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum BranchShape {
+    Next0,   // Target 0 is next
+    Next1,   // Target 1 is next
+    Default, // Neither target is next
+}
+
+// Branch code generation function signature
+type BranchGenFn =
+    fn(cb: &mut CodeBlock, target0: CodePtr, target1: Option<CodePtr>, shape: BranchShape) -> ();
+
+/// Store info about an outgoing branch in a code segment
+/// Note: care must be taken to minimize the size of branch objects
+struct Branch {
+    // Block this is attached to
+    block: BlockRef,
+
+    // Positions where the generated code starts and ends
+    start_addr: Option<CodePtr>,
+    end_addr: Option<CodePtr>,
+
+    // Context right after the branch instruction
+    src_ctx: Context,
+
+    // Branch target blocks and their contexts
+    targets: [BlockId; 2],
+    target_ctxs: [Context; 2],
+    blocks: [Option<BlockRef>; 2],
+
+    // Jump target addresses
+    dst_addrs: [Option<CodePtr>; 2],
+
+    // Branch code generation function
+    gen_fn: BranchGenFn,
+
+    // Shape of the branch
+    shape: BranchShape,
+}
+
+impl std::fmt::Debug for Branch {
+    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // TODO: expand this if needed. #[derive(Debug)] on Branch gave a
+        // strange error related to BranchGenFn
+        formatter
+            .debug_struct("Branch")
+            .field("start", &self.start_addr)
+            .field("end", &self.end_addr)
+            .field("targets", &self.targets)
+            .finish()
+    }
+}
+
+impl Branch {
+    // Compute the size of the branch code
+    fn code_size(&self) -> usize {
+        (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize)
+    }
+}
+
+// In case this block is invalidated, these two pieces of info
+// help to remove all pointers to this block in the system.
+#[derive(Debug)]
+pub struct CmeDependency {
+    pub receiver_klass: VALUE,
+    pub callee_cme: *const rb_callable_method_entry_t,
+}
+
+/// Basic block version
+/// Represents a portion of an iseq compiled with a given context
+/// Note: care must be taken to minimize the size of block_t objects
+#[derive(Debug)]
+pub struct Block {
+    // Bytecode sequence (iseq, idx) this is a version of
+    blockid: BlockId,
+
+    // Index one past the last instruction for this block in the iseq
+    end_idx: u32,
+
+    // Context at the start of the block
+    // This should never be mutated
+    ctx: Context,
+
+    // Positions where the generated code starts and ends
+    start_addr: Option<CodePtr>,
+    end_addr: Option<CodePtr>,
+
+    // List of incoming branches (from predecessors)
+    // These are reference counted (ownership shared between predecessor and successors)
+    incoming: Vec<BranchRef>,
+
+    // NOTE: we might actually be able to store the branches here without refcounting
+    // however, using a RefCell makes it easy to get a pointer to Branch objects
+    //
+    // List of outgoing branches (to successors)
+    outgoing: Vec<BranchRef>,
+
+    // FIXME: should these be code pointers instead?
+    // Offsets for GC managed objects in the mainline code block
+    gc_object_offsets: Vec<u32>,
+
+    // CME dependencies of this block, to help to remove all pointers to this
+    // block in the system.
+    cme_dependencies: Vec<CmeDependency>,
+
+    // Code address of an exit for `ctx` and `blockid`.
+    // Used for block invalidation.
+    pub entry_exit: Option<CodePtr>,
+}
+
+/// Reference-counted pointer to a block that can be borrowed mutably.
+/// Wrapped so we could implement [Hash] and [Eq] for use with stdlib collections.
+#[derive(Debug)]
+pub struct BlockRef(Rc<RefCell<Block>>);
+
+/// Reference-counted pointer to a branch that can be borrowed mutably
+type BranchRef = Rc<RefCell<Branch>>;
+
+/// List of block versions for a given blockid
+type VersionList = Vec<BlockRef>;
+
+/// Map from iseq indices to lists of versions for that given blockid
+/// An instance of this is stored on each iseq
+type VersionMap = Vec<VersionList>;
+
+impl BlockRef {
+    /// Constructor
+    pub fn new(rc: Rc<RefCell<Block>>) -> Self {
+        Self(rc)
+    }
+
+    /// Borrow the block through [RefCell].
+    pub fn borrow(&self) -> Ref<'_, Block> {
+        self.0.borrow()
+    }
+
+    /// Borrow the block for mutation through [RefCell].
+    pub fn borrow_mut(&self) -> RefMut<'_, Block> {
+        self.0.borrow_mut()
+    }
+}
+
+impl Clone for BlockRef {
+    /// Clone the [Rc]
+    fn clone(&self) -> Self {
+        Self(self.0.clone())
+    }
+}
+
+impl Hash for BlockRef {
+    /// Hash the reference by hashing the pointer
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        let rc_ptr = Rc::as_ptr(&self.0);
+        rc_ptr.hash(state);
+    }
+}
+
+impl PartialEq for BlockRef {
+    /// Equality defined by allocation identity
+    fn eq(&self, other: &Self) -> bool {
+        Rc::ptr_eq(&self.0, &other.0)
+    }
+}
+
+/// It's comparison by identity so all the requirements are statisfied
+impl Eq for BlockRef {}
+
+/// This is all the data YJIT stores on an iseq
+/// This will be dynamically allocated by C code
+/// C code should pass an &mut IseqPayload to us
+/// when calling into YJIT
+#[derive(Default)]
+pub struct IseqPayload {
+    version_map: VersionMap,
+}
+
+impl IseqPayload {
+    /// Remove all block versions from the payload and then return them as an iterator
+    pub fn take_all_blocks(&mut self) -> impl Iterator<Item = BlockRef> {
+        // Empty the blocks
+        let version_map = mem::take(&mut self.version_map);
+
+        // Turn it into an iterator that owns the blocks and return
+        version_map.into_iter().flat_map(|versions| versions)
+    }
+}
+
+/// Get the payload for an iseq. For safety it's up to the caller to ensure the returned `&mut`
+/// upholds aliasing rules and that the argument is a valid iseq.
+pub unsafe fn load_iseq_payload(iseq: IseqPtr) -> Option<&'static mut IseqPayload> {
+    let payload = rb_iseq_get_yjit_payload(iseq);
+    let payload: *mut IseqPayload = payload.cast();
+    payload.as_mut()
+}
+
+/// Get the payload object associated with an iseq. Create one if none exists.
+fn get_iseq_payload(iseq: IseqPtr) -> &'static mut IseqPayload {
+    use core::ffi::c_void;
+    type VoidPtr = *mut c_void;
+
+    let payload_non_null = unsafe {
+        let payload = rb_iseq_get_yjit_payload(iseq);
+        if payload.is_null() {
+            // Increment the compiled iseq count
+            incr_counter!(compiled_iseq_count);
+
+            // Allocate a new payload with Box and transfer ownership to the GC.
+            // We drop the payload with Box::from_raw when the GC frees the iseq and calls us.
+            // NOTE(alan): Sometimes we read from an iseq without ever writing to it.
+            // We allocate in those cases anyways.
+            let new_payload = Box::into_raw(Box::new(IseqPayload::default()));
+            rb_iseq_set_yjit_payload(iseq, new_payload as VoidPtr);
+
+            new_payload
+        } else {
+            payload as *mut IseqPayload
+        }
+    };
+
+    // SAFETY: we should have the VM lock and all other Ruby threads should be asleep. So we have
+    // exclusive mutable access.
+    // Hmm, nothing seems to stop calling this on the same
+    // iseq twice, though, which violates aliasing rules.
+    unsafe { payload_non_null.as_mut() }.unwrap()
+}
+
+/// Free the per-iseq payload
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_free(payload: *mut c_void) {
+    let payload = {
+        if payload.is_null() {
+            // Nothing to free.
+            return;
+        } else {
+            payload as *mut IseqPayload
+        }
+    };
+
+    use crate::invariants;
+
+    // Take ownership of the payload with Box::from_raw().
+    // It drops right before this function returns.
+    // SAFETY: We got the pointer from Box::into_raw().
+    let payload = unsafe { Box::from_raw(payload) };
+
+    // Remove all blocks in the payload from global invariants table.
+    for versions in &payload.version_map {
+        for block in versions {
+            invariants::block_assumptions_free(&block);
+        }
+    }
+}
+
+/// GC callback for marking GC objects in the the per-iseq payload.
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_mark(payload: *mut c_void) {
+    let payload = if payload.is_null() {
+        // Nothing to mark.
+        return;
+    } else {
+        // SAFETY: It looks like the GC takes the VM lock while marking
+        // so we should be satisfying aliasing rules here.
+        unsafe { &*(payload as *const IseqPayload) }
+    };
+
+    // For marking VALUEs written into the inline code block.
+    // We don't write VALUEs in the outlined block.
+    let cb: &CodeBlock = CodegenGlobals::get_inline_cb();
+
+    for versions in &payload.version_map {
+        for block in versions {
+            let block = block.borrow();
+
+            unsafe { rb_gc_mark_movable(block.blockid.iseq.into()) };
+
+            // Mark method entry dependencies
+            for cme_dep in &block.cme_dependencies {
+                unsafe { rb_gc_mark_movable(cme_dep.receiver_klass) };
+                unsafe { rb_gc_mark_movable(cme_dep.callee_cme.into()) };
+            }
+
+            // Mark outgoing branch entries
+            for branch in &block.outgoing {
+                let branch = branch.borrow();
+                for target in &branch.targets {
+                    unsafe { rb_gc_mark_movable(target.iseq.into()) };
+                }
+            }
+
+            // Walk over references to objects in generated code.
+            for offset in &block.gc_object_offsets {
+                let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+                // Creating an unaligned pointer is well defined unlike in C.
+                let value_address = value_address as *const VALUE;
+
+                // SAFETY: these point to YJIT's code buffer
+                unsafe {
+                    let object = value_address.read_unaligned();
+                    rb_gc_mark_movable(object);
+                };
+            }
+        }
+    }
+}
+
+/// GC callback for updating GC objects in the the per-iseq payload.
+/// This is a mirror of [rb_yjit_iseq_mark].
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_update_references(payload: *mut c_void) {
+    let payload = if payload.is_null() {
+        // Nothing to update.
+        return;
+    } else {
+        // SAFETY: It looks like the GC takes the VM lock while updating references
+        // so we should be satisfying aliasing rules here.
+        unsafe { &*(payload as *const IseqPayload) }
+    };
+
+    // Evict other threads from generated code since we are about to patch them.
+    // Also acts as an assert that we hold the VM lock.
+    unsafe { rb_vm_barrier() };
+
+    // For updating VALUEs written into the inline code block.
+    let cb = CodegenGlobals::get_inline_cb();
+
+    for versions in &payload.version_map {
+        for block in versions {
+            let mut block = block.borrow_mut();
+
+            block.blockid.iseq = unsafe { rb_gc_location(block.blockid.iseq.into()) }.as_iseq();
+
+            // Update method entry dependencies
+            for cme_dep in &mut block.cme_dependencies {
+                cme_dep.receiver_klass = unsafe { rb_gc_location(cme_dep.receiver_klass) };
+                cme_dep.callee_cme = unsafe { rb_gc_location(cme_dep.callee_cme.into()) }.as_cme();
+            }
+
+            // Update outgoing branch entries
+            for branch in &block.outgoing {
+                let mut branch = branch.borrow_mut();
+                for target in &mut branch.targets {
+                    target.iseq = unsafe { rb_gc_location(target.iseq.into()) }.as_iseq();
+                }
+            }
+
+            // Walk over references to objects in generated code.
+            for offset in &block.gc_object_offsets {
+                let offset_to_value = offset.as_usize();
+                let value_address: *const u8 = cb.get_ptr(offset_to_value).raw_ptr();
+                // Creating an unaligned pointer is well defined unlike in C.
+                let value_address = value_address as *mut VALUE;
+
+                // SAFETY: these point to YJIT's code buffer
+                let object = unsafe { value_address.read_unaligned() };
+                let new_addr = unsafe { rb_gc_location(object) };
+
+                // Only write when the VALUE moves, to be CoW friendly.
+                if new_addr != object {
+                    // Possibly unlock the page we need to update
+                    cb.mark_position_writable(offset_to_value);
+
+                    // Object could cross a page boundary, so unlock there as well
+                    cb.mark_position_writable(offset_to_value + size_of::<VALUE>() - 1);
+
+                    // SAFETY: we just made this address writable
+                    unsafe { value_address.write_unaligned(new_addr) };
+                }
+            }
+        }
+    }
+
+    // Note that we would have returned already if YJIT is off.
+    cb.mark_all_executable();
+
+    // I guess we need to make the outlined block executable as well because
+    // we don't split the two at exact page boundaries.
+    CodegenGlobals::get_outlined_cb()
+        .unwrap()
+        .mark_all_executable();
+}
+
+/// Get all blocks for a particular place in an iseq.
+fn get_version_list(blockid: BlockId) -> &'static mut VersionList {
+    let payload = get_iseq_payload(blockid.iseq);
+    let insn_idx = blockid.idx.as_usize();
+
+    // Expand the version map as necessary
+    if insn_idx >= payload.version_map.len() {
+        payload
+            .version_map
+            .resize(insn_idx + 1, VersionList::default());
+    }
+
+    return payload.version_map.get_mut(insn_idx).unwrap();
+}
+
+/// Take all of the blocks for a particular place in an iseq
+pub fn take_version_list(blockid: BlockId) -> VersionList {
+    let payload = get_iseq_payload(blockid.iseq);
+    let insn_idx = blockid.idx.as_usize();
+
+    if insn_idx >= payload.version_map.len() {
+        VersionList::default()
+    } else {
+        mem::take(&mut payload.version_map[insn_idx])
+    }
+}
+
+/// Count the number of block versions matching a given blockid
+fn get_num_versions(blockid: BlockId) -> usize {
+    let insn_idx = blockid.idx.as_usize();
+    let payload = get_iseq_payload(blockid.iseq);
+
+    payload
+        .version_map
+        .get(insn_idx)
+        .map(|versions| versions.len())
+        .unwrap_or(0)
+}
+
+/// Get a list of block versions generated for an iseq
+/// This is used for disassembly (see disasm.rs)
+pub fn get_iseq_block_list(iseq: IseqPtr) -> Vec<BlockRef> {
+    let payload = get_iseq_payload(iseq);
+
+    let mut blocks = Vec::<BlockRef>::new();
+
+    // For each instruction index
+    for insn_idx in 0..payload.version_map.len() {
+        let version_list = &payload.version_map[insn_idx];
+
+        // For each version at this instruction index
+        for version in version_list {
+            // Clone the block ref and add it to the list
+            blocks.push(version.clone());
+        }
+    }
+
+    return blocks;
+}
+
+/// Retrieve a basic block version for an (iseq, idx) tuple
+/// This will return None if no version is found
+fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
+    let versions = get_version_list(blockid);
+
+    // Best match found
+    let mut best_version: Option<BlockRef> = None;
+    let mut best_diff = usize::MAX;
+
+    // For each version matching the blockid
+    for blockref in versions.iter_mut() {
+        let block = blockref.borrow();
+        let diff = ctx.diff(&block.ctx);
+
+        // Note that we always prefer the first matching
+        // version found because of inline-cache chains
+        if diff < best_diff {
+            best_version = Some(blockref.clone());
+            best_diff = diff;
+        }
+    }
+
+    // If greedy versioning is enabled
+    if get_option!(greedy_versioning) {
+        // If we're below the version limit, don't settle for an imperfect match
+        if versions.len() + 1 < get_option!(max_versions) && best_diff > 0 {
+            return None;
+        }
+    }
+
+    return best_version;
+}
+
+/// Produce a generic context when the block version limit is hit for a blockid
+pub fn limit_block_versions(blockid: BlockId, ctx: &Context) -> Context {
+    // Guard chains implement limits separately, do nothing
+    if ctx.chain_depth > 0 {
+        return *ctx;
+    }
+
+    // If this block version we're about to add will hit the version limit
+    if get_num_versions(blockid) + 1 >= get_option!(max_versions) {
+        // Produce a generic context that stores no type information,
+        // but still respects the stack_size and sp_offset constraints.
+        // This new context will then match all future requests.
+        let mut generic_ctx = Context::default();
+        generic_ctx.stack_size = ctx.stack_size;
+        generic_ctx.sp_offset = ctx.sp_offset;
+
+        // Mutate the incoming context
+        return generic_ctx;
+    }
+
+    return *ctx;
+}
+
+/// Keep track of a block version. Block should be fully constructed.
+/// Uses `cb` for running write barriers.
+fn add_block_version(blockref: &BlockRef, cb: &CodeBlock) {
+    let block = blockref.borrow();
+
+    // Function entry blocks must have stack size 0
+    assert!(!(block.blockid.idx == 0 && block.ctx.stack_size > 0));
+
+    let version_list = get_version_list(block.blockid);
+
+    version_list.push(blockref.clone());
+
+    // By writing the new block to the iseq, the iseq now
+    // contains new references to Ruby objects. Run write barriers.
+    let iseq: VALUE = block.blockid.iseq.into();
+    for dep in block.iter_cme_deps() {
+        obj_written!(iseq, dep.receiver_klass);
+        obj_written!(iseq, dep.callee_cme.into());
+    }
+
+    // Run write barriers for all objects in generated code.
+    for offset in &block.gc_object_offsets {
+        let value_address: *const u8 = cb.get_ptr(offset.as_usize()).raw_ptr();
+        // Creating an unaligned pointer is well defined unlike in C.
+        let value_address: *const VALUE = value_address.cast();
+
+        let object = unsafe { value_address.read_unaligned() };
+        obj_written!(iseq, object);
+    }
+
+    incr_counter!(compiled_block_count);
+}
+
+/// Remove a block version from the version map of its parent ISEQ
+fn remove_block_version(blockref: &BlockRef) {
+    let block = blockref.borrow();
+    let version_list = get_version_list(block.blockid);
+
+    // Retain the versions that are not this one
+    version_list.retain(|other| blockref != other);
+}
+
+//===========================================================================
+// I put the implementation of traits for core.rs types below
+// We can move these closer to the above structs later if we want.
+//===========================================================================
+
+impl Block {
+    pub fn new(blockid: BlockId, ctx: &Context) -> BlockRef {
+        let block = Block {
+            blockid,
+            end_idx: 0,
+            ctx: *ctx,
+            start_addr: None,
+            end_addr: None,
+            incoming: Vec::new(),
+            outgoing: Vec::new(),
+            gc_object_offsets: Vec::new(),
+            cme_dependencies: Vec::new(),
+            entry_exit: None,
+        };
+
+        // Wrap the block in a reference counted refcell
+        // so that the block ownership can be shared
+        BlockRef::new(Rc::new(RefCell::new(block)))
+    }
+
+    pub fn get_blockid(&self) -> BlockId {
+        self.blockid
+    }
+
+    pub fn get_end_idx(&self) -> u32 {
+        self.end_idx
+    }
+
+    pub fn get_ctx(&self) -> Context {
+        self.ctx
+    }
+
+    pub fn get_start_addr(&self) -> Option<CodePtr> {
+        self.start_addr
+    }
+
+    pub fn get_end_addr(&self) -> Option<CodePtr> {
+        self.end_addr
+    }
+
+    /// Get an immutable iterator over cme dependencies
+    pub fn iter_cme_deps(&self) -> std::slice::Iter<'_, CmeDependency> {
+        self.cme_dependencies.iter()
+    }
+
+    /// Set the starting address in the generated code for the block
+    /// This can be done only once for a block
+    pub fn set_start_addr(&mut self, addr: CodePtr) {
+        assert!(self.start_addr.is_none());
+        self.start_addr = Some(addr);
+    }
+
+    /// Set the end address in the generated for the block
+    /// This can be done only once for a block
+    pub fn set_end_addr(&mut self, addr: CodePtr) {
+        // The end address can only be set after the start address is set
+        assert!(self.start_addr.is_some());
+
+        // TODO: assert constraint that blocks can shrink but not grow in length
+        self.end_addr = Some(addr);
+    }
+
+    /// Set the index of the last instruction in the block
+    /// This can be done only once for a block
+    pub fn set_end_idx(&mut self, end_idx: u32) {
+        assert!(self.end_idx == 0);
+        self.end_idx = end_idx;
+    }
+
+    pub fn add_gc_object_offset(self: &mut Block, ptr_offset: u32) {
+        self.gc_object_offsets.push(ptr_offset);
+    }
+
+    /// Instantiate a new CmeDependency struct and add it to the list of
+    /// dependencies for this block.
+    pub fn add_cme_dependency(
+        &mut self,
+        receiver_klass: VALUE,
+        callee_cme: *const rb_callable_method_entry_t,
+    ) {
+        self.cme_dependencies.push(CmeDependency {
+            receiver_klass,
+            callee_cme,
+        });
+    }
+
+    // Compute the size of the block code
+    pub fn code_size(&self) -> usize {
+        (self.end_addr.unwrap().raw_ptr() as usize) - (self.start_addr.unwrap().raw_ptr() as usize)
+    }
+}
+
+impl Context {
+    pub fn new_with_stack_size(size: i16) -> Self {
+        return Context {
+            stack_size: size as u16,
+            sp_offset: size,
+            chain_depth: 0,
+            local_types: [Type::Unknown; MAX_LOCAL_TYPES],
+            temp_types: [Type::Unknown; MAX_TEMP_TYPES],
+            self_type: Type::Unknown,
+            temp_mapping: [MapToStack; MAX_TEMP_TYPES],
+        };
+    }
+
+    pub fn new() -> Self {
+        return Self::new_with_stack_size(0);
+    }
+
+    pub fn get_stack_size(&self) -> u16 {
+        self.stack_size
+    }
+
+    pub fn get_sp_offset(&self) -> i16 {
+        self.sp_offset
+    }
+
+    pub fn set_sp_offset(&mut self, offset: i16) {
+        self.sp_offset = offset;
+    }
+
+    pub fn get_chain_depth(&self) -> u8 {
+        self.chain_depth
+    }
+
+    pub fn reset_chain_depth(&mut self) {
+        self.chain_depth = 0;
+    }
+
+    pub fn increment_chain_depth(&mut self) {
+        self.chain_depth += 1;
+    }
+
+    /// Get an operand for the adjusted stack pointer address
+    pub fn sp_opnd(&self, offset_bytes: isize) -> X86Opnd {
+        let offset = ((self.sp_offset as isize) * (SIZEOF_VALUE as isize)) + offset_bytes;
+        let offset = offset as i32;
+        return mem_opnd(64, REG_SP, offset);
+    }
+
+    /// Push one new value on the temp stack with an explicit mapping
+    /// Return a pointer to the new stack top
+    pub fn stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> X86Opnd {
+        // If type propagation is disabled, store no types
+        if get_option!(no_type_prop) {
+            return self.stack_push_mapping((mapping, Type::Unknown));
+        }
+
+        let stack_size: usize = self.stack_size.into();
+
+        // Keep track of the type and mapping of the value
+        if stack_size < MAX_TEMP_TYPES {
+            self.temp_mapping[stack_size] = mapping;
+            self.temp_types[stack_size] = temp_type;
+
+            if let MapToLocal(idx) = mapping {
+                assert!((idx as usize) < MAX_LOCAL_TYPES);
+            }
+        }
+
+        self.stack_size += 1;
+        self.sp_offset += 1;
+
+        // SP points just above the topmost value
+        let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32);
+        return mem_opnd(64, REG_SP, offset);
+    }
+
+    /// Push one new value on the temp stack
+    /// Return a pointer to the new stack top
+    pub fn stack_push(&mut self, val_type: Type) -> X86Opnd {
+        return self.stack_push_mapping((MapToStack, val_type));
+    }
+
+    /// Push the self value on the stack
+    pub fn stack_push_self(&mut self) -> X86Opnd {
+        return self.stack_push_mapping((MapToSelf, Type::Unknown));
+    }
+
+    /// Push a local variable on the stack
+    pub fn stack_push_local(&mut self, local_idx: usize) -> X86Opnd {
+        if local_idx >= MAX_LOCAL_TYPES {
+            return self.stack_push(Type::Unknown);
+        }
+
+        return self.stack_push_mapping((MapToLocal(local_idx as u8), Type::Unknown));
+    }
+
+    // Pop N values off the stack
+    // Return a pointer to the stack top before the pop operation
+    pub fn stack_pop(&mut self, n: usize) -> X86Opnd {
+        assert!(n <= self.stack_size.into());
+
+        // SP points just above the topmost value
+        let offset = ((self.sp_offset as i32) - 1) * (SIZEOF_VALUE as i32);
+        let top = mem_opnd(64, REG_SP, offset);
+
+        // Clear the types of the popped values
+        for i in 0..n {
+            let idx: usize = (self.stack_size as usize) - i - 1;
+
+            if idx < MAX_TEMP_TYPES {
+                self.temp_types[idx] = Type::Unknown;
+                self.temp_mapping[idx] = MapToStack;
+            }
+        }
+
+        self.stack_size -= n as u16;
+        self.sp_offset -= n as i16;
+
+        return top;
+    }
+
+    /// Get an operand pointing to a slot on the temp stack
+    pub fn stack_opnd(&self, idx: i32) -> X86Opnd {
+        // SP points just above the topmost value
+        let offset = ((self.sp_offset as i32) - 1 - idx) * (SIZEOF_VALUE as i32);
+        let opnd = mem_opnd(64, REG_SP, offset);
+        return opnd;
+    }
+
+    /// Get the type of an instruction operand
+    pub fn get_opnd_type(&self, opnd: InsnOpnd) -> Type {
+        match opnd {
+            SelfOpnd => self.self_type,
+            StackOpnd(idx) => {
+                let idx = idx as u16;
+                assert!(idx < self.stack_size);
+                let stack_idx: usize = (self.stack_size - 1 - idx).into();
+
+                // If outside of tracked range, do nothing
+                if stack_idx >= MAX_TEMP_TYPES {
+                    return Type::Unknown;
+                }
+
+                let mapping = self.temp_mapping[stack_idx];
+
+                match mapping {
+                    MapToSelf => self.self_type,
+                    MapToStack => self.temp_types[(self.stack_size - 1 - idx) as usize],
+                    MapToLocal(idx) => {
+                        assert!((idx as usize) < MAX_LOCAL_TYPES);
+                        return self.local_types[idx as usize];
+                    }
+                }
+            }
+        }
+    }
+
+    /// Get the currently tracked type for a local variable
+    pub fn get_local_type(&self, idx: usize) -> Type {
+        if idx > MAX_LOCAL_TYPES {
+            return Type::Unknown;
+        }
+
+        return self.local_types[idx];
+    }
+
+    /// Upgrade (or "learn") the type of an instruction operand
+    /// This value must be compatible and at least as specific as the previously known type.
+    /// If this value originated from self, or an lvar, the learned type will be
+    /// propagated back to its source.
+    pub fn upgrade_opnd_type(&mut self, opnd: InsnOpnd, opnd_type: Type) {
+        // If type propagation is disabled, store no types
+        if get_option!(no_type_prop) {
+            return;
+        }
+
+        match opnd {
+            SelfOpnd => self.self_type.upgrade(opnd_type),
+            StackOpnd(idx) => {
+                let idx = idx as u16;
+                assert!(idx < self.stack_size);
+                let stack_idx = (self.stack_size - 1 - idx) as usize;
+
+                // If outside of tracked range, do nothing
+                if stack_idx >= MAX_TEMP_TYPES {
+                    return;
+                }
+
+                let mapping = self.temp_mapping[stack_idx];
+
+                match mapping {
+                    MapToSelf => self.self_type.upgrade(opnd_type),
+                    MapToStack => self.temp_types[stack_idx].upgrade(opnd_type),
+                    MapToLocal(idx) => {
+                        let idx = idx as usize;
+                        assert!(idx < MAX_LOCAL_TYPES);
+                        self.local_types[idx].upgrade(opnd_type);
+                    }
+                }
+            }
+        }
+    }
+
+    /*
+    Get both the type and mapping (where the value originates) of an operand.
+    This is can be used with stack_push_mapping or set_opnd_mapping to copy
+    a stack value's type while maintaining the mapping.
+    */
+    pub fn get_opnd_mapping(&self, opnd: InsnOpnd) -> (TempMapping, Type) {
+        let opnd_type = self.get_opnd_type(opnd);
+
+        match opnd {
+            SelfOpnd => (MapToSelf, opnd_type),
+            StackOpnd(idx) => {
+                let idx = idx as u16;
+                assert!(idx < self.stack_size);
+                let stack_idx = (self.stack_size - 1 - idx) as usize;
+
+                if stack_idx < MAX_TEMP_TYPES {
+                    (self.temp_mapping[stack_idx], opnd_type)
+                } else {
+                    // We can't know the source of this stack operand, so we assume it is
+                    // a stack-only temporary. type will be UNKNOWN
+                    assert!(opnd_type == Type::Unknown);
+                    (MapToStack, opnd_type)
+                }
+            }
+        }
+    }
+
+    /// Overwrite both the type and mapping of a stack operand.
+    pub fn set_opnd_mapping(&mut self, opnd: InsnOpnd, (mapping, opnd_type): (TempMapping, Type)) {
+        match opnd {
+            SelfOpnd => unreachable!("self always maps to self"),
+            StackOpnd(idx) => {
+                assert!(idx < self.stack_size);
+                let stack_idx = (self.stack_size - 1 - idx) as usize;
+
+                // If type propagation is disabled, store no types
+                if get_option!(no_type_prop) {
+                    return;
+                }
+
+                // If outside of tracked range, do nothing
+                if stack_idx >= MAX_TEMP_TYPES {
+                    return;
+                }
+
+                self.temp_mapping[stack_idx] = mapping;
+
+                // Only used when mapping == MAP_STACK
+                self.temp_types[stack_idx] = opnd_type;
+            }
+        }
+    }
+
+    /// Set the type of a local variable
+    pub fn set_local_type(&mut self, local_idx: usize, local_type: Type) {
+        let ctx = self;
+
+        // If type propagation is disabled, store no types
+        if get_option!(no_type_prop) {
+            return;
+        }
+
+        if local_idx >= MAX_LOCAL_TYPES {
+            return;
+        }
+
+        // If any values on the stack map to this local we must detach them
+        for (i, mapping) in ctx.temp_mapping.iter_mut().enumerate() {
+            *mapping = match *mapping {
+                MapToStack => MapToStack,
+                MapToSelf => MapToSelf,
+                MapToLocal(idx) => {
+                    if idx as usize == local_idx {
+                        ctx.temp_types[i] = ctx.local_types[idx as usize];
+                        MapToStack
+                    } else {
+                        MapToLocal(idx)
+                    }
+                }
+            }
+        }
+
+        ctx.local_types[local_idx] = local_type;
+    }
+
+    /// Erase local variable type information
+    /// eg: because of a call we can't track
+    pub fn clear_local_types(&mut self) {
+        // When clearing local types we must detach any stack mappings to those
+        // locals. Even if local values may have changed, stack values will not.
+        for (i, mapping) in self.temp_mapping.iter_mut().enumerate() {
+            *mapping = match *mapping {
+                MapToStack => MapToStack,
+                MapToSelf => MapToSelf,
+                MapToLocal(idx) => {
+                    self.temp_types[i] = self.local_types[idx as usize];
+                    MapToStack
+                }
+            }
+        }
+
+        // Clear the local types
+        self.local_types = [Type::default(); MAX_LOCAL_TYPES];
+    }
+
+    /// Compute a difference score for two context objects
+    /// Returns 0 if the two contexts are the same
+    /// Returns > 0 if different but compatible
+    /// Returns usize::MAX if incompatible
+    pub fn diff(&self, dst: &Context) -> usize {
+        // Self is the source context (at the end of the predecessor)
+        let src = self;
+
+        // Can only lookup the first version in the chain
+        if dst.chain_depth != 0 {
+            return usize::MAX;
+        }
+
+        // Blocks with depth > 0 always produce new versions
+        // Sidechains cannot overlap
+        if src.chain_depth != 0 {
+            return usize::MAX;
+        }
+
+        if dst.stack_size != src.stack_size {
+            return usize::MAX;
+        }
+
+        if dst.sp_offset != src.sp_offset {
+            return usize::MAX;
+        }
+
+        // Difference sum
+        let mut diff = 0;
+
+        // Check the type of self
+        let self_diff = src.self_type.diff(dst.self_type);
+
+        if self_diff == usize::MAX {
+            return usize::MAX;
+        }
+
+        diff += self_diff;
+
+        // For each local type we track
+        for i in 0..src.local_types.len() {
+            let t_src = src.local_types[i];
+            let t_dst = dst.local_types[i];
+            let temp_diff = t_src.diff(t_dst);
+
+            if temp_diff == usize::MAX {
+                return usize::MAX;
+            }
+
+            diff += temp_diff;
+        }
+
+        // For each value on the temp stack
+        for i in 0..src.stack_size {
+            let (src_mapping, src_type) = src.get_opnd_mapping(StackOpnd(i));
+            let (dst_mapping, dst_type) = dst.get_opnd_mapping(StackOpnd(i));
+
+            // If the two mappings aren't the same
+            if src_mapping != dst_mapping {
+                if dst_mapping == MapToStack {
+                    // We can safely drop information about the source of the temp
+                    // stack operand.
+                    diff += 1;
+                } else {
+                    return usize::MAX;
+                }
+            }
+
+            let temp_diff = src_type.diff(dst_type);
+
+            if temp_diff == usize::MAX {
+                return usize::MAX;
+            }
+
+            diff += temp_diff;
+        }
+
+        return diff;
+    }
+}
+
+impl BlockId {
+    /// Print Ruby source location for debugging
+    #[cfg(debug_assertions)]
+    pub fn dump_src_loc(&self) {
+        unsafe { rb_yjit_dump_iseq_loc(self.iseq, self.idx) }
+    }
+}
+
+/// See [gen_block_series_body]. This simply counts compilation failures.
+fn gen_block_series(
+    blockid: BlockId,
+    start_ctx: &Context,
+    ec: EcPtr,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> Option<BlockRef> {
+    let result = gen_block_series_body(blockid, start_ctx, ec, cb, ocb);
+    if result.is_none() {
+        incr_counter!(compilation_failure);
+    }
+
+    result
+}
+
+/// Immediately compile a series of block versions at a starting point and
+/// return the starting block.
+fn gen_block_series_body(
+    blockid: BlockId,
+    start_ctx: &Context,
+    ec: EcPtr,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) -> Option<BlockRef> {
+    // Keep track of all blocks compiled in this batch
+    const EXPECTED_BATCH_SIZE: usize = 4;
+    let mut batch = Vec::with_capacity(EXPECTED_BATCH_SIZE);
+
+    // Generate code for the first block
+    let first_block = gen_single_block(blockid, start_ctx, ec, cb, ocb).ok()?;
+    batch.push(first_block.clone()); // Keep track of this block version
+
+    // Add the block version to the VersionMap for this ISEQ
+    add_block_version(&first_block, cb);
+
+    // Loop variable
+    let mut last_blockref = first_block.clone();
+    loop {
+        // Get the last outgoing branch from the previous block.
+        let last_branchref = {
+            let last_block = last_blockref.borrow();
+            match last_block.outgoing.last() {
+                Some(branch) => branch.clone(),
+                None => {
+                    break;
+                } // If last block has no branches, stop.
+            }
+        };
+        let mut last_branch = last_branchref.borrow_mut();
+
+        // gen_direct_jump() can request a block to be placed immediately after by
+        // leaving `None`s in the `dst_addrs` array.
+        match &last_branch.dst_addrs {
+            [None, None] => (),
+            _ => {
+                break;
+            } // If there is no next block to compile, stop
+        };
+
+        // Get id and context for the new block
+        let requested_id = last_branch.targets[0];
+        let requested_ctx = &last_branch.target_ctxs[0];
+        assert_ne!(
+            last_branch.targets[0], BLOCKID_NULL,
+            "block id must be filled"
+        );
+
+        // Generate new block using context from the last branch.
+        let result = gen_single_block(requested_id, requested_ctx, ec, cb, ocb);
+
+        // If the block failed to compile
+        if result.is_err() {
+            // Remove previously compiled block
+            // versions from the version map
+            for blockref in &batch {
+                // FIXME: should be deallocating resources here too
+                // e.g. invariants, etc.
+                //free_block(blockref)
+
+                remove_block_version(blockref);
+            }
+
+            // Stop compiling
+            return None;
+        }
+
+        let new_blockref = result.unwrap();
+
+        // Add the block version to the VersionMap for this ISEQ
+        add_block_version(&new_blockref, cb);
+
+        // Connect the last branch and the new block
+        last_branch.blocks[0] = Some(new_blockref.clone());
+        last_branch.dst_addrs[0] = new_blockref.borrow().start_addr;
+        new_blockref
+            .borrow_mut()
+            .incoming
+            .push(last_branchref.clone());
+
+        // This block should immediately follow the last branch
+        assert!(new_blockref.borrow().start_addr == last_branch.end_addr);
+
+        // Track the block
+        batch.push(new_blockref.clone());
+
+        // Repeat with newest block
+        last_blockref = new_blockref;
+    }
+
+    Some(first_block)
+}
+
+/// Generate a block version that is an entry point inserted into an iseq
+/// NOTE: this function assumes that the VM lock has been taken
+pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> Option<CodePtr> {
+    // Compute the current instruction index based on the current PC
+    let insn_idx: u32 = unsafe {
+        let pc_zero = rb_iseq_pc_at_idx(iseq, 0);
+        let ec_pc = get_cfp_pc(get_ec_cfp(ec));
+        ec_pc.offset_from(pc_zero).try_into().ok()?
+    };
+
+    // The entry context makes no assumptions about types
+    let blockid = BlockId {
+        iseq,
+        idx: insn_idx,
+    };
+
+    // Get the inline and outlined code blocks
+    let cb = CodegenGlobals::get_inline_cb();
+    let ocb = CodegenGlobals::get_outlined_cb();
+
+    // Write the interpreter entry prologue. Might be NULL when out of memory.
+    let code_ptr = gen_entry_prologue(cb, iseq, insn_idx);
+
+    // Try to generate code for the entry block
+    let block = gen_block_series(blockid, &Context::default(), ec, cb, ocb);
+
+    cb.mark_all_executable();
+    ocb.unwrap().mark_all_executable();
+
+    match block {
+        // Compilation failed
+        None => return None,
+
+        // If the block contains no Ruby instructions
+        Some(block) => {
+            let block = block.borrow();
+            if block.end_idx == insn_idx {
+                return None;
+            }
+        }
+    }
+
+    // Compilation successful and block not empty
+    return code_ptr;
+}
+
+/// Generate code for a branch, possibly rewriting and changing the size of it
+fn regenerate_branch(cb: &mut CodeBlock, branch: &mut Branch) {
+    // FIXME
+    /*
+    if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
+        // Generating this branch would modify frozen bytes. Do nothing.
+        return;
+    }
+    */
+
+    let old_write_pos = cb.get_write_pos();
+
+    let mut block = branch.block.borrow_mut();
+    let branch_terminates_block = branch.end_addr == block.end_addr;
+
+    // Rewrite the branch
+    assert!(branch.dst_addrs[0].is_some());
+    cb.set_write_ptr(branch.start_addr.unwrap());
+    (branch.gen_fn)(
+        cb,
+        branch.dst_addrs[0].unwrap(),
+        branch.dst_addrs[1],
+        branch.shape,
+    );
+    branch.end_addr = Some(cb.get_write_ptr());
+
+    // The block may have shrunk after the branch is rewritten
+    if branch_terminates_block {
+        // Adjust block size
+        block.end_addr = branch.end_addr;
+    }
+
+    // cb.write_pos is both a write cursor and a marker for the end of
+    // everything written out so far. Leave cb->write_pos at the end of the
+    // block before returning. This function only ever bump or retain the end
+    // of block marker since that's what the majority of callers want. When the
+    // branch sits at the very end of the codeblock and it shrinks after
+    // regeneration, it's up to the caller to drop bytes off the end to
+    // not leave a gap and implement branch->shape.
+    if old_write_pos > cb.get_write_pos() {
+        // We rewound cb->write_pos to generate the branch, now restore it.
+        cb.set_pos(old_write_pos);
+    } else {
+        // The branch sits at the end of cb and consumed some memory.
+        // Keep cb.write_pos.
+    }
+}
+
+/// Create a new outgoing branch entry for a block
+fn make_branch_entry(block: BlockRef, src_ctx: &Context, gen_fn: BranchGenFn) -> BranchRef {
+    let branch = Branch {
+        // Block this is attached to
+        block: block.clone(),
+
+        // Positions where the generated code starts and ends
+        start_addr: None,
+        end_addr: None,
+
+        // Context right after the branch instruction
+        src_ctx: *src_ctx,
+
+        // Branch target blocks and their contexts
+        targets: [BLOCKID_NULL, BLOCKID_NULL],
+        target_ctxs: [Context::default(), Context::default()],
+        blocks: [None, None],
+
+        // Jump target addresses
+        dst_addrs: [None, None],
+
+        // Branch code generation function
+        gen_fn: gen_fn,
+
+        // Shape of the branch
+        shape: BranchShape::Default,
+    };
+
+    // Add to the list of outgoing branches for the block
+    let branchref = Rc::new(RefCell::new(branch));
+    block.borrow_mut().outgoing.push(branchref.clone());
+
+    return branchref;
+}
+
+/// Generated code calls this function with the SysV calling convention.
+/// See [get_branch_target].
+extern "sysv64" fn branch_stub_hit(
+    branch_ptr: *const c_void,
+    target_idx: u32,
+    ec: EcPtr,
+) -> *const u8 {
+    with_vm_lock(src_loc!(), || {
+        branch_stub_hit_body(branch_ptr, target_idx, ec)
+    })
+}
+
+/// Called by the generated code when a branch stub is executed
+/// Triggers compilation of branches and code patching
+fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -> *const u8 {
+    assert!(!branch_ptr.is_null());
+
+    //branch_ptr is actually:
+    //branch_ptr: *const RefCell<Branch>
+    let branch_rc = unsafe { BranchRef::from_raw(branch_ptr as *const RefCell<Branch>) };
+
+    // We increment the strong count because we want to keep the reference owned
+    // by the branch stub alive. Return branch stubs can be hit multiple times.
+    unsafe { Rc::increment_strong_count(branch_ptr) };
+
+    let mut branch = branch_rc.borrow_mut();
+    let branch_size_on_entry = branch.code_size();
+
+    let target_idx: usize = target_idx.as_usize();
+    let target = branch.targets[target_idx];
+    let target_ctx = branch.target_ctxs[target_idx];
+
+    let target_branch_shape = match target_idx {
+        0 => BranchShape::Next0,
+        1 => BranchShape::Next1,
+        _ => unreachable!("target_idx < 2 must always hold"),
+    };
+
+    let cb = CodegenGlobals::get_inline_cb();
+    let ocb = CodegenGlobals::get_outlined_cb();
+
+    // If this branch has already been patched, return the dst address
+    // Note: ractors can cause the same stub to be hit multiple times
+    if let Some(_) = branch.blocks[target_idx] {
+        return branch.dst_addrs[target_idx].unwrap().raw_ptr();
+    }
+
+    let (cfp, original_interp_sp) = unsafe {
+        let cfp = get_ec_cfp(ec);
+        let original_interp_sp = get_cfp_sp(cfp);
+
+        let reconned_pc = rb_iseq_pc_at_idx(rb_cfp_get_iseq(cfp), target.idx);
+        let reconned_sp = original_interp_sp.offset(target_ctx.sp_offset.into());
+
+        // Update the PC in the current CFP, because it may be out of sync in JITted code
+        rb_set_cfp_pc(cfp, reconned_pc);
+
+        // :stub-sp-flush:
+        // Generated code do stack operations without modifying cfp->sp, while the
+        // cfp->sp tells the GC what values on the stack to root. Generated code
+        // generally takes care of updating cfp->sp when it calls runtime routines that
+        // could trigger GC, but it's inconvenient to do it before calling this function.
+        // So we do it here instead.
+        rb_set_cfp_sp(cfp, reconned_sp);
+
+        (cfp, original_interp_sp)
+    };
+
+    // Try to find an existing compiled version of this block
+    let mut block = find_block_version(target, &target_ctx);
+
+    // If this block hasn't yet been compiled
+    if block.is_none() {
+        let branch_old_shape = branch.shape;
+        let mut branch_modified = false;
+
+        // If the new block can be generated right after the branch (at cb->write_pos)
+        if Some(cb.get_write_ptr()) == branch.end_addr {
+            // This branch should be terminating its block
+            assert!(branch.end_addr == branch.block.borrow().end_addr);
+
+            // Change the branch shape to indicate the target block will be placed next
+            branch.shape = target_branch_shape;
+
+            // Rewrite the branch with the new, potentially more compact shape
+            regenerate_branch(cb, &mut branch);
+            branch_modified = true;
+
+            // Ensure that the branch terminates the codeblock just like
+            // before entering this if block. This drops bytes off the end
+            // in case we shrank the branch when regenerating.
+            cb.set_write_ptr(branch.end_addr.unwrap());
+        }
+
+        // Compile the new block version
+        drop(branch); // Stop mutable RefCell borrow since GC might borrow branch for marking
+        block = gen_block_series(target, &target_ctx, ec, cb, ocb);
+        branch = branch_rc.borrow_mut();
+
+        if block.is_none() && branch_modified {
+            // We couldn't generate a new block for the branch, but we modified the branch.
+            // Restore the branch by regenerating it.
+            branch.shape = branch_old_shape;
+            regenerate_branch(cb, &mut branch);
+        }
+    }
+
+    // Finish building the new block
+    let dst_addr = match block {
+        Some(block_rc) => {
+            let mut block: RefMut<_> = block_rc.borrow_mut();
+
+            // Branch shape should reflect layout
+            assert!(!(branch.shape == target_branch_shape && block.start_addr != branch.end_addr));
+
+            // Add this branch to the list of incoming branches for the target
+            block.incoming.push(branch_rc.clone());
+
+            // Update the branch target address
+            let dst_addr = block.start_addr;
+            branch.dst_addrs[target_idx] = dst_addr;
+
+            // Mark this branch target as patched (no longer a stub)
+            branch.blocks[target_idx] = Some(block_rc.clone());
+
+            // Rewrite the branch with the new jump target address
+            mem::drop(block); // end mut borrow
+            regenerate_branch(cb, &mut branch);
+
+            // Restore interpreter sp, since the code hitting the stub expects the original.
+            unsafe { rb_set_cfp_sp(cfp, original_interp_sp) };
+
+            block_rc.borrow().start_addr.unwrap()
+        }
+        None => {
+            // Failed to service the stub by generating a new block so now we
+            // need to exit to the interpreter at the stubbed location. We are
+            // intentionally *not* restoring original_interp_sp. At the time of
+            // writing, reconstructing interpreter state only involves setting
+            // cfp->sp and cfp->pc. We set both before trying to generate the
+            // block. All there is left to do to exit is to pop the native
+            // frame. We do that in code_for_exit_from_stub.
+            CodegenGlobals::get_stub_exit_code()
+        }
+    };
+
+    ocb.unwrap().mark_all_executable();
+    cb.mark_all_executable();
+
+    let new_branch_size = branch.code_size();
+    assert!(
+        new_branch_size <= branch_size_on_entry,
+        "branch stubs should never enlarge branches"
+    );
+
+    // Return a pointer to the compiled block version
+    dst_addr.raw_ptr()
+}
+
+/// Get a block version or stub corresponding to a branch target
+fn get_branch_target(
+    target: BlockId,
+    ctx: &Context,
+    branchref: &BranchRef,
+    target_idx: u32,
+    ocb: &mut OutlinedCb,
+) -> Option<CodePtr> {
+    let maybe_block = find_block_version(target, ctx);
+
+    // If the block already exists
+    if let Some(blockref) = maybe_block {
+        let mut block = blockref.borrow_mut();
+
+        // Add an incoming branch into this block
+        block.incoming.push(branchref.clone());
+        let mut branch = branchref.borrow_mut();
+        branch.blocks[target_idx.as_usize()] = Some(blockref.clone());
+
+        // Return a pointer to the compiled code for the block
+        return block.start_addr;
+    }
+
+    let ocb = ocb.unwrap();
+
+    // Generate an outlined stub that will call branch_stub_hit()
+    let stub_addr = ocb.get_write_ptr();
+
+    // Get a raw pointer to the branch while keeping the reference count alive
+    // Here clone increments the strong count by 1
+    // This means the branch stub owns its own reference to the branch
+    let branch_ptr: *const RefCell<Branch> = BranchRef::into_raw(branchref.clone());
+
+    // Call branch_stub_hit(branch_idx, target_idx, ec)
+    mov(ocb, C_ARG_REGS[2], REG_EC);
+    mov(ocb, C_ARG_REGS[1], uimm_opnd(target_idx as u64));
+    mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch_ptr as *const u8));
+    call_ptr(ocb, REG0, branch_stub_hit as *mut u8);
+
+    // Jump to the address returned by the
+    // branch_stub_hit call
+    jmp_rm(ocb, RAX);
+
+    if ocb.has_dropped_bytes() {
+        None // No space
+    } else {
+        Some(stub_addr)
+    }
+}
+
+pub fn gen_branch(
+    jit: &JITState,
+    src_ctx: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+    target0: BlockId,
+    ctx0: &Context,
+    target1: Option<BlockId>,
+    ctx1: Option<&Context>,
+    gen_fn: BranchGenFn,
+) {
+    assert!(target0 != BLOCKID_NULL);
+
+    let branchref = make_branch_entry(jit.get_block(), src_ctx, gen_fn);
+
+    // Get the branch targets or stubs
+    let dst_addr0 = get_branch_target(target0, ctx0, &branchref, 0, ocb);
+    let dst_addr1 = if ctx1.is_some() {
+        get_branch_target(target1.unwrap(), ctx1.unwrap(), &branchref, 1, ocb)
+    } else {
+        None
+    };
+
+    let mut branch = branchref.borrow_mut();
+
+    // Set the branch target adresses
+    branch.dst_addrs[0] = dst_addr0;
+    branch.dst_addrs[1] = dst_addr1;
+
+    branch.targets[0] = target0;
+    if target1.is_some() {
+        branch.targets[1] = target1.unwrap();
+    }
+    branch.target_ctxs[0] = *ctx0;
+    branch.target_ctxs[1] = if ctx1.is_some() {
+        *ctx1.unwrap()
+    } else {
+        Context::default()
+    };
+
+    // Call the branch generation function
+    branch.start_addr = Some(cb.get_write_ptr());
+    regenerate_branch(cb, &mut branch);
+}
+
+fn gen_jump_branch(
+    cb: &mut CodeBlock,
+    target0: CodePtr,
+    _target1: Option<CodePtr>,
+    shape: BranchShape,
+) {
+    if shape == BranchShape::Next1 {
+        panic!("Branch shape Next1 not allowed in gen_jump_branch!");
+    }
+
+    if shape == BranchShape::Default {
+        jmp_ptr(cb, target0);
+    }
+}
+
+pub fn gen_direct_jump(jit: &JITState, ctx: &Context, target0: BlockId, cb: &mut CodeBlock) {
+    assert!(target0 != BLOCKID_NULL);
+
+    let branchref = make_branch_entry(jit.get_block(), ctx, gen_jump_branch);
+    let mut branch = branchref.borrow_mut();
+
+    branch.targets[0] = target0;
+    branch.target_ctxs[0] = *ctx;
+
+    let maybe_block = find_block_version(target0, ctx);
+
+    // If the block already exists
+    if let Some(blockref) = maybe_block {
+        let mut block = blockref.borrow_mut();
+
+        block.incoming.push(branchref.clone());
+
+        branch.dst_addrs[0] = block.start_addr;
+        branch.blocks[0] = Some(blockref.clone());
+        branch.shape = BranchShape::Default;
+
+        // Call the branch generation function
+        branch.start_addr = Some(cb.get_write_ptr());
+        gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
+        branch.end_addr = Some(cb.get_write_ptr());
+    } else {
+        // This None target address signals gen_block_series() to compile the
+        // target block right after this one (fallthrough).
+        branch.dst_addrs[0] = None;
+        branch.shape = BranchShape::Next0;
+        branch.start_addr = Some(cb.get_write_ptr());
+        branch.end_addr = Some(cb.get_write_ptr());
+    }
+}
+
+/// Create a stub to force the code up to this point to be executed
+pub fn defer_compilation(
+    jit: &JITState,
+    cur_ctx: &Context,
+    cb: &mut CodeBlock,
+    ocb: &mut OutlinedCb,
+) {
+    if cur_ctx.chain_depth != 0 {
+        panic!("Double defer!");
+    }
+
+    let mut next_ctx = cur_ctx.clone();
+
+    if next_ctx.chain_depth >= u8::MAX {
+        panic!("max block version chain depth reached!");
+    }
+
+    next_ctx.chain_depth += 1;
+
+    let block_rc = jit.get_block();
+    let branch_rc = make_branch_entry(jit.get_block(), cur_ctx, gen_jump_branch);
+    let mut branch = branch_rc.borrow_mut();
+    let block = block_rc.borrow();
+
+    branch.target_ctxs[0] = next_ctx;
+    branch.targets[0] = BlockId {
+        iseq: block.blockid.iseq,
+        idx: jit.get_insn_idx(),
+    };
+    branch.dst_addrs[0] = get_branch_target(branch.targets[0], &next_ctx, &branch_rc, 0, ocb);
+
+    // Call the branch generation function
+    branch.start_addr = Some(cb.get_write_ptr());
+    gen_jump_branch(cb, branch.dst_addrs[0].unwrap(), None, BranchShape::Default);
+    branch.end_addr = Some(cb.get_write_ptr());
+}
+
+// Remove all references to a block then free it.
+fn free_block(blockref: &BlockRef) {
+    use crate::invariants::*;
+
+    block_assumptions_free(blockref);
+
+    let block = blockref.borrow();
+
+    // Remove this block from the predecessor's targets
+    for pred_branchref in &block.incoming {
+        // Branch from the predecessor to us
+        let mut pred_branch = pred_branchref.borrow_mut();
+
+        // If this is us, nullify the target block
+        for pred_succ_ref in &mut pred_branch.blocks {
+            if let Some(pred_succ) = pred_succ_ref {
+                if pred_succ == blockref {
+                    *pred_succ_ref = None;
+                }
+            }
+        }
+    }
+
+    // For each outgoing branch
+    for out_branchref in &block.outgoing {
+        let out_branch = out_branchref.borrow();
+
+        // For each successor block
+        for succ in &out_branch.blocks {
+            if let Some(succ) = succ {
+                // Remove outgoing branch from the successor's incoming list
+                let mut succ_block = succ.borrow_mut();
+                succ_block
+                    .incoming
+                    .retain(|succ_incoming| !Rc::ptr_eq(succ_incoming, out_branchref));
+            }
+        }
+    }
+
+    // No explicit deallocation here as blocks are ref-counted.
+}
+
+// Some runtime checks for integrity of a program location
+pub fn verify_blockid(blockid: BlockId) {
+    unsafe {
+        assert!(rb_IMEMO_TYPE_P(blockid.iseq.into(), imemo_iseq) != 0);
+        assert!(blockid.idx < get_iseq_encoded_size(blockid.iseq));
+    }
+}
+
+// Invalidate one specific block version
+pub fn invalidate_block_version(blockref: &BlockRef) {
+    //ASSERT_vm_locking();
+
+    // TODO: want to assert that all other ractors are stopped here. Can't patch
+    // machine code that some other thread is running.
+
+    let block = blockref.borrow();
+    let cb = CodegenGlobals::get_inline_cb();
+    let ocb = CodegenGlobals::get_outlined_cb();
+
+    verify_blockid(block.blockid);
+
+    // Remove this block from the version array
+    remove_block_version(blockref);
+
+    // Get a pointer to the generated code for this block
+    let code_ptr = block.start_addr;
+
+    // Make the the start of the block do an exit. This handles OOM situations
+    // and some cases where we can't efficiently patch incoming branches.
+    // Do this first, since in case there is a fallthrough branch into this
+    // block, the patching loop below can overwrite the start of the block.
+    // In those situations, there is hopefully no jumps to the start of the block
+    // after patching as the start of the block would be in the middle of something
+    // generated by branch_t::gen_fn.
+    {
+        let block_start = block
+            .start_addr
+            .expect("invalidation needs constructed block");
+        let block_end = block
+            .end_addr
+            .expect("invalidation needs constructed block");
+        let block_entry_exit = block
+            .entry_exit
+            .expect("invalidation needs the entry_exit field");
+
+        if block_start == block_entry_exit {
+            // Some blocks exit on entry. Patching a jump to the entry at the
+            // entry makes an infinite loop.
+        } else {
+            // TODO(alan)
+            // if (block.start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) // Don't patch frozen code region
+
+            // Patch in a jump to block.entry_exit.
+            let cur_pos = cb.get_write_ptr();
+            cb.set_write_ptr(block_start);
+            jmp_ptr(cb, block_entry_exit);
+            assert!(
+                cb.get_write_ptr() < block_end,
+                "invalidation wrote past end of block"
+            );
+            cb.set_write_ptr(cur_pos);
+        }
+    }
+
+    // For each incoming branch
+    for branchref in &block.incoming {
+        let mut branch = branchref.borrow_mut();
+        let target_idx = if branch.dst_addrs[0] == code_ptr {
+            0
+        } else {
+            1
+        };
+        assert_eq!(branch.dst_addrs[target_idx], code_ptr);
+        assert_eq!(blockref, branch.blocks[target_idx].as_ref().unwrap());
+
+        // Mark this target as being a stub
+        branch.blocks[target_idx] = None;
+
+        // TODO(alan):
+        // Don't patch frozen code region
+        // if (branch.start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
+        //     continue;
+        // }
+
+        // Create a stub for this branch target
+        mem::drop(branch); // end RefCell borrow as get_branch_target() can borrow the branch.
+        let mut branch_target =
+            get_branch_target(block.blockid, &block.ctx, branchref, target_idx as u32, ocb);
+
+        if branch_target.is_none() {
+            // We were unable to generate a stub (e.g. OOM). Use the block's
+            // exit instead of a stub for the block. It's important that we
+            // still patch the branch in this situation so stubs are unique
+            // to branches. Think about what could go wrong if we run out of
+            // memory in the middle of this loop.
+            branch_target = block.entry_exit;
+        }
+
+        branch = branchref.borrow_mut();
+        branch.dst_addrs[target_idx] = branch_target;
+
+        // Check if the invalidated block immediately follows
+        let target_next = block.start_addr == branch.end_addr;
+
+        if target_next {
+            // The new block will no longer be adjacent.
+            // Note that we could be enlarging the branch and writing into the
+            // start of the block being invalidated.
+            branch.shape = BranchShape::Default;
+        }
+
+        // Rewrite the branch with the new jump target address
+        regenerate_branch(cb, &mut branch);
+
+        if target_next && branch.end_addr > block.end_addr {
+            dbg!(
+                branch.block.borrow().blockid.idx,
+                block.blockid.idx,
+                branch.end_addr,
+                block.end_addr,
+                block.code_size()
+            );
+            panic!("yjit invalidate rewrote branch past end of invalidated block");
+        }
+    }
+
+    // Clear out the JIT func so that we can recompile later and so the
+    // interpreter will run the iseq.
+    //
+    // Only clear the jit_func when we're invalidating the JIT entry block.
+    // We only support compiling iseqs from index 0 right now.  So entry
+    // points will always have an instruction index of 0.  We'll need to
+    // change this in the future when we support optional parameters because
+    // they enter the function with a non-zero PC
+    if block.blockid.idx == 0 {
+        unsafe { rb_iseq_reset_jit_func(block.blockid.iseq) };
+    }
+
+    // TODO:
+    // May want to recompile a new entry point (for interpreter entry blocks)
+    // This isn't necessary for correctness
+
+    // FIXME:
+    // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
+
+    free_block(blockref);
+
+    ocb.unwrap().mark_all_executable();
+    cb.mark_all_executable();
+
+    incr_counter!(invalidation_count);
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core::*;
+
+    #[test]
+    fn types() {
+        // Valid src => dst
+        assert_eq!(Type::Unknown.diff(Type::Unknown), 0);
+        assert_eq!(Type::UnknownImm.diff(Type::UnknownImm), 0);
+        assert_ne!(Type::UnknownImm.diff(Type::Unknown), usize::MAX);
+        assert_ne!(Type::Fixnum.diff(Type::Unknown), usize::MAX);
+        assert_ne!(Type::Fixnum.diff(Type::UnknownImm), usize::MAX);
+
+        // Invalid src => dst
+        assert_eq!(Type::Unknown.diff(Type::UnknownImm), usize::MAX);
+        assert_eq!(Type::Unknown.diff(Type::Fixnum), usize::MAX);
+        assert_eq!(Type::Fixnum.diff(Type::UnknownHeap), usize::MAX);
+    }
+
+    #[test]
+    fn context() {
+        // Valid src => dst
+        assert_eq!(Context::default().diff(&Context::default()), 0);
+
+        // Try pushing an operand and getting its type
+        let mut ctx = Context::default();
+        ctx.stack_push(Type::Fixnum);
+        let top_type = ctx.get_opnd_type(StackOpnd(0));
+        assert!(top_type == Type::Fixnum);
+
+        // TODO: write more tests for Context type diff
+    }
+}
diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs
new file mode 100644
index 0000000000..9e386ea871
--- /dev/null
+++ b/yjit/src/cruby.rs
@@ -0,0 +1,919 @@
+//! This module deals with making relevant C functions available to Rust YJIT.
+//! Some C functions we use we maintain, some are public C extension APIs,
+//! some are internal CRuby APIs.
+//!
+//! ## General notes about linking
+//!
+//! The YJIT crate compiles to a native static library, which for our purposes
+//! we can understand as a collection of object files. On ELF platforms at least,
+//! object files can refer to "external symbols" which we could take some
+//! liberty and understand as assembly labels that refer to code defined in other
+//! object files resolved when linking. When we are linking, say to produce miniruby,
+//! the linker resolves and put concrete addresses for each usage of C function in
+//! the Rust static library.
+//!
+//! By declaring external functions and using them, we are asserting the symbols
+//! we use have definition in one of the object files we pass to the linker. Declaring
+//! a function here that has no definition anywhere causes a linking error.
+//!
+//! There are more things going on during linking and this section makes a lot of
+//! simplifications but hopefully this gives a good enough working mental model.
+//!
+//! ## Difference from example in the Rustonomicon
+//!
+//! You might be wondering about why this is different from the [FFI example]
+//! in the Nomicon, an official book about Unsafe Rust.
+//!
+//! There is no `#[link]` attribute because we are not linking against an external
+//! library, but rather implicitly asserting that we'll supply a concrete definition
+//! for all C functions we call, similar to how pure C projects put functions
+//! across different compilation units and link them together.
+//!
+//! TODO(alan): is the model different enough on Windows that this setup is unworkable?
+//!             Seems prudent to at least learn more about Windows binary tooling before
+//!             committing to a design.
+//!
+//! Alan recommends reading the Nomicon cover to cover as he thinks the book is
+//! not very long in general and especially for something that can save hours of
+//! debugging Undefined Behavior (UB) down the road.
+//!
+//! UBs can cause Safe Rust to crash, at which point it's hard to tell which
+//! usage of `unsafe` in the codebase invokes UB. Providing safe Rust interface
+//! wrapping `unsafe` Rust is a good technique, but requires practice and knowledge
+//! about what's well defined and what's undefined.
+//!
+//! For an extremely advanced example of building safe primitives using Unsafe Rust,
+//! see the [GhostCell] paper. Some parts of the paper assume less background knowledge
+//! than other parts, so there should be learning opportunities in it for all experience
+//! levels.
+//!
+//! ## Binding generation
+//!
+//! For the moment declarations on the Rust side are hand written. The code is boilerplate
+//! and could be generated automatically with a custom tooling that depend on
+//! rust-lang/rust-bindgen. The output Rust code could be checked in to version control
+//! and verified on CI like `make update-deps`.
+//!
+//! Upsides for this design:
+//!  - the YJIT static lib that links with miniruby and friends will not need bindgen
+//!    as a dependency at all. This is an important property so Ruby end users can
+//!    build a YJIT enabled Ruby with no internet connection using a release tarball
+//!  - Less hand-typed boilerplate
+//!  - Helps reduce risk of C definitions and Rust declaration going out of sync since
+//!    CI verifies synchronicity
+//!
+//! Downsides and known unknowns:
+//!  - Using rust-bindgen this way seems unusual. We might be depending on parts
+//!    that the project is not committed to maintaining
+//!  - This setup assumes rust-bindgen gives deterministic output, which can't be taken
+//!    for granted
+//!  - YJIT contributors will need to install libclang on their system to get rust-bindgen
+//!    to work if they want to run the generation tool locally
+//!
+//! The elephant in the room is that we'll still need to use Unsafe Rust to call C functions,
+//! and the binding generation can't magically save us from learning Unsafe Rust.
+//!
+//!
+//! [FFI example]: https://doc.rust-lang.org/nomicon/ffi.html
+//! [GhostCell]: http://plv.mpi-sws.org/rustbelt/ghostcell/
+
+// CRuby types use snake_case. Allow them so we use one name across languages.
+#![allow(non_camel_case_types)]
+// A lot of imported CRuby globals aren't all-caps
+#![allow(non_upper_case_globals)]
+
+use std::convert::From;
+use std::ffi::CString;
+use std::os::raw::{c_char, c_int, c_long, c_uint, c_void};
+use std::panic::{catch_unwind, UnwindSafe};
+
+// We check that we can do this with the configure script and a couple of
+// static asserts. u64 and not usize to play nice with lowering to x86.
+pub type size_t = u64;
+
+/// A type alias for the redefinition flags coming from CRuby. These are just
+/// shifted 1s but not explicitly an enum.
+pub type RedefinitionFlag = u32;
+
+// Textually include output from rust-bindgen as suggested by its user guide.
+include!("cruby_bindings.inc.rs");
+
+// TODO: For #defines that affect memory layout, we need to check for them
+// on build and fail if they're wrong. e.g. USE_FLONUM *must* be true.
+
+// TODO:
+// Temporary, these external bindings will likely be auto-generated
+// and textually included in this file
+extern "C" {
+    #[link_name = "rb_yjit_alloc_exec_mem"] // we can rename functions with this attribute
+    pub fn alloc_exec_mem(mem_size: u32) -> *mut u8;
+
+    #[link_name = "rb_insn_name"]
+    pub fn raw_insn_name(insn: VALUE) -> *const c_char;
+
+    #[link_name = "rb_insn_len"]
+    pub fn raw_insn_len(v: VALUE) -> c_int;
+
+    #[link_name = "rb_yarv_class_of"]
+    pub fn CLASS_OF(v: VALUE) -> VALUE;
+
+    #[link_name = "rb_get_ec_cfp"]
+    pub fn get_ec_cfp(ec: EcPtr) -> CfpPtr;
+
+    #[link_name = "rb_get_cfp_pc"]
+    pub fn get_cfp_pc(cfp: CfpPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_cfp_sp"]
+    pub fn get_cfp_sp(cfp: CfpPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_cfp_self"]
+    pub fn get_cfp_self(cfp: CfpPtr) -> VALUE;
+
+    #[link_name = "rb_get_cfp_ep"]
+    pub fn get_cfp_ep(cfp: CfpPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_cme_def_type"]
+    pub fn get_cme_def_type(cme: *const rb_callable_method_entry_t) -> rb_method_type_t;
+
+    #[link_name = "rb_get_cme_def_method_serial"]
+    pub fn get_cme_def_method_serial(cme: *const rb_callable_method_entry_t) -> u64;
+
+    #[link_name = "rb_get_cme_def_body_attr_id"]
+    pub fn get_cme_def_body_attr_id(cme: *const rb_callable_method_entry_t) -> ID;
+
+    #[link_name = "rb_get_cme_def_body_optimized_type"]
+    pub fn get_cme_def_body_optimized_type(
+        cme: *const rb_callable_method_entry_t,
+    ) -> method_optimized_type;
+
+    #[link_name = "rb_get_cme_def_body_optimized_index"]
+    pub fn get_cme_def_body_optimized_index(cme: *const rb_callable_method_entry_t) -> c_uint;
+
+    #[link_name = "rb_get_cme_def_body_cfunc"]
+    pub fn get_cme_def_body_cfunc(cme: *const rb_callable_method_entry_t)
+        -> *mut rb_method_cfunc_t;
+
+    #[link_name = "rb_get_def_method_serial"]
+    /// While this returns a uintptr_t in C, we always use it as a Rust u64
+    pub fn get_def_method_serial(def: *const rb_method_definition_t) -> u64;
+
+    #[link_name = "rb_get_def_original_id"]
+    pub fn get_def_original_id(def: *const rb_method_definition_t) -> ID;
+
+    #[link_name = "rb_get_mct_argc"]
+    pub fn get_mct_argc(mct: *const rb_method_cfunc_t) -> c_int;
+
+    #[link_name = "rb_get_mct_func"]
+    pub fn get_mct_func(mct: *const rb_method_cfunc_t) -> *const u8;
+
+    #[link_name = "rb_get_def_iseq_ptr"]
+    pub fn get_def_iseq_ptr(def: *const rb_method_definition_t) -> IseqPtr;
+
+    #[link_name = "rb_iseq_encoded_size"]
+    pub fn get_iseq_encoded_size(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_body_local_iseq"]
+    pub fn get_iseq_body_local_iseq(iseq: IseqPtr) -> IseqPtr;
+
+    #[link_name = "rb_get_iseq_body_iseq_encoded"]
+    pub fn get_iseq_body_iseq_encoded(iseq: IseqPtr) -> *mut VALUE;
+
+    #[link_name = "rb_get_iseq_body_builtin_inline_p"]
+    pub fn get_iseq_body_builtin_inline_p(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_body_stack_max"]
+    pub fn get_iseq_body_stack_max(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_flags_has_opt"]
+    pub fn get_iseq_flags_has_opt(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_kw"]
+    pub fn get_iseq_flags_has_kw(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_rest"]
+    pub fn get_iseq_flags_has_rest(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_post"]
+    pub fn get_iseq_flags_has_post(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_kwrest"]
+    pub fn get_iseq_flags_has_kwrest(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_block"]
+    pub fn get_iseq_flags_has_block(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_flags_has_accepts_no_kwarg"]
+    pub fn get_iseq_flags_has_accepts_no_kwarg(iseq: IseqPtr) -> bool;
+
+    #[link_name = "rb_get_iseq_body_local_table_size"]
+    pub fn get_iseq_body_local_table_size(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_body_param_keyword"]
+    pub fn get_iseq_body_param_keyword(iseq: IseqPtr) -> *const rb_seq_param_keyword_struct;
+
+    #[link_name = "rb_get_iseq_body_param_size"]
+    pub fn get_iseq_body_param_size(iseq: IseqPtr) -> c_uint;
+
+    #[link_name = "rb_get_iseq_body_param_lead_num"]
+    pub fn get_iseq_body_param_lead_num(iseq: IseqPtr) -> c_int;
+
+    #[link_name = "rb_get_iseq_body_param_opt_num"]
+    pub fn get_iseq_body_param_opt_num(iseq: IseqPtr) -> c_int;
+
+    #[link_name = "rb_get_iseq_body_param_opt_table"]
+    pub fn get_iseq_body_param_opt_table(iseq: IseqPtr) -> *const VALUE;
+
+    #[link_name = "rb_get_cikw_keyword_len"]
+    pub fn get_cikw_keyword_len(cikw: *const rb_callinfo_kwarg) -> c_int;
+
+    #[link_name = "rb_get_cikw_keywords_idx"]
+    pub fn get_cikw_keywords_idx(cikw: *const rb_callinfo_kwarg, idx: c_int) -> VALUE;
+
+    #[link_name = "rb_get_call_data_ci"]
+    pub fn get_call_data_ci(cd: *const rb_call_data) -> *const rb_callinfo;
+
+    #[link_name = "rb_yarv_str_eql_internal"]
+    pub fn rb_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE;
+
+    #[link_name = "rb_yarv_ary_entry_internal"]
+    pub fn rb_ary_entry_internal(ary: VALUE, offset: c_long) -> VALUE;
+
+    #[link_name = "rb_FL_TEST"]
+    pub fn FL_TEST(obj: VALUE, flags: VALUE) -> VALUE;
+
+    #[link_name = "rb_FL_TEST_RAW"]
+    pub fn FL_TEST_RAW(obj: VALUE, flags: VALUE) -> VALUE;
+
+    #[link_name = "rb_RB_TYPE_P"]
+    pub fn RB_TYPE_P(obj: VALUE, t: ruby_value_type) -> bool;
+
+    #[link_name = "rb_BASIC_OP_UNREDEFINED_P"]
+    pub fn BASIC_OP_UNREDEFINED_P(bop: ruby_basic_operators, klass: RedefinitionFlag) -> bool;
+
+    #[link_name = "rb_RSTRUCT_LEN"]
+    pub fn RSTRUCT_LEN(st: VALUE) -> c_long;
+
+    #[link_name = "rb_RSTRUCT_SET"]
+    pub fn RSTRUCT_SET(st: VALUE, k: c_int, v: VALUE);
+
+    // Ruby only defines these in vm_insnhelper.c, not in any header.
+    // Parsing it would result in a lot of duplicate definitions.
+    pub fn rb_vm_opt_mod(recv: VALUE, obj: VALUE) -> VALUE;
+    pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE;
+    pub fn rb_vm_defined(
+        ec: EcPtr,
+        reg_cfp: CfpPtr,
+        op_type: rb_num_t,
+        obj: VALUE,
+        v: VALUE,
+    ) -> bool;
+    pub fn rb_vm_set_ivar_idx(obj: VALUE, idx: u32, val: VALUE) -> VALUE;
+    pub fn rb_vm_setinstancevariable(iseq: IseqPtr, obj: VALUE, id: ID, val: VALUE, ic: IVC);
+    pub fn rb_aliased_callable_method_entry(
+        me: *const rb_callable_method_entry_t,
+    ) -> *const rb_callable_method_entry_t;
+    pub fn rb_iseq_only_optparam_p(iseq: IseqPtr) -> bool;
+    pub fn rb_iseq_only_kwparam_p(iseq: IseqPtr) -> bool;
+    pub fn rb_vm_getclassvariable(iseq: IseqPtr, cfp: CfpPtr, id: ID, ic: ICVARC) -> VALUE;
+    pub fn rb_vm_setclassvariable(
+        iseq: IseqPtr,
+        cfp: CfpPtr,
+        id: ID,
+        val: VALUE,
+        ic: ICVARC,
+    ) -> VALUE;
+    pub fn rb_vm_ic_hit_p(ic: IC, reg_ep: *const VALUE) -> bool;
+
+    #[link_name = "rb_vm_ci_argc"]
+    pub fn vm_ci_argc(ci: *const rb_callinfo) -> c_int;
+
+    #[link_name = "rb_vm_ci_mid"]
+    pub fn vm_ci_mid(ci: *const rb_callinfo) -> ID;
+
+    #[link_name = "rb_vm_ci_flag"]
+    pub fn vm_ci_flag(ci: *const rb_callinfo) -> c_uint;
+
+    #[link_name = "rb_vm_ci_kwarg"]
+    pub fn vm_ci_kwarg(ci: *const rb_callinfo) -> *const rb_callinfo_kwarg;
+
+    #[link_name = "rb_METHOD_ENTRY_VISI"]
+    pub fn METHOD_ENTRY_VISI(me: *const rb_callable_method_entry_t) -> rb_method_visibility_t;
+
+    pub fn rb_yjit_branch_stub_hit(
+        branch_ptr: *const c_void,
+        target_idx: u32,
+        ec: EcPtr,
+    ) -> *const c_void;
+
+    pub fn rb_str_bytesize(str: VALUE) -> VALUE;
+
+    #[link_name = "rb_RCLASS_ORIGIN"]
+    pub fn RCLASS_ORIGIN(v: VALUE) -> VALUE;
+}
+
+/// Helper so we can get a Rust string for insn_name()
+pub fn insn_name(opcode: usize) -> String {
+    use std::ffi::CStr;
+
+    unsafe {
+        // Look up Ruby's NULL-terminated insn name string
+        let op_name = raw_insn_name(VALUE(opcode));
+
+        // Convert the op name C string to a Rust string and concat
+        let op_name = CStr::from_ptr(op_name).to_str().unwrap();
+
+        // Convert into an owned string
+        op_name.to_string()
+    }
+}
+
+#[allow(unused_variables)]
+pub fn insn_len(opcode: usize) -> u32 {
+    #[cfg(test)]
+    panic!("insn_len is a CRuby function, and we don't link against CRuby for Rust testing!");
+
+    #[cfg(not(test))]
+    unsafe {
+        raw_insn_len(VALUE(opcode)).try_into().unwrap()
+    }
+}
+
+/// Opaque iseq type for opaque iseq pointers from vm_core.h
+/// See: <https://doc.rust-lang.org/nomicon/ffi.html#representing-opaque-structs>
+#[repr(C)]
+pub struct rb_iseq_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// An object handle similar to VALUE in the C code. Our methods assume
+/// that this is a handle. Sometimes the C code briefly uses VALUE as
+/// an unsigned integer type and don't necessarily store valid handles but
+/// thankfully those cases are rare and don't cross the FFI boundary.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+#[repr(transparent)] // same size and alignment as simply `usize`
+pub struct VALUE(pub usize);
+
+/// Pointer to an ISEQ
+pub type IseqPtr = *const rb_iseq_t;
+
+/// Opaque execution-context type from vm_core.h
+#[repr(C)]
+pub struct rb_execution_context_struct {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+/// Alias for rb_execution_context_struct used by CRuby sometimes
+pub type rb_execution_context_t = rb_execution_context_struct;
+
+/// Pointer to an execution context (rb_execution_context_struct)
+pub type EcPtr = *const rb_execution_context_struct;
+
+// From method.h
+#[repr(C)]
+pub struct rb_method_definition_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+type rb_method_definition_struct = rb_method_definition_t;
+
+/// Opaque cfunc type from method.h
+#[repr(C)]
+pub struct rb_method_cfunc_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque FILE type from the C standard library
+#[repr(C)]
+pub struct FILE {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque call-cache type from vm_callinfo.h
+#[repr(C)]
+pub struct rb_callcache {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque call-info type from vm_callinfo.h
+#[repr(C)]
+pub struct rb_callinfo_kwarg {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Opaque control_frame (CFP) struct from vm_core.h
+#[repr(C)]
+pub struct rb_control_frame_struct {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+/// Pointer to a control frame pointer (CFP)
+pub type CfpPtr = *mut rb_control_frame_struct;
+
+/// Opaque struct from vm_core.h
+#[repr(C)]
+pub struct rb_cref_t {
+    _data: [u8; 0],
+    _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
+}
+
+impl VALUE {
+    /// Dump info about the value to the console similarly to rp(VALUE)
+    pub fn dump_info(self) {
+        unsafe { rb_obj_info_dump(self) }
+    }
+
+    /// Return whether the value is truthy or falsy in Ruby -- only nil and false are falsy.
+    pub fn test(self) -> bool {
+        let VALUE(cval) = self;
+        let VALUE(qnilval) = Qnil;
+        (cval & !qnilval) != 0
+    }
+
+    /// Return true if the number is an immediate integer, flonum or static symbol
+    fn immediate_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 7) != 0
+    }
+
+    /// Return true if the value is a Ruby immediate integer, flonum, static symbol, nil or false
+    pub fn special_const_p(self) -> bool {
+        self.immediate_p() || !self.test()
+    }
+
+    /// Return true if the value is a Ruby Fixnum (immediate-size integer)
+    pub fn fixnum_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 1) == 1
+    }
+
+    /// Return true if the value is an immediate Ruby floating-point number (flonum)
+    pub fn flonum_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 3) == 2
+    }
+
+    /// Return true for a static (non-heap) Ruby symbol
+    pub fn static_sym_p(self) -> bool {
+        let VALUE(cval) = self;
+        (cval & 0xff) == RUBY_SYMBOL_FLAG
+    }
+
+    /// Returns true or false depending on whether the value is nil
+    pub fn nil_p(self) -> bool {
+        self == Qnil
+    }
+
+    /// Read the flags bits from the RBasic object, then return a Ruby type enum (e.g. RUBY_T_ARRAY)
+    pub fn builtin_type(self) -> ruby_value_type {
+        assert!(!self.special_const_p());
+
+        let VALUE(cval) = self;
+        let rbasic_ptr = cval as *const RBasic;
+        let flags_bits: usize = unsafe { (*rbasic_ptr).flags }.as_usize();
+        (flags_bits & (RUBY_T_MASK as usize)) as ruby_value_type
+    }
+
+    pub fn class_of(self) -> VALUE {
+        unsafe { CLASS_OF(self) }
+    }
+
+    pub fn as_isize(self) -> isize {
+        let VALUE(is) = self;
+        is as isize
+    }
+
+    pub fn as_i32(self) -> i32 {
+        self.as_i64().try_into().unwrap()
+    }
+
+    pub fn as_u32(self) -> u32 {
+        let VALUE(i) = self;
+        i.try_into().unwrap()
+    }
+
+    pub fn as_i64(self) -> i64 {
+        let VALUE(i) = self;
+        i as i64
+    }
+
+    pub fn as_u64(self) -> u64 {
+        let VALUE(i) = self;
+        i.try_into().unwrap()
+    }
+
+    pub fn as_usize(self) -> usize {
+        let VALUE(us) = self;
+        us as usize
+    }
+
+    pub fn as_ptr<T>(self) -> *const T {
+        let VALUE(us) = self;
+        us as *const T
+    }
+
+    pub fn as_mut_ptr<T>(self) -> *mut T {
+        let VALUE(us) = self;
+        us as *mut T
+    }
+
+    /// For working with opague pointers and encoding null check.
+    /// Similar to [std::ptr::NonNull], but for `*const T`. `NonNull<T>`
+    /// is for `*mut T` while our C functions are setup to use `*const T`.
+    /// Casting from `NonNull<T>` to `*const T` is too noisy.
+    pub fn as_optional_ptr<T>(self) -> Option<*const T> {
+        let ptr: *const T = self.as_ptr();
+
+        if ptr.is_null() {
+            None
+        } else {
+            Some(ptr)
+        }
+    }
+
+    /// Assert that `self` is an iseq in debug builds
+    pub fn as_iseq(self) -> IseqPtr {
+        let ptr: IseqPtr = self.as_ptr();
+
+        #[cfg(debug_assertions)]
+        if !ptr.is_null() {
+            unsafe { rb_assert_iseq_handle(self) }
+        }
+
+        ptr
+    }
+
+    /// Assert that `self` is a method entry in debug builds
+    pub fn as_cme(self) -> *const rb_callable_method_entry_t {
+        let ptr: *const rb_callable_method_entry_t = self.as_ptr();
+
+        #[cfg(debug_assertions)]
+        if !ptr.is_null() {
+            unsafe { rb_assert_cme_handle(self) }
+        }
+
+        ptr
+    }
+}
+
+impl VALUE {
+    pub fn fixnum_from_usize(item: usize) -> Self {
+        assert!(item <= (RUBY_FIXNUM_MAX as usize)); // An unsigned will always be greater than RUBY_FIXNUM_MIN
+        let k: usize = item.wrapping_add(item.wrapping_add(1));
+        VALUE(k)
+    }
+}
+
+impl From<IseqPtr> for VALUE {
+    /// For `.into()` convenience
+    fn from(iseq: IseqPtr) -> Self {
+        VALUE(iseq as usize)
+    }
+}
+
+impl From<*const rb_callable_method_entry_t> for VALUE {
+    /// For `.into()` convenience
+    fn from(cme: *const rb_callable_method_entry_t) -> Self {
+        VALUE(cme as usize)
+    }
+}
+
+impl From<VALUE> for u64 {
+    fn from(value: VALUE) -> Self {
+        let VALUE(uimm) = value;
+        uimm as u64
+    }
+}
+
+impl From<VALUE> for i64 {
+    fn from(value: VALUE) -> Self {
+        let VALUE(uimm) = value;
+        assert!(uimm <= (i64::MAX as usize));
+        uimm as i64
+    }
+}
+
+impl From<VALUE> for i32 {
+    fn from(value: VALUE) -> Self {
+        let VALUE(uimm) = value;
+        assert!(uimm <= (i32::MAX as usize));
+        uimm as i32
+    }
+}
+
+/// Produce a Ruby string from a Rust string slice
+pub fn rust_str_to_ruby(str: &str) -> VALUE {
+    unsafe { rb_utf8_str_new(str.as_ptr() as *const i8, str.len() as i64) }
+}
+
+/// Produce a Ruby symbol from a Rust string slice
+pub fn rust_str_to_sym(str: &str) -> VALUE {
+    let c_str = CString::new(str).unwrap();
+    let c_ptr: *const c_char = c_str.as_ptr();
+
+    unsafe { rb_id2sym(rb_intern(c_ptr)) }
+}
+
+/// A location in Rust code for integrating with debugging facilities defined in C.
+/// Use the [src_loc!] macro to crate an instance.
+pub struct SourceLocation {
+    pub file: CString,
+    pub line: c_int,
+}
+
+/// Make a [SourceLocation] at the current spot.
+macro_rules! src_loc {
+    () => {
+        // NOTE(alan): `CString::new` allocates so we might want to limit this to debug builds.
+        $crate::cruby::SourceLocation {
+            file: std::ffi::CString::new(file!()).unwrap(), // ASCII source file paths
+            line: line!().try_into().unwrap(),              // not that many lines
+        }
+    };
+}
+
+pub(crate) use src_loc;
+
+/// Run GC write barrier. Required after making a new edge in the object reference
+/// graph from `old` to `young`.
+macro_rules! obj_written {
+    ($old: expr, $young: expr) => {
+        let (old, young): (VALUE, VALUE) = ($old, $young);
+        let src_loc = $crate::cruby::src_loc!();
+        unsafe { rb_yjit_obj_written(old, young, src_loc.file.as_ptr(), src_loc.line) };
+    };
+}
+pub(crate) use obj_written;
+
+/// Acquire the VM lock, make sure all other Ruby threads are asleep then run
+/// some code while holding the lock. Returns whatever `func` returns.
+/// Use with [src_loc!].
+///
+/// Required for code patching in the presence of ractors.
+pub fn with_vm_lock<F, R>(loc: SourceLocation, func: F) -> R
+where
+    F: FnOnce() -> R + UnwindSafe,
+{
+    let file = loc.file.as_ptr();
+    let line = loc.line;
+    let mut recursive_lock_level: c_uint = 0;
+
+    unsafe { rb_yjit_vm_lock_then_barrier(&mut recursive_lock_level, file, line) };
+
+    let ret = match catch_unwind(func) {
+        Ok(result) => result,
+        Err(_) => {
+            // Theoretically we can recover from some of these panics,
+            // but it's too late if the unwind reaches here.
+            use std::{io, process, str};
+
+            let _ = catch_unwind(|| {
+                // IO functions can panic too.
+                eprintln!(
+                    "YJIT panicked while holding VM lock acquired at {}:{}. Aborting...",
+                    str::from_utf8(loc.file.as_bytes()).unwrap_or("<not utf8>"),
+                    line,
+                );
+            });
+            process::abort();
+        }
+    };
+
+    unsafe { rb_yjit_vm_unlock(&mut recursive_lock_level, file, line) };
+
+    ret
+}
+
+// Non-idiomatic capitalization for consistency with CRuby code
+#[allow(non_upper_case_globals)]
+pub const Qfalse: VALUE = VALUE(0);
+#[allow(non_upper_case_globals)]
+pub const Qnil: VALUE = VALUE(8);
+#[allow(non_upper_case_globals)]
+pub const Qtrue: VALUE = VALUE(20);
+#[allow(non_upper_case_globals)]
+pub const Qundef: VALUE = VALUE(52);
+
+pub const RUBY_SYMBOL_FLAG: usize = 0x0c;
+
+pub const RUBY_LONG_MIN: isize = std::os::raw::c_long::MIN as isize;
+pub const RUBY_LONG_MAX: isize = std::os::raw::c_long::MAX as isize;
+
+pub const RUBY_FIXNUM_MIN: isize = RUBY_LONG_MIN / 2;
+pub const RUBY_FIXNUM_MAX: isize = RUBY_LONG_MAX / 2;
+pub const RUBY_FIXNUM_FLAG: usize = 0x1;
+
+pub const RUBY_FLONUM_FLAG: usize = 0x2;
+pub const RUBY_FLONUM_MASK: usize = 0x3;
+
+pub const RUBY_IMMEDIATE_MASK: usize = 0x7;
+
+pub const RUBY_SPECIAL_SHIFT: usize = 8;
+
+// Constants from vm_core.h
+pub const VM_SPECIAL_OBJECT_VMCORE: usize = 0x1;
+pub const VM_ENV_DATA_INDEX_SPECVAL: isize = -1;
+pub const VM_ENV_DATA_INDEX_FLAGS: isize = 0;
+pub const VM_ENV_DATA_SIZE: usize = 3;
+
+// From vm_callinfo.h
+pub const VM_CALL_ARGS_SPLAT: u32 = 1 << VM_CALL_ARGS_SPLAT_bit;
+pub const VM_CALL_ARGS_BLOCKARG: u32 = 1 << VM_CALL_ARGS_BLOCKARG_bit;
+pub const VM_CALL_FCALL: u32 = 1 << VM_CALL_FCALL_bit;
+pub const VM_CALL_KWARG: u32 = 1 << VM_CALL_KWARG_bit;
+pub const VM_CALL_KW_SPLAT: u32 = 1 << VM_CALL_KW_SPLAT_bit;
+pub const VM_CALL_TAILCALL: u32 = 1 << VM_CALL_TAILCALL_bit;
+
+pub const SIZEOF_VALUE: usize = 8;
+pub const SIZEOF_VALUE_I32: i32 = SIZEOF_VALUE as i32;
+
+pub const RUBY_FL_SINGLETON: usize = RUBY_FL_USER_0;
+
+pub const ROBJECT_EMBED: usize = RUBY_FL_USER_1;
+pub const ROBJECT_EMBED_LEN_MAX: usize = 3; // This is a complex calculation in ruby/internal/core/robject.h
+
+pub const RMODULE_IS_REFINEMENT: usize = RUBY_FL_USER_3;
+
+// Constants from include/ruby/internal/fl_type.h
+pub const RUBY_FL_USHIFT: usize = 12;
+pub const RUBY_FL_USER_0: usize = 1 << (RUBY_FL_USHIFT + 0);
+pub const RUBY_FL_USER_1: usize = 1 << (RUBY_FL_USHIFT + 1);
+pub const RUBY_FL_USER_2: usize = 1 << (RUBY_FL_USHIFT + 2);
+pub const RUBY_FL_USER_3: usize = 1 << (RUBY_FL_USHIFT + 3);
+pub const RUBY_FL_USER_4: usize = 1 << (RUBY_FL_USHIFT + 4);
+pub const RUBY_FL_USER_5: usize = 1 << (RUBY_FL_USHIFT + 5);
+pub const RUBY_FL_USER_6: usize = 1 << (RUBY_FL_USHIFT + 6);
+pub const RUBY_FL_USER_7: usize = 1 << (RUBY_FL_USHIFT + 7);
+pub const RUBY_FL_USER_8: usize = 1 << (RUBY_FL_USHIFT + 8);
+pub const RUBY_FL_USER_9: usize = 1 << (RUBY_FL_USHIFT + 9);
+pub const RUBY_FL_USER_10: usize = 1 << (RUBY_FL_USHIFT + 10);
+pub const RUBY_FL_USER_11: usize = 1 << (RUBY_FL_USHIFT + 11);
+pub const RUBY_FL_USER_12: usize = 1 << (RUBY_FL_USHIFT + 12);
+pub const RUBY_FL_USER_13: usize = 1 << (RUBY_FL_USHIFT + 13);
+pub const RUBY_FL_USER_14: usize = 1 << (RUBY_FL_USHIFT + 14);
+pub const RUBY_FL_USER_15: usize = 1 << (RUBY_FL_USHIFT + 15);
+pub const RUBY_FL_USER_16: usize = 1 << (RUBY_FL_USHIFT + 16);
+pub const RUBY_FL_USER_17: usize = 1 << (RUBY_FL_USHIFT + 17);
+pub const RUBY_FL_USER_18: usize = 1 << (RUBY_FL_USHIFT + 18);
+pub const RUBY_FL_USER_19: usize = 1 << (RUBY_FL_USHIFT + 19);
+
+// Constants from include/ruby/internal/core/rarray.h
+pub const RARRAY_EMBED_FLAG: usize = RUBY_FL_USER_1;
+pub const RARRAY_EMBED_LEN_SHIFT: usize = RUBY_FL_USHIFT + 3;
+pub const RARRAY_EMBED_LEN_MASK: usize = RUBY_FL_USER_3 | RUBY_FL_USER_4;
+
+// From internal/struct.h
+pub const RSTRUCT_EMBED_LEN_MASK: usize = RUBY_FL_USER_2 | RUBY_FL_USER_1;
+
+// From iseq.h
+pub const ISEQ_TRANSLATED: usize = RUBY_FL_USER_7;
+
+// We'll need to encode a lot of Ruby struct/field offsets as constants unless we want to
+// redeclare all the Ruby C structs and write our own offsetof macro. For now, we use constants.
+pub const RUBY_OFFSET_RBASIC_FLAGS: i32 = 0; // struct RBasic, field "flags"
+pub const RUBY_OFFSET_RBASIC_KLASS: i32 = 8; // struct RBasic, field "klass"
+pub const RUBY_OFFSET_RARRAY_AS_HEAP_LEN: i32 = 16; // struct RArray, subfield "as.heap.len"
+pub const RUBY_OFFSET_RARRAY_AS_HEAP_PTR: i32 = 32; // struct RArray, subfield "as.heap.ptr"
+pub const RUBY_OFFSET_RARRAY_AS_ARY: i32 = 16; // struct RArray, subfield "as.ary"
+
+pub const RUBY_OFFSET_RSTRUCT_AS_HEAP_PTR: i32 = 24; // struct RStruct, subfield "as.heap.ptr"
+pub const RUBY_OFFSET_RSTRUCT_AS_ARY: i32 = 16; // struct RStruct, subfield "as.ary"
+
+pub const RUBY_OFFSET_ROBJECT_AS_ARY: i32 = 16; // struct RObject, subfield "as.ary"
+pub const RUBY_OFFSET_ROBJECT_AS_HEAP_NUMIV: i32 = 16; // struct RObject, subfield "as.heap.numiv"
+pub const RUBY_OFFSET_ROBJECT_AS_HEAP_IVPTR: i32 = 24; // struct RObject, subfield "as.heap.ivptr"
+
+// Constants from rb_control_frame_t vm_core.h
+pub const RUBY_OFFSET_CFP_PC: i32 = 0;
+pub const RUBY_OFFSET_CFP_SP: i32 = 8;
+pub const RUBY_OFFSET_CFP_ISEQ: i32 = 16;
+pub const RUBY_OFFSET_CFP_SELF: i32 = 24;
+pub const RUBY_OFFSET_CFP_EP: i32 = 32;
+pub const RUBY_OFFSET_CFP_BLOCK_CODE: i32 = 40;
+pub const RUBY_OFFSET_CFP_BP: i32 = 48; // field __bp__
+pub const RUBY_OFFSET_CFP_JIT_RETURN: i32 = 56;
+pub const RUBY_SIZEOF_CONTROL_FRAME: usize = 64;
+
+// Constants from rb_execution_context_t vm_core.h
+pub const RUBY_OFFSET_EC_CFP: i32 = 16;
+pub const RUBY_OFFSET_EC_INTERRUPT_FLAG: i32 = 32; // rb_atomic_t (u32)
+pub const RUBY_OFFSET_EC_INTERRUPT_MASK: i32 = 36; // rb_atomic_t (u32)
+pub const RUBY_OFFSET_EC_THREAD_PTR: i32 = 48;
+
+// Constants from rb_thread_t in vm_core.h
+pub const RUBY_OFFSET_THREAD_SELF: i32 = 16;
+
+// Constants from iseq_inline_constant_cache (IC) and iseq_inline_constant_cache_entry (ICE) in vm_core.h
+pub const RUBY_OFFSET_IC_ENTRY: i32 = 0;
+pub const RUBY_OFFSET_ICE_VALUE: i32 = 8;
+
+// TODO: need to dynamically autogenerate constants for all the YARV opcodes from insns.def
+// TODO: typing of these adds unnecessary casting
+pub const OP_NOP: usize = 0;
+pub const OP_GETLOCAL: usize = 1;
+pub const OP_SETLOCAL: usize = 2;
+pub const OP_GETBLOCKPARAM: usize = 3;
+pub const OP_SETBLOCKPARAM: usize = 4;
+pub const OP_GETBLOCKPARAMPROXY: usize = 5;
+pub const OP_GETSPECIAL: usize = 6;
+pub const OP_SETSPECIAL: usize = 7;
+pub const OP_GETINSTANCEVARIABLE: usize = 8;
+pub const OP_SETINSTANCEVARIABLE: usize = 9;
+pub const OP_GETCLASSVARIABLE: usize = 10;
+pub const OP_SETCLASSVARIABLE: usize = 11;
+pub const OP_GETCONSTANT: usize = 12;
+pub const OP_SETCONSTANT: usize = 13;
+pub const OP_GETGLOBAL: usize = 14;
+pub const OP_SETGLOBAL: usize = 15;
+pub const OP_PUTNIL: usize = 16;
+pub const OP_PUTSELF: usize = 17;
+pub const OP_PUTOBJECT: usize = 18;
+pub const OP_PUTSPECIALOBJECT: usize = 19;
+pub const OP_PUTSTRING: usize = 20;
+pub const OP_CONCATSTRINGS: usize = 21;
+pub const OP_ANYTOSTRING: usize = 22;
+pub const OP_TOREGEXP: usize = 23;
+pub const OP_INTERN: usize = 24;
+pub const OP_NEWARRAY: usize = 25;
+pub const OP_NEWARRAYKWSPLAT: usize = 26;
+pub const OP_DUPARRAY: usize = 27;
+pub const OP_DUPHASH: usize = 28;
+pub const OP_EXPANDARRAY: usize = 29;
+pub const OP_CONCATARRAY: usize = 30;
+pub const OP_SPLATARRAY: usize = 31;
+pub const OP_NEWHASH: usize = 32;
+pub const OP_NEWRANGE: usize = 33;
+pub const OP_POP: usize = 34;
+pub const OP_DUP: usize = 35;
+pub const OP_DUPN: usize = 36;
+pub const OP_SWAP: usize = 37;
+pub const OP_TOPN: usize = 38;
+pub const OP_SETN: usize = 39;
+pub const OP_ADJUSTSTACK: usize = 40;
+pub const OP_DEFINED: usize = 41;
+pub const OP_CHECKMATCH: usize = 42;
+pub const OP_CHECKKEYWORD: usize = 43;
+pub const OP_CHECKTYPE: usize = 44;
+pub const OP_DEFINECLASS: usize = 45;
+pub const OP_DEFINEMETHOD: usize = 46;
+pub const OP_DEFINESMETHOD: usize = 47;
+pub const OP_SEND: usize = 48;
+pub const OP_OPT_SEND_WITHOUT_BLOCK: usize = 49;
+pub const OP_OBJTOSTRING: usize = 50;
+pub const OP_OPT_STR_FREEZE: usize = 51;
+pub const OP_OPT_NIL_P: usize = 52;
+pub const OP_OPT_STR_UMINUS: usize = 53;
+pub const OP_OPT_NEWARRAY_MAX: usize = 54;
+pub const OP_OPT_NEWARRAY_MIN: usize = 55;
+pub const OP_INVOKESUPER: usize = 56;
+pub const OP_INVOKEBLOCK: usize = 57;
+pub const OP_LEAVE: usize = 58;
+pub const OP_THROW: usize = 59;
+pub const OP_JUMP: usize = 60;
+pub const OP_BRANCHIF: usize = 61;
+pub const OP_BRANCHUNLESS: usize = 62;
+pub const OP_BRANCHNIL: usize = 63;
+pub const OP_OPT_GETINLINECACHE: usize = 64;
+pub const OP_OPT_SETINLINECACHE: usize = 65;
+pub const OP_ONCE: usize = 66;
+pub const OP_OPT_CASE_DISPATCH: usize = 67;
+pub const OP_OPT_PLUS: usize = 68;
+pub const OP_OPT_MINUS: usize = 69;
+pub const OP_OPT_MULT: usize = 70;
+pub const OP_OPT_DIV: usize = 71;
+pub const OP_OPT_MOD: usize = 72;
+pub const OP_OPT_EQ: usize = 73;
+pub const OP_OPT_NEQ: usize = 74;
+pub const OP_OPT_LT: usize = 75;
+pub const OP_OPT_LE: usize = 76;
+pub const OP_OPT_GT: usize = 77;
+pub const OP_OPT_GE: usize = 78;
+pub const OP_OPT_LTLT: usize = 79;
+pub const OP_OPT_AND: usize = 80;
+pub const OP_OPT_OR: usize = 81;
+pub const OP_OPT_AREF: usize = 82;
+pub const OP_OPT_ASET: usize = 83;
+pub const OP_OPT_ASET_WITH: usize = 84;
+pub const OP_OPT_AREF_WITH: usize = 85;
+pub const OP_OPT_LENGTH: usize = 86;
+pub const OP_OPT_SIZE: usize = 87;
+pub const OP_OPT_EMPTY_P: usize = 88;
+pub const OP_OPT_SUCC: usize = 89;
+pub const OP_OPT_NOT: usize = 90;
+pub const OP_OPT_REGEXPMATCH2: usize = 91;
+pub const OP_INVOKEBUILTIN: usize = 92;
+pub const OP_OPT_INVOKEBUILTIN_DELEGATE: usize = 93;
+pub const OP_OPT_INVOKEBUILTIN_DELEGATE_LEAVE: usize = 94;
+pub const OP_GETLOCAL_WC_0: usize = 95;
+pub const OP_GETLOCAL_WC_1: usize = 96;
+pub const OP_SETLOCAL_WC_0: usize = 97;
+pub const OP_SETLOCAL_WC_1: usize = 98;
+pub const OP_PUTOBJECT_INT2FIX_0_: usize = 99;
+pub const OP_PUTOBJECT_INT2FIX_1_: usize = 100;
+
+pub const VM_INSTRUCTION_SIZE: usize = 202;
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
new file mode 100644
index 0000000000..2be42f5c63
--- /dev/null
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -0,0 +1,783 @@
+/* automatically generated by rust-bindgen 0.59.2 */
+
+pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
+pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
+pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
+pub const ARRAY_REDEFINED_OP_FLAG: u32 = 8;
+pub const HASH_REDEFINED_OP_FLAG: u32 = 16;
+pub const SYMBOL_REDEFINED_OP_FLAG: u32 = 64;
+pub const TIME_REDEFINED_OP_FLAG: u32 = 128;
+pub const REGEXP_REDEFINED_OP_FLAG: u32 = 256;
+pub const NIL_REDEFINED_OP_FLAG: u32 = 512;
+pub const TRUE_REDEFINED_OP_FLAG: u32 = 1024;
+pub const FALSE_REDEFINED_OP_FLAG: u32 = 2048;
+pub const PROC_REDEFINED_OP_FLAG: u32 = 4096;
+pub const VM_ENV_DATA_INDEX_ME_CREF: i32 = -2;
+pub const VM_BLOCK_HANDLER_NONE: u32 = 0;
+pub type ID = ::std::os::raw::c_ulong;
+extern "C" {
+    pub fn rb_singleton_class(obj: VALUE) -> VALUE;
+}
+pub type rb_alloc_func_t = ::std::option::Option<unsafe extern "C" fn(klass: VALUE) -> VALUE>;
+extern "C" {
+    pub fn rb_get_alloc_func(klass: VALUE) -> rb_alloc_func_t;
+}
+#[repr(C)]
+pub struct RBasic {
+    pub flags: VALUE,
+    pub klass: VALUE,
+}
+pub const RUBY_T_NONE: ruby_value_type = 0;
+pub const RUBY_T_OBJECT: ruby_value_type = 1;
+pub const RUBY_T_CLASS: ruby_value_type = 2;
+pub const RUBY_T_MODULE: ruby_value_type = 3;
+pub const RUBY_T_FLOAT: ruby_value_type = 4;
+pub const RUBY_T_STRING: ruby_value_type = 5;
+pub const RUBY_T_REGEXP: ruby_value_type = 6;
+pub const RUBY_T_ARRAY: ruby_value_type = 7;
+pub const RUBY_T_HASH: ruby_value_type = 8;
+pub const RUBY_T_STRUCT: ruby_value_type = 9;
+pub const RUBY_T_BIGNUM: ruby_value_type = 10;
+pub const RUBY_T_FILE: ruby_value_type = 11;
+pub const RUBY_T_DATA: ruby_value_type = 12;
+pub const RUBY_T_MATCH: ruby_value_type = 13;
+pub const RUBY_T_COMPLEX: ruby_value_type = 14;
+pub const RUBY_T_RATIONAL: ruby_value_type = 15;
+pub const RUBY_T_NIL: ruby_value_type = 17;
+pub const RUBY_T_TRUE: ruby_value_type = 18;
+pub const RUBY_T_FALSE: ruby_value_type = 19;
+pub const RUBY_T_SYMBOL: ruby_value_type = 20;
+pub const RUBY_T_FIXNUM: ruby_value_type = 21;
+pub const RUBY_T_UNDEF: ruby_value_type = 22;
+pub const RUBY_T_IMEMO: ruby_value_type = 26;
+pub const RUBY_T_NODE: ruby_value_type = 27;
+pub const RUBY_T_ICLASS: ruby_value_type = 28;
+pub const RUBY_T_ZOMBIE: ruby_value_type = 29;
+pub const RUBY_T_MOVED: ruby_value_type = 30;
+pub const RUBY_T_MASK: ruby_value_type = 31;
+pub type ruby_value_type = u32;
+pub type st_data_t = ::std::os::raw::c_ulong;
+pub type st_index_t = st_data_t;
+extern "C" {
+    pub fn rb_class_get_superclass(klass: VALUE) -> VALUE;
+}
+extern "C" {
+    pub static mut rb_mKernel: VALUE;
+}
+extern "C" {
+    pub static mut rb_cBasicObject: VALUE;
+}
+extern "C" {
+    pub static mut rb_cArray: VALUE;
+}
+extern "C" {
+    pub static mut rb_cFalseClass: VALUE;
+}
+extern "C" {
+    pub static mut rb_cFloat: VALUE;
+}
+extern "C" {
+    pub static mut rb_cHash: VALUE;
+}
+extern "C" {
+    pub static mut rb_cInteger: VALUE;
+}
+extern "C" {
+    pub static mut rb_cModule: VALUE;
+}
+extern "C" {
+    pub static mut rb_cNilClass: VALUE;
+}
+extern "C" {
+    pub static mut rb_cString: VALUE;
+}
+extern "C" {
+    pub static mut rb_cSymbol: VALUE;
+}
+extern "C" {
+    pub static mut rb_cThread: VALUE;
+}
+extern "C" {
+    pub static mut rb_cTrueClass: VALUE;
+}
+extern "C" {
+    pub fn rb_ary_new_capa(capa: ::std::os::raw::c_long) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ary_store(ary: VALUE, key: ::std::os::raw::c_long, val: VALUE);
+}
+extern "C" {
+    pub fn rb_ary_resurrect(ary: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ary_clear(ary: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_new() -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_aref(hash: VALUE, key: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_aset(hash: VALUE, key: VALUE, val: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_bulk_insert(argc: ::std::os::raw::c_long, argv: *const VALUE, hash: VALUE);
+}
+extern "C" {
+    pub fn rb_sym2id(obj: VALUE) -> ID;
+}
+extern "C" {
+    pub fn rb_id2sym(id: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_intern(name: *const ::std::os::raw::c_char) -> ID;
+}
+extern "C" {
+    pub fn rb_gc_mark(obj: VALUE);
+}
+extern "C" {
+    pub fn rb_gc_mark_movable(obj: VALUE);
+}
+extern "C" {
+    pub fn rb_gc_location(obj: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_is_kind_of(obj: VALUE, klass: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_backref_get() -> VALUE;
+}
+extern "C" {
+    pub fn rb_range_new(beg: VALUE, end: VALUE, excl: ::std::os::raw::c_int) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_nth_match(n: ::std::os::raw::c_int, md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_last_match(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_match_pre(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_match_post(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_reg_match_last(md: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_utf8_str_new(
+        ptr: *const ::std::os::raw::c_char,
+        len: ::std::os::raw::c_long,
+    ) -> VALUE;
+}
+extern "C" {
+    pub fn rb_str_intern(str_: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ivar_get(obj: VALUE, name: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_attr_get(obj: VALUE, name: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_info_dump(obj: VALUE);
+}
+extern "C" {
+    pub fn rb_reg_new_ary(ary: VALUE, options: ::std::os::raw::c_int) -> VALUE;
+}
+pub const idDot2: ruby_method_ids = 128;
+pub const idDot3: ruby_method_ids = 129;
+pub const idUPlus: ruby_method_ids = 132;
+pub const idUMinus: ruby_method_ids = 133;
+pub const idPow: ruby_method_ids = 134;
+pub const idCmp: ruby_method_ids = 135;
+pub const idPLUS: ruby_method_ids = 43;
+pub const idMINUS: ruby_method_ids = 45;
+pub const idMULT: ruby_method_ids = 42;
+pub const idDIV: ruby_method_ids = 47;
+pub const idMOD: ruby_method_ids = 37;
+pub const idLTLT: ruby_method_ids = 136;
+pub const idGTGT: ruby_method_ids = 137;
+pub const idLT: ruby_method_ids = 60;
+pub const idLE: ruby_method_ids = 138;
+pub const idGT: ruby_method_ids = 62;
+pub const idGE: ruby_method_ids = 139;
+pub const idEq: ruby_method_ids = 140;
+pub const idEqq: ruby_method_ids = 141;
+pub const idNeq: ruby_method_ids = 142;
+pub const idNot: ruby_method_ids = 33;
+pub const idAnd: ruby_method_ids = 38;
+pub const idOr: ruby_method_ids = 124;
+pub const idBackquote: ruby_method_ids = 96;
+pub const idEqTilde: ruby_method_ids = 143;
+pub const idNeqTilde: ruby_method_ids = 144;
+pub const idAREF: ruby_method_ids = 145;
+pub const idASET: ruby_method_ids = 146;
+pub const idCOLON2: ruby_method_ids = 147;
+pub const idANDOP: ruby_method_ids = 148;
+pub const idOROP: ruby_method_ids = 149;
+pub const idANDDOT: ruby_method_ids = 150;
+pub const tPRESERVED_ID_BEGIN: ruby_method_ids = 150;
+pub const idNilP: ruby_method_ids = 151;
+pub const idNULL: ruby_method_ids = 152;
+pub const idEmptyP: ruby_method_ids = 153;
+pub const idEqlP: ruby_method_ids = 154;
+pub const idRespond_to: ruby_method_ids = 155;
+pub const idRespond_to_missing: ruby_method_ids = 156;
+pub const idIFUNC: ruby_method_ids = 157;
+pub const idCFUNC: ruby_method_ids = 158;
+pub const id_core_set_method_alias: ruby_method_ids = 159;
+pub const id_core_set_variable_alias: ruby_method_ids = 160;
+pub const id_core_undef_method: ruby_method_ids = 161;
+pub const id_core_define_method: ruby_method_ids = 162;
+pub const id_core_define_singleton_method: ruby_method_ids = 163;
+pub const id_core_set_postexe: ruby_method_ids = 164;
+pub const id_core_hash_merge_ptr: ruby_method_ids = 165;
+pub const id_core_hash_merge_kwd: ruby_method_ids = 166;
+pub const id_core_raise: ruby_method_ids = 167;
+pub const id_core_sprintf: ruby_method_ids = 168;
+pub const id_debug_created_info: ruby_method_ids = 169;
+pub const tPRESERVED_ID_END: ruby_method_ids = 170;
+pub const tTOKEN_LOCAL_BEGIN: ruby_method_ids = 169;
+pub const tMax: ruby_method_ids = 170;
+pub const tMin: ruby_method_ids = 171;
+pub const tFreeze: ruby_method_ids = 172;
+pub const tInspect: ruby_method_ids = 173;
+pub const tIntern: ruby_method_ids = 174;
+pub const tObject_id: ruby_method_ids = 175;
+pub const tConst_added: ruby_method_ids = 176;
+pub const tConst_missing: ruby_method_ids = 177;
+pub const tMethodMissing: ruby_method_ids = 178;
+pub const tMethod_added: ruby_method_ids = 179;
+pub const tSingleton_method_added: ruby_method_ids = 180;
+pub const tMethod_removed: ruby_method_ids = 181;
+pub const tSingleton_method_removed: ruby_method_ids = 182;
+pub const tMethod_undefined: ruby_method_ids = 183;
+pub const tSingleton_method_undefined: ruby_method_ids = 184;
+pub const tLength: ruby_method_ids = 185;
+pub const tSize: ruby_method_ids = 186;
+pub const tGets: ruby_method_ids = 187;
+pub const tSucc: ruby_method_ids = 188;
+pub const tEach: ruby_method_ids = 189;
+pub const tProc: ruby_method_ids = 190;
+pub const tLambda: ruby_method_ids = 191;
+pub const tSend: ruby_method_ids = 192;
+pub const t__send__: ruby_method_ids = 193;
+pub const t__attached__: ruby_method_ids = 194;
+pub const t__recursive_key__: ruby_method_ids = 195;
+pub const tInitialize: ruby_method_ids = 196;
+pub const tInitialize_copy: ruby_method_ids = 197;
+pub const tInitialize_clone: ruby_method_ids = 198;
+pub const tInitialize_dup: ruby_method_ids = 199;
+pub const tTo_int: ruby_method_ids = 200;
+pub const tTo_ary: ruby_method_ids = 201;
+pub const tTo_str: ruby_method_ids = 202;
+pub const tTo_sym: ruby_method_ids = 203;
+pub const tTo_hash: ruby_method_ids = 204;
+pub const tTo_proc: ruby_method_ids = 205;
+pub const tTo_io: ruby_method_ids = 206;
+pub const tTo_a: ruby_method_ids = 207;
+pub const tTo_s: ruby_method_ids = 208;
+pub const tTo_i: ruby_method_ids = 209;
+pub const tTo_f: ruby_method_ids = 210;
+pub const tTo_r: ruby_method_ids = 211;
+pub const tBt: ruby_method_ids = 212;
+pub const tBt_locations: ruby_method_ids = 213;
+pub const tCall: ruby_method_ids = 214;
+pub const tMesg: ruby_method_ids = 215;
+pub const tException: ruby_method_ids = 216;
+pub const tLocals: ruby_method_ids = 217;
+pub const tNOT: ruby_method_ids = 218;
+pub const tAND: ruby_method_ids = 219;
+pub const tOR: ruby_method_ids = 220;
+pub const tDiv: ruby_method_ids = 221;
+pub const tDivmod: ruby_method_ids = 222;
+pub const tFdiv: ruby_method_ids = 223;
+pub const tQuo: ruby_method_ids = 224;
+pub const tName: ruby_method_ids = 225;
+pub const tNil: ruby_method_ids = 226;
+pub const tUScore: ruby_method_ids = 227;
+pub const tNUMPARAM_1: ruby_method_ids = 228;
+pub const tNUMPARAM_2: ruby_method_ids = 229;
+pub const tNUMPARAM_3: ruby_method_ids = 230;
+pub const tNUMPARAM_4: ruby_method_ids = 231;
+pub const tNUMPARAM_5: ruby_method_ids = 232;
+pub const tNUMPARAM_6: ruby_method_ids = 233;
+pub const tNUMPARAM_7: ruby_method_ids = 234;
+pub const tNUMPARAM_8: ruby_method_ids = 235;
+pub const tNUMPARAM_9: ruby_method_ids = 236;
+pub const tTOKEN_LOCAL_END: ruby_method_ids = 237;
+pub const tTOKEN_INSTANCE_BEGIN: ruby_method_ids = 236;
+pub const tTOKEN_INSTANCE_END: ruby_method_ids = 237;
+pub const tTOKEN_GLOBAL_BEGIN: ruby_method_ids = 236;
+pub const tLASTLINE: ruby_method_ids = 237;
+pub const tBACKREF: ruby_method_ids = 238;
+pub const tERROR_INFO: ruby_method_ids = 239;
+pub const tTOKEN_GLOBAL_END: ruby_method_ids = 240;
+pub const tTOKEN_CONST_BEGIN: ruby_method_ids = 239;
+pub const tTOKEN_CONST_END: ruby_method_ids = 240;
+pub const tTOKEN_CLASS_BEGIN: ruby_method_ids = 239;
+pub const tTOKEN_CLASS_END: ruby_method_ids = 240;
+pub const tTOKEN_ATTRSET_BEGIN: ruby_method_ids = 239;
+pub const tTOKEN_ATTRSET_END: ruby_method_ids = 240;
+pub const tNEXT_ID: ruby_method_ids = 240;
+pub const idMax: ruby_method_ids = 2721;
+pub const idMin: ruby_method_ids = 2737;
+pub const idFreeze: ruby_method_ids = 2753;
+pub const idInspect: ruby_method_ids = 2769;
+pub const idIntern: ruby_method_ids = 2785;
+pub const idObject_id: ruby_method_ids = 2801;
+pub const idConst_added: ruby_method_ids = 2817;
+pub const idConst_missing: ruby_method_ids = 2833;
+pub const idMethodMissing: ruby_method_ids = 2849;
+pub const idMethod_added: ruby_method_ids = 2865;
+pub const idSingleton_method_added: ruby_method_ids = 2881;
+pub const idMethod_removed: ruby_method_ids = 2897;
+pub const idSingleton_method_removed: ruby_method_ids = 2913;
+pub const idMethod_undefined: ruby_method_ids = 2929;
+pub const idSingleton_method_undefined: ruby_method_ids = 2945;
+pub const idLength: ruby_method_ids = 2961;
+pub const idSize: ruby_method_ids = 2977;
+pub const idGets: ruby_method_ids = 2993;
+pub const idSucc: ruby_method_ids = 3009;
+pub const idEach: ruby_method_ids = 3025;
+pub const idProc: ruby_method_ids = 3041;
+pub const idLambda: ruby_method_ids = 3057;
+pub const idSend: ruby_method_ids = 3073;
+pub const id__send__: ruby_method_ids = 3089;
+pub const id__attached__: ruby_method_ids = 3105;
+pub const id__recursive_key__: ruby_method_ids = 3121;
+pub const idInitialize: ruby_method_ids = 3137;
+pub const idInitialize_copy: ruby_method_ids = 3153;
+pub const idInitialize_clone: ruby_method_ids = 3169;
+pub const idInitialize_dup: ruby_method_ids = 3185;
+pub const idTo_int: ruby_method_ids = 3201;
+pub const idTo_ary: ruby_method_ids = 3217;
+pub const idTo_str: ruby_method_ids = 3233;
+pub const idTo_sym: ruby_method_ids = 3249;
+pub const idTo_hash: ruby_method_ids = 3265;
+pub const idTo_proc: ruby_method_ids = 3281;
+pub const idTo_io: ruby_method_ids = 3297;
+pub const idTo_a: ruby_method_ids = 3313;
+pub const idTo_s: ruby_method_ids = 3329;
+pub const idTo_i: ruby_method_ids = 3345;
+pub const idTo_f: ruby_method_ids = 3361;
+pub const idTo_r: ruby_method_ids = 3377;
+pub const idBt: ruby_method_ids = 3393;
+pub const idBt_locations: ruby_method_ids = 3409;
+pub const idCall: ruby_method_ids = 3425;
+pub const idMesg: ruby_method_ids = 3441;
+pub const idException: ruby_method_ids = 3457;
+pub const idLocals: ruby_method_ids = 3473;
+pub const idNOT: ruby_method_ids = 3489;
+pub const idAND: ruby_method_ids = 3505;
+pub const idOR: ruby_method_ids = 3521;
+pub const idDiv: ruby_method_ids = 3537;
+pub const idDivmod: ruby_method_ids = 3553;
+pub const idFdiv: ruby_method_ids = 3569;
+pub const idQuo: ruby_method_ids = 3585;
+pub const idName: ruby_method_ids = 3601;
+pub const idNil: ruby_method_ids = 3617;
+pub const idUScore: ruby_method_ids = 3633;
+pub const idNUMPARAM_1: ruby_method_ids = 3649;
+pub const idNUMPARAM_2: ruby_method_ids = 3665;
+pub const idNUMPARAM_3: ruby_method_ids = 3681;
+pub const idNUMPARAM_4: ruby_method_ids = 3697;
+pub const idNUMPARAM_5: ruby_method_ids = 3713;
+pub const idNUMPARAM_6: ruby_method_ids = 3729;
+pub const idNUMPARAM_7: ruby_method_ids = 3745;
+pub const idNUMPARAM_8: ruby_method_ids = 3761;
+pub const idNUMPARAM_9: ruby_method_ids = 3777;
+pub const idLASTLINE: ruby_method_ids = 3799;
+pub const idBACKREF: ruby_method_ids = 3815;
+pub const idERROR_INFO: ruby_method_ids = 3831;
+pub const tLAST_OP_ID: ruby_method_ids = 169;
+pub const idLAST_OP_ID: ruby_method_ids = 10;
+pub type ruby_method_ids = u32;
+extern "C" {
+    pub fn rb_ary_tmp_new_from_values(
+        arg1: VALUE,
+        arg2: ::std::os::raw::c_long,
+        arg3: *const VALUE,
+    ) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ec_ary_new_from_values(
+        ec: *mut rb_execution_context_struct,
+        n: ::std::os::raw::c_long,
+        elts: *const VALUE,
+    ) -> VALUE;
+}
+pub type rb_serial_t = ::std::os::raw::c_ulonglong;
+extern "C" {
+    pub fn rb_class_allocate_instance(klass: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_info(obj: VALUE) -> *const ::std::os::raw::c_char;
+}
+pub const imemo_env: imemo_type = 0;
+pub const imemo_cref: imemo_type = 1;
+pub const imemo_svar: imemo_type = 2;
+pub const imemo_throw_data: imemo_type = 3;
+pub const imemo_ifunc: imemo_type = 4;
+pub const imemo_memo: imemo_type = 5;
+pub const imemo_ment: imemo_type = 6;
+pub const imemo_iseq: imemo_type = 7;
+pub const imemo_tmpbuf: imemo_type = 8;
+pub const imemo_ast: imemo_type = 9;
+pub const imemo_parser_strterm: imemo_type = 10;
+pub const imemo_callinfo: imemo_type = 11;
+pub const imemo_callcache: imemo_type = 12;
+pub const imemo_constcache: imemo_type = 13;
+pub type imemo_type = u32;
+pub const METHOD_VISI_UNDEF: rb_method_visibility_t = 0;
+pub const METHOD_VISI_PUBLIC: rb_method_visibility_t = 1;
+pub const METHOD_VISI_PRIVATE: rb_method_visibility_t = 2;
+pub const METHOD_VISI_PROTECTED: rb_method_visibility_t = 3;
+pub const METHOD_VISI_MASK: rb_method_visibility_t = 3;
+pub type rb_method_visibility_t = u32;
+#[repr(C)]
+pub struct rb_method_entry_struct {
+    pub flags: VALUE,
+    pub defined_class: VALUE,
+    pub def: *mut rb_method_definition_struct,
+    pub called_id: ID,
+    pub owner: VALUE,
+}
+pub type rb_method_entry_t = rb_method_entry_struct;
+#[repr(C)]
+pub struct rb_callable_method_entry_struct {
+    pub flags: VALUE,
+    pub defined_class: VALUE,
+    pub def: *mut rb_method_definition_struct,
+    pub called_id: ID,
+    pub owner: VALUE,
+}
+pub type rb_callable_method_entry_t = rb_callable_method_entry_struct;
+pub const VM_METHOD_TYPE_ISEQ: rb_method_type_t = 0;
+pub const VM_METHOD_TYPE_CFUNC: rb_method_type_t = 1;
+pub const VM_METHOD_TYPE_ATTRSET: rb_method_type_t = 2;
+pub const VM_METHOD_TYPE_IVAR: rb_method_type_t = 3;
+pub const VM_METHOD_TYPE_BMETHOD: rb_method_type_t = 4;
+pub const VM_METHOD_TYPE_ZSUPER: rb_method_type_t = 5;
+pub const VM_METHOD_TYPE_ALIAS: rb_method_type_t = 6;
+pub const VM_METHOD_TYPE_UNDEF: rb_method_type_t = 7;
+pub const VM_METHOD_TYPE_NOTIMPLEMENTED: rb_method_type_t = 8;
+pub const VM_METHOD_TYPE_OPTIMIZED: rb_method_type_t = 9;
+pub const VM_METHOD_TYPE_MISSING: rb_method_type_t = 10;
+pub const VM_METHOD_TYPE_REFINED: rb_method_type_t = 11;
+pub type rb_method_type_t = u32;
+pub const OPTIMIZED_METHOD_TYPE_SEND: method_optimized_type = 0;
+pub const OPTIMIZED_METHOD_TYPE_CALL: method_optimized_type = 1;
+pub const OPTIMIZED_METHOD_TYPE_BLOCK_CALL: method_optimized_type = 2;
+pub const OPTIMIZED_METHOD_TYPE_STRUCT_AREF: method_optimized_type = 3;
+pub const OPTIMIZED_METHOD_TYPE_STRUCT_ASET: method_optimized_type = 4;
+pub const OPTIMIZED_METHOD_TYPE__MAX: method_optimized_type = 5;
+pub type method_optimized_type = u32;
+extern "C" {
+    pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
+}
+extern "C" {
+    pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
+}
+pub type rb_num_t = ::std::os::raw::c_ulong;
+#[repr(C)]
+pub struct iseq_inline_constant_cache_entry {
+    pub flags: VALUE,
+    pub value: VALUE,
+    pub _unused1: VALUE,
+    pub _unused2: VALUE,
+    pub ic_cref: *const rb_cref_t,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct iseq_inline_constant_cache {
+    pub entry: *mut iseq_inline_constant_cache_entry,
+    pub get_insn_idx: ::std::os::raw::c_uint,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct iseq_inline_iv_cache_entry {
+    pub entry: *mut rb_iv_index_tbl_entry,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct iseq_inline_cvar_cache_entry {
+    pub entry: *mut rb_cvar_class_tbl_entry,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword {
+    pub num: ::std::os::raw::c_int,
+    pub required_num: ::std::os::raw::c_int,
+    pub bits_start: ::std::os::raw::c_int,
+    pub rest_start: ::std::os::raw::c_int,
+    pub table: *const ID,
+    pub default_values: *mut VALUE,
+}
+pub const BOP_PLUS: ruby_basic_operators = 0;
+pub const BOP_MINUS: ruby_basic_operators = 1;
+pub const BOP_MULT: ruby_basic_operators = 2;
+pub const BOP_DIV: ruby_basic_operators = 3;
+pub const BOP_MOD: ruby_basic_operators = 4;
+pub const BOP_EQ: ruby_basic_operators = 5;
+pub const BOP_EQQ: ruby_basic_operators = 6;
+pub const BOP_LT: ruby_basic_operators = 7;
+pub const BOP_LE: ruby_basic_operators = 8;
+pub const BOP_LTLT: ruby_basic_operators = 9;
+pub const BOP_AREF: ruby_basic_operators = 10;
+pub const BOP_ASET: ruby_basic_operators = 11;
+pub const BOP_LENGTH: ruby_basic_operators = 12;
+pub const BOP_SIZE: ruby_basic_operators = 13;
+pub const BOP_EMPTY_P: ruby_basic_operators = 14;
+pub const BOP_NIL_P: ruby_basic_operators = 15;
+pub const BOP_SUCC: ruby_basic_operators = 16;
+pub const BOP_GT: ruby_basic_operators = 17;
+pub const BOP_GE: ruby_basic_operators = 18;
+pub const BOP_NOT: ruby_basic_operators = 19;
+pub const BOP_NEQ: ruby_basic_operators = 20;
+pub const BOP_MATCH: ruby_basic_operators = 21;
+pub const BOP_FREEZE: ruby_basic_operators = 22;
+pub const BOP_UMINUS: ruby_basic_operators = 23;
+pub const BOP_MAX: ruby_basic_operators = 24;
+pub const BOP_MIN: ruby_basic_operators = 25;
+pub const BOP_CALL: ruby_basic_operators = 26;
+pub const BOP_AND: ruby_basic_operators = 27;
+pub const BOP_OR: ruby_basic_operators = 28;
+pub const BOP_LAST_: ruby_basic_operators = 29;
+pub type ruby_basic_operators = u32;
+pub type rb_control_frame_t = rb_control_frame_struct;
+extern "C" {
+    pub static mut rb_mRubyVMFrozenCore: VALUE;
+}
+extern "C" {
+    pub static mut rb_block_param_proxy: VALUE;
+}
+pub type IC = *mut iseq_inline_constant_cache;
+pub type IVC = *mut iseq_inline_iv_cache_entry;
+pub type ICVARC = *mut iseq_inline_cvar_cache_entry;
+pub const VM_FRAME_MAGIC_METHOD: vm_frame_env_flags = 286326785;
+pub const VM_FRAME_MAGIC_BLOCK: vm_frame_env_flags = 572653569;
+pub const VM_FRAME_MAGIC_CLASS: vm_frame_env_flags = 858980353;
+pub const VM_FRAME_MAGIC_TOP: vm_frame_env_flags = 1145307137;
+pub const VM_FRAME_MAGIC_CFUNC: vm_frame_env_flags = 1431633921;
+pub const VM_FRAME_MAGIC_IFUNC: vm_frame_env_flags = 1717960705;
+pub const VM_FRAME_MAGIC_EVAL: vm_frame_env_flags = 2004287489;
+pub const VM_FRAME_MAGIC_RESCUE: vm_frame_env_flags = 2022178817;
+pub const VM_FRAME_MAGIC_DUMMY: vm_frame_env_flags = 2040070145;
+pub const VM_FRAME_MAGIC_MASK: vm_frame_env_flags = 2147418113;
+pub const VM_FRAME_FLAG_FINISH: vm_frame_env_flags = 32;
+pub const VM_FRAME_FLAG_BMETHOD: vm_frame_env_flags = 64;
+pub const VM_FRAME_FLAG_CFRAME: vm_frame_env_flags = 128;
+pub const VM_FRAME_FLAG_LAMBDA: vm_frame_env_flags = 256;
+pub const VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM: vm_frame_env_flags = 512;
+pub const VM_FRAME_FLAG_CFRAME_KW: vm_frame_env_flags = 1024;
+pub const VM_FRAME_FLAG_PASSED: vm_frame_env_flags = 2048;
+pub const VM_ENV_FLAG_LOCAL: vm_frame_env_flags = 2;
+pub const VM_ENV_FLAG_ESCAPED: vm_frame_env_flags = 4;
+pub const VM_ENV_FLAG_WB_REQUIRED: vm_frame_env_flags = 8;
+pub const VM_ENV_FLAG_ISOLATED: vm_frame_env_flags = 16;
+pub type vm_frame_env_flags = u32;
+extern "C" {
+    pub fn rb_vm_frame_method_entry(
+        cfp: *const rb_control_frame_t,
+    ) -> *const rb_callable_method_entry_t;
+}
+pub const VM_CALL_ARGS_SPLAT_bit: vm_call_flag_bits = 0;
+pub const VM_CALL_ARGS_BLOCKARG_bit: vm_call_flag_bits = 1;
+pub const VM_CALL_FCALL_bit: vm_call_flag_bits = 2;
+pub const VM_CALL_VCALL_bit: vm_call_flag_bits = 3;
+pub const VM_CALL_ARGS_SIMPLE_bit: vm_call_flag_bits = 4;
+pub const VM_CALL_BLOCKISEQ_bit: vm_call_flag_bits = 5;
+pub const VM_CALL_KWARG_bit: vm_call_flag_bits = 6;
+pub const VM_CALL_KW_SPLAT_bit: vm_call_flag_bits = 7;
+pub const VM_CALL_TAILCALL_bit: vm_call_flag_bits = 8;
+pub const VM_CALL_SUPER_bit: vm_call_flag_bits = 9;
+pub const VM_CALL_ZSUPER_bit: vm_call_flag_bits = 10;
+pub const VM_CALL_OPT_SEND_bit: vm_call_flag_bits = 11;
+pub const VM_CALL_KW_SPLAT_MUT_bit: vm_call_flag_bits = 12;
+pub const VM_CALL__END: vm_call_flag_bits = 13;
+pub type vm_call_flag_bits = u32;
+#[repr(C)]
+pub struct rb_callinfo {
+    pub flags: VALUE,
+    pub kwarg: *const rb_callinfo_kwarg,
+    pub mid: VALUE,
+    pub flag: VALUE,
+    pub argc: VALUE,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_call_data {
+    pub ci: *const rb_callinfo,
+    pub cc: *const rb_callcache,
+}
+extern "C" {
+    pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_str_concat_literals(num: size_t, strary: *const VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_ec_str_resurrect(ec: *mut rb_execution_context_struct, str_: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_new_with_size(size: st_index_t) -> VALUE;
+}
+extern "C" {
+    pub fn rb_hash_resurrect(hash: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_obj_ensure_iv_index_mapping(obj: VALUE, id: ID) -> u32;
+}
+extern "C" {
+    pub fn rb_gvar_get(arg1: ID) -> VALUE;
+}
+extern "C" {
+    pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE;
+}
+extern "C" {
+    pub fn rb_vm_insn_decode(encoded: VALUE) -> ::std::os::raw::c_int;
+}
+#[repr(C)]
+pub struct rb_iv_index_tbl_entry {
+    pub index: u32,
+    pub class_serial: rb_serial_t,
+    pub class_value: VALUE,
+}
+#[repr(C)]
+pub struct rb_cvar_class_tbl_entry {
+    pub index: u32,
+    pub global_cvar_state: rb_serial_t,
+    pub class_value: VALUE,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct rb_builtin_function {
+    pub func_ptr: *const ::std::os::raw::c_void,
+    pub argc: ::std::os::raw::c_int,
+    pub index: ::std::os::raw::c_int,
+    pub name: *const ::std::os::raw::c_char,
+    pub compiler: ::std::option::Option<
+        unsafe extern "C" fn(
+            arg1: *mut FILE,
+            arg2: ::std::os::raw::c_long,
+            arg3: ::std::os::raw::c_uint,
+            arg4: bool,
+        ),
+    >,
+}
+extern "C" {
+    pub fn rb_vm_insn_addr2opcode(addr: *const ::std::os::raw::c_void) -> ::std::os::raw::c_int;
+}
+pub type rb_iseq_each_i = ::std::option::Option<
+    unsafe extern "C" fn(
+        code: *mut VALUE,
+        insn: VALUE,
+        index: size_t,
+        data: *mut ::std::os::raw::c_void,
+    ) -> bool,
+>;
+extern "C" {
+    pub fn rb_iseq_each(
+        iseq: *const rb_iseq_t,
+        start_index: size_t,
+        iterator: rb_iseq_each_i,
+        data: *mut ::std::os::raw::c_void,
+    );
+}
+extern "C" {
+    pub fn rb_iseqw_to_iseq(iseqw: VALUE) -> *const rb_iseq_t;
+}
+extern "C" {
+    pub fn rb_vm_barrier();
+}
+extern "C" {
+    pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+}
+extern "C" {
+    pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
+}
+extern "C" {
+    pub fn rb_yjit_get_page_size() -> u32;
+}
+extern "C" {
+    pub fn rb_c_method_tracing_currently_enabled(ec: *mut rb_execution_context_t) -> bool;
+}
+extern "C" {
+    pub fn rb_full_cfunc_return(ec: *mut rb_execution_context_t, return_value: VALUE);
+}
+extern "C" {
+    pub fn rb_iseq_get_yjit_payload(iseq: *const rb_iseq_t) -> *mut ::std::os::raw::c_void;
+}
+extern "C" {
+    pub fn rb_iseq_set_yjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
+}
+extern "C" {
+    pub fn rb_iseq_reset_jit_func(iseq: *const rb_iseq_t);
+}
+extern "C" {
+    pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE;
+}
+extern "C" {
+    pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;
+}
+pub type rb_seq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
+extern "C" {
+    pub fn rb_leaf_invokebuiltin_iseq_p(iseq: *const rb_iseq_t) -> bool;
+}
+extern "C" {
+    pub fn rb_leaf_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function;
+}
+extern "C" {
+    pub fn rb_set_cfp_pc(cfp: *mut rb_control_frame_struct, pc: *const VALUE);
+}
+extern "C" {
+    pub fn rb_set_cfp_sp(cfp: *mut rb_control_frame_struct, sp: *mut VALUE);
+}
+extern "C" {
+    pub fn rb_cfp_get_iseq(cfp: *mut rb_control_frame_struct) -> *mut rb_iseq_t;
+}
+extern "C" {
+    pub fn rb_yjit_dump_iseq_loc(iseq: *const rb_iseq_t, insn_idx: u32);
+}
+extern "C" {
+    pub fn rb_yjit_multi_ractor_p() -> bool;
+}
+extern "C" {
+    pub fn rb_assert_iseq_handle(handle: VALUE);
+}
+extern "C" {
+    pub fn rb_IMEMO_TYPE_P(imemo: VALUE, imemo_type: imemo_type) -> ::std::os::raw::c_int;
+}
+extern "C" {
+    pub fn rb_assert_cme_handle(handle: VALUE);
+}
+pub type iseq_callback = ::std::option::Option<unsafe extern "C" fn(arg1: *const rb_iseq_t)>;
+extern "C" {
+    pub fn rb_yjit_for_each_iseq(callback: iseq_callback);
+}
+extern "C" {
+    pub fn rb_yjit_obj_written(
+        old: VALUE,
+        young: VALUE,
+        file: *const ::std::os::raw::c_char,
+        line: ::std::os::raw::c_int,
+    );
+}
+extern "C" {
+    pub fn rb_yjit_vm_lock_then_barrier(
+        recursive_lock_level: *mut ::std::os::raw::c_uint,
+        file: *const ::std::os::raw::c_char,
+        line: ::std::os::raw::c_int,
+    );
+}
+extern "C" {
+    pub fn rb_yjit_vm_unlock(
+        recursive_lock_level: *mut ::std::os::raw::c_uint,
+        file: *const ::std::os::raw::c_char,
+        line: ::std::os::raw::c_int,
+    );
+}
diff --git a/yjit/src/disasm.rs b/yjit/src/disasm.rs
new file mode 100644
index 0000000000..97edc786bc
--- /dev/null
+++ b/yjit/src/disasm.rs
@@ -0,0 +1,218 @@
+use crate::asm::*;
+use crate::codegen::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::yjit::yjit_enabled_p;
+use std::fmt::Write;
+
+/// Primitive called in yjit.rb
+/// Produce a string representing the disassembly for an ISEQ
+#[no_mangle]
+pub extern "C" fn rb_yjit_disasm_iseq(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE {
+    #[cfg(not(feature = "disasm"))]
+    {
+        let _ = iseqw;
+        return Qnil;
+    }
+
+    #[cfg(feature = "disasm")]
+    {
+        // TODO:
+        //if unsafe { CLASS_OF(iseqw) != rb_cISeq } {
+        //    return Qnil;
+        //}
+
+        if !yjit_enabled_p() {
+            return Qnil;
+        }
+
+        // Get the iseq pointer from the wrapper
+        let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
+
+        let out_string = disasm_iseq(iseq);
+
+        return rust_str_to_ruby(&out_string);
+    }
+}
+
+#[cfg(feature = "disasm")]
+fn disasm_iseq(iseq: IseqPtr) -> String {
+    let mut out = String::from("");
+
+    // Get a list of block versions generated for this iseq
+    let mut block_list = get_iseq_block_list(iseq);
+
+    // Get a list of codeblocks relevant to this iseq
+    let global_cb = CodegenGlobals::get_inline_cb();
+
+    // Sort the blocks by increasing start addresses
+    block_list.sort_by(|a, b| {
+        use std::cmp::Ordering;
+
+        // Get the start addresses for each block
+        let addr_a = a.borrow().get_start_addr().unwrap().raw_ptr();
+        let addr_b = b.borrow().get_start_addr().unwrap().raw_ptr();
+
+        if addr_a < addr_b {
+            Ordering::Less
+        } else if addr_a == addr_b {
+            Ordering::Equal
+        } else {
+            Ordering::Greater
+        }
+    });
+
+    // Compute total code size in bytes for all blocks in the function
+    let mut total_code_size = 0;
+    for blockref in &block_list {
+        total_code_size += blockref.borrow().code_size();
+    }
+
+    // Initialize capstone
+    extern crate capstone;
+    use capstone::prelude::*;
+    let cs = Capstone::new()
+        .x86()
+        .mode(arch::x86::ArchMode::Mode64)
+        .syntax(arch::x86::ArchSyntax::Intel)
+        .build()
+        .unwrap();
+
+    out.push_str(&format!("NUM BLOCK VERSIONS: {}\n", block_list.len()));
+    out.push_str(&format!(
+        "TOTAL INLINE CODE SIZE: {} bytes\n",
+        total_code_size
+    ));
+
+    // For each block, sorted by increasing start address
+    for block_idx in 0..block_list.len() {
+        let block = block_list[block_idx].borrow();
+        let blockid = block.get_blockid();
+        let end_idx = block.get_end_idx();
+        let start_addr = block.get_start_addr().unwrap().raw_ptr();
+        let end_addr = block.get_end_addr().unwrap().raw_ptr();
+        let code_size = block.code_size();
+
+        // Write some info about the current block
+        let block_ident = format!(
+            "BLOCK {}/{}, ISEQ RANGE [{},{}), {} bytes ",
+            block_idx + 1,
+            block_list.len(),
+            blockid.idx,
+            end_idx,
+            code_size
+        );
+        out.push_str(&format!("== {:=<60}\n", block_ident));
+
+        // Disassemble the instructions
+        let code_slice = unsafe { std::slice::from_raw_parts(start_addr, code_size) };
+        let insns = cs.disasm_all(code_slice, start_addr as u64).unwrap();
+
+        // For each instruction in this block
+        for insn in insns.as_ref() {
+            // Comments for this block
+            if let Some(comment_list) = global_cb.comments_at(insn.address() as usize) {
+                for comment in comment_list {
+                    out.push_str(&format!("  \x1b[1m# {}\x1b[0m\n", comment));
+                }
+            }
+            out.push_str(&format!("  {}\n", insn));
+        }
+
+        // If this is not the last block
+        if block_idx < block_list.len() - 1 {
+            // Compute the size of the gap between this block and the next
+            let next_block = block_list[block_idx + 1].borrow();
+            let next_start_addr = next_block.get_start_addr().unwrap().raw_ptr();
+            let gap_size = (next_start_addr as usize) - (end_addr as usize);
+
+            // Log the size of the gap between the blocks if nonzero
+            if gap_size > 0 {
+                out.push_str(&format!("... {} byte gap ...\n", gap_size));
+            }
+        }
+    }
+
+    return out;
+}
+
+/// Primitive called in yjit.rb
+/// Produce a list of instructions compiled for an isew
+#[no_mangle]
+pub extern "C" fn rb_yjit_insns_compiled(_ec: EcPtr, _ruby_self: VALUE, iseqw: VALUE) -> VALUE {
+    {
+        // TODO:
+        //if unsafe { CLASS_OF(iseqw) != rb_cISeq } {
+        //    return Qnil;
+        //}
+
+        if !yjit_enabled_p() {
+            return Qnil;
+        }
+
+        // Get the iseq pointer from the wrapper
+        let iseq = unsafe { rb_iseqw_to_iseq(iseqw) };
+
+        // Get the list of instructions compiled
+        let insn_vec = insns_compiled(iseq);
+
+        unsafe {
+            let insn_ary = rb_ary_new_capa((insn_vec.len() * 2) as i64);
+
+            // For each instruction compiled
+            for idx in 0..insn_vec.len() {
+                let op_name = &insn_vec[idx].0;
+                let insn_idx = insn_vec[idx].1;
+
+                let op_sym = rust_str_to_sym(&op_name);
+
+                // Store the instruction index and opcode symbol
+                rb_ary_store(
+                    insn_ary,
+                    (2 * idx + 0) as i64,
+                    VALUE::fixnum_from_usize(insn_idx as usize),
+                );
+                rb_ary_store(insn_ary, (2 * idx + 1) as i64, op_sym);
+            }
+
+            insn_ary
+        }
+    }
+}
+
+fn insns_compiled(iseq: IseqPtr) -> Vec<(String, u32)> {
+    let mut insn_vec = Vec::new();
+
+    // Get a list of block versions generated for this iseq
+    let block_list = get_iseq_block_list(iseq);
+
+    // For each block associated with this iseq
+    for blockref in &block_list {
+        let block = blockref.borrow();
+        let start_idx = block.get_blockid().idx;
+        let end_idx = block.get_end_idx();
+        assert!(end_idx <= unsafe { get_iseq_encoded_size(iseq) });
+
+        // For each YARV instruction in the block
+        let mut insn_idx = start_idx;
+        while insn_idx < end_idx {
+            // Get the current pc and opcode
+            let pc = unsafe { rb_iseq_pc_at_idx(iseq, insn_idx) };
+            // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes.
+            let opcode: usize = unsafe { rb_iseq_opcode_at_pc(iseq, pc) }
+                .try_into()
+                .unwrap();
+
+            // Get the mnemonic for this opcode
+            let op_name = insn_name(opcode);
+
+            // Add the instruction to the list
+            insn_vec.push((op_name, insn_idx));
+
+            // Move to the next instruction
+            insn_idx += insn_len(opcode);
+        }
+    }
+
+    return insn_vec;
+}
diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs
new file mode 100644
index 0000000000..262121a488
--- /dev/null
+++ b/yjit/src/invariants.rs
@@ -0,0 +1,585 @@
+//! Code to track assumptions made during code generation and invalidate
+//! generated code if and when these assumptions are invalidated.
+
+use crate::asm::OutlinedCb;
+use crate::codegen::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::options::*;
+use crate::stats::*;
+use crate::utils::IntoUsize;
+use crate::yjit::yjit_enabled_p;
+
+use std::collections::{HashMap, HashSet};
+use std::mem;
+use std::os::raw::c_void;
+
+// Invariants to track:
+// assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)
+// assume_method_lookup_stable(comptime_recv_klass, cme, jit);
+// assume_single_ractor_mode(jit)
+// assume_stable_global_constant_state(jit);
+
+/// Used to track all of the various block references that contain assumptions
+/// about the state of the virtual machine.
+pub struct Invariants {
+    /// Tracks block assumptions about callable method entry validity.
+    cme_validity: HashMap<*const rb_callable_method_entry_t, HashSet<BlockRef>>,
+
+    /// Tracks block assumptions about method lookup. Maps a class to a table of
+    /// method ID points to a set of blocks. While a block `b` is in the table,
+    /// b->callee_cme == rb_callable_method_entry(klass, mid).
+    method_lookup: HashMap<VALUE, HashMap<ID, HashSet<BlockRef>>>,
+
+    /// A map from a class and its associated basic operator to a set of blocks
+    /// that are assuming that that operator is not redefined. This is used for
+    /// quick access to all of the blocks that are making this assumption when
+    /// the operator is redefined.
+    basic_operator_blocks: HashMap<(RedefinitionFlag, ruby_basic_operators), HashSet<BlockRef>>,
+
+    /// A map from a block to a set of classes and their associated basic
+    /// operators that the block is assuming are not redefined. This is used for
+    /// quick access to all of the assumptions that a block is making when it
+    /// needs to be invalidated.
+    block_basic_operators: HashMap<BlockRef, HashSet<(RedefinitionFlag, ruby_basic_operators)>>,
+
+    /// Tracks the set of blocks that are assuming the interpreter is running
+    /// with only one ractor. This is important for things like accessing
+    /// constants which can have different semantics when multiple ractors are
+    /// running.
+    single_ractor: HashSet<BlockRef>,
+
+    /// A map from an ID to the set of blocks that are assuming a constant with
+    /// that ID as part of its name has not been redefined. For example, if
+    /// a constant `A::B` is redefined, then all blocks that are assuming that
+    /// `A` and `B` have not be redefined must be invalidated.
+    constant_state_blocks: HashMap<ID, HashSet<BlockRef>>,
+
+    /// A map from a block to a set of IDs that it is assuming have not been
+    /// redefined.
+    block_constant_states: HashMap<BlockRef, HashSet<ID>>,
+}
+
+/// Private singleton instance of the invariants global struct.
+static mut INVARIANTS: Option<Invariants> = None;
+
+impl Invariants {
+    pub fn init() {
+        // Wrapping this in unsafe to assign directly to a global.
+        unsafe {
+            INVARIANTS = Some(Invariants {
+                cme_validity: HashMap::new(),
+                method_lookup: HashMap::new(),
+                basic_operator_blocks: HashMap::new(),
+                block_basic_operators: HashMap::new(),
+                single_ractor: HashSet::new(),
+                constant_state_blocks: HashMap::new(),
+                block_constant_states: HashMap::new(),
+            });
+        }
+    }
+
+    /// Get a mutable reference to the codegen globals instance
+    pub fn get_instance() -> &'static mut Invariants {
+        unsafe { INVARIANTS.as_mut().unwrap() }
+    }
+}
+
+/// A public function that can be called from within the code generation
+/// functions to ensure that the block being generated is invalidated when the
+/// basic operator is redefined.
+pub fn assume_bop_not_redefined(
+    jit: &mut JITState,
+    ocb: &mut OutlinedCb,
+    klass: RedefinitionFlag,
+    bop: ruby_basic_operators,
+) -> bool {
+    if unsafe { BASIC_OP_UNREDEFINED_P(bop, klass) } {
+        jit_ensure_block_entry_exit(jit, ocb);
+
+        let invariants = Invariants::get_instance();
+        invariants
+            .basic_operator_blocks
+            .entry((klass, bop))
+            .or_insert(HashSet::new())
+            .insert(jit.get_block());
+        invariants
+            .block_basic_operators
+            .entry(jit.get_block())
+            .or_insert(HashSet::new())
+            .insert((klass, bop));
+
+        return true;
+    } else {
+        return false;
+    }
+}
+
+// Remember that a block assumes that
+// `rb_callable_method_entry(receiver_klass, cme->called_id) == cme` and that
+// `cme` is valid.
+// When either of these assumptions becomes invalid, rb_yjit_method_lookup_change() or
+// rb_yjit_cme_invalidate() invalidates the block.
+//
+// @raise NoMemoryError
+pub fn assume_method_lookup_stable(
+    jit: &mut JITState,
+    ocb: &mut OutlinedCb,
+    receiver_klass: VALUE,
+    callee_cme: *const rb_callable_method_entry_t,
+) {
+    // RUBY_ASSERT(rb_callable_method_entry(receiver_klass, cme->called_id) == cme);
+    // RUBY_ASSERT_ALWAYS(RB_TYPE_P(receiver_klass, T_CLASS) || RB_TYPE_P(receiver_klass, T_ICLASS));
+    // RUBY_ASSERT_ALWAYS(!rb_objspace_garbage_object_p(receiver_klass));
+
+    jit_ensure_block_entry_exit(jit, ocb);
+
+    let block = jit.get_block();
+    block
+        .borrow_mut()
+        .add_cme_dependency(receiver_klass, callee_cme);
+
+    Invariants::get_instance()
+        .cme_validity
+        .entry(callee_cme)
+        .or_insert(HashSet::new())
+        .insert(block.clone());
+
+    let mid = unsafe { (*callee_cme).called_id };
+    Invariants::get_instance()
+        .method_lookup
+        .entry(receiver_klass)
+        .or_insert(HashMap::new())
+        .entry(mid)
+        .or_insert(HashSet::new())
+        .insert(block.clone());
+}
+
+/// Tracks that a block is assuming it is operating in single-ractor mode.
+#[must_use]
+pub fn assume_single_ractor_mode(jit: &mut JITState, ocb: &mut OutlinedCb) -> bool {
+    if unsafe { rb_yjit_multi_ractor_p() } {
+        false
+    } else {
+        jit_ensure_block_entry_exit(jit, ocb);
+        Invariants::get_instance()
+            .single_ractor
+            .insert(jit.get_block());
+        true
+    }
+}
+
+/// Walk through the ISEQ to go from the current opt_getinlinecache to the
+/// subsequent opt_setinlinecache and find all of the name components that are
+/// associated with this constant (which correspond to the getconstant
+/// arguments).
+pub fn assume_stable_constant_names(jit: &mut JITState, ocb: &mut OutlinedCb) {
+    /// Tracks that a block is assuming that the name component of a constant
+    /// has not changed since the last call to this function.
+    unsafe extern "C" fn assume_stable_constant_name(
+        code: *mut VALUE,
+        insn: VALUE,
+        index: u64,
+        data: *mut c_void,
+    ) -> bool {
+        if insn.as_usize() == OP_OPT_SETINLINECACHE {
+            return false;
+        }
+
+        if insn.as_usize() == OP_GETCONSTANT {
+            let jit = &mut *(data as *mut JITState);
+
+            // The first operand to GETCONSTANT is always the ID associated with
+            // the constant lookup. We are grabbing this out in order to
+            // associate this block with the stability of this constant name.
+            let id = code.add(index.as_usize() + 1).read().as_u64() as ID;
+
+            let invariants = Invariants::get_instance();
+            invariants
+                .constant_state_blocks
+                .entry(id)
+                .or_insert(HashSet::new())
+                .insert(jit.get_block());
+            invariants
+                .block_constant_states
+                .entry(jit.get_block())
+                .or_insert(HashSet::new())
+                .insert(id);
+        }
+
+        true
+    }
+
+    jit_ensure_block_entry_exit(jit, ocb);
+
+    unsafe {
+        let iseq = jit.get_iseq();
+        let encoded = get_iseq_body_iseq_encoded(iseq);
+        let start_index = jit.get_pc().offset_from(encoded);
+
+        rb_iseq_each(
+            iseq,
+            start_index.try_into().unwrap(),
+            Some(assume_stable_constant_name),
+            jit as *mut _ as *mut c_void,
+        );
+    };
+}
+
+/// Called when a basic operator is redefined. Note that all the blocks assuming
+/// the stability of different operators are invalidated together and we don't
+/// do fine-grained tracking.
+#[no_mangle]
+pub extern "C" fn rb_yjit_bop_redefined(klass: RedefinitionFlag, bop: ruby_basic_operators) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        // Loop through the blocks that are associated with this class and basic
+        // operator and invalidate them.
+        Invariants::get_instance()
+            .basic_operator_blocks
+            .remove(&(klass, bop))
+            .map(|blocks| {
+                for block in blocks.iter() {
+                    invalidate_block_version(block);
+                    incr_counter!(invalidate_bop_redefined);
+                }
+            });
+    });
+}
+
+/// Callback for when a cme becomes invalid. Invalidate all blocks that depend
+/// on the given cme being valid.
+#[no_mangle]
+pub extern "C" fn rb_yjit_cme_invalidate(callee_cme: *const rb_callable_method_entry_t) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        if let Some(blocks) = Invariants::get_instance().cme_validity.remove(&callee_cme) {
+            for block in blocks.iter() {
+                invalidate_block_version(block);
+                incr_counter!(invalidate_method_lookup);
+            }
+        }
+    });
+}
+
+/// Callback for when rb_callable_method_entry(klass, mid) is going to change.
+/// Invalidate blocks that assume stable method lookup of `mid` in `klass` when this happens.
+/// This needs to be wrapped on the C side with RB_VM_LOCK_ENTER().
+#[no_mangle]
+pub extern "C" fn rb_yjit_method_lookup_change(klass: VALUE, mid: ID) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        Invariants::get_instance()
+            .method_lookup
+            .entry(klass)
+            .and_modify(|deps| {
+                if let Some(deps) = deps.remove(&mid) {
+                    for block in &deps {
+                        invalidate_block_version(block);
+                        incr_counter!(invalidate_method_lookup);
+                    }
+                }
+            });
+    });
+}
+
+/// Callback for then Ruby is about to spawn a ractor. In that case we need to
+/// invalidate every block that is assuming single ractor mode.
+#[no_mangle]
+pub extern "C" fn rb_yjit_before_ractor_spawn() {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        // Clear the set of blocks inside Invariants
+        let blocks = mem::take(&mut Invariants::get_instance().single_ractor);
+
+        // Invalidate the blocks
+        for block in &blocks {
+            invalidate_block_version(block);
+            incr_counter!(invalidate_ractor_spawn);
+        }
+    });
+}
+
+/// Callback for when the global constant state changes.
+#[no_mangle]
+pub extern "C" fn rb_yjit_constant_state_changed(id: ID) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        if get_option!(global_constant_state) {
+            // If the global-constant-state option is set, then we're going to
+            // invalidate every block that depends on any constant.
+
+            Invariants::get_instance()
+                .constant_state_blocks
+                .keys()
+                .for_each(|id| {
+                    if let Some(blocks) =
+                        Invariants::get_instance().constant_state_blocks.remove(&id)
+                    {
+                        for block in &blocks {
+                            invalidate_block_version(block);
+                            incr_counter!(invalidate_constant_state_bump);
+                        }
+                    }
+                });
+        } else {
+            // If the global-constant-state option is not set, then we're only going
+            // to invalidate the blocks that are associated with the given ID.
+
+            if let Some(blocks) = Invariants::get_instance().constant_state_blocks.remove(&id) {
+                for block in &blocks {
+                    invalidate_block_version(block);
+                    incr_counter!(invalidate_constant_state_bump);
+                }
+            }
+        }
+    });
+}
+
+/// Callback for marking GC objects inside [Invariants].
+/// See `struct yjijt_root_struct` in C.
+#[no_mangle]
+pub extern "C" fn rb_yjit_root_mark() {
+    // Comment from C YJIT:
+    //
+    // Why not let the GC move the cme keys in this table?
+    // Because this is basically a compare_by_identity Hash.
+    // If a key moves, we would need to reinsert it into the table so it is rehashed.
+    // That is tricky to do, espcially as it could trigger allocation which could
+    // trigger GC. Not sure if it is okay to trigger GC while the GC is updating
+    // references.
+    //
+    // NOTE(alan): since we are using Rust data structures that don't interact
+    // with the Ruby GC now, it might be feasible to allow movement.
+
+    let invariants = Invariants::get_instance();
+
+    // Mark CME imemos
+    for cme in invariants.cme_validity.keys() {
+        let cme: VALUE = (*cme).into();
+
+        unsafe { rb_gc_mark(cme) };
+    }
+
+    // Mark class and iclass objects
+    for klass in invariants.method_lookup.keys() {
+        // TODO: This is a leak. Unused blocks linger in the table forever, preventing the
+        // callee class they speculate on from being collected.
+        // We could do a bespoke weak reference scheme on classes similar to
+        // the interpreter's call cache. See finalizer for T_CLASS and cc_table_free().
+
+        unsafe { rb_gc_mark(*klass) };
+    }
+}
+
+/// Remove all invariant assumptions made by the block by removing the block as
+/// as a key in all of the relevant tables.
+pub fn block_assumptions_free(blockref: &BlockRef) {
+    let invariants = Invariants::get_instance();
+
+    {
+        let block = blockref.borrow();
+
+        // For each method lookup dependency
+        for dep in block.iter_cme_deps() {
+            // Remove tracking for cme validity
+            if let Some(blockset) = invariants.cme_validity.get_mut(&dep.callee_cme) {
+                blockset.remove(blockref);
+            }
+
+            // Remove tracking for lookup stability
+            if let Some(id_to_block_set) = invariants.method_lookup.get_mut(&dep.receiver_klass) {
+                let mid = unsafe { (*dep.callee_cme).called_id };
+                if let Some(block_set) = id_to_block_set.get_mut(&mid) {
+                    block_set.remove(&blockref);
+                }
+            }
+        }
+    }
+
+    // Remove tracking for basic operators that the given block assumes have
+    // not been redefined.
+    if let Some(bops) = invariants.block_basic_operators.remove(&blockref) {
+        // Remove tracking for the given block from the list of blocks associated
+        // with the given basic operator.
+        for key in &bops {
+            if let Some(blocks) = invariants.basic_operator_blocks.get_mut(key) {
+                blocks.remove(&blockref);
+            }
+        }
+    }
+
+    invariants.single_ractor.remove(&blockref);
+
+    // Remove tracking for constant state for a given ID.
+    if let Some(ids) = invariants.block_constant_states.remove(&blockref) {
+        for id in ids {
+            if let Some(blocks) = invariants.constant_state_blocks.get_mut(&id) {
+                blocks.remove(&blockref);
+            }
+        }
+    }
+}
+
+/// Callback from the opt_setinlinecache instruction in the interpreter.
+/// Invalidate the block for the matching opt_getinlinecache so it could regenerate code
+/// using the new value in the constant cache.
+#[no_mangle]
+pub extern "C" fn rb_yjit_constant_ic_update(iseq: *const rb_iseq_t, ic: IC) {
+    // If YJIT isn't enabled, do nothing
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    if !unsafe { (*(*ic).entry).ic_cref }.is_null() || unsafe { rb_yjit_multi_ractor_p() } {
+        // We can't generate code in these situations, so no need to invalidate.
+        // See gen_opt_getinlinecache.
+        return;
+    }
+
+    with_vm_lock(src_loc!(), || {
+        let code = unsafe { get_iseq_body_iseq_encoded(iseq) };
+        let get_insn_idx = unsafe { (*ic).get_insn_idx };
+
+        // This should come from a running iseq, so direct threading translation
+        // should have been done
+        assert!(unsafe { FL_TEST(iseq.into(), VALUE(ISEQ_TRANSLATED)) } != VALUE(0));
+        assert!(get_insn_idx < unsafe { get_iseq_encoded_size(iseq) });
+
+        // Ensure that the instruction the get_insn_idx is pointing to is in
+        // fact a opt_getinlinecache instruction.
+        assert_eq!(
+            unsafe {
+                let opcode_pc = code.add(get_insn_idx.as_usize());
+                let translated_opcode: VALUE = opcode_pc.read();
+                rb_vm_insn_decode(translated_opcode)
+            },
+            OP_OPT_GETINLINECACHE.try_into().unwrap()
+        );
+
+        // Find the matching opt_getinlinecache and invalidate all the blocks there
+        // RUBY_ASSERT(insn_op_type(BIN(opt_getinlinecache), 1) == TS_IC);
+
+        let ic_pc = unsafe { code.add(get_insn_idx.as_usize() + 2) };
+        let ic_operand: IC = unsafe { ic_pc.read() }.as_mut_ptr();
+
+        if ic == ic_operand {
+            for block in take_version_list(BlockId {
+                iseq,
+                idx: get_insn_idx,
+            }) {
+                invalidate_block_version(&block);
+                incr_counter!(invalidate_constant_ic_fill);
+            }
+        } else {
+            panic!("ic->get_insn_index not set properly");
+        }
+    });
+}
+
+// Invalidate all generated code and patch C method return code to contain
+// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
+// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
+// means they are inside a C routine. If there are any generated code on-stack,
+// they are waiting for a return from a C routine. For every routine call, we
+// patch in an exit after the body of the containing VM instruction. This makes
+// it so all the invalidated code exit as soon as execution logically reaches
+// the next VM instruction. The interpreter takes care of firing the tracing
+// event if it so happens that the next VM instruction has one attached.
+//
+// The c_return event needs special handling as our codegen never outputs code
+// that contains tracing logic. If we let the normal output code run until the
+// start of the next VM instruction by relying on the patching scheme above, we
+// would fail to fire the c_return event. The interpreter doesn't fire the
+// event at an instruction boundary, so simply exiting to the interpreter isn't
+// enough. To handle it, we patch in the full logic at the return address. See
+// full_cfunc_return().
+//
+// In addition to patching, we prevent future entries into invalidated code by
+// removing all live blocks from their iseq.
+#[no_mangle]
+pub extern "C" fn rb_yjit_tracing_invalidate_all() {
+    if !yjit_enabled_p() {
+        return;
+    }
+
+    use crate::asm::x86_64::jmp_ptr;
+
+    // Stop other ractors since we are going to patch machine code.
+    with_vm_lock(src_loc!(), || {
+        // Make it so all live block versions are no longer valid branch targets
+        unsafe { rb_yjit_for_each_iseq(Some(invalidate_all_blocks_for_tracing)) };
+
+        extern "C" fn invalidate_all_blocks_for_tracing(iseq: IseqPtr) {
+            if let Some(payload) = unsafe { load_iseq_payload(iseq) } {
+                // C comment:
+                //   Leaking the blocks for now since we might have situations where
+                //   a different ractor is waiting for the VM lock in branch_stub_hit().
+                //   If we free the block that ractor can wake up with a dangling block.
+                //
+                // Deviation: since we ref count the the blocks now, we might be deallocating and
+                //   not leak the block.
+                //
+                // Empty all blocks on the iseq so we don't compile new blocks that jump to the
+                // invalidated region.
+                let blocks = payload.take_all_blocks();
+                for blockref in blocks {
+                    block_assumptions_free(&blockref);
+                }
+            }
+
+            // Reset output code entry point
+            unsafe { rb_iseq_reset_jit_func(iseq) };
+        }
+
+        let cb = CodegenGlobals::get_inline_cb();
+
+        // Apply patches
+        let old_pos = cb.get_write_pos();
+        let patches = CodegenGlobals::take_global_inval_patches();
+        for patch in &patches {
+            cb.set_write_ptr(patch.inline_patch_pos);
+            jmp_ptr(cb, patch.outlined_target_pos);
+
+            // FIXME: Can't easily check we actually wrote out the JMP at the moment.
+            // assert!(!cb.has_dropped_bytes(), "patches should have space and jump offsets should fit in JMP rel32");
+        }
+        cb.set_pos(old_pos);
+
+        // Freeze invalidated part of the codepage. We only want to wait for
+        // running instances of the code to exit from now on, so we shouldn't
+        // change the code. There could be other ractors sleeping in
+        // branch_stub_hit(), for example. We could harden this by changing memory
+        // protection on the frozen range.
+        assert!(
+            CodegenGlobals::get_inline_frozen_bytes() <= old_pos,
+            "frozen bytes should increase monotonically"
+        );
+        CodegenGlobals::set_inline_frozen_bytes(old_pos);
+
+        CodegenGlobals::get_outlined_cb()
+            .unwrap()
+            .mark_all_executable();
+        cb.mark_all_executable();
+    });
+}
diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs
new file mode 100644
index 0000000000..b7355f55e3
--- /dev/null
+++ b/yjit/src/lib.rs
@@ -0,0 +1,17 @@
+// Silence dead code warnings until we are done porting YJIT
+#![allow(unused_imports)]
+#![allow(dead_code)]
+#![allow(unused_assignments)]
+#![allow(unused_macros)]
+#![allow(clippy::style)] // We are laid back about style
+
+mod asm;
+mod codegen;
+mod core;
+mod cruby;
+mod disasm;
+mod invariants;
+mod options;
+mod stats;
+mod utils;
+mod yjit;
diff --git a/yjit/src/options.rs b/yjit/src/options.rs
new file mode 100644
index 0000000000..669ac52dbd
--- /dev/null
+++ b/yjit/src/options.rs
@@ -0,0 +1,121 @@
+use std::ffi::CStr;
+
+// Command-line options
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[repr(C)]
+pub struct Options {
+    // Size of the executable memory block to allocate in MiB
+    pub exec_mem_size: usize,
+
+    // Number of method calls after which to start generating code
+    // Threshold==1 means compile on first execution
+    pub call_threshold: usize,
+
+    // Generate versions greedily until the limit is hit
+    pub greedy_versioning: bool,
+
+    // Disable the propagation of type information
+    pub no_type_prop: bool,
+
+    // Maximum number of versions per block
+    // 1 means always create generic versions
+    pub max_versions: usize,
+
+    // Capture and print out stats
+    pub gen_stats: bool,
+
+    /// Dump compiled and executed instructions for debugging
+    pub dump_insns: bool,
+
+    /// Verify context objects (debug mode only)
+    pub verify_ctx: bool,
+
+    /// Whether or not to assume a global constant state (and therefore
+    /// invalidating code whenever any constant changes) versus assuming
+    /// constant name components (and therefore invalidating code whenever a
+    /// matching name component changes)
+    pub global_constant_state: bool,
+}
+
+// Initialize the options to default values
+pub static mut OPTIONS: Options = Options {
+    exec_mem_size: 256,
+    call_threshold: 10,
+    greedy_versioning: false,
+    no_type_prop: false,
+    max_versions: 4,
+    gen_stats: false,
+    dump_insns: false,
+    verify_ctx: false,
+    global_constant_state: false,
+};
+
+/// Macro to get an option value by name
+macro_rules! get_option {
+    // Unsafe is ok here because options are initialized
+    // once before any Ruby code executes
+    ($option_name:ident) => {
+        unsafe { OPTIONS.$option_name }
+    };
+}
+pub(crate) use get_option;
+
+/// Expected to receive what comes after the third dash in "--yjit-*".
+/// Empty string means user passed only "--yjit". C code rejects when
+/// they pass exact "--yjit-".
+pub fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> {
+    let c_str: &CStr = unsafe { CStr::from_ptr(str_ptr) };
+    let opt_str: &str = c_str.to_str().ok()?;
+    //println!("{}", opt_str);
+
+    // Split the option name and value strings
+    // Note that some options do not contain an assignment
+    let parts = opt_str.split_once("=");
+    let (opt_name, opt_val) = match parts {
+        Some((before_eq, after_eq)) => (before_eq, after_eq),
+        None => (opt_str, ""),
+    };
+
+    // Match on the option name and value strings
+    match (opt_name, opt_val) {
+        ("", "") => (), // Simply --yjit
+
+        ("exec-mem-size", _) => match opt_val.parse() {
+            Ok(n) => unsafe { OPTIONS.exec_mem_size = n },
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("call-threshold", _) => match opt_val.parse() {
+            Ok(n) => unsafe { OPTIONS.call_threshold = n },
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("max-versions", _) => match opt_val.parse() {
+            Ok(n) => unsafe { OPTIONS.max_versions = n },
+            Err(_) => {
+                return None;
+            }
+        },
+
+        ("greedy-versioning", "") => unsafe { OPTIONS.greedy_versioning = true },
+        ("no-type-prop", "") => unsafe { OPTIONS.no_type_prop = true },
+        ("stats", "") => unsafe { OPTIONS.gen_stats = true },
+        ("dump-insns", "") => unsafe { OPTIONS.dump_insns = true },
+        ("verify-ctx", "") => unsafe { OPTIONS.verify_ctx = true },
+        ("global-constant-state", "") => unsafe { OPTIONS.global_constant_state = true },
+
+        // Option name not recognized
+        _ => {
+            return None;
+        }
+    }
+
+    // dbg!(unsafe {OPTIONS});
+
+    // Option successfully parsed
+    return Some(());
+}
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
new file mode 100644
index 0000000000..5e42e4d6f0
--- /dev/null
+++ b/yjit/src/stats.rs
@@ -0,0 +1,271 @@
+//! Everything related to the collection of runtime stats in YJIT
+//! See the stats feature and the --yjit-stats command-line option
+
+use crate::codegen::CodegenGlobals;
+use crate::cruby::*;
+use crate::options::*;
+use crate::yjit::yjit_enabled_p;
+
+// YJIT exit counts for each instruction type
+static mut EXIT_OP_COUNT: [u64; VM_INSTRUCTION_SIZE] = [0; VM_INSTRUCTION_SIZE];
+
+// Macro to declare the stat counters
+macro_rules! make_counters {
+    ($($counter_name:ident,)+) => {
+        // Struct containing the counter values
+        #[derive(Default, Debug)]
+        pub struct Counters { $(pub $counter_name: u64),+ }
+
+        // Global counters instance, initialized to zero
+        pub static mut COUNTERS: Counters = Counters { $($counter_name: 0),+ };
+
+        // Counter names constant
+        const COUNTER_NAMES: &'static [&'static str] = &[ $(stringify!($counter_name)),+ ];
+
+        // Map a counter name string to a counter pointer
+        fn get_counter_ptr(name: &str) -> *mut u64 {
+            match name {
+                $( stringify!($counter_name) => { ptr_to_counter!($counter_name) } ),+
+                _ => panic!()
+            }
+        }
+    }
+}
+
+/// Macro to increment a counter by name
+macro_rules! incr_counter {
+    // Unsafe is ok here because options are initialized
+    // once before any Ruby code executes
+    ($counter_name:ident) => {
+        #[allow(unused_unsafe)]
+        {
+            unsafe { COUNTERS.$counter_name += 1 }
+        }
+    };
+}
+pub(crate) use incr_counter;
+
+/// Macro to get a raw pointer to a given counter
+macro_rules! ptr_to_counter {
+    ($counter_name:ident) => {
+        unsafe {
+            let ctr_ptr = std::ptr::addr_of_mut!(COUNTERS.$counter_name);
+            ctr_ptr
+        }
+    };
+}
+pub(crate) use ptr_to_counter;
+
+// Declare all the counters we track
+make_counters! {
+    exec_instruction,
+
+    send_keywords,
+    send_kw_splat,
+    send_args_splat,
+    send_block_arg,
+    send_ivar_set_method,
+    send_zsuper_method,
+    send_undef_method,
+    send_optimized_method,
+    send_optimized_method_send,
+    send_optimized_method_call,
+    send_optimized_method_block_call,
+    send_missing_method,
+    send_bmethod,
+    send_refined_method,
+    send_cfunc_ruby_array_varg,
+    send_cfunc_argc_mismatch,
+    send_cfunc_toomany_args,
+    send_cfunc_tracing,
+    send_cfunc_kwargs,
+    send_attrset_kwargs,
+    send_iseq_tailcall,
+    send_iseq_arity_error,
+    send_iseq_only_keywords,
+    send_iseq_kwargs_req_and_opt_missing,
+    send_iseq_kwargs_mismatch,
+    send_iseq_complex_callee,
+    send_not_implemented_method,
+    send_getter_arity,
+    send_se_cf_overflow,
+    send_se_protected_check_failed,
+
+    traced_cfunc_return,
+
+    invokesuper_me_changed,
+    invokesuper_block,
+
+    leave_se_interrupt,
+    leave_interp_return,
+    leave_start_pc_non_zero,
+
+    getivar_se_self_not_heap,
+    getivar_idx_out_of_range,
+    getivar_megamorphic,
+
+    setivar_se_self_not_heap,
+    setivar_idx_out_of_range,
+    setivar_val_heapobject,
+    setivar_name_not_mapped,
+    setivar_not_object,
+    setivar_frozen,
+
+    oaref_argc_not_one,
+    oaref_arg_not_fixnum,
+
+    opt_getinlinecache_miss,
+
+    binding_allocations,
+    binding_set,
+
+    vm_insns_count,
+    compiled_iseq_count,
+    compiled_block_count,
+    compilation_failure,
+
+    exit_from_branch_stub,
+
+    invalidation_count,
+    invalidate_method_lookup,
+    invalidate_bop_redefined,
+    invalidate_ractor_spawn,
+    invalidate_constant_state_bump,
+    invalidate_constant_ic_fill,
+
+    constant_state_bumps,
+
+    expandarray_splat,
+    expandarray_postarg,
+    expandarray_not_array,
+    expandarray_rhs_too_small,
+
+    gbpp_block_param_modified,
+    gbpp_block_handler_not_iseq,
+}
+
+//===========================================================================
+
+/// Primitive called in yjit.rb
+/// Check if stats generation is enabled
+#[no_mangle]
+pub extern "C" fn rb_yjit_stats_enabled_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    #[cfg(feature = "stats")]
+    if get_option!(gen_stats) {
+        return Qtrue;
+    }
+
+    return Qfalse;
+}
+
+/// Primitive called in yjit.rb.
+/// Export all YJIT statistics as a Ruby hash.
+#[no_mangle]
+pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict())
+}
+
+/// Export all YJIT statistics as a Ruby hash.
+fn rb_yjit_gen_stats_dict() -> VALUE {
+    // If YJIT is not enabled, return Qnil
+    if !yjit_enabled_p() {
+        return Qnil;
+    }
+
+    let hash = unsafe { rb_hash_new() };
+
+    // Inline and outlined code size
+    unsafe {
+        // Get the inline and outlined code blocks
+        let cb = CodegenGlobals::get_inline_cb();
+        let ocb = CodegenGlobals::get_outlined_cb();
+
+        // Inline code size
+        let key = rust_str_to_sym("inline_code_size");
+        let value = VALUE::fixnum_from_usize(cb.get_write_pos());
+        rb_hash_aset(hash, key, value);
+
+        // Outlined code size
+        let key = rust_str_to_sym("outlined_code_size");
+        let value = VALUE::fixnum_from_usize(ocb.unwrap().get_write_pos());
+        rb_hash_aset(hash, key, value);
+    }
+
+    // If we're not generating stats, the hash is done
+    if !get_option!(gen_stats) {
+        return hash;
+    }
+
+    // If the stats feature is enabled
+    #[cfg(feature = "stats")]
+    unsafe {
+        // Indicate that the complete set of stats is available
+        rb_hash_aset(hash, rust_str_to_sym("all_stats"), Qtrue);
+
+        // For each counter we track
+        for counter_name in COUNTER_NAMES {
+            // Get the counter value
+            let counter_ptr = get_counter_ptr(counter_name);
+            let counter_val = *counter_ptr;
+
+            // Put counter into hash
+            let key = rust_str_to_sym(counter_name);
+            let value = VALUE::fixnum_from_usize(counter_val as usize);
+            rb_hash_aset(hash, key, value);
+        }
+
+        // For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME"
+        // and the value is the count of side exits for that instruction.
+        for op_idx in 0..VM_INSTRUCTION_SIZE {
+            let op_name = insn_name(op_idx);
+            let key_string = "exit_".to_owned() + &op_name;
+            let key = rust_str_to_sym(&key_string);
+            let value = VALUE::fixnum_from_usize(EXIT_OP_COUNT[op_idx] as usize);
+            rb_hash_aset(hash, key, value);
+        }
+    }
+
+    hash
+}
+
+/// Primitive called in yjit.rb. Zero out all the counters.
+#[no_mangle]
+pub extern "C" fn rb_yjit_reset_stats_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    unsafe {
+        EXIT_OP_COUNT = [0; VM_INSTRUCTION_SIZE];
+        COUNTERS = Counters::default();
+    }
+
+    return Qnil;
+}
+
+/// Increment the number of instructions executed by the interpreter
+#[no_mangle]
+pub extern "C" fn rb_yjit_collect_vm_usage_insn() {
+    incr_counter!(vm_insns_count);
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_collect_binding_alloc() {
+    incr_counter!(binding_allocations);
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_collect_binding_set() {
+    incr_counter!(binding_set);
+}
+
+#[no_mangle]
+pub extern "C" fn rb_yjit_count_side_exit_op(exit_pc: *const VALUE) -> *const VALUE {
+    #[cfg(not(test))]
+    unsafe {
+        // Get the opcode from the encoded insn handler at this PC
+        let opcode = rb_vm_insn_addr2opcode((*exit_pc).as_ptr());
+
+        // Increment the exit op count for this opcode
+        EXIT_OP_COUNT[opcode as usize] += 1;
+    };
+
+    // This function must return exit_pc!
+    return exit_pc;
+}
diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs
new file mode 100644
index 0000000000..227e3e5f32
--- /dev/null
+++ b/yjit/src/utils.rs
@@ -0,0 +1,205 @@
+use crate::asm::x86_64::*;
+use crate::asm::*;
+use crate::cruby::*;
+use std::slice;
+
+/// Trait for casting to [usize] that allows you to say `.as_usize()`.
+/// Implementation conditional on the the cast preserving the numeric value on
+/// all inputs and being inexpensive.
+///
+/// [usize] is only guaranteed to be more than 16-bit wide, so we can't use
+/// `.into()` to cast an `u32` or an `u64` to a `usize` even though in all
+/// the platforms YJIT supports these two casts are pretty much no-ops.
+/// We could say `as usize` or `.try_convert().unwrap()` everywhere
+/// for those casts but they both have undesirable consequences if and when
+/// we decide to support 32-bit platforms. Unfortunately we can't implement
+/// [::core::convert::From] for [usize] since both the trait and the type are
+/// external. Naming the method `into()` also runs into naming conflicts.
+pub(crate) trait IntoUsize {
+    /// Convert to usize. Implementation conditional on width of [usize].
+    fn as_usize(self) -> usize;
+}
+
+#[cfg(target_pointer_width = "64")]
+impl IntoUsize for u64 {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+#[cfg(target_pointer_width = "64")]
+impl IntoUsize for u32 {
+    fn as_usize(self) -> usize {
+        self as usize
+    }
+}
+
+impl IntoUsize for u16 {
+    /// Alias for `.into()`. For convenience so you could use the trait for
+    /// all unsgined types.
+    fn as_usize(self) -> usize {
+        self.into()
+    }
+}
+
+impl IntoUsize for u8 {
+    /// Alias for `.into()`. For convenience so you could use the trait for
+    /// all unsgined types.
+    fn as_usize(self) -> usize {
+        self.into()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn min_max_preserved_after_cast_to_usize() {
+        use crate::utils::IntoUsize;
+
+        let min: usize = u64::MIN.as_usize();
+        assert_eq!(min, u64::MIN.try_into().unwrap());
+        let max: usize = u64::MAX.as_usize();
+        assert_eq!(max, u64::MAX.try_into().unwrap());
+
+        let min: usize = u32::MIN.as_usize();
+        assert_eq!(min, u32::MIN.try_into().unwrap());
+        let max: usize = u32::MAX.as_usize();
+        assert_eq!(max, u32::MAX.try_into().unwrap());
+    }
+}
+
+// TODO: we may want to move this function into yjit.c, maybe add a convenient Rust-side wrapper
+/*
+// For debugging. Print the bytecode for an iseq.
+RBIMPL_ATTR_MAYBE_UNUSED()
+static void
+yjit_print_iseq(const rb_iseq_t *iseq)
+{
+    char *ptr;
+    long len;
+    VALUE disassembly = rb_iseq_disasm(iseq);
+    RSTRING_GETMEM(disassembly, ptr, len);
+    fprintf(stderr, "%.*s\n", (int)len, ptr);
+}
+*/
+
+// Save caller-save registers on the stack before a C call
+fn push_regs(cb: &mut CodeBlock) {
+    push(cb, RAX);
+    push(cb, RCX);
+    push(cb, RDX);
+    push(cb, RSI);
+    push(cb, RDI);
+    push(cb, R8);
+    push(cb, R9);
+    push(cb, R10);
+    push(cb, R11);
+    pushfq(cb);
+}
+
+// Restore caller-save registers from the after a C call
+fn pop_regs(cb: &mut CodeBlock) {
+    popfq(cb);
+    pop(cb, R11);
+    pop(cb, R10);
+    pop(cb, R9);
+    pop(cb, R8);
+    pop(cb, RDI);
+    pop(cb, RSI);
+    pop(cb, RDX);
+    pop(cb, RCX);
+    pop(cb, RAX);
+}
+
+pub fn print_int(cb: &mut CodeBlock, opnd: X86Opnd) {
+    extern "sysv64" fn print_int_fn(val: i64) {
+        println!("{}", val);
+    }
+
+    push_regs(cb);
+
+    match opnd {
+        X86Opnd::Mem(_) | X86Opnd::Reg(_) => {
+            // Sign-extend the value if necessary
+            if opnd.num_bits() < 64 {
+                movsx(cb, C_ARG_REGS[0], opnd);
+            } else {
+                mov(cb, C_ARG_REGS[0], opnd);
+            }
+        }
+        X86Opnd::Imm(_) | X86Opnd::UImm(_) => {
+            mov(cb, C_ARG_REGS[0], opnd);
+        }
+        _ => unreachable!(),
+    }
+
+    mov(cb, RAX, const_ptr_opnd(print_int_fn as *const u8));
+    call(cb, RAX);
+    pop_regs(cb);
+}
+
+/// Generate code to print a pointer
+pub fn print_ptr(cb: &mut CodeBlock, opnd: X86Opnd) {
+    extern "sysv64" fn print_ptr_fn(ptr: *const u8) {
+        println!("{:p}", ptr);
+    }
+
+    assert!(opnd.num_bits() == 64);
+
+    push_regs(cb);
+    mov(cb, C_ARG_REGS[0], opnd);
+    mov(cb, RAX, const_ptr_opnd(print_ptr_fn as *const u8));
+    call(cb, RAX);
+    pop_regs(cb);
+}
+
+/// Generate code to print a value
+pub fn print_value(cb: &mut CodeBlock, opnd: X86Opnd) {
+    extern "sysv64" fn print_value_fn(val: VALUE) {
+        unsafe { rb_obj_info_dump(val) }
+    }
+
+    assert!(opnd.num_bits() == 64);
+
+    push_regs(cb);
+
+    mov(cb, RDI, opnd);
+    mov(cb, RAX, const_ptr_opnd(print_value_fn as *const u8));
+    call(cb, RAX);
+
+    pop_regs(cb);
+}
+
+// Generate code to print constant string to stdout
+pub fn print_str(cb: &mut CodeBlock, str: &str) {
+    extern "sysv64" fn print_str_cfun(ptr: *const u8, num_bytes: usize) {
+        unsafe {
+            let slice = slice::from_raw_parts(ptr, num_bytes);
+            let str = std::str::from_utf8(slice).unwrap();
+            println!("{}", str);
+        }
+    }
+
+    let bytes = str.as_ptr();
+    let num_bytes = str.len();
+
+    push_regs(cb);
+
+    // Load the string address and jump over the string data
+    lea(cb, C_ARG_REGS[0], mem_opnd(8, RIP, 5));
+    jmp32(cb, num_bytes as i32);
+
+    // Write the string chars and a null terminator
+    for i in 0..num_bytes {
+        cb.write_byte(unsafe { *bytes.add(i) });
+    }
+
+    // Pass the string length as an argument
+    mov(cb, C_ARG_REGS[1], uimm_opnd(num_bytes as u64));
+
+    // Call the print function
+    mov(cb, RAX, const_ptr_opnd(print_str_cfun as *const u8));
+    call(cb, RAX);
+
+    pop_regs(cb);
+}
diff --git a/yjit/src/yjit.rs b/yjit/src/yjit.rs
new file mode 100644
index 0000000000..24a6b426bf
--- /dev/null
+++ b/yjit/src/yjit.rs
@@ -0,0 +1,98 @@
+use crate::codegen::*;
+use crate::core::*;
+use crate::cruby::*;
+use crate::invariants::*;
+use crate::options::*;
+
+use std::os::raw;
+use std::sync::atomic::{AtomicBool, Ordering};
+
+/// For tracking whether the user enabled YJIT through command line arguments or environment
+/// variables. AtomicBool to avoid `unsafe`. On x86 it compiles to simple movs.
+/// See <https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html>
+/// See [rb_yjit_enabled_p]
+static YJIT_ENABLED: AtomicBool = AtomicBool::new(false);
+
+/// Parse one command-line option.
+/// This is called from ruby.c
+#[no_mangle]
+pub extern "C" fn rb_yjit_parse_option(str_ptr: *const raw::c_char) -> bool {
+    return parse_option(str_ptr).is_some();
+}
+
+/// Is YJIT on? The interpreter uses this function to decide whether to increment
+/// ISEQ call counters. See mjit_exec().
+/// This is used frequently since it's used on every method call in the interpreter.
+#[no_mangle]
+pub extern "C" fn rb_yjit_enabled_p() -> raw::c_int {
+    // Note that we might want to call this function from signal handlers so
+    // might need to ensure signal-safety(7).
+    YJIT_ENABLED.load(Ordering::Acquire).into()
+}
+
+/// Like rb_yjit_enabled_p, but for Rust code.
+pub fn yjit_enabled_p() -> bool {
+    YJIT_ENABLED.load(Ordering::Acquire)
+}
+
+/// After how many calls YJIT starts compiling a method
+#[no_mangle]
+pub extern "C" fn rb_yjit_call_threshold() -> raw::c_uint {
+    get_option!(call_threshold) as raw::c_uint
+}
+
+/// This function is called from C code
+#[no_mangle]
+pub extern "C" fn rb_yjit_init_rust() {
+    // TODO: need to make sure that command-line options have been
+    // initialized by CRuby
+
+    // Catch panics to avoid UB for unwinding into C frames.
+    // See https://doc.rust-lang.org/nomicon/exception-safety.html
+    // TODO: set a panic handler so the we don't print a message
+    //       everytime we panic.
+    let result = std::panic::catch_unwind(|| {
+        Invariants::init();
+        CodegenGlobals::init();
+
+        // YJIT enabled and initialized successfully
+        YJIT_ENABLED.store(true, Ordering::Release);
+    });
+
+    if let Err(_) = result {
+        println!("YJIT: rb_yjit_init_rust() panicked. Aborting.");
+        std::process::abort();
+    }
+}
+
+/// Called from C code to begin compiling a function
+/// NOTE: this should be wrapped in RB_VM_LOCK_ENTER(), rb_vm_barrier() on the C side
+#[no_mangle]
+pub extern "C" fn rb_yjit_iseq_gen_entry_point(iseq: IseqPtr, ec: EcPtr) -> *const u8 {
+    let maybe_code_ptr = gen_entry_point(iseq, ec);
+
+    match maybe_code_ptr {
+        Some(ptr) => ptr.raw_ptr(),
+        None => std::ptr::null(),
+    }
+}
+
+/// Simulate a situation where we are out of executable memory
+#[no_mangle]
+pub extern "C" fn rb_yjit_simulate_oom_bang(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+    // If YJIT is not enabled, do nothing
+    if !yjit_enabled_p() {
+        return Qnil;
+    }
+
+    // Enabled in debug mode only for security
+    #[cfg(debug_assertions)]
+    {
+        let cb = CodegenGlobals::get_inline_cb();
+        let ocb = CodegenGlobals::get_outlined_cb().unwrap();
+        cb.set_pos(cb.get_mem_size() - 1);
+        ocb.set_pos(ocb.get_mem_size() - 1);
+    }
+
+    return Qnil;
+}
diff --git a/yjit_asm.c b/yjit_asm.c
deleted file mode 100644
index 4cc3a538fa..0000000000
--- a/yjit_asm.c
+++ /dev/null
@@ -1,1834 +0,0 @@
-// This file is a fragment of the yjit.o compilation unit. See yjit.c.
-//
-// Note that the definition for some of these functions don't specify
-// static inline, but their declaration in yjit_asm.h do. The resulting
-// linkage is the same as if they both specify. The relevant sections in
-// N1256 is 6.2.2p4, 6.2.2p5, and 6.7.4p5.
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <stdint.h>
-#include <assert.h>
-#include <errno.h>
-
-// For mmapp(), sysconf()
-#ifndef _WIN32
-#include <unistd.h>
-#include <sys/mman.h>
-#endif
-
-#include "yjit_asm.h"
-
-// Compute the number of bits needed to encode a signed value
-uint32_t sig_imm_size(int64_t imm)
-{
-    // Compute the smallest size this immediate fits in
-    if (imm >= INT8_MIN && imm <= INT8_MAX)
-        return 8;
-    if (imm >= INT16_MIN && imm <= INT16_MAX)
-        return 16;
-    if (imm >= INT32_MIN && imm <= INT32_MAX)
-        return 32;
-
-    return 64;
-}
-
-// Compute the number of bits needed to encode an unsigned value
-uint32_t unsig_imm_size(uint64_t imm)
-{
-    // Compute the smallest size this immediate fits in
-    if (imm <= UINT8_MAX)
-        return 8;
-    else if (imm <= UINT16_MAX)
-        return 16;
-    else if (imm <= UINT32_MAX)
-        return 32;
-
-    return 64;
-}
-
-x86opnd_t mem_opnd(uint32_t num_bits, x86opnd_t base_reg, int32_t disp)
-{
-    bool is_iprel = base_reg.as.reg.reg_type == REG_IP;
-
-    x86opnd_t opnd = {
-        OPND_MEM,
-        num_bits,
-        .as.mem = { base_reg.as.reg.reg_no, 0, 0, false, is_iprel, disp }
-    };
-
-    return opnd;
-}
-
-x86opnd_t mem_opnd_sib(uint32_t num_bits, x86opnd_t base_reg, x86opnd_t index_reg, int32_t scale, int32_t disp)
-{
-    uint8_t scale_exp;
-    switch (scale) {
-      case 8:
-        scale_exp = 3;
-        break;
-      case 4:
-        scale_exp = 2;
-        break;
-      case 2:
-        scale_exp = 1;
-        break;
-      case 1:
-        scale_exp = 0;
-        break;
-      default:
-        rb_bug("yjit: scale not one of 1,2,4,8");
-        break;
-    }
-
-    bool is_iprel = base_reg.as.reg.reg_type == REG_IP;
-
-    x86opnd_t opnd = {
-        OPND_MEM,
-        num_bits,
-        .as.mem = {
-            .base_reg_no = base_reg.as.reg.reg_no,
-            .idx_reg_no = index_reg.as.reg.reg_no,
-            .has_idx = 1,
-            .scale_exp = scale_exp,
-            .is_iprel = is_iprel,
-            .disp = disp
-        }
-    };
-
-    return opnd;
-}
-
-static x86opnd_t resize_opnd(x86opnd_t opnd, uint32_t num_bits)
-{
-    assert (num_bits % 8 == 0);
-    x86opnd_t sub = opnd;
-    sub.num_bits = num_bits;
-    return sub;
-}
-
-x86opnd_t imm_opnd(int64_t imm)
-{
-    x86opnd_t opnd = {
-        OPND_IMM,
-        sig_imm_size(imm),
-        .as.imm = imm
-    };
-
-    return opnd;
-}
-
-x86opnd_t const_ptr_opnd(const void *ptr)
-{
-    x86opnd_t opnd = {
-        OPND_IMM,
-        64,
-        .as.unsig_imm = (uint64_t)ptr
-    };
-
-    return opnd;
-}
-
-// Align the current write position to a multiple of bytes
-static uint8_t *align_ptr(uint8_t *ptr, uint32_t multiple)
-{
-    // Compute the pointer modulo the given alignment boundary
-    uint32_t rem = ((uint32_t)(uintptr_t)ptr) % multiple;
-
-    // If the pointer is already aligned, stop
-    if (rem == 0)
-        return ptr;
-
-    // Pad the pointer by the necessary amount to align it
-    uint32_t pad = multiple - rem;
-
-    return ptr + pad;
-}
-
-// Allocate a block of executable memory
-static uint8_t *alloc_exec_mem(uint32_t mem_size)
-{
-    uint8_t *mem_block;
-
-    // On Linux
-#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
-        // Align the requested address to page size
-        uint32_t page_size = (uint32_t)sysconf(_SC_PAGESIZE);
-        uint8_t *req_addr = align_ptr((uint8_t*)&alloc_exec_mem, page_size);
-
-        do {
-            // Try to map a chunk of memory as executable
-            mem_block = (uint8_t*)mmap(
-                (void*)req_addr,
-                mem_size,
-                PROT_READ | PROT_EXEC,
-                MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
-                -1,
-                0
-            );
-
-            // If we succeeded, stop
-            if (mem_block != MAP_FAILED) {
-                break;
-            }
-
-            // +4MB
-            req_addr += 4 * 1024 * 1024;
-        } while (req_addr < (uint8_t*)&alloc_exec_mem + INT32_MAX);
-
-    // On MacOS and other platforms
-#else
-        // Try to map a chunk of memory as executable
-        mem_block = (uint8_t*)mmap(
-            (void*)alloc_exec_mem,
-            mem_size,
-            PROT_READ | PROT_EXEC,
-            MAP_PRIVATE | MAP_ANONYMOUS,
-            -1,
-            0
-        );
-#endif
-
-    // Fallback
-    if (mem_block == MAP_FAILED) {
-        // Try again without the address hint (e.g., valgrind)
-        mem_block = (uint8_t*)mmap(
-            NULL,
-            mem_size,
-            PROT_READ | PROT_EXEC,
-            MAP_PRIVATE | MAP_ANONYMOUS,
-            -1,
-            0
-            );
-    }
-
-    // Check that the memory mapping was successful
-    if (mem_block == MAP_FAILED) {
-        perror("mmap call failed");
-        exit(-1);
-    }
-
-    codeblock_t block;
-    codeblock_t *cb = &block;
-
-    cb_init(cb, mem_block, mem_size);
-
-    // Fill the executable memory with PUSH DS (0x1E) so that
-    // executing uninitialized memory will fault with #UD in
-    // 64-bit mode.
-    cb_mark_all_writeable(cb);
-    memset(mem_block, 0x1E, mem_size);
-    cb_mark_all_executable(cb);
-
-    return mem_block;
-}
-
-// Initialize a code block object
-void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size)
-{
-    assert (mem_block);
-    cb->mem_block_ = mem_block;
-    cb->mem_size = mem_size;
-    cb->write_pos = 0;
-    cb->num_labels = 0;
-    cb->num_refs = 0;
-    cb->current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
-}
-
-// Set the current write position
-void cb_set_pos(codeblock_t *cb, uint32_t pos)
-{
-    // Assert here since while assembler functions do bounds checking, there is
-    // nothing stopping users from taking out an out-of-bounds pointer and
-    // doing bad accesses with it.
-    assert (pos < cb->mem_size);
-    cb->write_pos = pos;
-}
-
-// Align the current write position to a multiple of bytes
-void cb_align_pos(codeblock_t *cb, uint32_t multiple)
-{
-    // Compute the pointer modulo the given alignment boundary
-    uint8_t *ptr = cb_get_write_ptr(cb);
-    uint8_t *aligned_ptr = align_ptr(ptr, multiple);
-    const uint32_t write_pos = cb->write_pos;
-
-    // Pad the pointer by the necessary amount to align it
-    ptrdiff_t pad = aligned_ptr - ptr;
-    cb_set_pos(cb, write_pos + (int32_t)pad);
-}
-
-// Set the current write position from a pointer
-void cb_set_write_ptr(codeblock_t *cb, uint8_t *code_ptr)
-{
-    intptr_t pos = code_ptr - cb->mem_block_;
-    assert (pos < cb->mem_size);
-    cb_set_pos(cb, (uint32_t)pos);
-}
-
-// Get a direct pointer into the executable memory block
-uint8_t *cb_get_ptr(const codeblock_t *cb, uint32_t index)
-{
-    if (index < cb->mem_size) {
-        return &cb->mem_block_[index];
-    }
-    else {
-        return NULL;
-    }
-}
-
-// Get a direct pointer to the current write position
-uint8_t *cb_get_write_ptr(const codeblock_t *cb)
-{
-    return cb_get_ptr(cb, cb->write_pos);
-}
-
-// Write a byte at the current position
-void cb_write_byte(codeblock_t *cb, uint8_t byte)
-{
-    assert (cb->mem_block_);
-    if (cb->write_pos < cb->mem_size) {
-        cb_mark_position_writeable(cb, cb->write_pos);
-        cb->mem_block_[cb->write_pos] = byte;
-        cb->write_pos++;
-    }
-    else {
-        cb->dropped_bytes = true;
-    }
-}
-
-// Write multiple bytes starting from the current position
-void cb_write_bytes(codeblock_t *cb, uint32_t num_bytes, ...)
-{
-    va_list va;
-    va_start(va, num_bytes);
-
-    for (uint32_t i = 0; i < num_bytes; ++i)
-    {
-        uint8_t byte = va_arg(va, int);
-        cb_write_byte(cb, byte);
-    }
-
-    va_end(va);
-}
-
-// Write a signed integer over a given number of bits at the current position
-void cb_write_int(codeblock_t *cb, uint64_t val, uint32_t num_bits)
-{
-    assert (num_bits > 0);
-    assert (num_bits % 8 == 0);
-
-    // Switch on the number of bits
-    switch (num_bits) {
-      case 8:
-        cb_write_byte(cb, (uint8_t)val);
-        break;
-
-      case 16:
-        cb_write_bytes(
-            cb,
-            2,
-            (uint8_t)((val >> 0) & 0xFF),
-            (uint8_t)((val >> 8) & 0xFF)
-        );
-        break;
-
-      case 32:
-        cb_write_bytes(
-            cb,
-            4,
-            (uint8_t)((val >>  0) & 0xFF),
-            (uint8_t)((val >>  8) & 0xFF),
-            (uint8_t)((val >> 16) & 0xFF),
-            (uint8_t)((val >> 24) & 0xFF)
-        );
-        break;
-
-      default:
-        {
-            // Compute the size in bytes
-            uint32_t num_bytes = num_bits / 8;
-
-            // Write out the bytes
-            for (uint32_t i = 0; i < num_bytes; ++i)
-            {
-                uint8_t byte_val = (uint8_t)(val & 0xFF);
-                cb_write_byte(cb, byte_val);
-                val >>= 8;
-            }
-        }
-    }
-}
-
-// Allocate a new label with a given name
-uint32_t cb_new_label(codeblock_t *cb, const char *name)
-{
-    //if (hasASM)
-    //    writeString(to!string(label) ~ ":");
-
-    assert (cb->num_labels < MAX_LABELS);
-
-    // Allocate the new label
-    uint32_t label_idx = cb->num_labels++;
-
-    // This label doesn't have an address yet
-    cb->label_addrs[label_idx] = 0;
-    cb->label_names[label_idx] = name;
-
-    return label_idx;
-}
-
-// Write a label at the current address
-void cb_write_label(codeblock_t *cb, uint32_t label_idx)
-{
-    assert (label_idx < MAX_LABELS);
-    cb->label_addrs[label_idx] = cb->write_pos;
-}
-
-// Add a label reference at the current write position
-void cb_label_ref(codeblock_t *cb, uint32_t label_idx)
-{
-    assert (label_idx < MAX_LABELS);
-    assert (cb->num_refs < MAX_LABEL_REFS);
-
-    // Keep track of the reference
-    cb->label_refs[cb->num_refs] = (labelref_t){ cb->write_pos, label_idx };
-    cb->num_refs++;
-}
-
-// Link internal label references
-void cb_link_labels(codeblock_t *cb)
-{
-    uint32_t orig_pos = cb->write_pos;
-
-    // For each label reference
-    for (uint32_t i = 0; i < cb->num_refs; ++i)
-    {
-        uint32_t ref_pos = cb->label_refs[i].pos;
-        uint32_t label_idx = cb->label_refs[i].label_idx;
-        assert (ref_pos < cb->mem_size);
-        assert (label_idx < MAX_LABELS);
-
-        uint32_t label_addr = cb->label_addrs[label_idx];
-        assert (label_addr < cb->mem_size);
-
-        // Compute the offset from the reference's end to the label
-        int64_t offset = (int64_t)label_addr - (int64_t)(ref_pos + 4);
-
-        cb_set_pos(cb, ref_pos);
-        cb_write_int(cb, offset, 32);
-    }
-
-    cb->write_pos = orig_pos;
-
-    // Clear the label positions and references
-    cb->num_labels = 0;
-    cb->num_refs = 0;
-}
-
-// Check if an operand needs a REX byte to be encoded
-static bool rex_needed(x86opnd_t opnd)
-{
-    if (opnd.type == OPND_NONE || opnd.type == OPND_IMM)
-    {
-        return false;
-    }
-
-    if (opnd.type == OPND_REG)
-    {
-        return (
-            opnd.as.reg.reg_no > 7 ||
-            (opnd.num_bits == 8 && opnd.as.reg.reg_no >= 4 && opnd.as.reg.reg_no <= 7)
-        );
-    }
-
-    if (opnd.type == OPND_MEM)
-    {
-        return (opnd.as.mem.base_reg_no > 7) || (opnd.as.mem.has_idx && opnd.as.mem.idx_reg_no > 7);
-    }
-
-    rb_bug("unreachable");
-}
-
-// Check if an SIB byte is needed to encode this operand
-static bool sib_needed(x86opnd_t opnd)
-{
-    if (opnd.type != OPND_MEM)
-        return false;
-
-    return (
-        opnd.as.mem.has_idx ||
-        opnd.as.mem.base_reg_no == RSP.as.reg.reg_no ||
-        opnd.as.mem.base_reg_no == R12.as.reg.reg_no
-    );
-}
-
-// Compute the size of the displacement field needed for a memory operand
-static uint32_t disp_size(x86opnd_t opnd)
-{
-    assert (opnd.type == OPND_MEM);
-
-    // If using RIP as the base, use disp32
-    if (opnd.as.mem.is_iprel)
-    {
-        return 32;
-    }
-
-    // Compute the required displacement size
-    if (opnd.as.mem.disp != 0)
-    {
-        uint32_t num_bits = sig_imm_size(opnd.as.mem.disp);
-        assert (num_bits <= 32 && "displacement does not fit in 32 bits");
-
-        // x86 can only encode 8-bit and 32-bit displacements
-        if (num_bits == 16)
-            num_bits = 32;;
-
-        return num_bits;
-    }
-
-    // If EBP or RBP or R13 is used as the base, displacement must be encoded
-    if (opnd.as.mem.base_reg_no == RBP.as.reg.reg_no ||
-        opnd.as.mem.base_reg_no == R13.as.reg.reg_no)
-    {
-        return 8;
-    }
-
-    return 0;
-}
-
-// Write the REX byte
-static void cb_write_rex(
-    codeblock_t *cb,
-    bool w_flag,
-    uint8_t reg_no,
-    uint8_t idx_reg_no,
-    uint8_t rm_reg_no
-)
-{
-    // 0 1 0 0 w r x b
-    // w - 64-bit operand size flag
-    // r - MODRM.reg extension
-    // x - SIB.index extension
-    // b - MODRM.rm or SIB.base extension
-    uint8_t w = w_flag? 1:0;
-    uint8_t r = (reg_no & 8)? 1:0;
-    uint8_t x = (idx_reg_no & 8)? 1:0;
-    uint8_t b = (rm_reg_no & 8)? 1:0;
-
-    // Encode and write the REX byte
-    uint8_t rexByte = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b);
-    cb_write_byte(cb, rexByte);
-}
-
-// Write an opcode byte with an embedded register operand
-static void cb_write_opcode(codeblock_t *cb, uint8_t opcode, x86opnd_t reg)
-{
-    // Write the reg field into the opcode byte
-    uint8_t op_byte = opcode | (reg.as.reg.reg_no & 7);
-    cb_write_byte(cb, op_byte);
-}
-
-// Encode an RM instruction
-static void cb_write_rm(
-    codeblock_t *cb,
-    bool szPref,
-    bool rexW,
-    x86opnd_t r_opnd,
-    x86opnd_t rm_opnd,
-    uint8_t opExt,
-    uint32_t op_len,
-    ...)
-{
-    assert (op_len > 0 && op_len <= 3);
-    assert (r_opnd.type == OPND_REG || r_opnd.type == OPND_NONE);
-
-    // Flag to indicate the REX prefix is needed
-    bool need_rex = rexW || rex_needed(r_opnd) || rex_needed(rm_opnd);
-
-    // Flag to indicate SIB byte is needed
-    bool need_sib = sib_needed(r_opnd) || sib_needed(rm_opnd);
-
-    // Add the operand-size prefix, if needed
-    if (szPref == true)
-        cb_write_byte(cb, 0x66);
-
-    // Add the REX prefix, if needed
-    if (need_rex)
-    {
-        // 0 1 0 0 w r x b
-        // w - 64-bit operand size flag
-        // r - MODRM.reg extension
-        // x - SIB.index extension
-        // b - MODRM.rm or SIB.base extension
-
-        uint8_t w = rexW? 1:0;
-
-        uint8_t r;
-        if (r_opnd.type != OPND_NONE)
-            r = (r_opnd.as.reg.reg_no & 8)? 1:0;
-        else
-            r = 0;
-
-        uint8_t x;
-        if (need_sib && rm_opnd.as.mem.has_idx)
-            x = (rm_opnd.as.mem.idx_reg_no & 8)? 1:0;
-        else
-            x = 0;
-
-        uint8_t b;
-        if (rm_opnd.type == OPND_REG)
-            b = (rm_opnd.as.reg.reg_no & 8)? 1:0;
-        else if (rm_opnd.type == OPND_MEM)
-            b = (rm_opnd.as.mem.base_reg_no & 8)? 1:0;
-        else
-            b = 0;
-
-        // Encode and write the REX byte
-        uint8_t rex_byte = 0x40 + (w << 3) + (r << 2) + (x << 1) + (b);
-        cb_write_byte(cb, rex_byte);
-    }
-
-    // Write the opcode bytes to the code block
-    va_list va;
-    va_start(va, op_len);
-    for (uint32_t i = 0; i < op_len; ++i)
-    {
-        uint8_t byte = va_arg(va, int);
-        cb_write_byte(cb, byte);
-    }
-    va_end(va);
-
-    // MODRM.mod (2 bits)
-    // MODRM.reg (3 bits)
-    // MODRM.rm  (3 bits)
-
-    assert (
-        !(opExt != 0xFF && r_opnd.type != OPND_NONE) &&
-        "opcode extension and register operand present"
-    );
-
-    // Encode the mod field
-    uint8_t mod;
-    if (rm_opnd.type == OPND_REG)
-    {
-        mod = 3;
-    }
-    else
-    {
-        uint32_t dsize = disp_size(rm_opnd);
-        if (dsize == 0 || rm_opnd.as.mem.is_iprel)
-            mod = 0;
-        else if (dsize == 8)
-            mod = 1;
-        else if (dsize == 32)
-            mod = 2;
-        else
-            rb_bug("unreachable");
-    }
-
-    // Encode the reg field
-    uint8_t reg;
-    if (opExt != 0xFF)
-        reg = opExt;
-    else if (r_opnd.type == OPND_REG)
-        reg = r_opnd.as.reg.reg_no & 7;
-    else
-        reg = 0;
-
-    // Encode the rm field
-    uint8_t rm;
-    if (rm_opnd.type == OPND_REG)
-    {
-        rm = rm_opnd.as.reg.reg_no & 7;
-    }
-    else
-    {
-        if (need_sib)
-            rm = 4;
-        else
-            rm = rm_opnd.as.mem.base_reg_no & 7;
-    }
-
-    // Encode and write the ModR/M byte
-    uint8_t rm_byte = (mod << 6) + (reg << 3) + (rm);
-    cb_write_byte(cb, rm_byte);
-
-    // Add the SIB byte, if needed
-    if (need_sib)
-    {
-        // SIB.scale (2 bits)
-        // SIB.index (3 bits)
-        // SIB.base  (3 bits)
-
-        assert (rm_opnd.type == OPND_MEM);
-
-        // Encode the scale value
-        uint8_t scale = rm_opnd.as.mem.scale_exp;
-
-        // Encode the index value
-        uint8_t index;
-        if (!rm_opnd.as.mem.has_idx)
-            index = 4;
-        else
-            index = rm_opnd.as.mem.idx_reg_no & 7;
-
-        // Encode the base register
-        uint8_t base = rm_opnd.as.mem.base_reg_no & 7;
-
-        // Encode and write the SIB byte
-        uint8_t sib_byte = (scale << 6) + (index << 3) + (base);
-        cb_write_byte(cb, sib_byte);
-    }
-
-    // Add the displacement
-    if (rm_opnd.type == OPND_MEM)
-    {
-        uint32_t dsize = disp_size(rm_opnd);
-        if (dsize > 0)
-            cb_write_int(cb, rm_opnd.as.mem.disp, dsize);
-    }
-}
-
-// Encode a mul-like single-operand RM instruction
-static void write_rm_unary(
-    codeblock_t *cb,
-    const char *mnem,
-    uint8_t opMemReg8,
-    uint8_t opMemRegPref,
-    uint8_t opExt,
-    x86opnd_t opnd)
-{
-    // Write a disassembly string
-    //cb.writeASM(mnem, opnd);
-
-    // Check the size of opnd0
-    uint32_t opndSize;
-    if (opnd.type == OPND_REG || opnd.type == OPND_MEM)
-        opndSize = opnd.num_bits;
-    else
-        rb_bug("yjit: invalid operand");
-
-    assert (opndSize == 8 || opndSize == 16 || opndSize == 32 || opndSize == 64);
-    bool szPref = opndSize == 16;
-    bool rexW = opndSize == 64;
-
-    if (opndSize == 8)
-        cb_write_rm(cb, false, false, NO_OPND, opnd, opExt, 1, opMemReg8);
-    else
-        cb_write_rm(cb, szPref, rexW, NO_OPND, opnd, opExt, 1, opMemRegPref);
-}
-
-// Encode an add-like RM instruction with multiple possible encodings
-static void cb_write_rm_multi(
-    codeblock_t *cb,
-    const char *mnem,
-    uint8_t opMemReg8,
-    uint8_t opMemRegPref,
-    uint8_t opRegMem8,
-    uint8_t opRegMemPref,
-    uint8_t opMemImm8,
-    uint8_t opMemImmSml,
-    uint8_t opMemImmLrg,
-    uint8_t opExtImm,
-    x86opnd_t opnd0,
-    x86opnd_t opnd1)
-{
-    assert (opnd0.type == OPND_REG || opnd0.type == OPND_MEM);
-
-    /*
-    // Write disassembly string
-    if (!opnd1.isNone)
-        cb.writeASM(mnem, opnd0, opnd1);
-    else
-        cb.writeASM(mnem, opnd0);
-    */
-
-    // Check the size of opnd0
-    uint32_t opndSize = opnd0.num_bits;
-
-    // Check the size of opnd1
-    if (opnd1.type == OPND_REG || opnd1.type == OPND_MEM)
-    {
-        assert (opnd1.num_bits == opndSize && "operand size mismatch");
-    }
-    else if (opnd1.type == OPND_IMM)
-    {
-        assert (opnd1.num_bits <= opndSize);
-    }
-
-    assert (opndSize == 8 || opndSize == 16 || opndSize == 32 || opndSize == 64);
-    bool szPref = opndSize == 16;
-    bool rexW = opndSize == 64;
-
-    // R/M + Reg
-    if ((opnd0.type == OPND_MEM && opnd1.type == OPND_REG) ||
-        (opnd0.type == OPND_REG && opnd1.type == OPND_REG))
-    {
-        // R/M is opnd0
-        if (opndSize == 8)
-            cb_write_rm(cb, false, false, opnd1, opnd0, 0xFF, 1, opMemReg8);
-        else
-            cb_write_rm(cb, szPref, rexW, opnd1, opnd0, 0xFF, 1, opMemRegPref);
-    }
-
-    // Reg + R/M
-    else if (opnd0.type == OPND_REG && opnd1.type == OPND_MEM)
-    {
-        // R/M is opnd1
-        if (opndSize == 8)
-            cb_write_rm(cb, false, false, opnd0, opnd1, 0xFF, 1, opRegMem8);
-        else
-            cb_write_rm(cb, szPref, rexW, opnd0, opnd1, 0xFF, 1, opRegMemPref);
-    }
-
-    // R/M + Imm
-    else if (opnd1.type == OPND_IMM)
-    {
-        // 8-bit immediate
-        if (opnd1.num_bits <= 8)
-        {
-            if (opndSize == 8)
-                cb_write_rm(cb, false, false, NO_OPND, opnd0, opExtImm, 1, opMemImm8);
-            else
-                cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExtImm, 1, opMemImmSml);
-
-            cb_write_int(cb, opnd1.as.imm, 8);
-        }
-
-        // 32-bit immediate
-        else if (opnd1.num_bits <= 32)
-        {
-            assert (opnd1.num_bits <= opndSize && "immediate too large for dst");
-            cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExtImm, 1, opMemImmLrg);
-            cb_write_int(cb, opnd1.as.imm, (opndSize > 32)? 32:opndSize);
-        }
-
-        // Immediate too large
-        else
-        {
-            assert (false && "immediate value too large");
-        }
-    }
-
-    // Invalid operands
-    else
-    {
-        assert (false && "invalid operand combination");
-    }
-}
-
-// Encode a single-operand shift instruction
-static void cb_write_shift(
-    codeblock_t *cb,
-    const char *mnem,
-    uint8_t opMemOnePref,
-    uint8_t opMemClPref,
-    uint8_t opMemImmPref,
-    uint8_t opExt,
-    x86opnd_t opnd0,
-    x86opnd_t opnd1)
-{
-    // Write a disassembly string
-    //cb.writeASM(mnem, opnd0, opnd1);
-
-    // Check the size of opnd0
-    uint32_t opndSize;
-    if (opnd0.type == OPND_REG || opnd0.type == OPND_MEM)
-        opndSize = opnd0.num_bits;
-    else
-        rb_bug("yjit: shift: invalid first operand");
-
-    assert (opndSize == 16 || opndSize == 32 || opndSize == 64);
-    bool szPref = opndSize == 16;
-    bool rexW = opndSize == 64;
-
-    if (opnd1.type == OPND_IMM)
-    {
-        if (opnd1.as.imm == 1)
-        {
-            cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExt, 1, opMemOnePref);
-        }
-        else
-        {
-            assert (opnd1.num_bits <= 8);
-            cb_write_rm(cb, szPref, rexW, NO_OPND, opnd0, opExt, 1, opMemImmPref);
-            cb_write_byte(cb, (uint8_t)opnd1.as.imm);
-        }
-    }
-    /*
-    else if (opnd1.isReg && opnd1.reg == CL)
-    {
-        cb.writeRMInstr!('l', opExt, opMemClPref)(szPref, rexW, opnd0, X86Opnd.NONE);
-    }
-    */
-    else
-    {
-        assert (false);
-    }
-}
-
-// Encode a relative jump to a label (direct or conditional)
-// Note: this always encodes a 32-bit offset
-static void cb_write_jcc(codeblock_t *cb, const char *mnem, uint8_t op0, uint8_t op1, uint32_t label_idx)
-{
-    //cb.writeASM(mnem, label);
-
-    // Write the opcode
-    if (op0 != 0xFF)
-        cb_write_byte(cb, op0);
-    cb_write_byte(cb, op1);
-
-    // Add a reference to the label
-    cb_label_ref(cb, label_idx);
-
-    // Relative 32-bit offset to be patched
-    cb_write_int(cb, 0, 32);
-}
-
-// Encode a relative jump to a pointer at a 32-bit offset (direct or conditional)
-static void cb_write_jcc_ptr(codeblock_t *cb, const char *mnem, uint8_t op0, uint8_t op1, uint8_t *dst_ptr)
-{
-    //cb.writeASM(mnem, label);
-
-    // Write the opcode
-    if (op0 != 0xFF)
-        cb_write_byte(cb, op0);
-    cb_write_byte(cb, op1);
-
-    // Pointer to the end of this jump instruction
-    uint8_t *end_ptr = cb_get_ptr(cb, cb->write_pos + 4);
-
-    // Compute the jump offset
-    int64_t rel64 = (int64_t)(dst_ptr - end_ptr);
-    if (rel64 >= INT32_MIN && rel64 <= INT32_MAX) {
-        // Write the relative 32-bit jump offset
-        cb_write_int(cb, (int32_t)rel64, 32);
-    }
-    else {
-        // Offset doesn't fit in 4 bytes. Report error.
-        cb->dropped_bytes = true;
-    }
-}
-
-// Encode a conditional move instruction
-static void cb_write_cmov(codeblock_t *cb, const char *mnem, uint8_t opcode1, x86opnd_t dst, x86opnd_t src)
-{
-    //cb.writeASM(mnem, dst, src);
-
-    assert (dst.type == OPND_REG);
-    assert (src.type == OPND_REG || src.type == OPND_MEM);
-    assert (dst.num_bits >= 16 && "invalid dst reg size in cmov");
-
-    bool szPref = dst.num_bits == 16;
-    bool rexW = dst.num_bits == 64;
-
-    cb_write_rm(cb, szPref, rexW, dst, src, 0xFF, 2, 0x0F, opcode1);
-}
-
-// add - Integer addition
-void add(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_rm_multi(
-        cb,
-        "add",
-        0x00, // opMemReg8
-        0x01, // opMemRegPref
-        0x02, // opRegMem8
-        0x03, // opRegMemPref
-        0x80, // opMemImm8
-        0x83, // opMemImmSml
-        0x81, // opMemImmLrg
-        0x00, // opExtImm
-        opnd0,
-        opnd1
-    );
-}
-
-/// and - Bitwise AND
-void and(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_rm_multi(
-        cb,
-        "and",
-        0x20, // opMemReg8
-        0x21, // opMemRegPref
-        0x22, // opRegMem8
-        0x23, // opRegMemPref
-        0x80, // opMemImm8
-        0x83, // opMemImmSml
-        0x81, // opMemImmLrg
-        0x04, // opExtImm
-        opnd0,
-        opnd1
-    );
-}
-
-// call - Call to a pointer with a 32-bit displacement offset
-static void call_rel32(codeblock_t *cb, int32_t rel32)
-{
-    //cb.writeASM("call", rel32);
-
-    // Write the opcode
-    cb_write_byte(cb, 0xE8);
-
-    // Write the relative 32-bit jump offset
-    cb_write_int(cb, (int32_t)rel32, 32);
-}
-
-// call - Call a pointer, encode with a 32-bit offset if possible
-void call_ptr(codeblock_t *cb, x86opnd_t scratch_reg, uint8_t *dst_ptr)
-{
-    assert (scratch_reg.type == OPND_REG);
-
-    // Pointer to the end of this call instruction
-    uint8_t *end_ptr = cb_get_ptr(cb, cb->write_pos + 5);
-
-    // Compute the jump offset
-    int64_t rel64 = (int64_t)(dst_ptr - end_ptr);
-
-    // If the offset fits in 32-bit
-    if (rel64 >= INT32_MIN && rel64 <= INT32_MAX) {
-        call_rel32(cb, (int32_t)rel64);
-        return;
-    }
-
-    // Move the pointer into the scratch register and call
-    mov(cb, scratch_reg, const_ptr_opnd(dst_ptr));
-    call(cb, scratch_reg);
-}
-
-/// call - Call to label with 32-bit offset
-void call_label(codeblock_t *cb, uint32_t label_idx)
-{
-    //cb.writeASM("call", label);
-
-    // Write the opcode
-    cb_write_byte(cb, 0xE8);
-
-    // Add a reference to the label
-    cb_label_ref(cb, label_idx);
-
-    // Relative 32-bit offset to be patched
-    cb_write_int(cb, 0, 32);
-}
-
-/// call - Indirect call with an R/M operand
-void call(codeblock_t *cb, x86opnd_t opnd)
-{
-    //cb.writeASM("call", opnd);
-    cb_write_rm(cb, false, false, NO_OPND, opnd, 2, 1, 0xFF);
-}
-
-/// cmovcc - Conditional move
-void cmova(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmova", 0x47, dst, src); }
-void cmovae(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovae", 0x43, dst, src); }
-void cmovb(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovb", 0x42, dst, src); }
-void cmovbe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovbe", 0x46, dst, src); }
-void cmovc(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovc", 0x42, dst, src); }
-void cmove(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmove", 0x44, dst, src); }
-void cmovg(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovg", 0x4F, dst, src); }
-void cmovge(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovge", 0x4D, dst, src); }
-void cmovl(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovl", 0x4C, dst, src); }
-void cmovle(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovle", 0x4E, dst, src); }
-void cmovna(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovna", 0x46, dst, src); }
-void cmovnae(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnae", 0x42, dst, src); }
-void cmovnb(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnb", 0x43, dst, src); }
-void cmovnbe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnbe", 0x47, dst, src); }
-void cmovnc(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnc", 0x43, dst, src); }
-void cmovne(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovne", 0x45, dst, src); }
-void cmovng(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovng", 0x4E, dst, src); }
-void cmovnge(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnge", 0x4C, dst, src); }
-void cmovnl(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnl" , 0x4D, dst, src); }
-void cmovnle(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnle", 0x4F, dst, src); }
-void cmovno(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovno", 0x41, dst, src); }
-void cmovnp(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnp", 0x4B, dst, src); }
-void cmovns(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovns", 0x49, dst, src); }
-void cmovnz(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovnz", 0x45, dst, src); }
-void cmovo(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovo", 0x40, dst, src); }
-void cmovp(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovp", 0x4A, dst, src); }
-void cmovpe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovpe", 0x4A, dst, src); }
-void cmovpo(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovpo", 0x4B, dst, src); }
-void cmovs(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovs", 0x48, dst, src); }
-void cmovz(codeblock_t *cb, x86opnd_t dst, x86opnd_t src) { cb_write_cmov(cb, "cmovz", 0x44, dst, src); }
-
-/// cmp - Compare and set flags
-void cmp(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_rm_multi(
-        cb,
-        "cmp",
-        0x38, // opMemReg8
-        0x39, // opMemRegPref
-        0x3A, // opRegMem8
-        0x3B, // opRegMemPref
-        0x80, // opMemImm8
-        0x83, // opMemImmSml
-        0x81, // opMemImmLrg
-        0x07, // opExtImm
-        opnd0,
-        opnd1
-    );
-}
-
-/// cdq - Convert doubleword to quadword
-void cdq(codeblock_t *cb)
-{
-    //cb.writeASM("cdq");
-    cb_write_byte(cb, 0x99);
-}
-
-/// cqo - Convert quadword to octaword
-void cqo(codeblock_t *cb)
-{
-    //cb.writeASM("cqo");
-    cb_write_bytes(cb, 2, 0x48, 0x99);
-}
-
-/// Interrupt 3 - trap to debugger
-void int3(codeblock_t *cb)
-{
-    //cb.writeASM("INT 3");
-    cb_write_byte(cb, 0xCC);
-}
-
-/*
-// div - Unsigned integer division
-alias div = writeRMUnary!(
-    "div",
-    0xF6, // opMemReg8
-    0xF7, // opMemRegPref
-    0x06  // opExt
-);
-*/
-
-/*
-/// divsd - Divide scalar double
-alias divsd = writeXMM64!(
-    "divsd",
-    0xF2, // prefix
-    0x0F, // opRegMem0
-    0x5E  // opRegMem1
-);
-*/
-
-/*
-// idiv - Signed integer division
-alias idiv = writeRMUnary!(
-    "idiv",
-    0xF6, // opMemReg8
-    0xF7, // opMemRegPref
-    0x07  // opExt
-);
-*/
-
-/*
-/// imul - Signed integer multiplication with two operands
-void imul(CodeBlock cb, X86Opnd opnd0, X86Opnd opnd1)
-{
-    cb.writeASM("imul", opnd0, opnd1);
-
-    assert (opnd0.isReg, "invalid first operand");
-    auto opndSize = opnd0.reg.size;
-
-    // Check the size of opnd1
-    if (opnd1.isReg)
-        assert (opnd1.reg.size is opndSize, "operand size mismatch");
-    else if (opnd1.isMem)
-        assert (opnd1.mem.size is opndSize, "operand size mismatch");
-
-    assert (opndSize is 16 || opndSize is 32 || opndSize is 64);
-    auto szPref = opndSize is 16;
-    auto rexW = opndSize is 64;
-
-    cb.writeRMInstr!('r', 0xFF, 0x0F, 0xAF)(szPref, rexW, opnd0, opnd1);
-}
-*/
-
-/*
-/// imul - Signed integer multiplication with three operands (one immediate)
-void imul(CodeBlock cb, X86Opnd opnd0, X86Opnd opnd1, X86Opnd opnd2)
-{
-    cb.writeASM("imul", opnd0, opnd1, opnd2);
-
-    assert (opnd0.isReg, "invalid first operand");
-    auto opndSize = opnd0.reg.size;
-
-    // Check the size of opnd1
-    if (opnd1.isReg)
-        assert (opnd1.reg.size is opndSize, "operand size mismatch");
-    else if (opnd1.isMem)
-        assert (opnd1.mem.size is opndSize, "operand size mismatch");
-
-    assert (opndSize is 16 || opndSize is 32 || opndSize is 64);
-    auto szPref = opndSize is 16;
-    auto rexW = opndSize is 64;
-
-    assert (opnd2.isImm, "invalid third operand");
-    auto imm = opnd2.imm;
-
-    // 8-bit immediate
-    if (imm.immSize <= 8)
-    {
-        cb.writeRMInstr!('r', 0xFF, 0x6B)(szPref, rexW, opnd0, opnd1);
-        cb.writeInt(imm.imm, 8);
-    }
-
-    // 32-bit immediate
-    else if (imm.immSize <= 32)
-    {
-        assert (imm.immSize <= opndSize, "immediate too large for dst");
-        cb.writeRMInstr!('r', 0xFF, 0x69)(szPref, rexW, opnd0, opnd1);
-        cb.writeInt(imm.imm, min(opndSize, 32));
-    }
-
-    // Immediate too large
-    else
-    {
-        assert (false, "immediate value too large");
-    }
-}
-*/
-
-/// jcc - relative jumps to a label
-void ja_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "ja"  , 0x0F, 0x87, label_idx); }
-void jae_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jae" , 0x0F, 0x83, label_idx); }
-void jb_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jb"  , 0x0F, 0x82, label_idx); }
-void jbe_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jbe" , 0x0F, 0x86, label_idx); }
-void jc_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jc"  , 0x0F, 0x82, label_idx); }
-void je_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "je"  , 0x0F, 0x84, label_idx); }
-void jg_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jg"  , 0x0F, 0x8F, label_idx); }
-void jge_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jge" , 0x0F, 0x8D, label_idx); }
-void jl_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jl"  , 0x0F, 0x8C, label_idx); }
-void jle_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jle" , 0x0F, 0x8E, label_idx); }
-void jna_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jna" , 0x0F, 0x86, label_idx); }
-void jnae_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnae", 0x0F, 0x82, label_idx); }
-void jnb_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnb" , 0x0F, 0x83, label_idx); }
-void jnbe_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnbe", 0x0F, 0x87, label_idx); }
-void jnc_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnc" , 0x0F, 0x83, label_idx); }
-void jne_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jne" , 0x0F, 0x85, label_idx); }
-void jng_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jng" , 0x0F, 0x8E, label_idx); }
-void jnge_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnge", 0x0F, 0x8C, label_idx); }
-void jnl_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnl" , 0x0F, 0x8D, label_idx); }
-void jnle_label(codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnle", 0x0F, 0x8F, label_idx); }
-void jno_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jno" , 0x0F, 0x81, label_idx); }
-void jnp_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnp" , 0x0F, 0x8b, label_idx); }
-void jns_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jns" , 0x0F, 0x89, label_idx); }
-void jnz_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jnz" , 0x0F, 0x85, label_idx); }
-void jo_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jo"  , 0x0F, 0x80, label_idx); }
-void jp_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jp"  , 0x0F, 0x8A, label_idx); }
-void jpe_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jpe" , 0x0F, 0x8A, label_idx); }
-void jpo_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jpo" , 0x0F, 0x8B, label_idx); }
-void js_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "js"  , 0x0F, 0x88, label_idx); }
-void jz_label  (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jz"  , 0x0F, 0x84, label_idx); }
-void jmp_label (codeblock_t *cb, uint32_t label_idx) { cb_write_jcc(cb, "jmp" , 0xFF, 0xE9, label_idx); }
-
-/// jcc - relative jumps to a pointer (32-bit offset)
-void ja_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "ja"  , 0x0F, 0x87, ptr); }
-void jae_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jae" , 0x0F, 0x83, ptr); }
-void jb_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jb"  , 0x0F, 0x82, ptr); }
-void jbe_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jbe" , 0x0F, 0x86, ptr); }
-void jc_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jc"  , 0x0F, 0x82, ptr); }
-void je_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "je"  , 0x0F, 0x84, ptr); }
-void jg_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jg"  , 0x0F, 0x8F, ptr); }
-void jge_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jge" , 0x0F, 0x8D, ptr); }
-void jl_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jl"  , 0x0F, 0x8C, ptr); }
-void jle_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jle" , 0x0F, 0x8E, ptr); }
-void jna_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jna" , 0x0F, 0x86, ptr); }
-void jnae_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnae", 0x0F, 0x82, ptr); }
-void jnb_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnb" , 0x0F, 0x83, ptr); }
-void jnbe_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnbe", 0x0F, 0x87, ptr); }
-void jnc_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnc" , 0x0F, 0x83, ptr); }
-void jne_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jne" , 0x0F, 0x85, ptr); }
-void jng_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jng" , 0x0F, 0x8E, ptr); }
-void jnge_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnge", 0x0F, 0x8C, ptr); }
-void jnl_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnl" , 0x0F, 0x8D, ptr); }
-void jnle_ptr(codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnle", 0x0F, 0x8F, ptr); }
-void jno_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jno" , 0x0F, 0x81, ptr); }
-void jnp_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnp" , 0x0F, 0x8b, ptr); }
-void jns_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jns" , 0x0F, 0x89, ptr); }
-void jnz_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jnz" , 0x0F, 0x85, ptr); }
-void jo_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jo"  , 0x0F, 0x80, ptr); }
-void jp_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jp"  , 0x0F, 0x8A, ptr); }
-void jpe_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jpe" , 0x0F, 0x8A, ptr); }
-void jpo_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jpo" , 0x0F, 0x8B, ptr); }
-void js_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "js"  , 0x0F, 0x88, ptr); }
-void jz_ptr  (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jz"  , 0x0F, 0x84, ptr); }
-void jmp_ptr (codeblock_t *cb, uint8_t *ptr) { cb_write_jcc_ptr(cb, "jmp" , 0xFF, 0xE9, ptr); }
-
-/// jmp - Indirect jump near to an R/M operand
-void jmp_rm(codeblock_t *cb, x86opnd_t opnd)
-{
-    //cb.writeASM("jmp", opnd);
-    cb_write_rm(cb, false, false, NO_OPND, opnd, 4, 1, 0xFF);
-}
-
-// jmp - Jump with relative 32-bit offset
-void jmp32(codeblock_t *cb, int32_t offset)
-{
-    //cb.writeASM("jmp", ((offset > 0)? "+":"-") ~ to!string(offset));
-    cb_write_byte(cb, 0xE9);
-    cb_write_int(cb, offset, 32);
-}
-
-/// lea - Load Effective Address
-void lea(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
-{
-    //cb.writeASM("lea", dst, src);
-    assert (dst.num_bits == 64);
-    cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x8D);
-}
-
-// Does this number fit in 32 bits and stays the same if you zero extend it to 64 bit?
-// If the sign bit is clear, sign extension and zero extension yield the same
-// result.
-static bool
-zero_extendable_32bit(uint64_t number)
-{
-    return number <= UINT32_MAX && (number & (1ull << 31ull)) == 0;
-}
-
-/// mov - Data move operation
-void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
-{
-    // R/M + Imm
-    if (src.type == OPND_IMM)
-    {
-        //cb.writeASM("mov", dst, src);
-
-        // R + Imm
-        if (dst.type == OPND_REG)
-        {
-            assert (
-                src.num_bits <= dst.num_bits ||
-                unsig_imm_size(src.as.imm) <= dst.num_bits
-            );
-
-            // In case the source immediate could be zero extended to be 64
-            // bit, we can use the 32-bit operands version of the instruction.
-            // For example, we can turn mov(rax, 0x34) into the equivalent
-            // mov(eax, 0x34).
-            if (dst.num_bits == 64 && zero_extendable_32bit(src.as.unsig_imm)) {
-                if (rex_needed(dst))
-                    cb_write_rex(cb, false, 0, 0, dst.as.reg.reg_no);
-                cb_write_opcode(cb, 0xB8, dst);
-                cb_write_int(cb, src.as.imm, 32);
-            }
-            else {
-                if (dst.num_bits == 16)
-                    cb_write_byte(cb, 0x66);
-
-                if (rex_needed(dst) || dst.num_bits == 64)
-                    cb_write_rex(cb, dst.num_bits == 64, 0, 0, dst.as.reg.reg_no);
-
-                cb_write_opcode(cb, (dst.num_bits == 8)? 0xB0:0xB8, dst);
-
-                cb_write_int(cb, src.as.imm, dst.num_bits);
-            }
-        }
-
-        // M + Imm
-        else if (dst.type == OPND_MEM)
-        {
-            assert (src.num_bits <= dst.num_bits);
-
-            if (dst.num_bits == 8)
-                cb_write_rm(cb, false, false, NO_OPND, dst, 0xFF, 1, 0xC6);
-            else
-                cb_write_rm(cb, dst.num_bits == 16, dst.num_bits == 64, NO_OPND, dst, 0, 1, 0xC7);
-
-            const uint32_t output_num_bits = (dst.num_bits > 32u) ? 32u : dst.num_bits;
-            // assert that we can write whole immediate without loss of information
-            assert (sig_imm_size(src.as.imm) <= output_num_bits);
-            cb_write_int(cb, src.as.imm, output_num_bits);
-        }
-
-        else
-        {
-            assert (false);
-        }
-    }
-    else
-    {
-        cb_write_rm_multi(
-            cb,
-            "mov",
-            0x88, // opMemReg8
-            0x89, // opMemRegPref
-            0x8A, // opRegMem8
-            0x8B, // opRegMemPref
-            0xC6, // opMemImm8
-            0xFF, // opMemImmSml (not available)
-            0xFF, // opMemImmLrg
-            0xFF, // opExtImm
-            dst,
-            src
-        );
-    }
-}
-
-/// movsx - Move with sign extension (signed integers)
-void movsx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
-{
-    assert (dst.type == OPND_REG);
-    assert (src.type == OPND_REG || src.type == OPND_MEM);
-    assert (src.num_bits < dst.num_bits);
-
-    //cb.writeASM("movsx", dst, src);
-
-    if (src.num_bits == 8)
-    {
-        cb_write_rm(cb, dst.num_bits == 16, dst.num_bits == 64, dst, src, 0xFF, 2, 0x0F, 0xBE);
-    }
-    else if (src.num_bits == 16)
-    {
-        cb_write_rm(cb, dst.num_bits == 16, dst.num_bits == 64, dst, src, 0xFF, 2, 0x0F, 0xBF);
-    }
-    else if (src.num_bits == 32)
-    {
-        cb_write_rm(cb, false, true, dst, src, 0xFF, 1, 0x63);
-    }
-    else
-    {
-        assert (false);
-    }
-}
-
-/*
-/// movzx - Move with zero extension (unsigned values)
-void movzx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src)
-{
-    cb.writeASM("movzx", dst, src);
-
-    uint32_t dstSize;
-    if (dst.isReg)
-        dstSize = dst.reg.size;
-    else
-        assert (false, "movzx dst must be a register");
-
-    uint32_t srcSize;
-    if (src.isReg)
-        srcSize = src.reg.size;
-    else if (src.isMem)
-        srcSize = src.mem.size;
-    else
-        assert (false);
-
-    assert (
-        srcSize < dstSize,
-        "movzx: srcSize >= dstSize"
-    );
-
-    if (srcSize is 8)
-    {
-        cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB6)(dstSize is 16, dstSize is 64, dst, src);
-    }
-    else if (srcSize is 16)
-    {
-        cb.writeRMInstr!('r', 0xFF, 0x0F, 0xB7)(dstSize is 16, dstSize is 64, dst, src);
-    }
-    else
-    {
-        assert (false, "invalid src operand size for movxz");
-    }
-}
-*/
-
-// neg - Integer negation (multiplication by -1)
-void neg(codeblock_t *cb, x86opnd_t opnd)
-{
-    write_rm_unary(
-        cb,
-        "neg",
-        0xF6, // opMemReg8
-        0xF7, // opMemRegPref
-        0x03,  // opExt
-        opnd
-    );
-}
-
-// nop - Noop, one or multiple bytes long
-void nop(codeblock_t *cb, uint32_t length)
-{
-    switch (length) {
-      case 0:
-        break;
-
-      case 1:
-        //cb.writeASM("nop1");
-        cb_write_byte(cb, 0x90);
-        break;
-
-      case 2:
-        //cb.writeASM("nop2");
-        cb_write_bytes(cb, 2, 0x66,0x90);
-        break;
-
-      case 3:
-        //cb.writeASM("nop3");
-        cb_write_bytes(cb, 3, 0x0F,0x1F,0x00);
-        break;
-
-      case 4:
-        //cb.writeASM("nop4");
-        cb_write_bytes(cb, 4, 0x0F,0x1F,0x40,0x00);
-        break;
-
-      case 5:
-        //cb.writeASM("nop5");
-        cb_write_bytes(cb, 5, 0x0F,0x1F,0x44,0x00,0x00);
-        break;
-
-      case 6:
-        //cb.writeASM("nop6");
-        cb_write_bytes(cb, 6, 0x66,0x0F,0x1F,0x44,0x00,0x00);
-        break;
-
-      case 7:
-        //cb.writeASM("nop7");
-        cb_write_bytes(cb, 7, 0x0F,0x1F,0x80,0x00,0x00,0x00,0x00);
-        break;
-
-      case 8:
-        //cb.writeASM("nop8");
-        cb_write_bytes(cb, 8, 0x0F,0x1F,0x84,0x00,0x00,0x00,0x00,0x00);
-        break;
-
-      case 9:
-        //cb.writeASM("nop9");
-        cb_write_bytes(cb, 9, 0x66,0x0F,0x1F,0x84,0x00,0x00,0x00,0x00,0x00);
-        break;
-
-      default:
-        {
-            uint32_t written = 0;
-            while (written + 9 <= length)
-            {
-                nop(cb, 9);
-                written += 9;
-            }
-            nop(cb, length - written);
-        }
-        break;
-    }
-}
-
-// not - Bitwise NOT
-void not(codeblock_t *cb, x86opnd_t opnd)
-{
-    write_rm_unary(
-        cb,
-        "not",
-        0xF6, // opMemReg8
-        0xF7, // opMemRegPref
-        0x02, // opExt
-        opnd
-    );
-}
-
-/// or - Bitwise OR
-void or(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_rm_multi(
-        cb,
-        "or",
-        0x08, // opMemReg8
-        0x09, // opMemRegPref
-        0x0A, // opRegMem8
-        0x0B, // opRegMemPref
-        0x80, // opMemImm8
-        0x83, // opMemImmSml
-        0x81, // opMemImmLrg
-        0x01, // opExtImm
-        opnd0,
-        opnd1
-    );
-}
-
-/// pop - Pop a register off the stack
-void pop(codeblock_t *cb, x86opnd_t opnd)
-{
-    assert (opnd.num_bits == 64);
-
-    //cb.writeASM("pop", opnd);
-
-    if (opnd.type == OPND_REG) {
-        if (rex_needed(opnd))
-            cb_write_rex(cb, false, 0, 0, opnd.as.reg.reg_no);
-        cb_write_opcode(cb, 0x58, opnd);
-    }
-    else if (opnd.type == OPND_MEM) {
-        cb_write_rm(cb, false, false, NO_OPND, opnd, 0, 1, 0x8F);
-    }
-    else {
-        assert(false && "unexpected operand type");
-    }
-}
-
-/// popfq - Pop the flags register (64-bit)
-void popfq(codeblock_t *cb)
-{
-    //cb.writeASM("popfq");
-
-    // REX.W + 0x9D
-    cb_write_bytes(cb, 2, 0x48, 0x9D);
-}
-
-/// push - Push an operand on the stack
-void push(codeblock_t *cb, x86opnd_t opnd)
-{
-    assert (opnd.num_bits == 64);
-
-    //cb.writeASM("push", opnd);
-
-    if (opnd.type == OPND_REG) {
-      if (rex_needed(opnd))
-          cb_write_rex(cb, false, 0, 0, opnd.as.reg.reg_no);
-      cb_write_opcode(cb, 0x50, opnd);
-    }
-    else if (opnd.type == OPND_MEM) {
-      cb_write_rm(cb, false, false, NO_OPND, opnd, 6, 1, 0xFF);
-    }
-    else {
-      assert(false && "unexpected operand type");
-    }
-}
-
-/// pushfq - Push the flags register (64-bit)
-void pushfq(codeblock_t *cb)
-{
-    //cb.writeASM("pushfq");
-    cb_write_byte(cb, 0x9C);
-}
-
-/// ret - Return from call, popping only the return address
-void ret(codeblock_t *cb)
-{
-    //cb.writeASM("ret");
-    cb_write_byte(cb, 0xC3);
-}
-
-// sal - Shift arithmetic left
-void sal(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_shift(
-        cb,
-        "sal",
-        0xD1, // opMemOnePref,
-        0xD3, // opMemClPref,
-        0xC1, // opMemImmPref,
-        0x04,
-        opnd0,
-        opnd1
-    );
-}
-
-/// sar - Shift arithmetic right (signed)
-void sar(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_shift(
-        cb,
-        "sar",
-        0xD1, // opMemOnePref,
-        0xD3, // opMemClPref,
-        0xC1, // opMemImmPref,
-        0x07,
-        opnd0,
-        opnd1
-    );
-}
-// shl - Shift logical left
-void shl(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_shift(
-        cb,
-        "shl",
-        0xD1, // opMemOnePref,
-        0xD3, // opMemClPref,
-        0xC1, // opMemImmPref,
-        0x04,
-        opnd0,
-        opnd1
-    );
-}
-
-/// shr - Shift logical right (unsigned)
-void shr(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_shift(
-        cb,
-        "shr",
-        0xD1, // opMemOnePref,
-        0xD3, // opMemClPref,
-        0xC1, // opMemImmPref,
-        0x05,
-        opnd0,
-        opnd1
-    );
-}
-
-/// sub - Integer subtraction
-void sub(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_rm_multi(
-        cb,
-        "sub",
-        0x28, // opMemReg8
-        0x29, // opMemRegPref
-        0x2A, // opRegMem8
-        0x2B, // opRegMemPref
-        0x80, // opMemImm8
-        0x83, // opMemImmSml
-        0x81, // opMemImmLrg
-        0x05, // opExtImm
-        opnd0,
-        opnd1
-    );
-}
-
-/// test - Logical Compare
-void test(codeblock_t *cb, x86opnd_t rm_opnd, x86opnd_t test_opnd)
-{
-    assert (rm_opnd.type == OPND_REG || rm_opnd.type == OPND_MEM);
-    assert (test_opnd.type == OPND_REG || test_opnd.type == OPND_IMM);
-
-    // If the second operand is an immediate
-    if (test_opnd.type == OPND_IMM)
-    {
-        x86opnd_t imm_opnd = test_opnd;
-
-        if (imm_opnd.as.imm >= 0)
-        {
-            assert (unsig_imm_size(imm_opnd.as.unsig_imm) <= 32);
-            assert (unsig_imm_size(imm_opnd.as.unsig_imm) <= rm_opnd.num_bits);
-
-            // Use the smallest operand size possible
-            rm_opnd = resize_opnd(rm_opnd, unsig_imm_size(imm_opnd.as.unsig_imm));
-
-            if (rm_opnd.num_bits == 8)
-            {
-                cb_write_rm(cb, false, false, NO_OPND, rm_opnd, 0x00, 1, 0xF6);
-                cb_write_int(cb, imm_opnd.as.imm, rm_opnd.num_bits);
-            }
-            else
-            {
-                cb_write_rm(cb, rm_opnd.num_bits == 16, false, NO_OPND, rm_opnd, 0x00, 1, 0xF7);
-                cb_write_int(cb, imm_opnd.as.imm, rm_opnd.num_bits);
-            }
-        }
-        else
-        {
-            // This mode only applies to 64-bit R/M operands with 32-bit signed immediates
-            assert (imm_opnd.as.imm < 0);
-            assert (sig_imm_size(imm_opnd.as.imm) <= 32);
-            assert (rm_opnd.num_bits == 64);
-            cb_write_rm(cb, false, true, NO_OPND, rm_opnd, 0x00, 1, 0xF7);
-            cb_write_int(cb, imm_opnd.as.imm, 32);
-        }
-    }
-    else
-    {
-        assert (test_opnd.num_bits == rm_opnd.num_bits);
-
-        if (rm_opnd.num_bits == 8)
-        {
-            cb_write_rm(cb, false, false, test_opnd, rm_opnd, 0xFF, 1, 0x84);
-        }
-        else
-        {
-            cb_write_rm(cb, rm_opnd.num_bits == 16, rm_opnd.num_bits == 64, test_opnd, rm_opnd, 0xFF, 1, 0x85);
-        }
-    }
-}
-
-/// Undefined opcode
-void ud2(codeblock_t *cb)
-{
-    cb_write_bytes(cb, 2, 0x0F, 0x0B);
-}
-
-/// xchg - Exchange Register/Memory with Register
-void xchg(codeblock_t *cb, x86opnd_t rm_opnd, x86opnd_t r_opnd)
-{
-    assert (rm_opnd.num_bits == 64);
-    assert (r_opnd.num_bits == 64);
-    assert (rm_opnd.type == OPND_REG);
-    assert (r_opnd.type == OPND_REG);
-
-    // If we're exchanging with RAX
-    if (rm_opnd.type == OPND_REG && rm_opnd.as.reg.reg_no == RAX.as.reg.reg_no)
-    {
-        // Write the REX byte
-        cb_write_rex(cb, rm_opnd.num_bits == 64, 0, 0, r_opnd.as.reg.reg_no);
-
-        // Write the opcode and register number
-        cb_write_byte(cb, 0x90 + (r_opnd.as.reg.reg_no & 7));
-    }
-    else
-    {
-        cb_write_rm(cb, rm_opnd.num_bits == 16, rm_opnd.num_bits == 64, r_opnd, rm_opnd, 0xFF, 1, 0x87);
-    }
-}
-
-/// xor - Exclusive bitwise OR
-void xor(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1)
-{
-    cb_write_rm_multi(
-        cb,
-        "xor",
-        0x30, // opMemReg8
-        0x31, // opMemRegPref
-        0x32, // opRegMem8
-        0x33, // opRegMemPref
-        0x80, // opMemImm8
-        0x83, // opMemImmSml
-        0x81, // opMemImmLrg
-        0x06, // opExtImm
-        opnd0,
-        opnd1
-    );
-}
-
-// LOCK - lock prefix for atomic shared memory operations
-void cb_write_lock_prefix(codeblock_t *cb)
-{
-    cb_write_byte(cb, 0xF0);
-}
-
-void cb_mark_all_writeable(codeblock_t * cb)
-{
-    if (mprotect(cb->mem_block_, cb->mem_size, PROT_READ | PROT_WRITE)) {
-        fprintf(stderr, "Couldn't make JIT page (%p) writeable, errno: %s", (void *)cb->mem_block_, strerror(errno));
-        abort();
-    }
-}
-
-void cb_mark_position_writeable(codeblock_t * cb, uint32_t write_pos)
-{
-#ifdef _WIN32
-    uint32_t pagesize = 0x1000; // 4KB
-#else
-    uint32_t pagesize = (uint32_t)sysconf(_SC_PAGESIZE);
-#endif
-    uint32_t aligned_position = (write_pos / pagesize) * pagesize;
-
-    if (cb->current_aligned_write_pos != aligned_position) {
-        cb->current_aligned_write_pos = aligned_position;
-        void *const page_addr = cb_get_ptr(cb, aligned_position);
-        if (mprotect(page_addr, pagesize, PROT_READ | PROT_WRITE)) {
-            fprintf(stderr, "Couldn't make JIT page (%p) writeable, errno: %s", page_addr, strerror(errno));
-            abort();
-        }
-    }
-}
-
-void cb_mark_all_executable(codeblock_t * cb)
-{
-    cb->current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE;
-    if (mprotect(cb->mem_block_, cb->mem_size, PROT_READ | PROT_EXEC)) {
-        fprintf(stderr, "Couldn't make JIT page (%p) executable, errno: %s", (void *)cb->mem_block_, strerror(errno));
-        abort();
-    }
-}
-
diff --git a/yjit_asm.h b/yjit_asm.h
deleted file mode 100644
index 202f21e796..0000000000
--- a/yjit_asm.h
+++ /dev/null
@@ -1,408 +0,0 @@
-#ifndef YJIT_ASM_H
-#define YJIT_ASM_H 1
-
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-// Maximum number of labels to link
-#define MAX_LABELS 32
-
-// Maximum number of label references
-#define MAX_LABEL_REFS 32
-
-// Reference to an ASM label
-typedef struct LabelRef
-{
-    // Position in the code block where the label reference exists
-    uint32_t pos;
-
-    // Label which this refers to
-    uint32_t label_idx;
-
-} labelref_t;
-
-// Block of executable memory into which instructions can be written
-typedef struct CodeBlock
-{
-    // Memory block
-    // Users are advised to not use this directly.
-    uint8_t *mem_block_;
-
-    // Memory block size
-    uint32_t mem_size;
-
-    // Current writing position
-    uint32_t write_pos;
-
-    // Table of registered label addresses
-    uint32_t label_addrs[MAX_LABELS];
-
-    // Table of registered label names
-    // Note that these should be constant strings only
-    const char *label_names[MAX_LABELS];
-
-    // References to labels
-    labelref_t label_refs[MAX_LABEL_REFS];
-
-    // Number of labels registeered
-    uint32_t num_labels;
-
-    // Number of references to labels
-    uint32_t num_refs;
-
-
-    // Keep track of the current aligned write position.
-    // Used for changing protection when writing to the JIT buffer
-    uint32_t current_aligned_write_pos;
-
-    // Set if the assembler is unable to output some instructions,
-    // for example, when there is not enough space or when a jump
-    // target is too far away.
-    bool dropped_bytes;
-
-    // Flag to enable or disable comments
-    bool has_asm;
-
-
-} codeblock_t;
-
-// 1 is not aligned so this won't match any pages
-#define ALIGNED_WRITE_POSITION_NONE 1
-
-enum OpndType
-{
-    OPND_NONE,
-    OPND_REG,
-    OPND_IMM,
-    OPND_MEM
-};
-
-enum RegType
-{
-    REG_GP,
-    REG_FP,
-    REG_XMM,
-    REG_IP
-};
-
-typedef struct X86Reg
-{
-    // Register type
-    uint8_t reg_type;
-
-    // Register index number
-    uint8_t reg_no;
-
-} x86reg_t;
-
-typedef struct X86Mem
-{
-    /// Base register number
-    uint8_t base_reg_no;
-
-    /// Index register number
-    uint8_t idx_reg_no;
-
-    /// SIB scale exponent value (power of two, two bits)
-    uint8_t scale_exp;
-
-    /// Has index register flag
-    bool has_idx;
-
-    // TODO: should this be here, or should we have an extra operand type?
-    /// IP-relative addressing flag
-    bool is_iprel;
-
-    /// Constant displacement from the base, not scaled
-    int32_t disp;
-
-} x86mem_t;
-
-typedef struct X86Opnd
-{
-    // Operand type
-    uint8_t type;
-
-    // Size in bits
-    uint16_t num_bits;
-
-    union
-    {
-        // Register operand
-        x86reg_t reg;
-
-        // Memory operand
-        x86mem_t mem;
-
-        // Signed immediate value
-        int64_t imm;
-
-        // Unsigned immediate value
-        uint64_t unsig_imm;
-    } as;
-
-} x86opnd_t;
-
-// Dummy none/null operand
-static const x86opnd_t NO_OPND = { OPND_NONE, 0, .as.imm = 0 };
-
-// Instruction pointer
-static const x86opnd_t RIP = { OPND_REG, 64, .as.reg = { REG_IP, 5 }};
-
-// 64-bit GP registers
-static const x86opnd_t RAX = { OPND_REG, 64, .as.reg = { REG_GP, 0 }};
-static const x86opnd_t RCX = { OPND_REG, 64, .as.reg = { REG_GP, 1 }};
-static const x86opnd_t RDX = { OPND_REG, 64, .as.reg = { REG_GP, 2 }};
-static const x86opnd_t RBX = { OPND_REG, 64, .as.reg = { REG_GP, 3 }};
-static const x86opnd_t RSP = { OPND_REG, 64, .as.reg = { REG_GP, 4 }};
-static const x86opnd_t RBP = { OPND_REG, 64, .as.reg = { REG_GP, 5 }};
-static const x86opnd_t RSI = { OPND_REG, 64, .as.reg = { REG_GP, 6 }};
-static const x86opnd_t RDI = { OPND_REG, 64, .as.reg = { REG_GP, 7 }};
-static const x86opnd_t R8  = { OPND_REG, 64, .as.reg = { REG_GP, 8 }};
-static const x86opnd_t R9  = { OPND_REG, 64, .as.reg = { REG_GP, 9 }};
-static const x86opnd_t R10 = { OPND_REG, 64, .as.reg = { REG_GP, 10 }};
-static const x86opnd_t R11 = { OPND_REG, 64, .as.reg = { REG_GP, 11 }};
-static const x86opnd_t R12 = { OPND_REG, 64, .as.reg = { REG_GP, 12 }};
-static const x86opnd_t R13 = { OPND_REG, 64, .as.reg = { REG_GP, 13 }};
-static const x86opnd_t R14 = { OPND_REG, 64, .as.reg = { REG_GP, 14 }};
-static const x86opnd_t R15 = { OPND_REG, 64, .as.reg = { REG_GP, 15 }};
-
-// 32-bit GP registers
-static const x86opnd_t EAX  = { OPND_REG, 32, .as.reg = { REG_GP, 0 }};
-static const x86opnd_t ECX  = { OPND_REG, 32, .as.reg = { REG_GP, 1 }};
-static const x86opnd_t EDX  = { OPND_REG, 32, .as.reg = { REG_GP, 2 }};
-static const x86opnd_t EBX  = { OPND_REG, 32, .as.reg = { REG_GP, 3 }};
-static const x86opnd_t ESP  = { OPND_REG, 32, .as.reg = { REG_GP, 4 }};
-static const x86opnd_t EBP  = { OPND_REG, 32, .as.reg = { REG_GP, 5 }};
-static const x86opnd_t ESI  = { OPND_REG, 32, .as.reg = { REG_GP, 6 }};
-static const x86opnd_t EDI  = { OPND_REG, 32, .as.reg = { REG_GP, 7 }};
-static const x86opnd_t R8D  = { OPND_REG, 32, .as.reg = { REG_GP, 8 }};
-static const x86opnd_t R9D  = { OPND_REG, 32, .as.reg = { REG_GP, 9 }};
-static const x86opnd_t R10D = { OPND_REG, 32, .as.reg = { REG_GP, 10 }};
-static const x86opnd_t R11D = { OPND_REG, 32, .as.reg = { REG_GP, 11 }};
-static const x86opnd_t R12D = { OPND_REG, 32, .as.reg = { REG_GP, 12 }};
-static const x86opnd_t R13D = { OPND_REG, 32, .as.reg = { REG_GP, 13 }};
-static const x86opnd_t R14D = { OPND_REG, 32, .as.reg = { REG_GP, 14 }};
-static const x86opnd_t R15D = { OPND_REG, 32, .as.reg = { REG_GP, 15 }};
-
-// 16-bit GP registers
-static const x86opnd_t AX   = { OPND_REG, 16, .as.reg = { REG_GP, 0 }};
-static const x86opnd_t CX   = { OPND_REG, 16, .as.reg = { REG_GP, 1 }};
-static const x86opnd_t DX   = { OPND_REG, 16, .as.reg = { REG_GP, 2 }};
-static const x86opnd_t BX   = { OPND_REG, 16, .as.reg = { REG_GP, 3 }};
-static const x86opnd_t SP   = { OPND_REG, 16, .as.reg = { REG_GP, 4 }};
-static const x86opnd_t BP   = { OPND_REG, 16, .as.reg = { REG_GP, 5 }};
-static const x86opnd_t SI   = { OPND_REG, 16, .as.reg = { REG_GP, 6 }};
-static const x86opnd_t DI   = { OPND_REG, 16, .as.reg = { REG_GP, 7 }};
-static const x86opnd_t R8W  = { OPND_REG, 16, .as.reg = { REG_GP, 8 }};
-static const x86opnd_t R9W  = { OPND_REG, 16, .as.reg = { REG_GP, 9 }};
-static const x86opnd_t R10W = { OPND_REG, 16, .as.reg = { REG_GP, 10 }};
-static const x86opnd_t R11W = { OPND_REG, 16, .as.reg = { REG_GP, 11 }};
-static const x86opnd_t R12W = { OPND_REG, 16, .as.reg = { REG_GP, 12 }};
-static const x86opnd_t R13W = { OPND_REG, 16, .as.reg = { REG_GP, 13 }};
-static const x86opnd_t R14W = { OPND_REG, 16, .as.reg = { REG_GP, 14 }};
-static const x86opnd_t R15W = { OPND_REG, 16, .as.reg = { REG_GP, 15 }};
-
-// 8-bit GP registers
-static const x86opnd_t AL   = { OPND_REG, 8, .as.reg = { REG_GP, 0 }};
-static const x86opnd_t CL   = { OPND_REG, 8, .as.reg = { REG_GP, 1 }};
-static const x86opnd_t DL   = { OPND_REG, 8, .as.reg = { REG_GP, 2 }};
-static const x86opnd_t BL   = { OPND_REG, 8, .as.reg = { REG_GP, 3 }};
-static const x86opnd_t SPL  = { OPND_REG, 8, .as.reg = { REG_GP, 4 }};
-static const x86opnd_t BPL  = { OPND_REG, 8, .as.reg = { REG_GP, 5 }};
-static const x86opnd_t SIL  = { OPND_REG, 8, .as.reg = { REG_GP, 6 }};
-static const x86opnd_t DIL  = { OPND_REG, 8, .as.reg = { REG_GP, 7 }};
-static const x86opnd_t R8B  = { OPND_REG, 8, .as.reg = { REG_GP, 8 }};
-static const x86opnd_t R9B  = { OPND_REG, 8, .as.reg = { REG_GP, 9 }};
-static const x86opnd_t R10B = { OPND_REG, 8, .as.reg = { REG_GP, 10 }};
-static const x86opnd_t R11B = { OPND_REG, 8, .as.reg = { REG_GP, 11 }};
-static const x86opnd_t R12B = { OPND_REG, 8, .as.reg = { REG_GP, 12 }};
-static const x86opnd_t R13B = { OPND_REG, 8, .as.reg = { REG_GP, 13 }};
-static const x86opnd_t R14B = { OPND_REG, 8, .as.reg = { REG_GP, 14 }};
-static const x86opnd_t R15B = { OPND_REG, 8, .as.reg = { REG_GP, 15 }};
-
-// C argument registers
-#define NUM_C_ARG_REGS 6
-#define C_ARG_REGS ( (x86opnd_t[]){ RDI, RSI, RDX, RCX, R8, R9 } )
-
-// Compute the number of bits needed to store a signed or unsigned value
-static inline uint32_t sig_imm_size(int64_t imm);
-static inline uint32_t unsig_imm_size(uint64_t imm);
-
-// Memory operand with base register and displacement/offset
-static inline x86opnd_t mem_opnd(uint32_t num_bits, x86opnd_t base_reg, int32_t disp);
-
-// Scale-index-base memory operand
-static inline x86opnd_t mem_opnd_sib(uint32_t num_bits, x86opnd_t base_reg, x86opnd_t index_reg, int32_t scale, int32_t disp);
-
-// Immediate number operand
-static inline x86opnd_t imm_opnd(int64_t val);
-
-// Constant pointer operand
-static inline x86opnd_t const_ptr_opnd(const void *ptr);
-
-// Struct member operand
-#define member_opnd(base_reg, struct_type, member_name) mem_opnd( \
-    8 * sizeof(((struct_type*)0)->member_name), \
-    base_reg,                                   \
-    offsetof(struct_type, member_name)          \
-)
-
-// Struct member operand with an array index
-#define member_opnd_idx(base_reg, struct_type, member_name, idx) mem_opnd( \
-    8 * sizeof(((struct_type*)0)->member_name[0]),     \
-    base_reg,                                       \
-    (offsetof(struct_type, member_name) +           \
-     sizeof(((struct_type*)0)->member_name[0]) * idx)  \
-)
-
-// Allocate executable memory
-static uint8_t *alloc_exec_mem(uint32_t mem_size);
-
-// Code block functions
-static inline void cb_init(codeblock_t *cb, uint8_t *mem_block, uint32_t mem_size);
-static inline void cb_align_pos(codeblock_t *cb, uint32_t multiple);
-static inline void cb_set_pos(codeblock_t *cb, uint32_t pos);
-static inline void cb_set_write_ptr(codeblock_t *cb, uint8_t *code_ptr);
-static inline uint8_t *cb_get_ptr(const codeblock_t *cb, uint32_t index);
-static inline uint8_t *cb_get_write_ptr(const codeblock_t *cb);
-static inline void cb_write_byte(codeblock_t *cb, uint8_t byte);
-static inline void cb_write_bytes(codeblock_t *cb, uint32_t num_bytes, ...);
-static inline void cb_write_int(codeblock_t *cb, uint64_t val, uint32_t num_bits);
-static inline uint32_t cb_new_label(codeblock_t *cb, const char *name);
-static inline void cb_write_label(codeblock_t *cb, uint32_t label_idx);
-static inline void cb_label_ref(codeblock_t *cb, uint32_t label_idx);
-static inline void cb_link_labels(codeblock_t *cb);
-static inline void cb_mark_all_writeable(codeblock_t *cb);
-static inline void cb_mark_position_writeable(codeblock_t *cb, uint32_t write_pos);
-static inline void cb_mark_all_executable(codeblock_t *cb);
-
-// Encode individual instructions into a code block
-static inline void add(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void and(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void call_ptr(codeblock_t *cb, x86opnd_t scratch_reg, uint8_t *dst_ptr);
-static inline void call_label(codeblock_t *cb, uint32_t label_idx);
-static inline void call(codeblock_t *cb, x86opnd_t opnd);
-static inline void cmova(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovae(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovb(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovbe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovc(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmove(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovg(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovge(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovl(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovle(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovna(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnae(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnb(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnbe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnc(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovne(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovng(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnge(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnl(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnle(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovno(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnp(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovns(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovnz(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovo(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovp(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovpe(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovpo(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovs(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmovz(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void cmp(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void cdq(codeblock_t *cb);
-static inline void cqo(codeblock_t *cb);
-static inline void int3(codeblock_t *cb);
-static inline void ja_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jae_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jb_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jbe_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jc_label(codeblock_t *cb, uint32_t label_idx);
-static inline void je_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jg_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jge_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jl_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jle_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jna_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnae_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnb_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnbe_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnc_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jne_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jng_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnge_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnl_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnle_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jno_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnp_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jns_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jnz_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jo_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jp_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jpe_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jpo_label(codeblock_t *cb, uint32_t label_idx);
-static inline void js_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jz_label(codeblock_t *cb, uint32_t label_idx);
-static inline void ja_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jae_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jb_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jbe_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jc_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void je_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jg_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jge_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jl_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jle_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jna_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnae_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnb_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnbe_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnc_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jne_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jng_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnge_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnl_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnle_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jno_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnp_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jns_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jnz_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jo_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jp_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jpe_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jpo_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void js_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jz_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jmp_label(codeblock_t *cb, uint32_t label_idx);
-static inline void jmp_ptr(codeblock_t *cb, uint8_t *ptr);
-static inline void jmp_rm(codeblock_t *cb, x86opnd_t opnd);
-static inline void jmp32(codeblock_t *cb, int32_t offset);
-static inline void lea(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void mov(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void movsx(codeblock_t *cb, x86opnd_t dst, x86opnd_t src);
-static inline void neg(codeblock_t *cb, x86opnd_t opnd);
-static inline void nop(codeblock_t *cb, uint32_t length);
-static inline void not(codeblock_t *cb, x86opnd_t opnd);
-static inline void or(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void pop(codeblock_t *cb, x86opnd_t reg);
-static inline void popfq(codeblock_t *cb);
-static inline void push(codeblock_t *cb, x86opnd_t opnd);
-static inline void pushfq(codeblock_t *cb);
-static inline void ret(codeblock_t *cb);
-static inline void sal(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void sar(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void shl(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void shr(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void sub(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void test(codeblock_t *cb, x86opnd_t rm_opnd, x86opnd_t test_opnd);
-static inline void ud2(codeblock_t *cb);
-static inline void xchg(codeblock_t *cb, x86opnd_t rm_opnd, x86opnd_t r_opnd);
-static inline void xor(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-static inline void cb_write_lock_prefix(codeblock_t *cb);
-
-#endif
diff --git a/yjit_codegen.c b/yjit_codegen.c
deleted file mode 100644
index cd46685b57..0000000000
--- a/yjit_codegen.c
+++ /dev/null
@@ -1,5121 +0,0 @@
-// This file is a fragment of the yjit.o compilation unit. See yjit.c.
-#include "internal.h"
-#include "gc.h"
-#include "internal/compile.h"
-#include "internal/class.h"
-#include "internal/hash.h"
-#include "internal/object.h"
-#include "internal/sanitizers.h"
-#include "internal/string.h"
-#include "internal/struct.h"
-#include "internal/variable.h"
-#include "internal/re.h"
-#include "probes.h"
-#include "probes_helper.h"
-#include "yjit.h"
-#include "yjit_iface.h"
-#include "yjit_core.h"
-#include "yjit_codegen.h"
-#include "yjit_asm.h"
-
-// Map from YARV opcodes to code generation functions
-static codegen_fn gen_fns[VM_INSTRUCTION_SIZE] = { NULL };
-
-// Map from method entries to code generation functions
-static st_table *yjit_method_codegen_table = NULL;
-
-// Code for exiting back to the interpreter from the leave instruction
-static void *leave_exit_code;
-
-// Code for full logic of returning from C method and exiting to the interpreter
-static uint32_t outline_full_cfunc_return_pos;
-
-// For implementing global code invalidation
-struct codepage_patch {
-    uint32_t inline_patch_pos;
-    uint32_t outlined_target_pos;
-};
-
-typedef rb_darray(struct codepage_patch) patch_array_t;
-
-static patch_array_t global_inval_patches = NULL;
-
-// Print the current source location for debugging purposes
-RBIMPL_ATTR_MAYBE_UNUSED()
-static void
-jit_print_loc(jitstate_t *jit, const char *msg)
-{
-    char *ptr;
-    long len;
-    VALUE path = rb_iseq_path(jit->iseq);
-    RSTRING_GETMEM(path, ptr, len);
-    fprintf(stderr, "%s %.*s:%u\n", msg, (int)len, ptr, rb_iseq_line_no(jit->iseq, jit->insn_idx));
-}
-
-// dump an object for debugging purposes
-RBIMPL_ATTR_MAYBE_UNUSED()
-static void
-jit_obj_info_dump(codeblock_t *cb, x86opnd_t opnd) {
-    push_regs(cb);
-    mov(cb, C_ARG_REGS[0], opnd);
-    call_ptr(cb, REG0, (void *)rb_obj_info_dump);
-    pop_regs(cb);
-}
-
-// Get the current instruction's opcode
-static int
-jit_get_opcode(jitstate_t *jit)
-{
-    return jit->opcode;
-}
-
-// Get the index of the next instruction
-static uint32_t
-jit_next_insn_idx(jitstate_t *jit)
-{
-    return jit->insn_idx + insn_len(jit_get_opcode(jit));
-}
-
-// Get an instruction argument by index
-static VALUE
-jit_get_arg(jitstate_t *jit, size_t arg_idx)
-{
-    RUBY_ASSERT(arg_idx + 1 < (size_t)insn_len(jit_get_opcode(jit)));
-    return *(jit->pc + arg_idx + 1);
-}
-
-// Load a VALUE into a register and keep track of the reference if it is on the GC heap.
-static void
-jit_mov_gc_ptr(jitstate_t *jit, codeblock_t *cb, x86opnd_t reg, VALUE ptr)
-{
-    RUBY_ASSERT(reg.type == OPND_REG && reg.num_bits == 64);
-
-    // Load the pointer constant into the specified register
-    mov(cb, reg, const_ptr_opnd((void*)ptr));
-
-    // The pointer immediate is encoded as the last part of the mov written out
-    uint32_t ptr_offset = cb->write_pos - sizeof(VALUE);
-
-    if (!SPECIAL_CONST_P(ptr)) {
-        rb_darray_append(&jit->block->gc_object_offsets, ptr_offset);
-    }
-}
-
-// Check if we are compiling the instruction at the stub PC
-// Meaning we are compiling the instruction that is next to execute
-static bool
-jit_at_current_insn(jitstate_t *jit)
-{
-    const VALUE *ec_pc = jit->ec->cfp->pc;
-    return (ec_pc == jit->pc);
-}
-
-// Peek at the nth topmost value on the Ruby stack.
-// Returns the topmost value when n == 0.
-static VALUE
-jit_peek_at_stack(jitstate_t *jit, ctx_t *ctx, int n)
-{
-    RUBY_ASSERT(jit_at_current_insn(jit));
-
-    // Note: this does not account for ctx->sp_offset because
-    // this is only available when hitting a stub, and while
-    // hitting a stub, cfp->sp needs to be up to date in case
-    // codegen functions trigger GC. See :stub-sp-flush:.
-    VALUE *sp = jit->ec->cfp->sp;
-
-    return *(sp - 1 - n);
-}
-
-static VALUE
-jit_peek_at_self(jitstate_t *jit, ctx_t *ctx)
-{
-    return jit->ec->cfp->self;
-}
-
-RBIMPL_ATTR_MAYBE_UNUSED()
-static VALUE
-jit_peek_at_local(jitstate_t *jit, ctx_t *ctx, int n)
-{
-    RUBY_ASSERT(jit_at_current_insn(jit));
-
-    int32_t local_table_size = ISEQ_BODY(jit->iseq)->local_table_size;
-    RUBY_ASSERT(n < (int)ISEQ_BODY(jit->iseq)->local_table_size);
-
-    const VALUE *ep = jit->ec->cfp->ep;
-    return ep[-VM_ENV_DATA_SIZE - local_table_size + n + 1];
-}
-
-// Save the incremented PC on the CFP
-// This is necessary when callees can raise or allocate
-static void
-jit_save_pc(jitstate_t *jit, x86opnd_t scratch_reg)
-{
-    codeblock_t *cb = jit->cb;
-    mov(cb, scratch_reg, const_ptr_opnd(jit->pc + insn_len(jit->opcode)));
-    mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), scratch_reg);
-}
-
-// Save the current SP on the CFP
-// This realigns the interpreter SP with the JIT SP
-// Note: this will change the current value of REG_SP,
-//       which could invalidate memory operands
-static void
-jit_save_sp(jitstate_t *jit, ctx_t *ctx)
-{
-    if (ctx->sp_offset != 0) {
-        x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
-        codeblock_t *cb = jit->cb;
-        lea(cb, REG_SP, stack_pointer);
-        mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
-        ctx->sp_offset = 0;
-    }
-}
-
-// jit_save_pc() + jit_save_sp(). Should be used before calling a routine that
-// could:
-//  - Perform GC allocation
-//  - Take the VM lock through RB_VM_LOCK_ENTER()
-//  - Perform Ruby method call
-static void
-jit_prepare_routine_call(jitstate_t *jit, ctx_t *ctx, x86opnd_t scratch_reg)
-{
-    jit->record_boundary_patch_point = true;
-    jit_save_pc(jit, scratch_reg);
-    jit_save_sp(jit, ctx);
-
-    // In case the routine calls Ruby methods, it can set local variables
-    // through Kernel#binding and other means.
-    ctx_clear_local_types(ctx);
-}
-
-// Record the current codeblock write position for rewriting into a jump into
-// the outlined block later. Used to implement global code invalidation.
-static void
-record_global_inval_patch(const codeblock_t *cb, uint32_t outline_block_target_pos)
-{
-    struct codepage_patch patch_point = { cb->write_pos, outline_block_target_pos };
-    rb_darray_append(&global_inval_patches, patch_point);
-}
-
-static bool jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit);
-
-#if YJIT_STATS
-
-// Add a comment at the current position in the code block
-static void
-_add_comment(codeblock_t *cb, const char *comment_str)
-{
-    // We can't add comments to the outlined code block
-    if (cb == ocb)
-        return;
-
-    // Avoid adding duplicate comment strings (can happen due to deferred codegen)
-    size_t num_comments = rb_darray_size(yjit_code_comments);
-    if (num_comments > 0) {
-        struct yjit_comment last_comment = rb_darray_get(yjit_code_comments, num_comments - 1);
-        if (last_comment.offset == cb->write_pos && strcmp(last_comment.comment, comment_str) == 0) {
-            return;
-        }
-    }
-
-    struct yjit_comment new_comment = (struct yjit_comment){ cb->write_pos, comment_str };
-    rb_darray_append(&yjit_code_comments, new_comment);
-}
-
-// Comments for generated machine code
-#define ADD_COMMENT(cb, comment) _add_comment((cb), (comment))
-
-// Verify the ctx's types and mappings against the compile-time stack, self,
-// and locals.
-static void
-verify_ctx(jitstate_t *jit, ctx_t *ctx)
-{
-    // Only able to check types when at current insn
-    RUBY_ASSERT(jit_at_current_insn(jit));
-
-    VALUE self_val = jit_peek_at_self(jit, ctx);
-    if (type_diff(yjit_type_of_value(self_val), ctx->self_type) == INT_MAX) {
-        rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of self: %s", yjit_type_name(ctx->self_type), rb_obj_info(self_val));
-    }
-
-    for (int i = 0; i < ctx->stack_size && i < MAX_TEMP_TYPES; i++) {
-        temp_type_mapping_t learned = ctx_get_opnd_mapping(ctx, OPND_STACK(i));
-        VALUE val = jit_peek_at_stack(jit, ctx, i);
-        val_type_t detected = yjit_type_of_value(val);
-
-        if (learned.mapping.kind == TEMP_SELF) {
-            if (self_val != val) {
-                rb_bug("verify_ctx: stack value was mapped to self, but values did not match\n"
-                        "  stack: %s\n"
-                        "  self: %s",
-                        rb_obj_info(val),
-                        rb_obj_info(self_val));
-            }
-        }
-
-        if (learned.mapping.kind == TEMP_LOCAL) {
-            int local_idx = learned.mapping.idx;
-            VALUE local_val = jit_peek_at_local(jit, ctx, local_idx);
-            if (local_val != val) {
-                rb_bug("verify_ctx: stack value was mapped to local, but values did not match\n"
-                        "  stack: %s\n"
-                        "  local %i: %s",
-                        rb_obj_info(val),
-                        local_idx,
-                        rb_obj_info(local_val));
-            }
-        }
-
-        if (type_diff(detected, learned.type) == INT_MAX) {
-            rb_bug("verify_ctx: ctx type (%s) incompatible with actual value on stack: %s", yjit_type_name(learned.type), rb_obj_info(val));
-        }
-    }
-
-    int32_t local_table_size = ISEQ_BODY(jit->iseq)->local_table_size;
-    for (int i = 0; i < local_table_size && i < MAX_TEMP_TYPES; i++) {
-        val_type_t learned = ctx->local_types[i];
-        VALUE val = jit_peek_at_local(jit, ctx, i);
-        val_type_t detected = yjit_type_of_value(val);
-
-        if (type_diff(detected, learned) == INT_MAX) {
-            rb_bug("verify_ctx: ctx type (%s) incompatible with actual value of local: %s", yjit_type_name(learned), rb_obj_info(val));
-        }
-    }
-}
-
-#else
-
-#define ADD_COMMENT(cb, comment) ((void)0)
-#define verify_ctx(jit, ctx) ((void)0)
-
-#endif // if YJIT_STATS
-
-#if YJIT_STATS
-
-// Increment a profiling counter with counter_name
-#define GEN_COUNTER_INC(cb, counter_name) _gen_counter_inc(cb, &(yjit_runtime_counters . counter_name))
-static void
-_gen_counter_inc(codeblock_t *cb, int64_t *counter)
-{
-    if (!rb_yjit_opts.gen_stats) return;
-
-    // Use REG1 because there might be return value in REG0
-    mov(cb, REG1, const_ptr_opnd(counter));
-    cb_write_lock_prefix(cb); // for ractors.
-    add(cb, mem_opnd(64, REG1, 0), imm_opnd(1));
-}
-
-// Increment a counter then take an existing side exit.
-#define COUNTED_EXIT(jit, side_exit, counter_name) _counted_side_exit(jit, side_exit, &(yjit_runtime_counters . counter_name))
-static uint8_t *
-_counted_side_exit(jitstate_t* jit, uint8_t *existing_side_exit, int64_t *counter)
-{
-    if (!rb_yjit_opts.gen_stats) return existing_side_exit;
-
-    uint8_t *start = cb_get_ptr(jit->ocb, jit->ocb->write_pos);
-    _gen_counter_inc(jit->ocb, counter);
-    jmp_ptr(jit->ocb, existing_side_exit);
-    return start;
-}
-
-#else
-
-#define GEN_COUNTER_INC(cb, counter_name) ((void)0)
-#define COUNTED_EXIT(jit, side_exit, counter_name) side_exit
-
-#endif // if YJIT_STATS
-
-// Generate an exit to return to the interpreter
-static uint32_t
-yjit_gen_exit(VALUE *exit_pc, ctx_t *ctx, codeblock_t *cb)
-{
-    const uint32_t code_pos = cb->write_pos;
-
-    ADD_COMMENT(cb, "exit to interpreter");
-
-    // Generate the code to exit to the interpreters
-    // Write the adjusted SP back into the CFP
-    if (ctx->sp_offset != 0) {
-        x86opnd_t stack_pointer = ctx_sp_opnd(ctx, 0);
-        lea(cb, REG_SP, stack_pointer);
-        mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG_SP);
-    }
-
-    // Update CFP->PC
-    mov(cb, RAX, const_ptr_opnd(exit_pc));
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), RAX);
-
-    // Accumulate stats about interpreter exits
-#if YJIT_STATS
-    if (rb_yjit_opts.gen_stats) {
-        mov(cb, RDI, const_ptr_opnd(exit_pc));
-        call_ptr(cb, RSI, (void *)&yjit_count_side_exit_op);
-    }
-#endif
-
-    pop(cb, REG_SP);
-    pop(cb, REG_EC);
-    pop(cb, REG_CFP);
-
-    mov(cb, RAX, imm_opnd(Qundef));
-    ret(cb);
-
-    return code_pos;
-}
-
-// Generate a continuation for gen_leave() that exits to the interpreter at REG_CFP->pc.
-static uint8_t *
-yjit_gen_leave_exit(codeblock_t *cb)
-{
-    uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
-
-    // Note, gen_leave() fully reconstructs interpreter state and leaves the
-    // return value in RAX before coming here.
-
-    // Every exit to the interpreter should be counted
-    GEN_COUNTER_INC(cb, leave_interp_return);
-
-    pop(cb, REG_SP);
-    pop(cb, REG_EC);
-    pop(cb, REG_CFP);
-
-    ret(cb);
-
-    return code_ptr;
-}
-
-// Fill code_for_exit_from_stub. This is used by branch_stub_hit() to exit
-// to the interpreter when it cannot service a stub by generating new code.
-// Before coming here, branch_stub_hit() takes care of fully reconstructing
-// interpreter state.
-static void
-gen_code_for_exit_from_stub(void)
-{
-    codeblock_t *cb = ocb;
-    code_for_exit_from_stub = cb_get_ptr(cb, cb->write_pos);
-
-    GEN_COUNTER_INC(cb, exit_from_branch_stub);
-
-    pop(cb, REG_SP);
-    pop(cb, REG_EC);
-    pop(cb, REG_CFP);
-
-    mov(cb, RAX, imm_opnd(Qundef));
-    ret(cb);
-}
-
-// :side-exit:
-// Get an exit for the current instruction in the outlined block. The code
-// for each instruction often begins with several guards before proceeding
-// to do work. When guards fail, an option we have is to exit to the
-// interpreter at an instruction boundary. The piece of code that takes
-// care of reconstructing interpreter state and exiting out of generated
-// code is called the side exit.
-//
-// No guards change the logic for reconstructing interpreter state at the
-// moment, so there is one unique side exit for each context. Note that
-// it's incorrect to jump to the side exit after any ctx stack push/pop operations
-// since they change the logic required for reconstructing interpreter state.
-static uint8_t *
-yjit_side_exit(jitstate_t *jit, ctx_t *ctx)
-{
-    if (!jit->side_exit_for_pc) {
-        codeblock_t *ocb = jit->ocb;
-        uint32_t pos = yjit_gen_exit(jit->pc, ctx, ocb);
-        jit->side_exit_for_pc = cb_get_ptr(ocb, pos);
-    }
-
-    return jit->side_exit_for_pc;
-}
-
-// Ensure that there is an exit for the start of the block being compiled.
-// Block invalidation uses this exit.
-static void
-jit_ensure_block_entry_exit(jitstate_t *jit)
-{
-    block_t *block = jit->block;
-    if (block->entry_exit) return;
-
-    if (jit->insn_idx == block->blockid.idx) {
-        // We are compiling the first instruction in the block.
-        // Generate the exit with the cache in jitstate.
-        block->entry_exit = yjit_side_exit(jit, &block->ctx);
-    }
-    else {
-        VALUE *pc = yjit_iseq_pc_at_idx(block->blockid.iseq, block->blockid.idx);
-        uint32_t pos = yjit_gen_exit(pc, &block->ctx, ocb);
-        block->entry_exit = cb_get_ptr(ocb, pos);
-    }
-}
-
-// Generate a runtime guard that ensures the PC is at the start of the iseq,
-// otherwise take a side exit.  This is to handle the situation of optional
-// parameters.  When a function with optional parameters is called, the entry
-// PC for the method isn't necessarily 0, but we always generated code that
-// assumes the entry point is 0.
-static void
-yjit_pc_guard(codeblock_t *cb, const rb_iseq_t *iseq)
-{
-    RUBY_ASSERT(cb != NULL);
-
-    mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, pc));
-    mov(cb, REG1, const_ptr_opnd(ISEQ_BODY(iseq)->iseq_encoded));
-    xor(cb, REG0, REG1);
-
-    // xor should impact ZF, so we can jz here
-    uint32_t pc_is_zero = cb_new_label(cb, "pc_is_zero");
-    jz_label(cb, pc_is_zero);
-
-    // We're not starting at the first PC, so we need to exit.
-    GEN_COUNTER_INC(cb, leave_start_pc_non_zero);
-
-    pop(cb, REG_SP);
-    pop(cb, REG_EC);
-    pop(cb, REG_CFP);
-
-    mov(cb, RAX, imm_opnd(Qundef));
-    ret(cb);
-
-    // PC should be at the beginning
-    cb_write_label(cb, pc_is_zero);
-    cb_link_labels(cb);
-}
-
-// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
-// like the interpreter. When tracing for c_return is enabled, we patch the code after
-// the C method return to call into this to fire the event.
-static void
-full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
-{
-    rb_control_frame_t *cfp = ec->cfp;
-    RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
-    const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
-
-    RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
-    RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
-
-    // CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
-
-    // Pop the C func's frame and fire the c_return TracePoint event
-    // Note that this is the same order as vm_call_cfunc_with_frame().
-    rb_vm_pop_frame(ec);
-    EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
-    // Note, this deviates from the interpreter in that users need to enable
-    // a c_return TracePoint for this DTrace hook to work. A reasonable change
-    // since the Ruby return event works this way as well.
-    RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
-
-    // Push return value into the caller's stack. We know that it's a frame that
-    // uses cfp->sp because we are patching a call done with gen_send_cfunc().
-    ec->cfp->sp[0] = return_value;
-    ec->cfp->sp++;
-}
-
-// Landing code for when c_return tracing is enabled. See full_cfunc_return().
-static void
-gen_full_cfunc_return(void)
-{
-    codeblock_t *cb = ocb;
-    outline_full_cfunc_return_pos = ocb->write_pos;
-
-    // This chunk of code expect REG_EC to be filled properly and
-    // RAX to contain the return value of the C method.
-
-    // Call full_cfunc_return()
-    mov(cb, C_ARG_REGS[0], REG_EC);
-    mov(cb, C_ARG_REGS[1], RAX);
-    call_ptr(cb, REG0, (void *)full_cfunc_return);
-
-    // Count the exit
-    GEN_COUNTER_INC(cb, traced_cfunc_return);
-
-    // Return to the interpreter
-    pop(cb, REG_SP);
-    pop(cb, REG_EC);
-    pop(cb, REG_CFP);
-
-    mov(cb, RAX, imm_opnd(Qundef));
-    ret(cb);
-}
-
-/*
-Compile an interpreter entry block to be inserted into an iseq
-Returns `NULL` if compilation fails.
-*/
-static uint8_t *
-yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq)
-{
-    RUBY_ASSERT(cb != NULL);
-
-    enum { MAX_PROLOGUE_SIZE = 1024 };
-
-    // Check if we have enough executable memory
-    if (cb->write_pos + MAX_PROLOGUE_SIZE >= cb->mem_size) {
-        return NULL;
-    }
-
-    const uint32_t old_write_pos = cb->write_pos;
-
-    // Align the current write position to cache line boundaries
-    cb_align_pos(cb, 64);
-
-    uint8_t *code_ptr = cb_get_ptr(cb, cb->write_pos);
-    ADD_COMMENT(cb, "yjit entry");
-
-    push(cb, REG_CFP);
-    push(cb, REG_EC);
-    push(cb, REG_SP);
-
-    // We are passed EC and CFP
-    mov(cb, REG_EC, C_ARG_REGS[0]);
-    mov(cb, REG_CFP, C_ARG_REGS[1]);
-
-    // Load the current SP from the CFP into REG_SP
-    mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
-
-    // Setup cfp->jit_return
-    // TODO: this could use an IP relative LEA instead of an 8 byte immediate
-    mov(cb, REG0, const_ptr_opnd(leave_exit_code));
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
-
-    // We're compiling iseqs that we *expect* to start at `insn_idx`. But in
-    // the case of optional parameters, the interpreter can set the pc to a
-    // different location depending on the optional parameters.  If an iseq
-    // has optional parameters, we'll add a runtime check that the PC we've
-    // compiled for is the same PC that the interpreter wants us to run with.
-    // If they don't match, then we'll take a side exit.
-    if (ISEQ_BODY(iseq)->param.flags.has_opt) {
-        yjit_pc_guard(cb, iseq);
-    }
-
-    // Verify MAX_PROLOGUE_SIZE
-    RUBY_ASSERT_ALWAYS(cb->write_pos - old_write_pos <= MAX_PROLOGUE_SIZE);
-
-    return code_ptr;
-}
-
-// Generate code to check for interrupts and take a side-exit.
-// Warning: this function clobbers REG0
-static void
-yjit_check_ints(codeblock_t *cb, uint8_t *side_exit)
-{
-    // Check for interrupts
-    // see RUBY_VM_CHECK_INTS(ec) macro
-    ADD_COMMENT(cb, "RUBY_VM_CHECK_INTS(ec)");
-    mov(cb, REG0_32, member_opnd(REG_EC, rb_execution_context_t, interrupt_mask));
-    not(cb, REG0_32);
-    test(cb, member_opnd(REG_EC, rb_execution_context_t, interrupt_flag), REG0_32);
-    jnz_ptr(cb, side_exit);
-}
-
-// Generate a stubbed unconditional jump to the next bytecode instruction.
-// Blocks that are part of a guard chain can use this to share the same successor.
-static void
-jit_jump_to_next_insn(jitstate_t *jit, const ctx_t *current_context)
-{
-    // Reset the depth since in current usages we only ever jump to to
-    // chain_depth > 0 from the same instruction.
-    ctx_t reset_depth = *current_context;
-    reset_depth.chain_depth = 0;
-
-    blockid_t jump_block = { jit->iseq, jit_next_insn_idx(jit) };
-
-    // We are at the end of the current instruction. Record the boundary.
-    if (jit->record_boundary_patch_point) {
-        uint32_t exit_pos = yjit_gen_exit(jit->pc + insn_len(jit->opcode), &reset_depth, jit->ocb);
-        record_global_inval_patch(jit->cb, exit_pos);
-        jit->record_boundary_patch_point = false;
-    }
-
-    // Generate the jump instruction
-    gen_direct_jump(
-        jit,
-        &reset_depth,
-        jump_block
-    );
-}
-
-// Compile a sequence of bytecode instructions for a given basic block version.
-// Part of gen_block_version().
-static block_t *
-gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
-{
-    RUBY_ASSERT(cb != NULL);
-    verify_blockid(blockid);
-
-    // Allocate the new block
-    block_t *block = calloc(1, sizeof(block_t));
-    if (!block) {
-        return NULL;
-    }
-
-    // Copy the starting context to avoid mutating it
-    ctx_t ctx_copy = *start_ctx;
-    ctx_t *ctx = &ctx_copy;
-
-    // Limit the number of specialized versions for this block
-    *ctx = limit_block_versions(blockid, ctx);
-
-    // Save the starting context on the block.
-    block->blockid = blockid;
-    block->ctx = *ctx;
-
-    RUBY_ASSERT(!(blockid.idx == 0 && start_ctx->stack_size > 0));
-
-    const rb_iseq_t *iseq = block->blockid.iseq;
-    const unsigned int iseq_size = ISEQ_BODY(iseq)->iseq_size;
-    uint32_t insn_idx = block->blockid.idx;
-    const uint32_t starting_insn_idx = insn_idx;
-
-    // Initialize a JIT state object
-    jitstate_t jit = {
-        .cb = cb,
-        .ocb = ocb,
-        .block = block,
-        .iseq = iseq,
-        .ec = ec
-    };
-
-    // Mark the start position of the block
-    block->start_addr = cb_get_write_ptr(cb);
-
-    // For each instruction to compile
-    while (insn_idx < iseq_size) {
-        // Get the current pc and opcode
-        VALUE *pc = yjit_iseq_pc_at_idx(iseq, insn_idx);
-        int opcode = yjit_opcode_at_pc(iseq, pc);
-        RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
-
-        // opt_getinlinecache wants to be in a block all on its own. Cut the block short
-        // if we run into it. See gen_opt_getinlinecache() for details.
-        if (opcode == BIN(opt_getinlinecache) && insn_idx > starting_insn_idx) {
-            jit_jump_to_next_insn(&jit, ctx);
-            break;
-        }
-
-        // Set the current instruction
-        jit.insn_idx = insn_idx;
-        jit.opcode = opcode;
-        jit.pc = pc;
-        jit.side_exit_for_pc = NULL;
-
-        // If previous instruction requested to record the boundary
-        if (jit.record_boundary_patch_point) {
-            // Generate an exit to this instruction and record it
-            uint32_t exit_pos = yjit_gen_exit(jit.pc, ctx, ocb);
-            record_global_inval_patch(cb, exit_pos);
-            jit.record_boundary_patch_point = false;
-        }
-
-        // Verify our existing assumption (DEBUG)
-        if (jit_at_current_insn(&jit)) {
-            verify_ctx(&jit, ctx);
-        }
-
-        // Lookup the codegen function for this instruction
-        codegen_fn gen_fn = gen_fns[opcode];
-        codegen_status_t status = YJIT_CANT_COMPILE;
-        if (gen_fn) {
-            if (0) {
-                fprintf(stderr, "compiling %d: %s\n", insn_idx, insn_name(opcode));
-                print_str(cb, insn_name(opcode));
-            }
-
-            // :count-placement:
-            // Count bytecode instructions that execute in generated code.
-            // Note that the increment happens even when the output takes side exit.
-            GEN_COUNTER_INC(cb, exec_instruction);
-
-            // Add a comment for the name of the YARV instruction
-            ADD_COMMENT(cb, insn_name(opcode));
-
-            // Call the code generation function
-            status = gen_fn(&jit, ctx, cb);
-        }
-
-        // If we can't compile this instruction
-        // exit to the interpreter and stop compiling
-        if (status == YJIT_CANT_COMPILE) {
-            // TODO: if the codegen function makes changes to ctx and then return YJIT_CANT_COMPILE,
-            // the exit this generates would be wrong. We could save a copy of the entry context
-            // and assert that ctx is the same here.
-            uint32_t exit_off = yjit_gen_exit(jit.pc, ctx, cb);
-
-            // If this is the first instruction in the block, then we can use
-            // the exit for block->entry_exit.
-            if (insn_idx == block->blockid.idx) {
-                block->entry_exit = cb_get_ptr(cb, exit_off);
-            }
-            break;
-        }
-
-        // For now, reset the chain depth after each instruction as only the
-        // first instruction in the block can concern itself with the depth.
-        ctx->chain_depth = 0;
-
-        // Move to the next instruction to compile
-        insn_idx += insn_len(opcode);
-
-        // If the instruction terminates this block
-        if (status == YJIT_END_BLOCK) {
-            break;
-        }
-    }
-
-    // Mark the end position of the block
-    block->end_addr = cb_get_write_ptr(cb);
-
-    // Store the index of the last instruction in the block
-    block->end_idx = insn_idx;
-
-    // We currently can't handle cases where the request is for a block that
-    // doesn't go to the next instruction.
-    RUBY_ASSERT(!jit.record_boundary_patch_point);
-
-    // If code for the block doesn't fit, free the block and fail.
-    if (cb->dropped_bytes || ocb->dropped_bytes) {
-        yjit_free_block(block);
-        return NULL;
-    }
-
-    if (YJIT_DUMP_MODE >= 2) {
-        // Dump list of compiled instrutions
-        fprintf(stderr, "Compiled the following for iseq=%p:\n", (void *)iseq);
-        for (uint32_t idx = block->blockid.idx; idx < insn_idx; ) {
-            int opcode = yjit_opcode_at_pc(iseq, yjit_iseq_pc_at_idx(iseq, idx));
-            fprintf(stderr, "  %04d %s\n", idx, insn_name(opcode));
-            idx += insn_len(opcode);
-        }
-    }
-
-    return block;
-}
-
-static codegen_status_t gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
-
-static codegen_status_t
-gen_nop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Do nothing
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_dup(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Get the top value and its type
-    x86opnd_t dup_val = ctx_stack_pop(ctx, 0);
-    temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
-
-    // Push the same value on top
-    x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
-    mov(cb, REG0, dup_val);
-    mov(cb, loc0, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// duplicate stack top n elements
-static codegen_status_t
-gen_dupn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
-
-    // In practice, seems to be only used for n==2
-    if (n != 2) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    x86opnd_t opnd1 = ctx_stack_opnd(ctx, 1);
-    x86opnd_t opnd0 = ctx_stack_opnd(ctx, 0);
-    temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(1));
-    temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
-
-    x86opnd_t dst1 = ctx_stack_push_mapping(ctx, mapping1);
-    mov(cb, REG0, opnd1);
-    mov(cb, dst1, REG0);
-
-    x86opnd_t dst0 = ctx_stack_push_mapping(ctx, mapping0);
-    mov(cb, REG0, opnd0);
-    mov(cb, dst0, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static void
-stack_swap(ctx_t *ctx, codeblock_t *cb, int offset0, int offset1, x86opnd_t reg0, x86opnd_t reg1)
-{
-    x86opnd_t opnd0 = ctx_stack_opnd(ctx, offset0);
-    x86opnd_t opnd1 = ctx_stack_opnd(ctx, offset1);
-
-    temp_type_mapping_t mapping0 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset0));
-    temp_type_mapping_t mapping1 = ctx_get_opnd_mapping(ctx, OPND_STACK(offset1));
-
-    mov(cb, reg0, opnd0);
-    mov(cb, reg1, opnd1);
-    mov(cb, opnd0, reg1);
-    mov(cb, opnd1, reg0);
-
-    ctx_set_opnd_mapping(ctx, OPND_STACK(offset0), mapping1);
-    ctx_set_opnd_mapping(ctx, OPND_STACK(offset1), mapping0);
-}
-
-// Swap top 2 stack entries
-static codegen_status_t
-gen_swap(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    stack_swap(ctx , cb, 0, 1, REG0, REG1);
-    return YJIT_KEEP_COMPILING;
-}
-
-// set Nth stack entry to stack top
-static codegen_status_t
-gen_setn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
-
-    // Set the destination
-    x86opnd_t top_val = ctx_stack_pop(ctx, 0);
-    x86opnd_t dst_opnd = ctx_stack_opnd(ctx, (int32_t)n);
-    mov(cb, REG0, top_val);
-    mov(cb, dst_opnd, REG0);
-
-    temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(0));
-    ctx_set_opnd_mapping(ctx, OPND_STACK(n), mapping);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// get nth stack value, then push it
-static codegen_status_t
-gen_topn(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t n = (int32_t)jit_get_arg(jit, 0);
-
-    // Get top n type / operand
-    x86opnd_t top_n_val = ctx_stack_opnd(ctx, n);
-    temp_type_mapping_t mapping = ctx_get_opnd_mapping(ctx, OPND_STACK(n));
-
-    x86opnd_t loc0 = ctx_stack_push_mapping(ctx, mapping);
-    mov(cb, REG0, top_n_val);
-    mov(cb, loc0, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_pop(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Decrement SP
-    ctx_stack_pop(ctx, 1);
-    return YJIT_KEEP_COMPILING;
-}
-
-// Pop n values off the stack
-static codegen_status_t
-gen_adjuststack(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
-    ctx_stack_pop(ctx, n);
-    return YJIT_KEEP_COMPILING;
-}
-
-// new array initialized from top N values
-static codegen_status_t
-gen_newarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we are allocating
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(int32_t)(sizeof(VALUE) * (uint32_t)n));
-
-    // call rb_ec_ary_new_from_values(struct rb_execution_context_struct *ec, long n, const VALUE *elts);
-    mov(cb, C_ARG_REGS[0], REG_EC);
-    mov(cb, C_ARG_REGS[1], imm_opnd(n));
-    lea(cb, C_ARG_REGS[2], values_ptr);
-    call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
-
-    ctx_stack_pop(ctx, n);
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// dup array
-static codegen_status_t
-gen_duparray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    VALUE ary = jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we are allocating
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // call rb_ary_resurrect(VALUE ary);
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], ary);
-    call_ptr(cb, REG0, (void *)rb_ary_resurrect);
-
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// dup hash
-static codegen_status_t
-gen_duphash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    VALUE hash = jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we are allocating
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // call rb_hash_resurrect(VALUE hash);
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], hash);
-    call_ptr(cb, REG0, (void *)rb_hash_resurrect);
-
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-VALUE rb_vm_splat_array(VALUE flag, VALUE ary);
-
-// call to_a on the array on the stack
-static codegen_status_t
-gen_splatarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    VALUE flag = (VALUE) jit_get_arg(jit, 0);
-
-    // Save the PC and SP because the callee may allocate
-    // Note that this modifies REG_SP, which is why we do it first
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // Get the operands from the stack
-    x86opnd_t ary_opnd = ctx_stack_pop(ctx, 1);
-
-    // Call rb_vm_splat_array(flag, ary)
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], flag);
-    mov(cb, C_ARG_REGS[1], ary_opnd);
-    call_ptr(cb, REG1, (void *) rb_vm_splat_array);
-
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_ARRAY);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// new range initialized from top 2 values
-static codegen_status_t
-gen_newrange(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t flag = (rb_num_t)jit_get_arg(jit, 0);
-
-    // rb_range_new() allocates and can raise
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // val = rb_range_new(low, high, (int)flag);
-    mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, 1));
-    mov(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, 0));
-    mov(cb, C_ARG_REGS[2], imm_opnd(flag));
-    call_ptr(cb, REG0, (void *)rb_range_new);
-
-    ctx_stack_pop(ctx, 2);
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static void
-guard_object_is_heap(codeblock_t *cb, x86opnd_t object_opnd, ctx_t *ctx, uint8_t *side_exit)
-{
-    ADD_COMMENT(cb, "guard object is heap");
-
-    // Test that the object is not an immediate
-    test(cb, object_opnd, imm_opnd(RUBY_IMMEDIATE_MASK));
-    jnz_ptr(cb, side_exit);
-
-    // Test that the object is not false or nil
-    cmp(cb, object_opnd, imm_opnd(Qnil));
-    RUBY_ASSERT(Qfalse < Qnil);
-    jbe_ptr(cb, side_exit);
-}
-
-static inline void
-guard_object_is_array(codeblock_t *cb, x86opnd_t object_opnd, x86opnd_t flags_opnd, ctx_t *ctx, uint8_t *side_exit)
-{
-    ADD_COMMENT(cb, "guard object is array");
-
-    // Pull out the type mask
-    mov(cb, flags_opnd, member_opnd(object_opnd, struct RBasic, flags));
-    and(cb, flags_opnd, imm_opnd(RUBY_T_MASK));
-
-    // Compare the result with T_ARRAY
-    cmp(cb, flags_opnd, imm_opnd(T_ARRAY));
-    jne_ptr(cb, side_exit);
-}
-
-// push enough nils onto the stack to fill out an array
-static codegen_status_t
-gen_expandarray(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int flag = (int) jit_get_arg(jit, 1);
-
-    // If this instruction has the splat flag, then bail out.
-    if (flag & 0x01) {
-        GEN_COUNTER_INC(cb, expandarray_splat);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // If this instruction has the postarg flag, then bail out.
-    if (flag & 0x02) {
-        GEN_COUNTER_INC(cb, expandarray_postarg);
-        return YJIT_CANT_COMPILE;
-    }
-
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // num is the number of requested values. If there aren't enough in the
-    // array then we're going to push on nils.
-    int num = (int)jit_get_arg(jit, 0);
-    val_type_t array_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
-    x86opnd_t array_opnd = ctx_stack_pop(ctx, 1);
-
-    if (array_type.type == ETYPE_NIL) {
-        // special case for a, b = nil pattern
-        // push N nils onto the stack
-        for (int i = 0; i < num; i++) {
-            x86opnd_t push = ctx_stack_push(ctx, TYPE_NIL);
-            mov(cb, push, imm_opnd(Qnil));
-        }
-        return YJIT_KEEP_COMPILING;
-    }
-
-    // Move the array from the stack into REG0 and check that it's an array.
-    mov(cb, REG0, array_opnd);
-    guard_object_is_heap(cb, REG0, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
-    guard_object_is_array(cb, REG0, REG1, ctx, COUNTED_EXIT(jit, side_exit, expandarray_not_array));
-
-    // If we don't actually want any values, then just return.
-    if (num == 0) {
-        return YJIT_KEEP_COMPILING;
-    }
-
-    // Pull out the embed flag to check if it's an embedded array.
-    x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
-    mov(cb, REG1, flags_opnd);
-
-    // Move the length of the embedded array into REG1.
-    and(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_MASK));
-    shr(cb, REG1, imm_opnd(RARRAY_EMBED_LEN_SHIFT));
-
-    // Conditionally move the length of the heap array into REG1.
-    test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
-    cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.len));
-
-    // Only handle the case where the number of values in the array is greater
-    // than or equal to the number of values requested.
-    cmp(cb, REG1, imm_opnd(num));
-    jl_ptr(cb, COUNTED_EXIT(jit, side_exit, expandarray_rhs_too_small));
-
-    // Load the address of the embedded array into REG1.
-    // (struct RArray *)(obj)->as.ary
-    lea(cb, REG1, member_opnd(REG0, struct RArray, as.ary));
-
-    // Conditionally load the address of the heap array into REG1.
-    // (struct RArray *)(obj)->as.heap.ptr
-    test(cb, flags_opnd, imm_opnd(RARRAY_EMBED_FLAG));
-    cmovz(cb, REG1, member_opnd(REG0, struct RArray, as.heap.ptr));
-
-    // Loop backward through the array and push each element onto the stack.
-    for (int32_t i = (int32_t) num - 1; i >= 0; i--) {
-        x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, REG0, mem_opnd(64, REG1, i * SIZEOF_VALUE));
-        mov(cb, top, REG0);
-    }
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// new hash initialized from top N values
-static codegen_status_t
-gen_newhash(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t num = (int32_t)jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we are allocating
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    if (num) {
-        // val = rb_hash_new_with_size(num / 2);
-        mov(cb, C_ARG_REGS[0], imm_opnd(num / 2));
-        call_ptr(cb, REG0, (void *)rb_hash_new_with_size);
-
-        // save the allocated hash as we want to push it after insertion
-        push(cb, RAX);
-        push(cb, RAX); // alignment
-
-        // rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
-        mov(cb, C_ARG_REGS[0], imm_opnd(num));
-        lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, num - 1));
-        mov(cb, C_ARG_REGS[2], RAX);
-        call_ptr(cb, REG0, (void *)rb_hash_bulk_insert);
-
-        pop(cb, RAX); // alignment
-        pop(cb, RAX);
-
-        ctx_stack_pop(ctx, num);
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
-        mov(cb, stack_ret, RAX);
-    }
-    else {
-        // val = rb_hash_new();
-        call_ptr(cb, REG0, (void *)rb_hash_new);
-
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HASH);
-        mov(cb, stack_ret, RAX);
-    }
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// Push a constant value to the stack, including type information.
-// The constant may be a heap object or a special constant.
-static void
-jit_putobject(jitstate_t *jit, ctx_t *ctx, VALUE arg)
-{
-    val_type_t val_type = yjit_type_of_value(arg);
-    x86opnd_t stack_top = ctx_stack_push(ctx, val_type);
-
-    if (SPECIAL_CONST_P(arg)) {
-        // Immediates will not move and do not need to be tracked for GC
-        // Thanks to this we can mov directly to memory when possible.
-
-        // NOTE: VALUE -> int64_t cast below is implementation defined.
-        // Hopefully it preserves the the bit pattern or raise a signal.
-        // See N1256 section 6.3.1.3.
-        x86opnd_t imm = imm_opnd((int64_t)arg);
-
-        // 64-bit immediates can't be directly written to memory
-        if (imm.num_bits <= 32) {
-            mov(cb, stack_top, imm);
-        }
-        else {
-            mov(cb, REG0, imm);
-            mov(cb, stack_top, REG0);
-        }
-    }
-    else {
-        // Load the value to push into REG0
-        // Note that this value may get moved by the GC
-        jit_mov_gc_ptr(jit, cb, REG0, arg);
-
-        // Write argument at SP
-        mov(cb, stack_top, REG0);
-    }
-}
-
-static codegen_status_t
-gen_putnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    jit_putobject(jit, ctx, Qnil);
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_putobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    VALUE arg = jit_get_arg(jit, 0);
-
-    jit_putobject(jit, ctx, arg);
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_putstring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    VALUE put_val = jit_get_arg(jit, 0);
-
-    // Save the PC and SP because the callee will allocate
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    mov(cb, C_ARG_REGS[0], REG_EC);
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], put_val);
-    call_ptr(cb, REG0, (void *)rb_ec_str_resurrect);
-
-    x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_STRING);
-    mov(cb, stack_top, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_putobject_int2fix(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int opcode = jit_get_opcode(jit);
-    int cst_val = (opcode == BIN(putobject_INT2FIX_0_))? 0:1;
-
-    jit_putobject(jit, ctx, INT2FIX(cst_val));
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_putself(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Load self from CFP
-    mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
-
-    // Write it on the stack
-    x86opnd_t stack_top = ctx_stack_push_self(ctx);
-    mov(cb, stack_top, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_putspecialobject(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    enum vm_special_object_type type = (enum vm_special_object_type)jit_get_arg(jit, 0);
-
-    if (type == VM_SPECIAL_OBJECT_VMCORE) {
-        x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_HEAP);
-        jit_mov_gc_ptr(jit, cb, REG0, rb_mRubyVMFrozenCore);
-        mov(cb, stack_top, REG0);
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        // TODO: implement for VM_SPECIAL_OBJECT_CBASE and
-        // VM_SPECIAL_OBJECT_CONST_BASE
-        return YJIT_CANT_COMPILE;
-    }
-}
-
-// Get EP at level from CFP
-static void
-gen_get_ep(codeblock_t *cb, x86opnd_t reg, uint32_t level)
-{
-    // Load environment pointer EP from CFP
-    mov(cb, reg, member_opnd(REG_CFP, rb_control_frame_t, ep));
-
-    while (level--) {
-        // Get the previous EP from the current EP
-        // See GET_PREV_EP(ep) macro
-        // VALUE *prev_ep = ((VALUE *)((ep)[VM_ENV_DATA_INDEX_SPECVAL] & ~0x03))
-        mov(cb, reg, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
-        and(cb, reg, imm_opnd(~0x03));
-    }
-}
-
-// Compute the local table index of a variable from its index relative to the
-// environment object.
-static uint32_t
-slot_to_local_idx(const rb_iseq_t *iseq, int32_t slot_idx)
-{
-    // Layout illustration
-    // This is an array of VALUE
-    //                                           | VM_ENV_DATA_SIZE |
-    //                                           v                  v
-    // low addr <+-------+-------+-------+-------+------------------+
-    //           |local 0|local 1|  ...  |local n|       ....       |
-    //           +-------+-------+-------+-------+------------------+
-    //           ^       ^                       ^                  ^
-    //           +-------+---local_table_size----+         cfp->ep--+
-    //                   |                                          |
-    //                   +------------------slot_idx----------------+
-    //
-    // See usages of local_var_name() from iseq.c for similar calculation.
-
-    // FIXME: unsigned to signed cast below can truncate
-    int32_t local_table_size = ISEQ_BODY(iseq)->local_table_size;
-    int32_t op = slot_idx - VM_ENV_DATA_SIZE;
-    int32_t local_idx = local_table_size - op - 1;
-    RUBY_ASSERT(local_idx >= 0 && local_idx < local_table_size);
-    return (uint32_t)local_idx;
-}
-
-static codegen_status_t
-gen_getlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Compute the offset from BP to the local
-    // TODO: Type is lindex_t in interpter. The following cast can truncate.
-    //       Not in the mood to dance around signed multiplication UB at the moment...
-    int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
-    const int32_t offs = -(SIZEOF_VALUE * slot_idx);
-    uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
-
-    // Load environment pointer EP (level 0) from CFP
-    gen_get_ep(cb, REG0, 0);
-
-    // Load the local from the EP
-    mov(cb, REG0, mem_opnd(64, REG0, offs));
-
-    // Write the local at SP
-    x86opnd_t stack_top = ctx_stack_push_local(ctx, local_idx);
-    mov(cb, stack_top, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_getlocal_generic(ctx_t *ctx, uint32_t local_idx, uint32_t level)
-{
-    gen_get_ep(cb, REG0, level);
-
-    // Load the local from the block
-    // val = *(vm_get_ep(GET_EP(), level) - idx);
-    const int32_t offs = -(int32_t)(SIZEOF_VALUE * local_idx);
-    mov(cb, REG0, mem_opnd(64, REG0, offs));
-
-    // Write the local at SP
-    x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_top, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_getlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t idx = (int32_t)jit_get_arg(jit, 0);
-    int32_t level = (int32_t)jit_get_arg(jit, 1);
-    return gen_getlocal_generic(ctx, idx, level);
-}
-
-static codegen_status_t
-gen_getlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t idx = (int32_t)jit_get_arg(jit, 0);
-    return gen_getlocal_generic(ctx, idx, 1);
-}
-
-static codegen_status_t
-gen_setlocal_wc0(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    /*
-    vm_env_write(const VALUE *ep, int index, VALUE v)
-    {
-        VALUE flags = ep[VM_ENV_DATA_INDEX_FLAGS];
-        if (LIKELY((flags & VM_ENV_FLAG_WB_REQUIRED) == 0)) {
-            VM_STACK_ENV_WRITE(ep, index, v);
-        }
-        else {
-            vm_env_write_slowpath(ep, index, v);
-        }
-    }
-    */
-
-    int32_t slot_idx = (int32_t)jit_get_arg(jit, 0);
-    uint32_t local_idx = slot_to_local_idx(jit->iseq, slot_idx);
-
-    // Load environment pointer EP (level 0) from CFP
-    gen_get_ep(cb, REG0, 0);
-
-    // flags & VM_ENV_FLAG_WB_REQUIRED
-    x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
-    test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
-    jnz_ptr(cb, side_exit);
-
-    // Set the type of the local variable in the context
-    val_type_t temp_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
-    ctx_set_local_type(ctx, local_idx, temp_type);
-
-    // Pop the value to write from the stack
-    x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
-    mov(cb, REG1, stack_top);
-
-    // Write the value at the environment pointer
-    const int32_t offs = -8 * slot_idx;
-    mov(cb, mem_opnd(64, REG0, offs), REG1);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// Push Qtrue or Qfalse depending on whether the given keyword was supplied by
-// the caller
-static codegen_status_t
-gen_checkkeyword(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // When a keyword is unspecified past index 32, a hash will be used
-    // instead. This can only happen in iseqs taking more than 32 keywords.
-    if (ISEQ_BODY(jit->iseq)->param.keyword->num >= 32) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // The EP offset to the undefined bits local
-    int32_t bits_offset = (int32_t)jit_get_arg(jit, 0);
-
-    // The index of the keyword we want to check
-    int32_t index = (int32_t)jit_get_arg(jit, 1);
-
-    // Load environment pointer EP
-    gen_get_ep(cb, REG0, 0);
-
-    // VALUE kw_bits = *(ep - bits);
-    x86opnd_t bits_opnd = mem_opnd(64, REG0, sizeof(VALUE) * -bits_offset);
-
-    // unsigned int b = (unsigned int)FIX2ULONG(kw_bits);
-    // if ((b & (0x01 << idx))) {
-    //
-    // We can skip the FIX2ULONG conversion by shifting the bit we test
-    int64_t bit_test = 0x01 << (index + 1);
-    test(cb, bits_opnd, imm_opnd(bit_test));
-    mov(cb, REG0, imm_opnd(Qfalse));
-    mov(cb, REG1, imm_opnd(Qtrue));
-    cmovz(cb, REG0, REG1);
-
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
-    mov(cb, stack_ret, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_setlocal_generic(jitstate_t *jit, ctx_t *ctx, uint32_t local_idx, uint32_t level)
-{
-    // Load environment pointer EP at level
-    gen_get_ep(cb, REG0, level);
-
-    // flags & VM_ENV_FLAG_WB_REQUIRED
-    x86opnd_t flags_opnd = mem_opnd(64, REG0, sizeof(VALUE) * VM_ENV_DATA_INDEX_FLAGS);
-    test(cb, flags_opnd, imm_opnd(VM_ENV_FLAG_WB_REQUIRED));
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // if (flags & VM_ENV_FLAG_WB_REQUIRED) != 0
-    jnz_ptr(cb, side_exit);
-
-    // Pop the value to write from the stack
-    x86opnd_t stack_top = ctx_stack_pop(ctx, 1);
-    mov(cb, REG1, stack_top);
-
-    // Write the value at the environment pointer
-    const int32_t offs = -(int32_t)(SIZEOF_VALUE * local_idx);
-    mov(cb, mem_opnd(64, REG0, offs), REG1);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_setlocal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t idx = (int32_t)jit_get_arg(jit, 0);
-    int32_t level = (int32_t)jit_get_arg(jit, 1);
-    return gen_setlocal_generic(jit, ctx, idx, level);
-}
-
-static codegen_status_t
-gen_setlocal_wc1(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t idx = (int32_t)jit_get_arg(jit, 0);
-    return gen_setlocal_generic(jit, ctx, idx, 1);
-}
-
-static void
-gen_jnz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-      case SHAPE_NEXT1:
-        RUBY_ASSERT(false);
-        break;
-
-      case SHAPE_DEFAULT:
-        jnz_ptr(cb, target0);
-        break;
-    }
-}
-
-static void
-gen_jz_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-      case SHAPE_NEXT1:
-        RUBY_ASSERT(false);
-        break;
-
-      case SHAPE_DEFAULT:
-        jz_ptr(cb, target0);
-        break;
-    }
-}
-
-static void
-gen_jbe_to_target0(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-      case SHAPE_NEXT1:
-        RUBY_ASSERT(false);
-        break;
-
-      case SHAPE_DEFAULT:
-        jbe_ptr(cb, target0);
-        break;
-    }
-}
-
-enum jcc_kinds {
-    JCC_JNE,
-    JCC_JNZ,
-    JCC_JZ,
-    JCC_JE,
-    JCC_JBE,
-    JCC_JNA,
-};
-
-// Generate a jump to a stub that recompiles the current YARV instruction on failure.
-// When depth_limitk is exceeded, generate a jump to a side exit.
-static void
-jit_chain_guard(enum jcc_kinds jcc, jitstate_t *jit, const ctx_t *ctx, uint8_t depth_limit, uint8_t *side_exit)
-{
-    branchgen_fn target0_gen_fn;
-
-    switch (jcc) {
-      case JCC_JNE:
-      case JCC_JNZ:
-        target0_gen_fn = gen_jnz_to_target0;
-        break;
-      case JCC_JZ:
-      case JCC_JE:
-        target0_gen_fn = gen_jz_to_target0;
-        break;
-      case JCC_JBE:
-      case JCC_JNA:
-        target0_gen_fn = gen_jbe_to_target0;
-        break;
-      default:
-        rb_bug("yjit: unimplemented jump kind");
-        break;
-    };
-
-    if (ctx->chain_depth < depth_limit) {
-        ctx_t deeper = *ctx;
-        deeper.chain_depth++;
-
-        gen_branch(
-            jit,
-            ctx,
-            (blockid_t) { jit->iseq, jit->insn_idx },
-            &deeper,
-            BLOCKID_NULL,
-            NULL,
-            target0_gen_fn
-        );
-    }
-    else {
-        target0_gen_fn(cb, side_exit, NULL, SHAPE_DEFAULT);
-    }
-}
-
-enum {
-    GETIVAR_MAX_DEPTH = 10,       // up to 5 different classes, and embedded or not for each
-    OPT_AREF_MAX_CHAIN_DEPTH = 2, // hashes and arrays
-    SEND_MAX_DEPTH = 5,           // up to 5 different classes
-};
-
-VALUE rb_vm_set_ivar_idx(VALUE obj, uint32_t idx, VALUE val);
-
-// Codegen for setting an instance variable.
-// Preconditions:
-//   - receiver is in REG0
-//   - receiver has the same class as CLASS_OF(comptime_receiver)
-//   - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
-static codegen_status_t
-gen_set_ivar(jitstate_t *jit, ctx_t *ctx, VALUE recv, VALUE klass, ID ivar_name)
-{
-    // Save the PC and SP because the callee may allocate
-    // Note that this modifies REG_SP, which is why we do it first
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // Get the operands from the stack
-    x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
-    x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
-
-    uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(recv, ivar_name);
-
-    // Call rb_vm_set_ivar_idx with the receiver, the index of the ivar, and the value
-    mov(cb, C_ARG_REGS[0], recv_opnd);
-    mov(cb, C_ARG_REGS[1], imm_opnd(ivar_index));
-    mov(cb, C_ARG_REGS[2], val_opnd);
-    call_ptr(cb, REG0, (void *)rb_vm_set_ivar_idx);
-
-    x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, out_opnd, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// Codegen for getting an instance variable.
-// Preconditions:
-//   - receiver is in REG0
-//   - receiver has the same class as CLASS_OF(comptime_receiver)
-//   - no stack push or pops to ctx since the entry to the codegen of the instruction being compiled
-static codegen_status_t
-gen_get_ivar(jitstate_t *jit, ctx_t *ctx, const int max_chain_depth, VALUE comptime_receiver, ID ivar_name, insn_opnd_t reg0_opnd, uint8_t *side_exit)
-{
-    VALUE comptime_val_klass = CLASS_OF(comptime_receiver);
-    const ctx_t starting_context = *ctx; // make a copy for use with jit_chain_guard
-
-    // If the class uses the default allocator, instances should all be T_OBJECT
-    // NOTE: This assumes nobody changes the allocator of the class after allocation.
-    //       Eventually, we can encode whether an object is T_OBJECT or not
-    //       inside object shapes.
-    if (!RB_TYPE_P(comptime_receiver, T_OBJECT) ||
-            rb_get_alloc_func(comptime_val_klass) != rb_class_allocate_instance) {
-        // General case. Call rb_ivar_get().
-        // VALUE rb_ivar_get(VALUE obj, ID id)
-        ADD_COMMENT(cb, "call rb_ivar_get()");
-
-        // The function could raise exceptions.
-        jit_prepare_routine_call(jit, ctx, REG1);
-
-        mov(cb, C_ARG_REGS[0], REG0);
-        mov(cb, C_ARG_REGS[1], imm_opnd((int64_t)ivar_name));
-        call_ptr(cb, REG1, (void *)rb_ivar_get);
-
-        if (!reg0_opnd.is_self) {
-            (void)ctx_stack_pop(ctx, 1);
-        }
-        // Push the ivar on the stack
-        x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, out_opnd, RAX);
-
-        // Jump to next instruction. This allows guard chains to share the same successor.
-        jit_jump_to_next_insn(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    /*
-    // FIXME:
-    // This check was added because of a failure in a test involving the
-    // Nokogiri Document class where we see a T_DATA that still has the default
-    // allocator.
-    // Aaron Patterson argues that this is a bug in the C extension, because
-    // people could call .allocate() on the class and still get a T_OBJECT
-    // For now I added an extra dynamic check that the receiver is T_OBJECT
-    // so we can safely pass all the tests in Shopify Core.
-    //
-    // Guard that the receiver is T_OBJECT
-    // #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK)
-    ADD_COMMENT(cb, "guard receiver is T_OBJECT");
-    mov(cb, REG1, member_opnd(REG0, struct RBasic, flags));
-    and(cb, REG1, imm_opnd(RUBY_T_MASK));
-    cmp(cb, REG1, imm_opnd(T_OBJECT));
-    jit_chain_guard(JCC_JNE, jit, &starting_context, max_chain_depth, side_exit);
-    */
-
-    // FIXME: Mapping the index could fail when there is too many ivar names. If we're
-    // compiling for a branch stub that can cause the exception to be thrown from the
-    // wrong PC.
-    uint32_t ivar_index = rb_obj_ensure_iv_index_mapping(comptime_receiver, ivar_name);
-
-    // Pop receiver if it's on the temp stack
-    if (!reg0_opnd.is_self) {
-        (void)ctx_stack_pop(ctx, 1);
-    }
-
-    // Compile time self is embedded and the ivar index lands within the object
-    if (RB_FL_TEST_RAW(comptime_receiver, ROBJECT_EMBED) && ivar_index < ROBJECT_EMBED_LEN_MAX) {
-        // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
-
-        // Guard that self is embedded
-        // TODO: BT and JC is shorter
-        ADD_COMMENT(cb, "guard embedded getivar");
-        x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
-        test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
-        jit_chain_guard(JCC_JZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
-
-        // Load the variable
-        x86opnd_t ivar_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.ary) + ivar_index * SIZEOF_VALUE);
-        mov(cb, REG1, ivar_opnd);
-
-        // Guard that the variable is not Qundef
-        cmp(cb, REG1, imm_opnd(Qundef));
-        mov(cb, REG0, imm_opnd(Qnil));
-        cmove(cb, REG1, REG0);
-
-        // Push the ivar on the stack
-        x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, out_opnd, REG1);
-    }
-    else {
-        // Compile time value is *not* embedded.
-
-        // Guard that value is *not* embedded
-        // See ROBJECT_IVPTR() from include/ruby/internal/core/robject.h
-        ADD_COMMENT(cb, "guard extended getivar");
-        x86opnd_t flags_opnd = member_opnd(REG0, struct RBasic, flags);
-        test(cb, flags_opnd, imm_opnd(ROBJECT_EMBED));
-        jit_chain_guard(JCC_JNZ, jit, &starting_context, max_chain_depth, COUNTED_EXIT(jit, side_exit, getivar_megamorphic));
-
-        // check that the extended table is big enough
-        if (ivar_index >= ROBJECT_EMBED_LEN_MAX + 1) {
-            // Check that the slot is inside the extended table (num_slots > index)
-            x86opnd_t num_slots = mem_opnd(32, REG0, offsetof(struct RObject, as.heap.numiv));
-            cmp(cb, num_slots, imm_opnd(ivar_index));
-            jle_ptr(cb, COUNTED_EXIT(jit, side_exit, getivar_idx_out_of_range));
-        }
-
-        // Get a pointer to the extended table
-        x86opnd_t tbl_opnd = mem_opnd(64, REG0, offsetof(struct RObject, as.heap.ivptr));
-        mov(cb, REG0, tbl_opnd);
-
-        // Read the ivar from the extended table
-        x86opnd_t ivar_opnd = mem_opnd(64, REG0, sizeof(VALUE) * ivar_index);
-        mov(cb, REG0, ivar_opnd);
-
-        // Check that the ivar is not Qundef
-        cmp(cb, REG0, imm_opnd(Qundef));
-        mov(cb, REG1, imm_opnd(Qnil));
-        cmove(cb, REG0, REG1);
-
-        // Push the ivar on the stack
-        x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, out_opnd, REG0);
-    }
-
-    // Jump to next instruction. This allows guard chains to share the same successor.
-    jit_jump_to_next_insn(jit, ctx);
-    return YJIT_END_BLOCK;
-}
-
-static codegen_status_t
-gen_getinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize on a runtime `self`
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    ID ivar_name = (ID)jit_get_arg(jit, 0);
-
-    VALUE comptime_val = jit_peek_at_self(jit, ctx);
-    VALUE comptime_val_klass = CLASS_OF(comptime_val);
-
-    // Generate a side exit
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // Guard that the receiver has the same class as the one from compile time.
-    mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, self));
-
-    jit_guard_known_klass(jit, ctx, comptime_val_klass, OPND_SELF, comptime_val, GETIVAR_MAX_DEPTH, side_exit);
-
-    return gen_get_ivar(jit, ctx, GETIVAR_MAX_DEPTH, comptime_val, ivar_name, OPND_SELF, side_exit);
-}
-
-void rb_vm_setinstancevariable(const rb_iseq_t *iseq, VALUE obj, ID id, VALUE val, IVC ic);
-
-static codegen_status_t
-gen_setinstancevariable(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    ID id = (ID)jit_get_arg(jit, 0);
-    IVC ic = (IVC)jit_get_arg(jit, 1);
-
-    // Save the PC and SP because the callee may allocate
-    // Note that this modifies REG_SP, which is why we do it first
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // Get the operands from the stack
-    x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
-
-    // Call rb_vm_setinstancevariable(iseq, obj, id, val, ic);
-    mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
-    mov(cb, C_ARG_REGS[3], val_opnd);
-    mov(cb, C_ARG_REGS[2], imm_opnd(id));
-    mov(cb, C_ARG_REGS[4], const_ptr_opnd(ic));
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)jit->iseq);
-    call_ptr(cb, REG0, (void *)rb_vm_setinstancevariable);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-bool rb_vm_defined(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, rb_num_t op_type, VALUE obj, VALUE v);
-
-static codegen_status_t
-gen_defined(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t op_type = (rb_num_t)jit_get_arg(jit, 0);
-    VALUE obj = (VALUE)jit_get_arg(jit, 1);
-    VALUE pushval = (VALUE)jit_get_arg(jit, 2);
-
-    // Save the PC and SP because the callee may allocate
-    // Note that this modifies REG_SP, which is why we do it first
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // Get the operands from the stack
-    x86opnd_t v_opnd = ctx_stack_pop(ctx, 1);
-
-    // Call vm_defined(ec, reg_cfp, op_type, obj, v)
-    mov(cb, C_ARG_REGS[0], REG_EC);
-    mov(cb, C_ARG_REGS[1], REG_CFP);
-    mov(cb, C_ARG_REGS[2], imm_opnd(op_type));
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)obj);
-    mov(cb, C_ARG_REGS[4], v_opnd);
-    call_ptr(cb, REG0, (void *)rb_vm_defined);
-
-    // if (vm_defined(ec, GET_CFP(), op_type, obj, v)) {
-    //  val = pushval;
-    // }
-    jit_mov_gc_ptr(jit, cb, REG1, (VALUE)pushval);
-    cmp(cb, AL, imm_opnd(0));
-    mov(cb, RAX, imm_opnd(Qnil));
-    cmovnz(cb, RAX, REG1);
-
-    // Push the return value onto the stack
-    val_type_t out_type = SPECIAL_CONST_P(pushval)? TYPE_IMM:TYPE_UNKNOWN;
-    x86opnd_t stack_ret = ctx_stack_push(ctx, out_type);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_checktype(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    enum ruby_value_type type_val = (enum ruby_value_type)jit_get_arg(jit, 0);
-    // Only three types are emitted by compile.c
-    if (type_val == T_STRING || type_val == T_ARRAY || type_val == T_HASH) {
-        val_type_t val_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
-        x86opnd_t val = ctx_stack_pop(ctx, 1);
-
-        x86opnd_t stack_ret;
-
-        // Check if we know from type information
-        if ((type_val == T_STRING && val_type.type == ETYPE_STRING) ||
-                (type_val == T_ARRAY && val_type.type == ETYPE_ARRAY) ||
-                (type_val == T_HASH && val_type.type == ETYPE_HASH)) {
-            // guaranteed type match
-            stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
-            mov(cb, stack_ret, imm_opnd(Qtrue));
-            return YJIT_KEEP_COMPILING;
-        }
-        else if (val_type.is_imm || val_type.type != ETYPE_UNKNOWN) {
-            // guaranteed not to match T_STRING/T_ARRAY/T_HASH
-            stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
-            mov(cb, stack_ret, imm_opnd(Qfalse));
-            return YJIT_KEEP_COMPILING;
-        }
-
-        mov(cb, REG0, val);
-        mov(cb, REG1, imm_opnd(Qfalse));
-
-        uint32_t ret = cb_new_label(cb, "ret");
-
-        if (!val_type.is_heap) {
-            // if (SPECIAL_CONST_P(val)) {
-            // Return Qfalse via REG1 if not on heap
-            test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
-            jnz_label(cb, ret);
-            cmp(cb, REG0, imm_opnd(Qnil));
-            jbe_label(cb, ret);
-        }
-
-        // Check type on object
-        mov(cb, REG0, mem_opnd(64, REG0, offsetof(struct RBasic, flags)));
-        and(cb, REG0, imm_opnd(RUBY_T_MASK));
-        cmp(cb, REG0, imm_opnd(type_val));
-        mov(cb, REG0, imm_opnd(Qtrue));
-        // REG1 contains Qfalse from above
-        cmove(cb, REG1, REG0);
-
-        cb_write_label(cb, ret);
-        stack_ret = ctx_stack_push(ctx, TYPE_IMM);
-        mov(cb, stack_ret, REG1);
-        cb_link_labels(cb);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        return YJIT_CANT_COMPILE;
-    }
-}
-
-static codegen_status_t
-gen_concatstrings(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t n = (rb_num_t)jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we are allocating
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(int32_t)(sizeof(VALUE) * (uint32_t)n));
-
-    // call rb_str_concat_literals(long n, const VALUE *strings);
-    mov(cb, C_ARG_REGS[0], imm_opnd(n));
-    lea(cb, C_ARG_REGS[1], values_ptr);
-    call_ptr(cb, REG0, (void *)rb_str_concat_literals);
-
-    ctx_stack_pop(ctx, n);
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static void
-guard_two_fixnums(ctx_t *ctx, uint8_t *side_exit)
-{
-    // Get the stack operand types
-    val_type_t arg1_type = ctx_get_opnd_type(ctx, OPND_STACK(0));
-    val_type_t arg0_type = ctx_get_opnd_type(ctx, OPND_STACK(1));
-
-    if (arg0_type.is_heap || arg1_type.is_heap) {
-        jmp_ptr(cb, side_exit);
-        return;
-    }
-
-    if (arg0_type.type != ETYPE_FIXNUM && arg0_type.type != ETYPE_UNKNOWN) {
-        jmp_ptr(cb, side_exit);
-        return;
-    }
-
-    if (arg1_type.type != ETYPE_FIXNUM && arg1_type.type != ETYPE_UNKNOWN) {
-        jmp_ptr(cb, side_exit);
-        return;
-    }
-
-    RUBY_ASSERT(!arg0_type.is_heap);
-    RUBY_ASSERT(!arg1_type.is_heap);
-    RUBY_ASSERT(arg0_type.type == ETYPE_FIXNUM || arg0_type.type == ETYPE_UNKNOWN);
-    RUBY_ASSERT(arg1_type.type == ETYPE_FIXNUM || arg1_type.type == ETYPE_UNKNOWN);
-
-    // Get stack operands without popping them
-    x86opnd_t arg1 = ctx_stack_opnd(ctx, 0);
-    x86opnd_t arg0 = ctx_stack_opnd(ctx, 1);
-
-    // If not fixnums, fall back
-    if (arg0_type.type != ETYPE_FIXNUM) {
-        ADD_COMMENT(cb, "guard arg0 fixnum");
-        test(cb, arg0, imm_opnd(RUBY_FIXNUM_FLAG));
-        jz_ptr(cb, side_exit);
-    }
-    if (arg1_type.type != ETYPE_FIXNUM) {
-        ADD_COMMENT(cb, "guard arg1 fixnum");
-        test(cb, arg1, imm_opnd(RUBY_FIXNUM_FLAG));
-        jz_ptr(cb, side_exit);
-    }
-
-    // Set stack types in context
-    ctx_upgrade_opnd_type(ctx, OPND_STACK(0), TYPE_FIXNUM);
-    ctx_upgrade_opnd_type(ctx, OPND_STACK(1), TYPE_FIXNUM);
-}
-
-// Conditional move operation used by comparison operators
-typedef void (*cmov_fn)(codeblock_t *cb, x86opnd_t opnd0, x86opnd_t opnd1);
-
-static codegen_status_t
-gen_fixnum_cmp(jitstate_t *jit, ctx_t *ctx, cmov_fn cmov_op)
-{
-    // Defer compilation so we can specialize base on a runtime receiver
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
-    VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
-    if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
-        // Create a side-exit to fall back to the interpreter
-        // Note: we generate the side-exit before popping operands from the stack
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_LT)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Check that both operands are fixnums
-        guard_two_fixnums(ctx, side_exit);
-
-        // Get the operands from the stack
-        x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
-        x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
-
-        // Compare the arguments
-        xor(cb, REG0_32, REG0_32); // REG0 = Qfalse
-        mov(cb, REG1, arg0);
-        cmp(cb, REG1, arg1);
-        mov(cb, REG1, imm_opnd(Qtrue));
-        cmov_op(cb, REG0, REG1);
-
-        // Push the output on the stack
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, dst, REG0);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_lt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_fixnum_cmp(jit, ctx, cmovl);
-}
-
-static codegen_status_t
-gen_opt_le(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_fixnum_cmp(jit, ctx, cmovle);
-}
-
-static codegen_status_t
-gen_opt_ge(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_fixnum_cmp(jit, ctx, cmovge);
-}
-
-static codegen_status_t
-gen_opt_gt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_fixnum_cmp(jit, ctx, cmovg);
-}
-
-// Implements specialized equality for either two fixnum or two strings
-// Returns true if code was generated, otherwise false
-static bool
-gen_equality_specialized(jitstate_t *jit, ctx_t *ctx, uint8_t *side_exit)
-{
-    VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
-    VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
-    x86opnd_t a_opnd = ctx_stack_opnd(ctx, 1);
-    x86opnd_t b_opnd = ctx_stack_opnd(ctx, 0);
-
-    if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
-        if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_EQ)) {
-            // if overridden, emit the generic version
-            return false;
-        }
-
-        guard_two_fixnums(ctx, side_exit);
-
-        mov(cb, REG0, a_opnd);
-        cmp(cb, REG0, b_opnd);
-
-        mov(cb, REG0, imm_opnd(Qfalse));
-        mov(cb, REG1, imm_opnd(Qtrue));
-        cmove(cb, REG0, REG1);
-
-        // Push the output on the stack
-        ctx_stack_pop(ctx, 2);
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
-        mov(cb, dst, REG0);
-
-        return true;
-    }
-    else if (CLASS_OF(comptime_a) == rb_cString &&
-            CLASS_OF(comptime_b) == rb_cString) {
-        if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_EQ)) {
-            // if overridden, emit the generic version
-            return false;
-        }
-
-        // Load a and b in preparation for call later
-        mov(cb, C_ARG_REGS[0], a_opnd);
-        mov(cb, C_ARG_REGS[1], b_opnd);
-
-        // Guard that a is a String
-        mov(cb, REG0, C_ARG_REGS[0]);
-        jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(1), comptime_a, SEND_MAX_DEPTH, side_exit);
-
-        uint32_t ret = cb_new_label(cb, "ret");
-
-        // If they are equal by identity, return true
-        cmp(cb, C_ARG_REGS[0], C_ARG_REGS[1]);
-        mov(cb, RAX, imm_opnd(Qtrue));
-        je_label(cb, ret);
-
-        // Otherwise guard that b is a T_STRING (from type info) or String (from runtime guard)
-        if (ctx_get_opnd_type(ctx, OPND_STACK(0)).type != ETYPE_STRING) {
-            mov(cb, REG0, C_ARG_REGS[1]);
-            // Note: any T_STRING is valid here, but we check for a ::String for simplicity
-            jit_guard_known_klass(jit, ctx, rb_cString, OPND_STACK(0), comptime_b, SEND_MAX_DEPTH, side_exit);
-        }
-
-        // Call rb_str_eql_internal(a, b)
-        call_ptr(cb, REG0, (void *)rb_str_eql_internal);
-
-        // Push the output on the stack
-        cb_write_label(cb, ret);
-        ctx_stack_pop(ctx, 2);
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_IMM);
-        mov(cb, dst, RAX);
-        cb_link_labels(cb);
-
-        return true;
-    }
-    else {
-        return false;
-    }
-}
-
-static codegen_status_t
-gen_opt_eq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize base on a runtime receiver
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    if (gen_equality_specialized(jit, ctx, side_exit)) {
-        jit_jump_to_next_insn(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-    else {
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block);
-
-static codegen_status_t
-gen_opt_neq(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // opt_neq is passed two rb_call_data as arguments:
-    // first for ==, second for !=
-    struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 1);
-    return gen_send_general(jit, ctx, cd, NULL);
-}
-
-static codegen_status_t
-gen_opt_aref(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    struct rb_call_data * cd = (struct rb_call_data *)jit_get_arg(jit, 0);
-    int32_t argc = (int32_t)vm_ci_argc(cd->ci);
-
-    // Only JIT one arg calls like `ary[6]`
-    if (argc != 1) {
-        GEN_COUNTER_INC(cb, oaref_argc_not_one);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Defer compilation so we can specialize base on a runtime receiver
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    // Remember the context on entry for adding guard chains
-    const ctx_t starting_context = *ctx;
-
-    // Specialize base on compile time values
-    VALUE comptime_idx = jit_peek_at_stack(jit, ctx, 0);
-    VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 1);
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    if (CLASS_OF(comptime_recv) == rb_cArray && RB_FIXNUM_P(comptime_idx)) {
-        if (!assume_bop_not_redefined(jit, ARRAY_REDEFINED_OP_FLAG, BOP_AREF)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Pop the stack operands
-        x86opnd_t idx_opnd = ctx_stack_pop(ctx, 1);
-        x86opnd_t recv_opnd = ctx_stack_pop(ctx, 1);
-        mov(cb, REG0, recv_opnd);
-
-        // if (SPECIAL_CONST_P(recv)) {
-        // Bail if receiver is not a heap object
-        test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
-        jnz_ptr(cb, side_exit);
-        cmp(cb, REG0, imm_opnd(Qfalse));
-        je_ptr(cb, side_exit);
-        cmp(cb, REG0, imm_opnd(Qnil));
-        je_ptr(cb, side_exit);
-
-        // Bail if recv has a class other than ::Array.
-        // BOP_AREF check above is only good for ::Array.
-        mov(cb, REG1, mem_opnd(64, REG0, offsetof(struct RBasic, klass)));
-        mov(cb, REG0, const_ptr_opnd((void *)rb_cArray));
-        cmp(cb, REG0, REG1);
-        jit_chain_guard(JCC_JNE, jit, &starting_context, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
-
-        // Bail if idx is not a FIXNUM
-        mov(cb, REG1, idx_opnd);
-        test(cb, REG1, imm_opnd(RUBY_FIXNUM_FLAG));
-        jz_ptr(cb, COUNTED_EXIT(jit, side_exit, oaref_arg_not_fixnum));
-
-        // Call VALUE rb_ary_entry_internal(VALUE ary, long offset).
-        // It never raises or allocates, so we don't need to write to cfp->pc.
-        {
-            mov(cb, RDI, recv_opnd);
-            sar(cb, REG1, imm_opnd(1)); // Convert fixnum to int
-            mov(cb, RSI, REG1);
-            call_ptr(cb, REG0, (void *)rb_ary_entry_internal);
-
-            // Push the return value onto the stack
-            x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-            mov(cb, stack_ret, RAX);
-        }
-
-        // Jump to next instruction. This allows guard chains to share the same successor.
-        jit_jump_to_next_insn(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-    else if (CLASS_OF(comptime_recv) == rb_cHash) {
-        if (!assume_bop_not_redefined(jit, HASH_REDEFINED_OP_FLAG, BOP_AREF)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        x86opnd_t key_opnd = ctx_stack_opnd(ctx, 0);
-        x86opnd_t recv_opnd = ctx_stack_opnd(ctx, 1);
-
-        // Guard that the receiver is a hash
-        mov(cb, REG0, recv_opnd);
-        jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(1), comptime_recv, OPT_AREF_MAX_CHAIN_DEPTH, side_exit);
-
-        // Setup arguments for rb_hash_aref().
-        mov(cb, C_ARG_REGS[0], REG0);
-        mov(cb, C_ARG_REGS[1], key_opnd);
-
-        // Prepare to call rb_hash_aref(). It might call #hash on the key.
-        jit_prepare_routine_call(jit, ctx, REG0);
-
-        call_ptr(cb, REG0, (void *)rb_hash_aref);
-
-        // Pop the key and the receiver
-        (void)ctx_stack_pop(ctx, 2);
-
-        // Push the return value onto the stack
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, stack_ret, RAX);
-
-        // Jump to next instruction. This allows guard chains to share the same successor.
-        jit_jump_to_next_insn(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-    else {
-        // General case. Call the [] method.
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_aset(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize on a runtime `self`
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 2);
-    VALUE comptime_key  = jit_peek_at_stack(jit, ctx, 1);
-
-    // Get the operands from the stack
-    x86opnd_t recv = ctx_stack_opnd(ctx, 2);
-    x86opnd_t key = ctx_stack_opnd(ctx, 1);
-    x86opnd_t val = ctx_stack_opnd(ctx, 0);
-
-    if (CLASS_OF(comptime_recv) == rb_cArray && FIXNUM_P(comptime_key)) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        // Guard receiver is an Array
-        mov(cb, REG0, recv);
-        jit_guard_known_klass(jit, ctx, rb_cArray, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
-
-        // Guard key is a fixnum
-        mov(cb, REG0, key);
-        jit_guard_known_klass(jit, ctx, rb_cInteger, OPND_STACK(1), comptime_key, SEND_MAX_DEPTH, side_exit);
-
-        // Call rb_ary_store
-        mov(cb, C_ARG_REGS[0], recv);
-        mov(cb, C_ARG_REGS[1], key);
-        sar(cb, C_ARG_REGS[1], imm_opnd(1)); // FIX2LONG(key)
-        mov(cb, C_ARG_REGS[2], val);
-
-        // We might allocate or raise
-        jit_prepare_routine_call(jit, ctx, REG0);
-
-        call_ptr(cb, REG0, (void *)rb_ary_store);
-
-        // rb_ary_store returns void
-        // stored value should still be on stack
-        mov(cb, REG0, ctx_stack_opnd(ctx, 0));
-
-        // Push the return value onto the stack
-        ctx_stack_pop(ctx, 3);
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, stack_ret, REG0);
-
-        jit_jump_to_next_insn(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-    else if (CLASS_OF(comptime_recv) == rb_cHash) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        // Guard receiver is a Hash
-        mov(cb, REG0, recv);
-        jit_guard_known_klass(jit, ctx, rb_cHash, OPND_STACK(2), comptime_recv, SEND_MAX_DEPTH, side_exit);
-
-        // Call rb_hash_aset
-        mov(cb, C_ARG_REGS[0], recv);
-        mov(cb, C_ARG_REGS[1], key);
-        mov(cb, C_ARG_REGS[2], val);
-
-        // We might allocate or raise
-        jit_prepare_routine_call(jit, ctx, REG0);
-
-        call_ptr(cb, REG0, (void *)rb_hash_aset);
-
-        // Push the return value onto the stack
-        ctx_stack_pop(ctx, 3);
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, stack_ret, RAX);
-
-        jit_jump_to_next_insn(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-    else {
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_and(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize on a runtime `self`
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
-    VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
-    if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
-        // Create a side-exit to fall back to the interpreter
-        // Note: we generate the side-exit before popping operands from the stack
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_AND)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Check that both operands are fixnums
-        guard_two_fixnums(ctx, side_exit);
-
-        // Get the operands and destination from the stack
-        x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
-        x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
-
-        // Do the bitwise and arg0 & arg1
-        mov(cb, REG0, arg0);
-        and(cb, REG0, arg1);
-
-        // Push the output on the stack
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
-        mov(cb, dst, REG0);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        // Delegate to send, call the method on the recv
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_or(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize on a runtime `self`
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
-    VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
-    if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
-        // Create a side-exit to fall back to the interpreter
-        // Note: we generate the side-exit before popping operands from the stack
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_OR)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Check that both operands are fixnums
-        guard_two_fixnums(ctx, side_exit);
-
-        // Get the operands and destination from the stack
-        x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
-        x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
-
-        // Do the bitwise or arg0 | arg1
-        mov(cb, REG0, arg0);
-        or(cb, REG0, arg1);
-
-        // Push the output on the stack
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
-        mov(cb, dst, REG0);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        // Delegate to send, call the method on the recv
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_minus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize on a runtime `self`
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
-    VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
-    if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
-        // Create a side-exit to fall back to the interpreter
-        // Note: we generate the side-exit before popping operands from the stack
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_MINUS)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Check that both operands are fixnums
-        guard_two_fixnums(ctx, side_exit);
-
-        // Get the operands and destination from the stack
-        x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
-        x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
-
-        // Subtract arg0 - arg1 and test for overflow
-        mov(cb, REG0, arg0);
-        sub(cb, REG0, arg1);
-        jo_ptr(cb, side_exit);
-        add(cb, REG0, imm_opnd(1));
-
-        // Push the output on the stack
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
-        mov(cb, dst, REG0);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        // Delegate to send, call the method on the recv
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_plus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Defer compilation so we can specialize on a runtime `self`
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_a = jit_peek_at_stack(jit, ctx, 1);
-    VALUE comptime_b = jit_peek_at_stack(jit, ctx, 0);
-
-    if (FIXNUM_P(comptime_a) && FIXNUM_P(comptime_b)) {
-        // Create a side-exit to fall back to the interpreter
-        // Note: we generate the side-exit before popping operands from the stack
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        if (!assume_bop_not_redefined(jit, INTEGER_REDEFINED_OP_FLAG, BOP_PLUS)) {
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Check that both operands are fixnums
-        guard_two_fixnums(ctx, side_exit);
-
-        // Get the operands and destination from the stack
-        x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
-        x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
-
-        // Add arg0 + arg1 and test for overflow
-        mov(cb, REG0, arg0);
-        sub(cb, REG0, imm_opnd(1));
-        add(cb, REG0, arg1);
-        jo_ptr(cb, side_exit);
-
-        // Push the output on the stack
-        x86opnd_t dst = ctx_stack_push(ctx, TYPE_FIXNUM);
-        mov(cb, dst, REG0);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        // Delegate to send, call the method on the recv
-        return gen_opt_send_without_block(jit, ctx, cb);
-    }
-}
-
-static codegen_status_t
-gen_opt_mult(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Delegate to send, call the method on the recv
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_div(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Delegate to send, call the method on the recv
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-VALUE rb_vm_opt_mod(VALUE recv, VALUE obj);
-
-static codegen_status_t
-gen_opt_mod(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Save the PC and SP because the callee may allocate bignums
-    // Note that this modifies REG_SP, which is why we do it first
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // Get the operands from the stack
-    x86opnd_t arg1 = ctx_stack_pop(ctx, 1);
-    x86opnd_t arg0 = ctx_stack_pop(ctx, 1);
-
-    // Call rb_vm_opt_mod(VALUE recv, VALUE obj)
-    mov(cb, C_ARG_REGS[0], arg0);
-    mov(cb, C_ARG_REGS[1], arg1);
-    call_ptr(cb, REG0, (void *)rb_vm_opt_mod);
-
-    // If val == Qundef, bail to do a method call
-    cmp(cb, RAX, imm_opnd(Qundef));
-    je_ptr(cb, side_exit);
-
-    // Push the return value onto the stack
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_opt_ltlt(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Delegate to send, call the method on the recv
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_nil_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Delegate to send, call the method on the recv
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_empty_p(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Delegate to send, call the method on the recv
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_str_freeze(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_FREEZE)) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    VALUE str = jit_get_arg(jit, 0);
-    jit_mov_gc_ptr(jit, cb, REG0, str);
-
-    // Push the return value onto the stack
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
-    mov(cb, stack_ret, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_opt_str_uminus(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    if (!assume_bop_not_redefined(jit, STRING_REDEFINED_OP_FLAG, BOP_UMINUS)) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    VALUE str = jit_get_arg(jit, 0);
-    jit_mov_gc_ptr(jit, cb, REG0, str);
-
-    // Push the return value onto the stack
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
-    mov(cb, stack_ret, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_opt_not(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_size(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_length(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_regexpmatch2(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    return gen_opt_send_without_block(jit, ctx, cb);
-}
-
-static codegen_status_t
-gen_opt_case_dispatch(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Normally this instruction would lookup the key in a hash and jump to an
-    // offset based on that.
-    // Instead we can take the fallback case and continue with the next
-    // instruction.
-    // We'd hope that our jitted code will be sufficiently fast without the
-    // hash lookup, at least for small hashes, but it's worth revisiting this
-    // assumption in the future.
-
-    ctx_stack_pop(ctx, 1);
-
-    return YJIT_KEEP_COMPILING; // continue with the next instruction
-}
-
-static void
-gen_branchif_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-        jz_ptr(cb, target1);
-        break;
-
-      case SHAPE_NEXT1:
-        jnz_ptr(cb, target0);
-        break;
-
-      case SHAPE_DEFAULT:
-        jnz_ptr(cb, target0);
-        jmp_ptr(cb, target1);
-        break;
-    }
-}
-
-static codegen_status_t
-gen_branchif(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
-
-    // Check for interrupts, but only on backward branches that may create loops
-    if (jump_offset < 0) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-        yjit_check_ints(cb, side_exit);
-    }
-
-    // Test if any bit (outside of the Qnil bit) is on
-    // RUBY_Qfalse  /* ...0000 0000 */
-    // RUBY_Qnil    /* ...0000 1000 */
-    x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
-    test(cb, val_opnd, imm_opnd(~Qnil));
-
-    // Get the branch target instruction offsets
-    uint32_t next_idx = jit_next_insn_idx(jit);
-    uint32_t jump_idx = next_idx + jump_offset;
-    blockid_t next_block = { jit->iseq, next_idx };
-    blockid_t jump_block = { jit->iseq, jump_idx };
-
-    // Generate the branch instructions
-    gen_branch(
-        jit,
-        ctx,
-        jump_block,
-        ctx,
-        next_block,
-        ctx,
-        gen_branchif_branch
-    );
-
-    return YJIT_END_BLOCK;
-}
-
-static void
-gen_branchunless_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-        jnz_ptr(cb, target1);
-        break;
-
-      case SHAPE_NEXT1:
-        jz_ptr(cb, target0);
-        break;
-
-      case SHAPE_DEFAULT:
-        jz_ptr(cb, target0);
-        jmp_ptr(cb, target1);
-        break;
-    }
-}
-
-static codegen_status_t
-gen_branchunless(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
-
-    // Check for interrupts, but only on backward branches that may create loops
-    if (jump_offset < 0) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-        yjit_check_ints(cb, side_exit);
-    }
-
-    // Test if any bit (outside of the Qnil bit) is on
-    // RUBY_Qfalse  /* ...0000 0000 */
-    // RUBY_Qnil    /* ...0000 1000 */
-    x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
-    test(cb, val_opnd, imm_opnd(~Qnil));
-
-    // Get the branch target instruction offsets
-    uint32_t next_idx = jit_next_insn_idx(jit);
-    uint32_t jump_idx = next_idx + jump_offset;
-    blockid_t next_block = { jit->iseq, next_idx };
-    blockid_t jump_block = { jit->iseq, jump_idx };
-
-    // Generate the branch instructions
-    gen_branch(
-        jit,
-        ctx,
-        jump_block,
-        ctx,
-        next_block,
-        ctx,
-        gen_branchunless_branch
-    );
-
-    return YJIT_END_BLOCK;
-}
-
-static void
-gen_branchnil_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-        jne_ptr(cb, target1);
-        break;
-
-      case SHAPE_NEXT1:
-        je_ptr(cb, target0);
-        break;
-
-      case SHAPE_DEFAULT:
-        je_ptr(cb, target0);
-        jmp_ptr(cb, target1);
-        break;
-    }
-}
-
-static codegen_status_t
-gen_branchnil(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
-
-    // Check for interrupts, but only on backward branches that may create loops
-    if (jump_offset < 0) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-        yjit_check_ints(cb, side_exit);
-    }
-
-    // Test if the value is Qnil
-    // RUBY_Qnil    /* ...0000 1000 */
-    x86opnd_t val_opnd = ctx_stack_pop(ctx, 1);
-    cmp(cb, val_opnd, imm_opnd(Qnil));
-
-    // Get the branch target instruction offsets
-    uint32_t next_idx = jit_next_insn_idx(jit);
-    uint32_t jump_idx = next_idx + jump_offset;
-    blockid_t next_block = { jit->iseq, next_idx };
-    blockid_t jump_block = { jit->iseq, jump_idx };
-
-    // Generate the branch instructions
-    gen_branch(
-        jit,
-        ctx,
-        jump_block,
-        ctx,
-        next_block,
-        ctx,
-        gen_branchnil_branch
-    );
-
-    return YJIT_END_BLOCK;
-}
-
-static codegen_status_t
-gen_jump(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    int32_t jump_offset = (int32_t)jit_get_arg(jit, 0);
-
-    // Check for interrupts, but only on backward branches that may create loops
-    if (jump_offset < 0) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-        yjit_check_ints(cb, side_exit);
-    }
-
-    // Get the branch target instruction offsets
-    uint32_t jump_idx = jit_next_insn_idx(jit) + jump_offset;
-    blockid_t jump_block = { jit->iseq, jump_idx };
-
-    // Generate the jump instruction
-    gen_direct_jump(
-        jit,
-        ctx,
-        jump_block
-    );
-
-    return YJIT_END_BLOCK;
-}
-
-/*
-Guard that self or a stack operand has the same class as `known_klass`, using
-`sample_instance` to speculate about the shape of the runtime value.
-FIXNUM and on-heap integers are treated as if they have distinct classes, and
-the guard generated for one will fail for the other.
-
-Recompile as contingency if possible, or take side exit a last resort.
-*/
-static bool
-jit_guard_known_klass(jitstate_t *jit, ctx_t *ctx, VALUE known_klass, insn_opnd_t insn_opnd, VALUE sample_instance, const int max_chain_depth, uint8_t *side_exit)
-{
-    val_type_t val_type = ctx_get_opnd_type(ctx, insn_opnd);
-
-    if (known_klass == rb_cNilClass) {
-        RUBY_ASSERT(!val_type.is_heap);
-        if (val_type.type != ETYPE_NIL) {
-            RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
-
-            ADD_COMMENT(cb, "guard object is nil");
-            cmp(cb, REG0, imm_opnd(Qnil));
-            jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
-
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_NIL);
-        }
-    }
-    else if (known_klass == rb_cTrueClass) {
-        RUBY_ASSERT(!val_type.is_heap);
-        if (val_type.type != ETYPE_TRUE) {
-            RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
-
-            ADD_COMMENT(cb, "guard object is true");
-            cmp(cb, REG0, imm_opnd(Qtrue));
-            jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
-
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_TRUE);
-        }
-    }
-    else if (known_klass == rb_cFalseClass) {
-        RUBY_ASSERT(!val_type.is_heap);
-        if (val_type.type != ETYPE_FALSE) {
-            RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
-
-            ADD_COMMENT(cb, "guard object is false");
-            STATIC_ASSERT(qfalse_is_zero, Qfalse == 0);
-            test(cb, REG0, REG0);
-            jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
-
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FALSE);
-        }
-    }
-    else if (known_klass == rb_cInteger && FIXNUM_P(sample_instance)) {
-        RUBY_ASSERT(!val_type.is_heap);
-        // We will guard fixnum and bignum as though they were separate classes
-        // BIGNUM can be handled by the general else case below
-        if (val_type.type != ETYPE_FIXNUM || !val_type.is_imm) {
-            RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
-
-            ADD_COMMENT(cb, "guard object is fixnum");
-            test(cb, REG0, imm_opnd(RUBY_FIXNUM_FLAG));
-            jit_chain_guard(JCC_JZ, jit, ctx, max_chain_depth, side_exit);
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FIXNUM);
-        }
-    }
-    else if (known_klass == rb_cSymbol && STATIC_SYM_P(sample_instance)) {
-        RUBY_ASSERT(!val_type.is_heap);
-        // We will guard STATIC vs DYNAMIC as though they were separate classes
-        // DYNAMIC symbols can be handled by the general else case below
-        if (val_type.type != ETYPE_SYMBOL || !val_type.is_imm) {
-            RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
-
-            ADD_COMMENT(cb, "guard object is static symbol");
-            STATIC_ASSERT(special_shift_is_8, RUBY_SPECIAL_SHIFT == 8);
-            cmp(cb, REG0_8, imm_opnd(RUBY_SYMBOL_FLAG));
-            jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_STATIC_SYMBOL);
-        }
-    }
-    else if (known_klass == rb_cFloat && FLONUM_P(sample_instance)) {
-        RUBY_ASSERT(!val_type.is_heap);
-        if (val_type.type != ETYPE_FLONUM || !val_type.is_imm) {
-            RUBY_ASSERT(val_type.type == ETYPE_UNKNOWN);
-
-            // We will guard flonum vs heap float as though they were separate classes
-            ADD_COMMENT(cb, "guard object is flonum");
-            mov(cb, REG1, REG0);
-            and(cb, REG1, imm_opnd(RUBY_FLONUM_MASK));
-            cmp(cb, REG1, imm_opnd(RUBY_FLONUM_FLAG));
-            jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_FLONUM);
-        }
-    }
-    else if (FL_TEST(known_klass, FL_SINGLETON) && sample_instance == rb_attr_get(known_klass, id__attached__)) {
-        // Singleton classes are attached to one specific object, so we can
-        // avoid one memory access (and potentially the is_heap check) by
-        // looking for the expected object directly.
-        // Note that in case the sample instance has a singleton class that
-        // doesn't attach to the sample instance, it means the sample instance
-        // has an empty singleton class that hasn't been materialized yet. In
-        // this case, comparing against the sample instance doesn't guarantee
-        // that its singleton class is empty, so we can't avoid the memory
-        // access. As an example, `Object.new.singleton_class` is an object in
-        // this situation.
-        ADD_COMMENT(cb, "guard known object with singleton class");
-        // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the object.
-        jit_mov_gc_ptr(jit, cb, REG1, sample_instance);
-        cmp(cb, REG0, REG1);
-        jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
-    }
-    else {
-        RUBY_ASSERT(!val_type.is_imm);
-
-        // Check that the receiver is a heap object
-        // Note: if we get here, the class doesn't have immediate instances.
-        if (!val_type.is_heap) {
-            ADD_COMMENT(cb, "guard not immediate");
-            RUBY_ASSERT(Qfalse < Qnil);
-            test(cb, REG0, imm_opnd(RUBY_IMMEDIATE_MASK));
-            jit_chain_guard(JCC_JNZ, jit, ctx, max_chain_depth, side_exit);
-            cmp(cb, REG0, imm_opnd(Qnil));
-            jit_chain_guard(JCC_JBE, jit, ctx, max_chain_depth, side_exit);
-
-            ctx_upgrade_opnd_type(ctx, insn_opnd, TYPE_HEAP);
-        }
-
-        x86opnd_t klass_opnd = mem_opnd(64, REG0, offsetof(struct RBasic, klass));
-
-        // Bail if receiver class is different from known_klass
-        // TODO: jit_mov_gc_ptr keeps a strong reference, which leaks the class.
-        ADD_COMMENT(cb, "guard known class");
-        jit_mov_gc_ptr(jit, cb, REG1, known_klass);
-        cmp(cb, klass_opnd, REG1);
-        jit_chain_guard(JCC_JNE, jit, ctx, max_chain_depth, side_exit);
-    }
-
-    return true;
-}
-
-// Generate ancestry guard for protected callee.
-// Calls to protected callees only go through when self.is_a?(klass_that_defines_the_callee).
-static void
-jit_protected_callee_ancestry_guard(jitstate_t *jit, codeblock_t *cb, const rb_callable_method_entry_t *cme, uint8_t *side_exit)
-{
-    // See vm_call_method().
-    mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, self));
-    jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], cme->defined_class);
-    // Note: PC isn't written to current control frame as rb_is_kind_of() shouldn't raise.
-    // VALUE rb_obj_is_kind_of(VALUE obj, VALUE klass);
-    call_ptr(cb, REG0, (void *)&rb_obj_is_kind_of);
-    test(cb, RAX, RAX);
-    jz_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_protected_check_failed));
-}
-
-// Return true when the codegen function generates code.
-// known_recv_klass is non-NULL when the caller has used jit_guard_known_klass().
-// See yjit_reg_method().
-typedef bool (*method_codegen_t)(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass);
-
-// Register a specialized codegen function for a particular method. Note that
-// the if the function returns true, the code it generates runs without a
-// control frame and without interrupt checks. To avoid creating observable
-// behavior changes, the codegen function should only target simple code paths
-// that do not allocate and do not make method calls.
-static void
-yjit_reg_method(VALUE klass, const char *mid_str, method_codegen_t gen_fn)
-{
-    ID mid = rb_intern(mid_str);
-    const rb_method_entry_t *me = rb_method_entry_at(klass, mid);
-
-    if (!me) {
-        rb_bug("undefined optimized method: %s", rb_id2name(mid));
-    }
-
-    // For now, only cfuncs are supported
-    RUBY_ASSERT(me && me->def);
-    RUBY_ASSERT(me->def->type == VM_METHOD_TYPE_CFUNC);
-
-    st_insert(yjit_method_codegen_table, (st_data_t)me->def->method_serial, (st_data_t)gen_fn);
-}
-
-// Codegen for rb_obj_not().
-// Note, caller is responsible for generating all the right guards, including
-// arity guards.
-static bool
-jit_rb_obj_not(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
-{
-    const val_type_t recv_opnd = ctx_get_opnd_type(ctx, OPND_STACK(0));
-
-    if (recv_opnd.type == ETYPE_NIL || recv_opnd.type == ETYPE_FALSE) {
-        ADD_COMMENT(cb, "rb_obj_not(nil_or_false)");
-        ctx_stack_pop(ctx, 1);
-        x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_TRUE);
-        mov(cb, out_opnd, imm_opnd(Qtrue));
-    }
-    else if (recv_opnd.is_heap || recv_opnd.type != ETYPE_UNKNOWN) {
-        // Note: recv_opnd.type != ETYPE_NIL && recv_opnd.type != ETYPE_FALSE.
-        ADD_COMMENT(cb, "rb_obj_not(truthy)");
-        ctx_stack_pop(ctx, 1);
-        x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FALSE);
-        mov(cb, out_opnd, imm_opnd(Qfalse));
-    }
-    else {
-        // jit_guard_known_klass() already ran on the receiver which should
-        // have deduced deduced the type of the receiver. This case should be
-        // rare if not unreachable.
-        return false;
-    }
-    return true;
-}
-
-// Codegen for rb_true()
-static bool
-jit_rb_true(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
-{
-    ADD_COMMENT(cb, "nil? == true");
-    ctx_stack_pop(ctx, 1);
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_TRUE);
-    mov(cb, stack_ret, imm_opnd(Qtrue));
-    return true;
-}
-
-// Codegen for rb_false()
-static bool
-jit_rb_false(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
-{
-    ADD_COMMENT(cb, "nil? == false");
-    ctx_stack_pop(ctx, 1);
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_FALSE);
-    mov(cb, stack_ret, imm_opnd(Qfalse));
-    return true;
-}
-
-// Codegen for rb_obj_equal()
-// object identity comparison
-static bool
-jit_rb_obj_equal(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
-{
-    ADD_COMMENT(cb, "equal?");
-    x86opnd_t obj1 = ctx_stack_pop(ctx, 1);
-    x86opnd_t obj2 = ctx_stack_pop(ctx, 1);
-
-    mov(cb, REG0, obj1);
-    cmp(cb, REG0, obj2);
-    mov(cb, REG0, imm_opnd(Qtrue));
-    mov(cb, REG1, imm_opnd(Qfalse));
-    cmovne(cb, REG0, REG1);
-
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_IMM);
-    mov(cb, stack_ret, REG0);
-    return true;
-}
-
-static VALUE
-yjit_str_bytesize(VALUE str)
-{
-    return LONG2NUM(RSTRING_LEN(str));
-}
-
-static bool
-jit_rb_str_bytesize(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *known_recv_klass)
-{
-    ADD_COMMENT(cb, "String#bytesize");
-
-    x86opnd_t recv = ctx_stack_pop(ctx, 1);
-    mov(cb, C_ARG_REGS[0], recv);
-    call_ptr(cb, REG0, (void *)&yjit_str_bytesize);
-
-    x86opnd_t out_opnd = ctx_stack_push(ctx, TYPE_FIXNUM);
-    mov(cb, out_opnd, RAX);
-
-    return true;
-}
-
-// Codegen for rb_str_to_s()
-// When String#to_s is called on a String instance, the method returns self and
-// most of the overhead comes from setting up the method call. We observed that
-// this situation happens a lot in some workloads.
-static bool
-jit_rb_str_to_s(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
-{
-    if (recv_known_klass && *recv_known_klass == rb_cString) {
-        ADD_COMMENT(cb, "to_s on plain string");
-        // The method returns the receiver, which is already on the stack.
-        // No stack movement.
-        return true;
-    }
-    return false;
-}
-
-static bool
-jit_thread_s_current(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
-{
-    ADD_COMMENT(cb, "Thread.current");
-    ctx_stack_pop(ctx, 1);
-
-    // ec->thread_ptr
-    mov(cb, REG0, member_opnd(REG_EC, rb_execution_context_t, thread_ptr));
-
-    // thread->self
-    mov(cb, REG0, member_opnd(REG0, rb_thread_t, self));
-
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_HEAP);
-    mov(cb, stack_ret, REG0);
-    return true;
-}
-
-// Check if we know how to codegen for a particular cfunc method
-static method_codegen_t
-lookup_cfunc_codegen(const rb_method_definition_t *def)
-{
-    st_data_t gen_fn;
-    if (st_lookup(yjit_method_codegen_table, def->method_serial, &gen_fn)) {
-        return (method_codegen_t)gen_fn;
-    }
-    return NULL;
-}
-
-// Is anyone listening for :c_call and :c_return event currently?
-static bool
-c_method_tracing_currently_enabled(const jitstate_t *jit)
-{
-    rb_event_flag_t tracing_events;
-    if (rb_multi_ractor_p()) {
-        tracing_events = ruby_vm_event_enabled_global_flags;
-    }
-    else {
-        // At the time of writing, events are never removed from
-        // ruby_vm_event_enabled_global_flags so always checking using it would
-        // mean we don't compile even after tracing is disabled.
-        tracing_events = rb_ec_ractor_hooks(jit->ec)->events;
-    }
-
-    return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
-}
-
-// Called at runtime to build hashes of passed kwargs
-static VALUE
-yjit_runtime_build_kwhash(const struct rb_callinfo *ci, const VALUE *sp) {
-    // similar to args_kw_argv_to_hash
-    const VALUE *const passed_keywords = vm_ci_kwarg(ci)->keywords;
-    const int kw_len = vm_ci_kwarg(ci)->keyword_len;
-    const VALUE h = rb_hash_new_with_size(kw_len);
-
-    for (int i = 0; i < kw_len; i++) {
-        rb_hash_aset(h, passed_keywords[i], (sp - kw_len)[i]);
-    }
-    return h;
-}
-
-static codegen_status_t
-gen_send_cfunc(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, const int32_t argc, VALUE *recv_known_klass)
-{
-    const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
-
-    const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
-    const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
-
-    // Number of args which will be passed through to the callee
-    // This is adjusted by the kwargs being combined into a hash.
-    const int passed_argc = kw_arg ? argc - kw_arg_num + 1 : argc;
-
-    // If the argument count doesn't match
-    if (cfunc->argc >= 0 && cfunc->argc != passed_argc) {
-        GEN_COUNTER_INC(cb, send_cfunc_argc_mismatch);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Don't JIT functions that need C stack arguments for now
-    if (cfunc->argc >= 0 && passed_argc + 1 > NUM_C_ARG_REGS) {
-        GEN_COUNTER_INC(cb, send_cfunc_toomany_args);
-        return YJIT_CANT_COMPILE;
-    }
-
-    if (c_method_tracing_currently_enabled(jit)) {
-        // Don't JIT if tracing c_call or c_return
-        GEN_COUNTER_INC(cb, send_cfunc_tracing);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Delegate to codegen for C methods if we have it.
-    {
-        method_codegen_t known_cfunc_codegen;
-        if (!kw_arg && (known_cfunc_codegen = lookup_cfunc_codegen(cme->def))) {
-            if (known_cfunc_codegen(jit, ctx, ci, cme, block, argc, recv_known_klass)) {
-                // cfunc codegen generated code. Terminate the block so
-                // there isn't multiple calls in the same block.
-                jit_jump_to_next_insn(jit, ctx);
-                return YJIT_END_BLOCK;
-            }
-        }
-    }
-
-    // Callee method ID
-    //ID mid = vm_ci_mid(ci);
-    //printf("JITting call to C function \"%s\", argc: %lu\n", rb_id2name(mid), argc);
-    //print_str(cb, "");
-    //print_str(cb, "calling CFUNC:");
-    //print_str(cb, rb_id2name(mid));
-    //print_str(cb, "recv");
-    //print_ptr(cb, recv);
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // Check for interrupts
-    yjit_check_ints(cb, side_exit);
-
-    // Stack overflow check
-    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
-    // REG_CFP <= REG_SP + 4 * sizeof(VALUE) + sizeof(rb_control_frame_t)
-    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 4 + 2 * sizeof(rb_control_frame_t)));
-    cmp(cb, REG_CFP, REG0);
-    jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
-
-    // Points to the receiver operand on the stack
-    x86opnd_t recv = ctx_stack_opnd(ctx, argc);
-
-    // Store incremented PC into current control frame in case callee raises.
-    jit_save_pc(jit, REG0);
-
-    if (block) {
-        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
-        // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
-        // with cfp->block_code.
-        jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
-        mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
-    }
-
-    // Increment the stack pointer by 3 (in the callee)
-    // sp += 3
-    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * 3));
-
-    // Write method entry at sp[-3]
-    // sp[-3] = me;
-    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
-    // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
-    jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
-    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
-
-    // Write block handler at sp[-2]
-    // sp[-2] = block_handler;
-    if (block) {
-        // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
-        lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
-        or(cb, REG1, imm_opnd(1));
-        mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
-    }
-    else {
-        mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
-    }
-
-    // Write env flags at sp[-1]
-    // sp[-1] = frame_type;
-    uint64_t frame_type = VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL;
-    if (kw_arg) {
-        frame_type |= VM_FRAME_FLAG_CFRAME_KW;
-    }
-    mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
-
-    // Allocate a new CFP (ec->cfp--)
-    sub(
-        cb,
-        member_opnd(REG_EC, rb_execution_context_t, cfp),
-        imm_opnd(sizeof(rb_control_frame_t))
-    );
-
-    // Setup the new frame
-    // *cfp = (const struct rb_control_frame_struct) {
-    //    .pc         = 0,
-    //    .sp         = sp,
-    //    .iseq       = 0,
-    //    .self       = recv,
-    //    .ep         = sp - 1,
-    //    .block_code = 0,
-    //    .__bp__     = sp,
-    // };
-    mov(cb, REG1, member_opnd(REG_EC, rb_execution_context_t, cfp));
-    mov(cb, member_opnd(REG1, rb_control_frame_t, pc), imm_opnd(0));
-    mov(cb, member_opnd(REG1, rb_control_frame_t, sp), REG0);
-    mov(cb, member_opnd(REG1, rb_control_frame_t, iseq), imm_opnd(0));
-    mov(cb, member_opnd(REG1, rb_control_frame_t, block_code), imm_opnd(0));
-    mov(cb, member_opnd(REG1, rb_control_frame_t, __bp__), REG0);
-    sub(cb, REG0, imm_opnd(sizeof(VALUE)));
-    mov(cb, member_opnd(REG1, rb_control_frame_t, ep), REG0);
-    mov(cb, REG0, recv);
-    mov(cb, member_opnd(REG1, rb_control_frame_t, self), REG0);
-
-    // Verify that we are calling the right function
-    if (YJIT_CHECK_MODE > 0) {
-        // Call check_cfunc_dispatch
-        mov(cb, C_ARG_REGS[0], recv);
-        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[1], (VALUE)ci);
-        mov(cb, C_ARG_REGS[2], const_ptr_opnd((void *)cfunc->func));
-        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[3], (VALUE)cme);
-        call_ptr(cb, REG0, (void *)&check_cfunc_dispatch);
-    }
-
-    if (kw_arg) {
-        // Build a hash from all kwargs passed
-        jit_mov_gc_ptr(jit, cb, C_ARG_REGS[0], (VALUE)ci);
-        lea(cb, C_ARG_REGS[1], ctx_sp_opnd(ctx, 0));
-        call_ptr(cb, REG0, (void *)&yjit_runtime_build_kwhash);
-
-        // Replace the stack location at the start of kwargs with the new hash
-        x86opnd_t stack_opnd = ctx_stack_opnd(ctx, argc - passed_argc);
-        mov(cb, stack_opnd, RAX);
-    }
-
-    // Non-variadic method
-    if (cfunc->argc >= 0) {
-        // Copy the arguments from the stack to the C argument registers
-        // self is the 0th argument and is at index argc from the stack top
-        for (int32_t i = 0; i < passed_argc + 1; ++i)
-        {
-            x86opnd_t stack_opnd = ctx_stack_opnd(ctx, argc - i);
-            x86opnd_t c_arg_reg = C_ARG_REGS[i];
-            mov(cb, c_arg_reg, stack_opnd);
-        }
-    }
-    // Variadic method
-    if (cfunc->argc == -1) {
-        // The method gets a pointer to the first argument
-        // rb_f_puts(int argc, VALUE *argv, VALUE recv)
-        mov(cb, C_ARG_REGS[0], imm_opnd(passed_argc));
-        lea(cb, C_ARG_REGS[1], ctx_stack_opnd(ctx, argc - 1));
-        mov(cb, C_ARG_REGS[2], ctx_stack_opnd(ctx, argc));
-    }
-    // Variadic method with Ruby array
-    if (cfunc->argc == -2) {
-        // Create a Ruby array from the arguments.
-        //
-        // This follows similar behaviour to vm_call_cfunc_with_frame() and
-        // call_cfunc_m2(). We use rb_ec_ary_new_from_values() instead of
-        // rb_ary_new4() since we have REG_EC available.
-        //
-        // Before getting here we will have set the new CFP in the EC, and the
-        // stack at CFP's SP will contain the values we are inserting into the
-        // Array, so they will be properly marked if we hit a GC.
-
-        // rb_ec_ary_new_from_values(rb_execution_context_t *ec, long n, const VLAUE *elts)
-        mov(cb, C_ARG_REGS[0], REG_EC);
-        mov(cb, C_ARG_REGS[1], imm_opnd(passed_argc));
-        lea(cb, C_ARG_REGS[2], ctx_stack_opnd(ctx, argc - 1));
-        call_ptr(cb, REG0, (void *)rb_ec_ary_new_from_values);
-
-        // rb_file_s_join(VALUE recv, VALUE args)
-        mov(cb, C_ARG_REGS[0], ctx_stack_opnd(ctx, argc));
-        mov(cb, C_ARG_REGS[1], RAX);
-    }
-
-    // Pop the C function arguments from the stack (in the caller)
-    ctx_stack_pop(ctx, argc + 1);
-
-    // Write interpreter SP into CFP.
-    // Needed in case the callee yields to the block.
-    jit_save_sp(jit, ctx);
-
-    // Call the C function
-    // VALUE ret = (cfunc->func)(recv, argv[0], argv[1]);
-    // cfunc comes from compile-time cme->def, which we assume to be stable.
-    // Invalidation logic is in rb_yjit_method_lookup_change()
-    call_ptr(cb, REG0, (void*)cfunc->func);
-
-    // Record code position for TracePoint patching. See full_cfunc_return().
-    record_global_inval_patch(cb, outline_full_cfunc_return_pos);
-
-    // Push the return value on the Ruby stack
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_ret, RAX);
-
-    // Pop the stack frame (ec->cfp++)
-    add(
-        cb,
-        member_opnd(REG_EC, rb_execution_context_t, cfp),
-        imm_opnd(sizeof(rb_control_frame_t))
-    );
-
-    // cfunc calls may corrupt types
-    ctx_clear_local_types(ctx);
-
-    // Note: the return block of gen_send_iseq() has ctx->sp_offset == 1
-    // which allows for sharing the same successor.
-
-    // Jump (fall through) to the call continuation block
-    // We do this to end the current block after the call
-    jit_jump_to_next_insn(jit, ctx);
-    return YJIT_END_BLOCK;
-}
-
-static void
-gen_return_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-      case SHAPE_NEXT1:
-        RUBY_ASSERT(false);
-        break;
-
-      case SHAPE_DEFAULT:
-        mov(cb, REG0, const_ptr_opnd(target0));
-        mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
-        break;
-    }
-}
-
-// If true, the iseq is leaf and it can be replaced by a single C call.
-static bool
-rb_leaf_invokebuiltin_iseq_p(const rb_iseq_t *iseq)
-{
-    unsigned int invokebuiltin_len = insn_len(BIN(opt_invokebuiltin_delegate_leave));
-    unsigned int leave_len = insn_len(BIN(leave));
-
-    return (ISEQ_BODY(iseq)->iseq_size == (invokebuiltin_len + leave_len) &&
-        rb_vm_insn_addr2opcode((void *)ISEQ_BODY(iseq)->iseq_encoded[0]) == BIN(opt_invokebuiltin_delegate_leave) &&
-        rb_vm_insn_addr2opcode((void *)ISEQ_BODY(iseq)->iseq_encoded[invokebuiltin_len]) == BIN(leave) &&
-        ISEQ_BODY(iseq)->builtin_inline_p
-    );
- }
-
-// Return an rb_builtin_function if the iseq contains only that leaf builtin function.
-static const struct rb_builtin_function*
-rb_leaf_builtin_function(const rb_iseq_t *iseq)
-{
-    if (!rb_leaf_invokebuiltin_iseq_p(iseq))
-        return NULL;
-    return (const struct rb_builtin_function *)ISEQ_BODY(iseq)->iseq_encoded[1];
-}
-
-static codegen_status_t
-gen_send_iseq(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, rb_iseq_t *block, int32_t argc)
-{
-    const rb_iseq_t *iseq = def_iseq_ptr(cme->def);
-
-    // When you have keyword arguments, there is an extra object that gets
-    // placed on the stack the represents a bitmap of the keywords that were not
-    // specified at the call site. We need to keep track of the fact that this
-    // value is present on the stack in order to properly set up the callee's
-    // stack pointer.
-    const bool doing_kw_call = ISEQ_BODY(iseq)->param.flags.has_kw;
-    const bool supplying_kws = vm_ci_flag(ci) & VM_CALL_KWARG;
-
-    if (vm_ci_flag(ci) & VM_CALL_TAILCALL) {
-        // We can't handle tailcalls
-        GEN_COUNTER_INC(cb, send_iseq_tailcall);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // No support for callees with these parameters yet as they require allocation
-    // or complex handling.
-    if (ISEQ_BODY(iseq)->param.flags.has_rest ||
-        ISEQ_BODY(iseq)->param.flags.has_post ||
-        ISEQ_BODY(iseq)->param.flags.has_kwrest) {
-        GEN_COUNTER_INC(cb, send_iseq_complex_callee);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // If we have keyword arguments being passed to a callee that only takes
-    // positionals, then we need to allocate a hash. For now we're going to
-    // call that too complex and bail.
-    if (supplying_kws && !ISEQ_BODY(iseq)->param.flags.has_kw) {
-        GEN_COUNTER_INC(cb, send_iseq_complex_callee);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // If we have a method accepting no kwargs (**nil), exit if we have passed
-    // it any kwargs.
-    if (supplying_kws && ISEQ_BODY(iseq)->param.flags.accepts_no_kwarg) {
-        GEN_COUNTER_INC(cb, send_iseq_complex_callee);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // For computing number of locals to setup for the callee
-    int num_params = ISEQ_BODY(iseq)->param.size;
-
-    // Block parameter handling. This mirrors setup_parameters_complex().
-    if (ISEQ_BODY(iseq)->param.flags.has_block) {
-        if (ISEQ_BODY(iseq)->local_iseq == iseq) {
-            // Block argument is passed through EP and not setup as a local in
-            // the callee.
-            num_params--;
-        }
-        else {
-            // In this case (param.flags.has_block && local_iseq != iseq),
-            // the block argument is setup as a local variable and requires
-            // materialization (allocation). Bail.
-            GEN_COUNTER_INC(cb, send_iseq_complex_callee);
-            return YJIT_CANT_COMPILE;
-        }
-    }
-
-    uint32_t start_pc_offset = 0;
-
-    const int required_num = ISEQ_BODY(iseq)->param.lead_num;
-
-    // This struct represents the metadata about the caller-specified
-    // keyword arguments.
-    const struct rb_callinfo_kwarg *kw_arg = vm_ci_kwarg(ci);
-    const int kw_arg_num = kw_arg ? kw_arg->keyword_len : 0;
-
-    // Arity handling and optional parameter setup
-    const int opts_filled = argc - required_num - kw_arg_num;
-    const int opt_num = ISEQ_BODY(iseq)->param.opt_num;
-    const int opts_missing = opt_num - opts_filled;
-
-    if (opts_filled < 0 || opts_filled > opt_num) {
-        GEN_COUNTER_INC(cb, send_iseq_arity_error);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // If we have unfilled optional arguments and keyword arguments then we
-    // would need to move adjust the arguments location to account for that.
-    // For now we aren't handling this case.
-    if (doing_kw_call && opts_missing > 0) {
-        GEN_COUNTER_INC(cb, send_iseq_complex_callee);
-        return YJIT_CANT_COMPILE;
-    }
-
-    if (opt_num > 0) {
-        num_params -= opt_num - opts_filled;
-        start_pc_offset = (uint32_t)ISEQ_BODY(iseq)->param.opt_table[opts_filled];
-    }
-
-    if (doing_kw_call) {
-        // Here we're calling a method with keyword arguments and specifying
-        // keyword arguments at this call site.
-
-        // This struct represents the metadata about the callee-specified
-        // keyword parameters.
-        const struct rb_iseq_param_keyword *keyword = ISEQ_BODY(iseq)->param.keyword;
-
-        int required_kwargs_filled = 0;
-
-        if (keyword->num > 30) {
-            // We have so many keywords that (1 << num) encoded as a FIXNUM
-            // (which shifts it left one more) no longer fits inside a 32-bit
-            // immediate.
-            GEN_COUNTER_INC(cb, send_iseq_complex_callee);
-            return YJIT_CANT_COMPILE;
-        }
-
-        // Check that the kwargs being passed are valid
-        if (supplying_kws) {
-            // This is the list of keyword arguments that the callee specified
-            // in its initial declaration.
-            const ID *callee_kwargs = keyword->table;
-
-            // Here we're going to build up a list of the IDs that correspond to
-            // the caller-specified keyword arguments. If they're not in the
-            // same order as the order specified in the callee declaration, then
-            // we're going to need to generate some code to swap values around
-            // on the stack.
-            ID *caller_kwargs = ALLOCA_N(VALUE, kw_arg->keyword_len);
-            for (int kwarg_idx = 0; kwarg_idx < kw_arg->keyword_len; kwarg_idx++)
-                caller_kwargs[kwarg_idx] = SYM2ID(kw_arg->keywords[kwarg_idx]);
-
-            // First, we're going to be sure that the names of every
-            // caller-specified keyword argument correspond to a name in the
-            // list of callee-specified keyword parameters.
-            for (int caller_idx = 0; caller_idx < kw_arg->keyword_len; caller_idx++) {
-                int callee_idx;
-
-                for (callee_idx = 0; callee_idx < keyword->num; callee_idx++) {
-                    if (caller_kwargs[caller_idx] == callee_kwargs[callee_idx]) {
-                        break;
-                    }
-                }
-
-                // If the keyword was never found, then we know we have a
-                // mismatch in the names of the keyword arguments, so we need to
-                // bail.
-                if (callee_idx == keyword->num) {
-                    GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
-                    return YJIT_CANT_COMPILE;
-                }
-
-                // Keep a count to ensure all required kwargs are specified
-                if (callee_idx < keyword->required_num) {
-                    required_kwargs_filled++;
-                }
-            }
-        }
-
-        RUBY_ASSERT(required_kwargs_filled <= keyword->required_num);
-        if (required_kwargs_filled != keyword->required_num) {
-            GEN_COUNTER_INC(cb, send_iseq_kwargs_mismatch);
-            return YJIT_CANT_COMPILE;
-        }
-    }
-
-    // Number of locals that are not parameters
-    const int num_locals = ISEQ_BODY(iseq)->local_table_size - num_params;
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // Check for interrupts
-    yjit_check_ints(cb, side_exit);
-
-    const struct rb_builtin_function *leaf_builtin = rb_leaf_builtin_function(iseq);
-
-    if (leaf_builtin && !block && leaf_builtin->argc + 1 /* for self */ + 1 /* for ec */ <= NUM_C_ARG_REGS) {
-        ADD_COMMENT(cb, "inlined leaf builtin");
-
-        // Call the builtin func (ec, recv, arg1, arg2, ...)
-        mov(cb, C_ARG_REGS[0], REG_EC);
-
-        // Copy self and arguments
-        for (int32_t i = 0; i < leaf_builtin->argc + 1; i++) {
-            x86opnd_t stack_opnd = ctx_stack_opnd(ctx, leaf_builtin->argc - i);
-            x86opnd_t c_arg_reg = C_ARG_REGS[i + 1];
-            mov(cb, c_arg_reg, stack_opnd);
-        }
-        ctx_stack_pop(ctx, leaf_builtin->argc + 1);
-        call_ptr(cb, REG0, (void *)leaf_builtin->func_ptr);
-
-        // Push the return value
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, stack_ret, RAX);
-
-        // Note: assuming that the leaf builtin doesn't change local variables here.
-        // Seems like a safe assumption.
-
-        return YJIT_KEEP_COMPILING;
-    }
-
-    // Stack overflow check
-    // Note that vm_push_frame checks it against a decremented cfp, hence the multiply by 2.
-    // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
-    ADD_COMMENT(cb, "stack overflow check");
-    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (num_locals + ISEQ_BODY(iseq)->stack_max) + 2 * sizeof(rb_control_frame_t)));
-    cmp(cb, REG_CFP, REG0);
-    jle_ptr(cb, COUNTED_EXIT(jit, side_exit, send_se_cf_overflow));
-
-    if (doing_kw_call) {
-        // Here we're calling a method with keyword arguments and specifying
-        // keyword arguments at this call site.
-
-        // Number of positional arguments the callee expects before the first
-        // keyword argument
-        const int args_before_kw = required_num + opt_num;
-
-        // This struct represents the metadata about the caller-specified
-        // keyword arguments.
-        int caller_keyword_len = 0;
-        const VALUE *caller_keywords = NULL;
-        if (vm_ci_kwarg(ci)) {
-            caller_keyword_len = vm_ci_kwarg(ci)->keyword_len;
-            caller_keywords = &vm_ci_kwarg(ci)->keywords[0];
-        }
-
-        // This struct represents the metadata about the callee-specified
-        // keyword parameters.
-        const struct rb_iseq_param_keyword *const keyword = ISEQ_BODY(iseq)->param.keyword;
-
-        ADD_COMMENT(cb, "keyword args");
-
-        // This is the list of keyword arguments that the callee specified
-        // in its initial declaration.
-        const ID *callee_kwargs = keyword->table;
-
-        int total_kwargs = keyword->num;
-
-        // Here we're going to build up a list of the IDs that correspond to
-        // the caller-specified keyword arguments. If they're not in the
-        // same order as the order specified in the callee declaration, then
-        // we're going to need to generate some code to swap values around
-        // on the stack.
-        ID *caller_kwargs = ALLOCA_N(VALUE, total_kwargs);
-        int kwarg_idx;
-        for (kwarg_idx = 0; kwarg_idx < caller_keyword_len; kwarg_idx++) {
-            caller_kwargs[kwarg_idx] = SYM2ID(caller_keywords[kwarg_idx]);
-        }
-
-        int unspecified_bits = 0;
-
-        for (int callee_idx = keyword->required_num; callee_idx < total_kwargs; callee_idx++) {
-            bool already_passed = false;
-            ID callee_kwarg = callee_kwargs[callee_idx];
-
-            for (int caller_idx = 0; caller_idx < caller_keyword_len; caller_idx++) {
-                if (caller_kwargs[caller_idx] == callee_kwarg) {
-                    already_passed = true;
-                    break;
-                }
-            }
-
-            if (!already_passed) {
-                // Reserve space on the stack for each default value we'll be
-                // filling in (which is done in the next loop). Also increments
-                // argc so that the callee's SP is recorded correctly.
-                argc++;
-                x86opnd_t default_arg = ctx_stack_push(ctx, TYPE_UNKNOWN);
-                VALUE default_value = keyword->default_values[callee_idx - keyword->required_num];
-
-                if (default_value == Qundef) {
-                    // Qundef means that this value is not constant and must be
-                    // recalculated at runtime, so we record it in unspecified_bits
-                    // (Qnil is then used as a placeholder instead of Qundef).
-                    unspecified_bits |= 0x01 << (callee_idx - keyword->required_num);
-                    default_value = Qnil;
-                }
-
-                // GC might move default_value.
-                jit_mov_gc_ptr(jit, cb, REG0, default_value);
-                mov(cb, default_arg, REG0);
-
-                caller_kwargs[kwarg_idx++] = callee_kwarg;
-            }
-        }
-        RUBY_ASSERT(kwarg_idx == total_kwargs);
-
-        // Next, we're going to loop through every keyword that was
-        // specified by the caller and make sure that it's in the correct
-        // place. If it's not we're going to swap it around with another one.
-        for (kwarg_idx = 0; kwarg_idx < total_kwargs; kwarg_idx++) {
-            ID callee_kwarg = callee_kwargs[kwarg_idx];
-
-            // If the argument is already in the right order, then we don't
-            // need to generate any code since the expected value is already
-            // in the right place on the stack.
-            if (callee_kwarg == caller_kwargs[kwarg_idx]) continue;
-
-            // In this case the argument is not in the right place, so we
-            // need to find its position where it _should_ be and swap with
-            // that location.
-            for (int swap_idx = kwarg_idx + 1; swap_idx < total_kwargs; swap_idx++) {
-                if (callee_kwarg == caller_kwargs[swap_idx]) {
-                    // First we're going to generate the code that is going
-                    // to perform the actual swapping at runtime.
-                    stack_swap(ctx, cb, argc - 1 - swap_idx - args_before_kw, argc - 1 - kwarg_idx - args_before_kw, REG1, REG0);
-
-                    // Next we're going to do some bookkeeping on our end so
-                    // that we know the order that the arguments are
-                    // actually in now.
-                    ID tmp = caller_kwargs[kwarg_idx];
-                    caller_kwargs[kwarg_idx] = caller_kwargs[swap_idx];
-                    caller_kwargs[swap_idx] = tmp;
-
-                    break;
-                }
-            }
-        }
-
-        // Keyword arguments cause a special extra local variable to be
-        // pushed onto the stack that represents the parameters that weren't
-        // explicitly given a value and have a non-constant default.
-        mov(cb, ctx_stack_opnd(ctx, -1), imm_opnd(INT2FIX(unspecified_bits)));
-    }
-    // Points to the receiver operand on the stack
-    x86opnd_t recv = ctx_stack_opnd(ctx, argc);
-
-    // Store the updated SP on the current frame (pop arguments and receiver)
-    ADD_COMMENT(cb, "store caller sp");
-    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
-
-    // Store the next PC in the current frame
-    jit_save_pc(jit, REG0);
-
-    if (block) {
-        // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block().
-        // VM_CFP_TO_CAPTURED_BLCOK does &cfp->self, rb_captured_block->code.iseq aliases
-        // with cfp->block_code.
-        jit_mov_gc_ptr(jit, cb, REG0, (VALUE)block);
-        mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), REG0);
-    }
-
-    // Adjust the callee's stack pointer
-    lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * (3 + num_locals + doing_kw_call)));
-
-    // Initialize local variables to Qnil
-    for (int i = 0; i < num_locals; i++) {
-        mov(cb, mem_opnd(64, REG0, sizeof(VALUE) * (i - num_locals - 3)), imm_opnd(Qnil));
-    }
-
-    ADD_COMMENT(cb, "push env");
-    // Put compile time cme into REG1. It's assumed to be valid because we are notified when
-    // any cme we depend on become outdated. See rb_yjit_method_lookup_change().
-    jit_mov_gc_ptr(jit, cb, REG1, (VALUE)cme);
-    // Write method entry at sp[-3]
-    // sp[-3] = me;
-    mov(cb, mem_opnd(64, REG0, 8 * -3), REG1);
-
-    // Write block handler at sp[-2]
-    // sp[-2] = block_handler;
-    if (block) {
-        // reg1 = VM_BH_FROM_ISEQ_BLOCK(VM_CFP_TO_CAPTURED_BLOCK(reg_cfp));
-        lea(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, self));
-        or(cb, REG1, imm_opnd(1));
-        mov(cb, mem_opnd(64, REG0, 8 * -2), REG1);
-    }
-    else {
-        mov(cb, mem_opnd(64, REG0, 8 * -2), imm_opnd(VM_BLOCK_HANDLER_NONE));
-    }
-
-    // Write env flags at sp[-1]
-    // sp[-1] = frame_type;
-    uint64_t frame_type = VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL;
-    mov(cb, mem_opnd(64, REG0, 8 * -1), imm_opnd(frame_type));
-
-    ADD_COMMENT(cb, "push callee CFP");
-    // Allocate a new CFP (ec->cfp--)
-    sub(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
-    mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
-
-    // Setup the new frame
-    // *cfp = (const struct rb_control_frame_struct) {
-    //    .pc         = pc,
-    //    .sp         = sp,
-    //    .iseq       = iseq,
-    //    .self       = recv,
-    //    .ep         = sp - 1,
-    //    .block_code = 0,
-    //    .__bp__     = sp,
-    // };
-    mov(cb, REG1, recv);
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, self), REG1);
-    mov(cb, REG_SP, REG0); // Switch to the callee's REG_SP
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, __bp__), REG0);
-    sub(cb, REG0, imm_opnd(sizeof(VALUE)));
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, ep), REG0);
-    jit_mov_gc_ptr(jit, cb, REG0, (VALUE)iseq);
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, iseq), REG0);
-    mov(cb, member_opnd(REG_CFP, rb_control_frame_t, block_code), imm_opnd(0));
-
-    // No need to set cfp->pc since the callee sets it whenever calling into routines
-    // that could look at it through jit_save_pc().
-    // mov(cb, REG0, const_ptr_opnd(start_pc));
-    // mov(cb, member_opnd(REG_CFP, rb_control_frame_t, pc), REG0);
-
-    // Stub so we can return to JITted code
-    blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
-
-    // Create a context for the callee
-    ctx_t callee_ctx = DEFAULT_CTX;
-
-    // Set the argument types in the callee's context
-    for (int32_t arg_idx = 0; arg_idx < argc; ++arg_idx) {
-        val_type_t arg_type = ctx_get_opnd_type(ctx, OPND_STACK(argc - arg_idx - 1));
-        ctx_set_local_type(&callee_ctx, arg_idx, arg_type);
-    }
-    val_type_t recv_type = ctx_get_opnd_type(ctx, OPND_STACK(argc));
-    ctx_upgrade_opnd_type(&callee_ctx, OPND_SELF, recv_type);
-
-    // The callee might change locals through Kernel#binding and other means.
-    ctx_clear_local_types(ctx);
-
-    // Pop arguments and receiver in return context, push the return value
-    // After the return, sp_offset will be 1. The codegen for leave writes
-    // the return value in case of JIT-to-JIT return.
-    ctx_t return_ctx = *ctx;
-    ctx_stack_pop(&return_ctx, argc + 1);
-    ctx_stack_push(&return_ctx, TYPE_UNKNOWN);
-    return_ctx.sp_offset = 1;
-    return_ctx.chain_depth = 0;
-
-    // Write the JIT return address on the callee frame
-    gen_branch(
-        jit,
-        ctx,
-        return_block,
-        &return_ctx,
-        return_block,
-        &return_ctx,
-        gen_return_branch
-    );
-
-    //print_str(cb, "calling Ruby func:");
-    //print_str(cb, rb_id2name(vm_ci_mid(ci)));
-
-    // Directly jump to the entry point of the callee
-    gen_direct_jump(
-        jit,
-        &callee_ctx,
-        (blockid_t){ iseq, start_pc_offset }
-    );
-
-    return YJIT_END_BLOCK;
-}
-
-static codegen_status_t
-gen_struct_aref(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
-    if (vm_ci_argc(ci) != 0) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    const unsigned int off = cme->def->body.optimized.index;
-
-    // Confidence checks
-    RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
-    RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
-
-    // We are going to use an encoding that takes a 4-byte immediate which
-    // limits the offset to INT32_MAX.
-    {
-        uint64_t native_off = (uint64_t)off * (uint64_t)SIZEOF_VALUE;
-        if (native_off > (uint64_t)INT32_MAX) {
-            return YJIT_CANT_COMPILE;
-        }
-    }
-
-    // All structs from the same Struct class should have the same
-    // length. So if our comptime_recv is embedded all runtime
-    // structs of the same class should be as well, and the same is
-    // true of the converse.
-    bool embedded = FL_TEST_RAW(comptime_recv, RSTRUCT_EMBED_LEN_MASK);
-
-    ADD_COMMENT(cb, "struct aref");
-
-    x86opnd_t recv = ctx_stack_pop(ctx, 1);
-
-    mov(cb, REG0, recv);
-
-    if (embedded) {
-        mov(cb, REG0, member_opnd_idx(REG0, struct RStruct, as.ary, off));
-    }
-    else {
-        mov(cb, REG0, member_opnd(REG0, struct RStruct, as.heap.ptr));
-        mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * off));
-    }
-
-    x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, ret, REG0);
-
-    jit_jump_to_next_insn(jit, ctx);
-    return YJIT_END_BLOCK;
-}
-
-static codegen_status_t
-gen_struct_aset(jitstate_t *jit, ctx_t *ctx, const struct rb_callinfo *ci, const rb_callable_method_entry_t *cme, VALUE comptime_recv, VALUE comptime_recv_klass) {
-    if (vm_ci_argc(ci) != 1) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    const unsigned int off = cme->def->body.optimized.index;
-
-    // Confidence checks
-    RUBY_ASSERT_ALWAYS(RB_TYPE_P(comptime_recv, T_STRUCT));
-    RUBY_ASSERT_ALWAYS((long)off < RSTRUCT_LEN(comptime_recv));
-
-    ADD_COMMENT(cb, "struct aset");
-
-    x86opnd_t val = ctx_stack_pop(ctx, 1);
-    x86opnd_t recv = ctx_stack_pop(ctx, 1);
-
-    mov(cb, C_ARG_REGS[0], recv);
-    mov(cb, C_ARG_REGS[1], imm_opnd(off));
-    mov(cb, C_ARG_REGS[2], val);
-    call_ptr(cb, REG0, (void *)RSTRUCT_SET);
-
-    x86opnd_t ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, ret, RAX);
-
-    jit_jump_to_next_insn(jit, ctx);
-    return YJIT_END_BLOCK;
-}
-
-const rb_callable_method_entry_t *
-rb_aliased_callable_method_entry(const rb_callable_method_entry_t *me);
-
-static codegen_status_t
-gen_send_general(jitstate_t *jit, ctx_t *ctx, struct rb_call_data *cd, rb_iseq_t *block)
-{
-    // Relevant definitions:
-    // rb_execution_context_t       : vm_core.h
-    // invoker, cfunc logic         : method.h, vm_method.c
-    // rb_callinfo                  : vm_callinfo.h
-    // rb_callable_method_entry_t   : method.h
-    // vm_call_cfunc_with_frame     : vm_insnhelper.c
-    //
-    // For a general overview for how the interpreter calls methods,
-    // see vm_call_method().
-
-    const struct rb_callinfo *ci = cd->ci; // info about the call site
-
-    int32_t argc = (int32_t)vm_ci_argc(ci);
-    ID mid = vm_ci_mid(ci);
-
-    // Don't JIT calls with keyword splat
-    if (vm_ci_flag(ci) & VM_CALL_KW_SPLAT) {
-        GEN_COUNTER_INC(cb, send_kw_splat);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Don't JIT calls that aren't simple
-    // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
-    if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
-        GEN_COUNTER_INC(cb, send_args_splat);
-        return YJIT_CANT_COMPILE;
-    }
-    if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
-        GEN_COUNTER_INC(cb, send_block_arg);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Defer compilation so we can specialize on class of receiver
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
-    VALUE comptime_recv_klass = CLASS_OF(comptime_recv);
-
-    // Guard that the receiver has the same class as the one from compile time
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // Points to the receiver operand on the stack
-    x86opnd_t recv = ctx_stack_opnd(ctx, argc);
-    insn_opnd_t recv_opnd = OPND_STACK(argc);
-    mov(cb, REG0, recv);
-    if (!jit_guard_known_klass(jit, ctx, comptime_recv_klass, recv_opnd, comptime_recv, SEND_MAX_DEPTH, side_exit)) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Do method lookup
-    const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_recv_klass, mid);
-    if (!cme) {
-        // TODO: counter
-        return YJIT_CANT_COMPILE;
-    }
-
-    switch (METHOD_ENTRY_VISI(cme)) {
-      case METHOD_VISI_PUBLIC:
-        // Can always call public methods
-        break;
-      case METHOD_VISI_PRIVATE:
-        if (!(vm_ci_flag(ci) & VM_CALL_FCALL)) {
-            // Can only call private methods with FCALL callsites.
-            // (at the moment they are callsites without a receiver or an explicit `self` receiver)
-            return YJIT_CANT_COMPILE;
-        }
-        break;
-      case METHOD_VISI_PROTECTED:
-        jit_protected_callee_ancestry_guard(jit, cb, cme, side_exit);
-        break;
-      case METHOD_VISI_UNDEF:
-        RUBY_ASSERT(false && "cmes should always have a visibility");
-        break;
-    }
-
-    // Register block for invalidation
-    RUBY_ASSERT(cme->called_id == mid);
-    assume_method_lookup_stable(comptime_recv_klass, cme, jit);
-
-    // To handle the aliased method case (VM_METHOD_TYPE_ALIAS)
-    while (true) {
-        // switch on the method type
-        switch (cme->def->type) {
-          case VM_METHOD_TYPE_ISEQ:
-            return gen_send_iseq(jit, ctx, ci, cme, block, argc);
-          case VM_METHOD_TYPE_CFUNC:
-            return gen_send_cfunc(jit, ctx, ci, cme, block, argc, &comptime_recv_klass);
-          case VM_METHOD_TYPE_IVAR:
-            if (argc != 0) {
-                // Argument count mismatch. Getters take no arguments.
-                GEN_COUNTER_INC(cb, send_getter_arity);
-                return YJIT_CANT_COMPILE;
-            }
-            if (c_method_tracing_currently_enabled(jit)) {
-                // Can't generate code for firing c_call and c_return events
-                // :attr-tracing:
-                // Handling the C method tracing events for attr_accessor
-                // methods is easier than regular C methods as we know the
-                // "method" we are calling into never enables those tracing
-                // events. Once global invalidation runs, the code for the
-                // attr_accessor is invalidated and we exit at the closest
-                // instruction boundary which is always outside of the body of
-                // the attr_accessor code.
-                GEN_COUNTER_INC(cb, send_cfunc_tracing);
-                return YJIT_CANT_COMPILE;
-            }
-
-            mov(cb, REG0, recv);
-
-            ID ivar_name = cme->def->body.attr.id;
-            return gen_get_ivar(jit, ctx, SEND_MAX_DEPTH, comptime_recv, ivar_name, recv_opnd, side_exit);
-          case VM_METHOD_TYPE_ATTRSET:
-            if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
-                GEN_COUNTER_INC(cb, send_attrset_kwargs);
-                return YJIT_CANT_COMPILE;
-            }
-            else if (argc != 1 || !RB_TYPE_P(comptime_recv, T_OBJECT)) {
-                GEN_COUNTER_INC(cb, send_ivar_set_method);
-                return YJIT_CANT_COMPILE;
-            }
-            else if (c_method_tracing_currently_enabled(jit)) {
-                // Can't generate code for firing c_call and c_return events
-                // See :attr-tracing:
-                GEN_COUNTER_INC(cb, send_cfunc_tracing);
-                return YJIT_CANT_COMPILE;
-            }
-            else {
-                ID ivar_name = cme->def->body.attr.id;
-                return gen_set_ivar(jit, ctx, comptime_recv, comptime_recv_klass, ivar_name);
-            }
-          // Block method, e.g. define_method(:foo) { :my_block }
-          case VM_METHOD_TYPE_BMETHOD:
-            GEN_COUNTER_INC(cb, send_bmethod);
-            return YJIT_CANT_COMPILE;
-          case VM_METHOD_TYPE_ZSUPER:
-            GEN_COUNTER_INC(cb, send_zsuper_method);
-            return YJIT_CANT_COMPILE;
-          case VM_METHOD_TYPE_ALIAS: {
-            // Retrieve the alised method and re-enter the switch
-            cme = rb_aliased_callable_method_entry(cme);
-            continue;
-          }
-          case VM_METHOD_TYPE_UNDEF:
-            GEN_COUNTER_INC(cb, send_undef_method);
-            return YJIT_CANT_COMPILE;
-          case VM_METHOD_TYPE_NOTIMPLEMENTED:
-            GEN_COUNTER_INC(cb, send_not_implemented_method);
-            return YJIT_CANT_COMPILE;
-          // Send family of methods, e.g. call/apply
-          case VM_METHOD_TYPE_OPTIMIZED:
-            switch (cme->def->body.optimized.type) {
-              case OPTIMIZED_METHOD_TYPE_SEND:
-                GEN_COUNTER_INC(cb, send_optimized_method_send);
-                return YJIT_CANT_COMPILE;
-              case OPTIMIZED_METHOD_TYPE_CALL:
-                GEN_COUNTER_INC(cb, send_optimized_method_call);
-                return YJIT_CANT_COMPILE;
-              case OPTIMIZED_METHOD_TYPE_BLOCK_CALL:
-                GEN_COUNTER_INC(cb, send_optimized_method_block_call);
-                return YJIT_CANT_COMPILE;
-              case OPTIMIZED_METHOD_TYPE_STRUCT_AREF:
-                return gen_struct_aref(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
-              case OPTIMIZED_METHOD_TYPE_STRUCT_ASET:
-                return gen_struct_aset(jit, ctx, ci, cme, comptime_recv, comptime_recv_klass);
-              default:
-                rb_bug("unknown optimized method type (%d)", cme->def->body.optimized.type);
-                UNREACHABLE_RETURN(YJIT_CANT_COMPILE);
-            }
-          case VM_METHOD_TYPE_MISSING:
-            GEN_COUNTER_INC(cb, send_missing_method);
-            return YJIT_CANT_COMPILE;
-          case VM_METHOD_TYPE_REFINED:
-            GEN_COUNTER_INC(cb, send_refined_method);
-            return YJIT_CANT_COMPILE;
-            // no default case so compiler issues a warning if this is not exhaustive
-        }
-
-        // Unreachable
-        RUBY_ASSERT(false);
-    }
-}
-
-static codegen_status_t
-gen_opt_send_without_block(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
-    return gen_send_general(jit, ctx, cd, NULL);
-}
-
-static codegen_status_t
-gen_send(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
-    rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
-    return gen_send_general(jit, ctx, cd, block);
-}
-
-static codegen_status_t
-gen_invokesuper(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
-    rb_iseq_t *block = (rb_iseq_t *)jit_get_arg(jit, 1);
-
-    // Defer compilation so we can specialize on class of receiver
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(jit->ec->cfp);
-    if (!me) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // FIXME: We should track and invalidate this block when this cme is invalidated
-    VALUE current_defined_class = me->defined_class;
-    ID mid = me->def->original_id;
-
-    if (me != rb_callable_method_entry(current_defined_class, me->called_id)) {
-        // Though we likely could generate this call, as we are only concerned
-        // with the method entry remaining valid, assume_method_lookup_stable
-        // below requires that the method lookup matches as well
-        return YJIT_CANT_COMPILE;
-    }
-
-    // vm_search_normal_superclass
-    if (BUILTIN_TYPE(current_defined_class) == T_ICLASS && FL_TEST_RAW(RBASIC(current_defined_class)->klass, RMODULE_IS_REFINEMENT)) {
-        return YJIT_CANT_COMPILE;
-    }
-    VALUE comptime_superclass = RCLASS_SUPER(RCLASS_ORIGIN(current_defined_class));
-
-    const struct rb_callinfo *ci = cd->ci;
-    int32_t argc = (int32_t)vm_ci_argc(ci);
-
-    // Don't JIT calls that aren't simple
-    // Note, not using VM_CALL_ARGS_SIMPLE because sometimes we pass a block.
-    if ((vm_ci_flag(ci) & VM_CALL_ARGS_SPLAT) != 0) {
-        GEN_COUNTER_INC(cb, send_args_splat);
-        return YJIT_CANT_COMPILE;
-    }
-    if ((vm_ci_flag(ci) & VM_CALL_KWARG) != 0) {
-        GEN_COUNTER_INC(cb, send_keywords);
-        return YJIT_CANT_COMPILE;
-    }
-    if ((vm_ci_flag(ci) & VM_CALL_KW_SPLAT) != 0) {
-        GEN_COUNTER_INC(cb, send_kw_splat);
-        return YJIT_CANT_COMPILE;
-    }
-    if ((vm_ci_flag(ci) & VM_CALL_ARGS_BLOCKARG) != 0) {
-        GEN_COUNTER_INC(cb, send_block_arg);
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Ensure we haven't rebound this method onto an incompatible class.
-    // In the interpreter we try to avoid making this check by performing some
-    // cheaper calculations first, but since we specialize on the method entry
-    // and so only have to do this once at compile time this is fine to always
-    // check and side exit.
-    VALUE comptime_recv = jit_peek_at_stack(jit, ctx, argc);
-    if (!rb_obj_is_kind_of(comptime_recv, current_defined_class)) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Do method lookup
-    const rb_callable_method_entry_t *cme = rb_callable_method_entry(comptime_superclass, mid);
-
-    if (!cme) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Check that we'll be able to write this method dispatch before generating checks
-    switch (cme->def->type) {
-      case VM_METHOD_TYPE_ISEQ:
-      case VM_METHOD_TYPE_CFUNC:
-        break;
-      default:
-        // others unimplemented
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Guard that the receiver has the same class as the one from compile time
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    if (jit->ec->cfp->ep[VM_ENV_DATA_INDEX_ME_CREF] != (VALUE)me) {
-        // This will be the case for super within a block
-        return YJIT_CANT_COMPILE;
-    }
-
-    ADD_COMMENT(cb, "guard known me");
-    mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
-    x86opnd_t ep_me_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_ME_CREF);
-    jit_mov_gc_ptr(jit, cb, REG1, (VALUE)me);
-    cmp(cb, ep_me_opnd, REG1);
-    jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_me_changed));
-
-    if (!block) {
-        // Guard no block passed
-        // rb_vm_frame_block_handler(GET_EC()->cfp) == VM_BLOCK_HANDLER_NONE
-        // note, we assume VM_ASSERT(VM_ENV_LOCAL_P(ep))
-        //
-        // TODO: this could properly forward the current block handler, but
-        // would require changes to gen_send_*
-        ADD_COMMENT(cb, "guard no block given");
-        // EP is in REG0 from above
-        x86opnd_t ep_specval_opnd = mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL);
-        cmp(cb, ep_specval_opnd, imm_opnd(VM_BLOCK_HANDLER_NONE));
-        jne_ptr(cb, COUNTED_EXIT(jit, side_exit, invokesuper_block));
-    }
-
-    // Points to the receiver operand on the stack
-    x86opnd_t recv = ctx_stack_opnd(ctx, argc);
-    mov(cb, REG0, recv);
-
-    // We need to assume that both our current method entry and the super
-    // method entry we invoke remain stable
-    assume_method_lookup_stable(current_defined_class, me, jit);
-    assume_method_lookup_stable(comptime_superclass, cme, jit);
-
-    // Method calls may corrupt types
-    ctx_clear_local_types(ctx);
-
-    switch (cme->def->type) {
-      case VM_METHOD_TYPE_ISEQ:
-        return gen_send_iseq(jit, ctx, ci, cme, block, argc);
-      case VM_METHOD_TYPE_CFUNC:
-        return gen_send_cfunc(jit, ctx, ci, cme, block, argc, NULL);
-      default:
-        break;
-    }
-
-    RUBY_ASSERT_ALWAYS(false);
-}
-
-static codegen_status_t
-gen_leave(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Only the return value should be on the stack
-    RUBY_ASSERT(ctx->stack_size == 1);
-
-    // Create a side-exit to fall back to the interpreter
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // Load environment pointer EP from CFP
-    mov(cb, REG1, member_opnd(REG_CFP, rb_control_frame_t, ep));
-
-    // Check for interrupts
-    ADD_COMMENT(cb, "check for interrupts");
-    yjit_check_ints(cb, COUNTED_EXIT(jit, side_exit, leave_se_interrupt));
-
-    // Load the return value
-    mov(cb, REG0, ctx_stack_pop(ctx, 1));
-
-    // Pop the current frame (ec->cfp++)
-    // Note: the return PC is already in the previous CFP
-    add(cb, REG_CFP, imm_opnd(sizeof(rb_control_frame_t)));
-    mov(cb, member_opnd(REG_EC, rb_execution_context_t, cfp), REG_CFP);
-
-    // Reload REG_SP for the caller and write the return value.
-    // Top of the stack is REG_SP[0] since the caller has sp_offset=1.
-    mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
-    mov(cb, mem_opnd(64, REG_SP, 0), REG0);
-
-    // Jump to the JIT return address on the frame that was just popped
-    const int32_t offset_to_jit_return = -((int32_t)sizeof(rb_control_frame_t)) + (int32_t)offsetof(rb_control_frame_t, jit_return);
-    jmp_rm(cb, mem_opnd(64, REG_CFP, offset_to_jit_return));
-
-    return YJIT_END_BLOCK;
-}
-
-static codegen_status_t
-gen_getglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    ID gid = jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we might make a Ruby call for warning
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    mov(cb, C_ARG_REGS[0], imm_opnd(gid));
-
-    call_ptr(cb, REG0, (void *)&rb_gvar_get);
-
-    x86opnd_t top = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, top, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_setglobal(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    ID gid = jit_get_arg(jit, 0);
-
-    // Save the PC and SP because we might make a Ruby call for
-    // Kernel#trace_var
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    mov(cb, C_ARG_REGS[0], imm_opnd(gid));
-
-    x86opnd_t val = ctx_stack_pop(ctx, 1);
-
-    mov(cb, C_ARG_REGS[1], val);
-
-    call_ptr(cb, REG0, (void *)&rb_gvar_set);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_anytostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Might allocate in rb_obj_as_string_result().
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    x86opnd_t str = ctx_stack_pop(ctx, 1);
-    x86opnd_t val = ctx_stack_pop(ctx, 1);
-
-    mov(cb, C_ARG_REGS[0], str);
-    mov(cb, C_ARG_REGS[1], val);
-
-    call_ptr(cb, REG0, (void *)&rb_obj_as_string_result);
-
-    // Push the return value
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_STRING);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_objtostring(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    if (!jit_at_current_insn(jit)) {
-        defer_compilation(jit, ctx);
-        return YJIT_END_BLOCK;
-    }
-
-    x86opnd_t recv = ctx_stack_opnd(ctx, 0);
-    VALUE comptime_recv = jit_peek_at_stack(jit, ctx, 0);
-
-    if (RB_TYPE_P(comptime_recv, T_STRING)) {
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        mov(cb, REG0, recv);
-        jit_guard_known_klass(jit, ctx, CLASS_OF(comptime_recv), OPND_STACK(0), comptime_recv, SEND_MAX_DEPTH, side_exit);
-        // No work needed. The string value is already on the top of the stack.
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        struct rb_call_data *cd = (struct rb_call_data *)jit_get_arg(jit, 0);
-        return gen_send_general(jit, ctx, cd, NULL);
-    }
-}
-
-static codegen_status_t
-gen_toregexp(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    rb_num_t opt = jit_get_arg(jit, 0);
-    rb_num_t cnt = jit_get_arg(jit, 1);
-
-    // Save the PC and SP because this allocates an object and could
-    // raise an exception.
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    x86opnd_t values_ptr = ctx_sp_opnd(ctx, -(int32_t)(sizeof(VALUE) * (uint32_t)cnt));
-    ctx_stack_pop(ctx, cnt);
-
-    mov(cb, C_ARG_REGS[0], imm_opnd(0));
-    mov(cb, C_ARG_REGS[1], imm_opnd(cnt));
-    lea(cb, C_ARG_REGS[2], values_ptr);
-    call_ptr(cb, REG0, (void *)&rb_ary_tmp_new_from_values);
-
-    // Save the array so we can clear it later
-    push(cb, RAX);
-    push(cb, RAX); // Alignment
-    mov(cb, C_ARG_REGS[0], RAX);
-    mov(cb, C_ARG_REGS[1], imm_opnd(opt));
-    call_ptr(cb, REG0, (void *)&rb_reg_new_ary);
-
-    // The actual regex is in RAX now.  Pop the temp array from
-    // rb_ary_tmp_new_from_values into C arg regs so we can clear it
-    pop(cb, REG1); // Alignment
-    pop(cb, C_ARG_REGS[0]);
-
-    // The value we want to push on the stack is in RAX right now
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_ret, RAX);
-
-    // Clear the temp array.
-    call_ptr(cb, REG0, (void *)&rb_ary_clear);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_intern(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // Save the PC and SP because we might allocate
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    x86opnd_t str = ctx_stack_pop(ctx, 1);
-
-    mov(cb, C_ARG_REGS[0], str);
-
-    call_ptr(cb, REG0, (void *)&rb_str_intern);
-
-    // Push the return value
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_getspecial(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // This takes two arguments, key and type
-    // key is only used when type == 0
-    // A non-zero type determines which type of backref to fetch
-    //rb_num_t key = jit_get_arg(jit, 0);
-    rb_num_t type = jit_get_arg(jit, 1);
-
-    if (type == 0) {
-        // not yet implemented
-        return YJIT_CANT_COMPILE;
-    }
-    else if (type & 0x01) {
-        // Fetch a "special" backref based on a char encoded by shifting by 1
-
-        // Can raise if matchdata uninitialized
-        jit_prepare_routine_call(jit, ctx, REG0);
-
-        // call rb_backref_get()
-        ADD_COMMENT(cb, "rb_backref_get");
-        call_ptr(cb, REG0, (void *)rb_backref_get);
-        mov(cb, C_ARG_REGS[0], RAX);
-
-        switch (type >> 1) {
-          case '&':
-            ADD_COMMENT(cb, "rb_reg_last_match");
-            call_ptr(cb, REG0, (void *)rb_reg_last_match);
-            break;
-          case '`':
-            ADD_COMMENT(cb, "rb_reg_match_pre");
-            call_ptr(cb, REG0, (void *)rb_reg_match_pre);
-            break;
-          case '\'':
-            ADD_COMMENT(cb, "rb_reg_match_post");
-            call_ptr(cb, REG0, (void *)rb_reg_match_post);
-            break;
-          case '+':
-            ADD_COMMENT(cb, "rb_reg_match_last");
-            call_ptr(cb, REG0, (void *)rb_reg_match_last);
-            break;
-          default:
-            rb_bug("invalid back-ref");
-        }
-
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, stack_ret, RAX);
-
-        return YJIT_KEEP_COMPILING;
-    }
-    else {
-        // Fetch the N-th match from the last backref based on type shifted by 1
-
-        // Can raise if matchdata uninitialized
-        jit_prepare_routine_call(jit, ctx, REG0);
-
-        // call rb_backref_get()
-        ADD_COMMENT(cb, "rb_backref_get");
-        call_ptr(cb, REG0, (void *)rb_backref_get);
-
-        // rb_reg_nth_match((int)(type >> 1), backref);
-        ADD_COMMENT(cb, "rb_reg_nth_match");
-        mov(cb, C_ARG_REGS[0], imm_opnd(type >> 1));
-        mov(cb, C_ARG_REGS[1], RAX);
-        call_ptr(cb, REG0, (void *)rb_reg_nth_match);
-
-        x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, stack_ret, RAX);
-
-        return YJIT_KEEP_COMPILING;
-    }
-}
-
-VALUE
-rb_vm_getclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, ICVARC ic);
-
-static codegen_status_t
-gen_getclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
-{
-    // rb_vm_getclassvariable can raise exceptions.
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
-    mov(cb, C_ARG_REGS[1], REG_CFP);
-    mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
-    mov(cb, C_ARG_REGS[3], imm_opnd(jit_get_arg(jit, 1)));
-
-    call_ptr(cb, REG0, (void *)rb_vm_getclassvariable);
-
-    x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_top, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-VALUE
-rb_vm_setclassvariable(const rb_iseq_t *iseq, const rb_control_frame_t *cfp, ID id, VALUE val, ICVARC ic);
-
-static codegen_status_t
-gen_setclassvariable(jitstate_t* jit, ctx_t* ctx, codeblock_t* cb)
-{
-    // rb_vm_setclassvariable can raise exceptions.
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    mov(cb, C_ARG_REGS[0], member_opnd(REG_CFP, rb_control_frame_t, iseq));
-    mov(cb, C_ARG_REGS[1], REG_CFP);
-    mov(cb, C_ARG_REGS[2], imm_opnd(jit_get_arg(jit, 0)));
-    mov(cb, C_ARG_REGS[3], ctx_stack_pop(ctx, 1));
-    mov(cb, C_ARG_REGS[4], imm_opnd(jit_get_arg(jit, 1)));
-
-    call_ptr(cb, REG0, (void *)rb_vm_setclassvariable);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_opt_getinlinecache(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    VALUE jump_offset = jit_get_arg(jit, 0);
-    VALUE const_cache_as_value = jit_get_arg(jit, 1);
-    IC ic = (IC)const_cache_as_value;
-
-    // See vm_ic_hit_p(). The same conditions are checked in yjit_constant_ic_update().
-    struct iseq_inline_constant_cache_entry *ice = ic->entry;
-    if (!ice) {
-        // In these cases, leave a block that unconditionally side exits
-        // for the interpreter to invalidate.
-        return YJIT_CANT_COMPILE;
-    }
-
-    // Make sure there is an exit for this block as the interpreter might want
-    // to invalidate this block from yjit_constant_ic_update().
-    jit_ensure_block_entry_exit(jit);
-
-    if (ice->ic_cref) {
-        // Cache is keyed on a certain lexical scope. Use the interpreter's cache.
-        uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-        // Call function to verify the cache. It doesn't allocate or call methods.
-        bool rb_vm_ic_hit_p(IC ic, const VALUE *reg_ep);
-        mov(cb, C_ARG_REGS[0], const_ptr_opnd((void *)ic));
-        mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, ep));
-        call_ptr(cb, REG0, (void *)rb_vm_ic_hit_p);
-
-        // Check the result. _Bool is one byte in SysV.
-        test(cb, AL, AL);
-        jz_ptr(cb, COUNTED_EXIT(jit, side_exit, opt_getinlinecache_miss));
-
-        // Push ic->entry->value
-        mov(cb, REG0, const_ptr_opnd((void *)ic));
-        mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache, entry));
-        x86opnd_t stack_top = ctx_stack_push(ctx, TYPE_UNKNOWN);
-        mov(cb, REG0, member_opnd(REG0, struct iseq_inline_constant_cache_entry, value));
-        mov(cb, stack_top, REG0);
-    }
-    else {
-        // Optimize for single ractor mode.
-        // FIXME: This leaks when st_insert raises NoMemoryError
-        if (!assume_single_ractor_mode(jit)) return YJIT_CANT_COMPILE;
-
-        // Invalidate output code on any and all constant writes
-        // FIXME: This leaks when st_insert raises NoMemoryError
-        assume_stable_global_constant_state(jit);
-
-        jit_putobject(jit, ctx, ice->value);
-    }
-
-    // Jump over the code for filling the cache
-    uint32_t jump_idx = jit_next_insn_idx(jit) + (int32_t)jump_offset;
-    gen_direct_jump(
-        jit,
-        ctx,
-        (blockid_t){ .iseq = jit->iseq, .idx = jump_idx }
-    );
-
-    return YJIT_END_BLOCK;
-}
-
-// Push the explicit block parameter onto the temporary stack. Part of the
-// interpreter's scheme for avoiding Proc allocations when delegating
-// explicit block parameters.
-static codegen_status_t
-gen_getblockparamproxy(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    // A mirror of the interpreter code. Checking for the case
-    // where it's pushing rb_block_param_proxy.
-    uint8_t *side_exit = yjit_side_exit(jit, ctx);
-
-    // EP level
-    uint32_t level = (uint32_t)jit_get_arg(jit, 1);
-
-    // Load environment pointer EP from CFP
-    gen_get_ep(cb, REG0, level);
-
-    // Bail when VM_ENV_FLAGS(ep, VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM) is non zero
-    test(cb, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_FLAGS), imm_opnd(VM_FRAME_FLAG_MODIFIED_BLOCK_PARAM));
-    jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_param_modified));
-
-    // Load the block handler for the current frame
-    // note, VM_ASSERT(VM_ENV_LOCAL_P(ep))
-    mov(cb, REG0, mem_opnd(64, REG0, SIZEOF_VALUE * VM_ENV_DATA_INDEX_SPECVAL));
-
-    // Block handler is a tagged pointer. Look at the tag. 0x03 is from VM_BH_ISEQ_BLOCK_P().
-    and(cb, REG0_8, imm_opnd(0x3));
-
-    // Bail unless VM_BH_ISEQ_BLOCK_P(bh). This also checks for null.
-    cmp(cb, REG0_8, imm_opnd(0x1));
-    jnz_ptr(cb, COUNTED_EXIT(jit, side_exit, gbpp_block_handler_not_iseq));
-
-    // Push rb_block_param_proxy. It's a root, so no need to use jit_mov_gc_ptr.
-    mov(cb, REG0, const_ptr_opnd((void *)rb_block_param_proxy));
-    RUBY_ASSERT(!SPECIAL_CONST_P(rb_block_param_proxy));
-    x86opnd_t top = ctx_stack_push(ctx, TYPE_HEAP);
-    mov(cb, top, REG0);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static codegen_status_t
-gen_invokebuiltin(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
-
-    // ec, self, and arguments
-    if (bf->argc + 2 > NUM_C_ARG_REGS) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // If the calls don't allocate, do they need up to date PC, SP?
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    // Call the builtin func (ec, recv, arg1, arg2, ...)
-    mov(cb, C_ARG_REGS[0], REG_EC);
-    mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
-
-    // Copy arguments from locals
-    for (int32_t i = 0; i < bf->argc; i++) {
-        x86opnd_t stack_opnd = ctx_stack_opnd(ctx, bf->argc - i - 1);
-        x86opnd_t c_arg_reg = C_ARG_REGS[2 + i];
-        mov(cb, c_arg_reg, stack_opnd);
-    }
-
-    call_ptr(cb, REG0, (void *)bf->func_ptr);
-
-    // Push the return value
-    ctx_stack_pop(ctx, bf->argc);
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-// opt_invokebuiltin_delegate calls a builtin function, like
-// invokebuiltin does, but instead of taking arguments from the top of the
-// stack uses the argument locals (and self) from the current method.
-static codegen_status_t
-gen_opt_invokebuiltin_delegate(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb)
-{
-    const struct rb_builtin_function *bf = (struct rb_builtin_function *)jit_get_arg(jit, 0);
-    int32_t start_index = (int32_t)jit_get_arg(jit, 1);
-
-    // ec, self, and arguments
-    if (bf->argc + 2 > NUM_C_ARG_REGS) {
-        return YJIT_CANT_COMPILE;
-    }
-
-    // If the calls don't allocate, do they need up to date PC, SP?
-    jit_prepare_routine_call(jit, ctx, REG0);
-
-    if (bf->argc > 0) {
-        // Load environment pointer EP from CFP
-        mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, ep));
-    }
-
-    // Call the builtin func (ec, recv, arg1, arg2, ...)
-    mov(cb, C_ARG_REGS[0], REG_EC);
-    mov(cb, C_ARG_REGS[1], member_opnd(REG_CFP, rb_control_frame_t, self));
-
-    // Copy arguments from locals
-    for (int32_t i = 0; i < bf->argc; i++) {
-        const int32_t offs = start_index + i - ISEQ_BODY(jit->iseq)->local_table_size - VM_ENV_DATA_SIZE + 1;
-        x86opnd_t local_opnd = mem_opnd(64, REG0, offs * SIZEOF_VALUE);
-        x86opnd_t c_arg_reg = C_ARG_REGS[i + 2];
-        mov(cb, c_arg_reg, local_opnd);
-    }
-    call_ptr(cb, REG0, (void *)bf->func_ptr);
-
-    // Push the return value
-    x86opnd_t stack_ret = ctx_stack_push(ctx, TYPE_UNKNOWN);
-    mov(cb, stack_ret, RAX);
-
-    return YJIT_KEEP_COMPILING;
-}
-
-static int tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data);
-static void invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq);
-
-// Invalidate all generated code and patch C method return code to contain
-// logic for firing the c_return TracePoint event. Once rb_vm_barrier()
-// returns, all other ractors are pausing inside RB_VM_LOCK_ENTER(), which
-// means they are inside a C routine. If there are any generated code on-stack,
-// they are waiting for a return from a C routine. For every routine call, we
-// patch in an exit after the body of the containing VM instruction. This makes
-// it so all the invalidated code exit as soon as execution logically reaches
-// the next VM instruction. The interpreter takes care of firing the tracing
-// event if it so happens that the next VM instruction has one attached.
-//
-// The c_return event needs special handling as our codegen never outputs code
-// that contains tracing logic. If we let the normal output code run until the
-// start of the next VM instruction by relying on the patching scheme above, we
-// would fail to fire the c_return event. The interpreter doesn't fire the
-// event at an instruction boundary, so simply exiting to the interpreter isn't
-// enough. To handle it, we patch in the full logic at the return address. See
-// full_cfunc_return().
-//
-// In addition to patching, we prevent future entries into invalidated code by
-// removing all live blocks from their iseq.
-void
-rb_yjit_tracing_invalidate_all(void)
-{
-    if (!rb_yjit_enabled_p()) return;
-
-    // Stop other ractors since we are going to patch machine code.
-    RB_VM_LOCK_ENTER();
-    rb_vm_barrier();
-
-    // Make it so all live block versions are no longer valid branch targets
-    rb_objspace_each_objects(tracing_invalidate_all_i, NULL);
-
-    // Apply patches
-    const uint32_t old_pos = cb->write_pos;
-    rb_darray_for(global_inval_patches, patch_idx) {
-        struct codepage_patch patch = rb_darray_get(global_inval_patches, patch_idx);
-        cb_set_pos(cb, patch.inline_patch_pos);
-        uint8_t *jump_target = cb_get_ptr(ocb, patch.outlined_target_pos);
-        jmp_ptr(cb, jump_target);
-    }
-    cb_set_pos(cb, old_pos);
-
-    // Freeze invalidated part of the codepage. We only want to wait for
-    // running instances of the code to exit from now on, so we shouldn't
-    // change the code. There could be other ractors sleeping in
-    // branch_stub_hit(), for example. We could harden this by changing memory
-    // protection on the frozen range.
-    RUBY_ASSERT_ALWAYS(yjit_codepage_frozen_bytes <= old_pos && "frozen bytes should increase monotonically");
-    yjit_codepage_frozen_bytes = old_pos;
-
-    cb_mark_all_executable(ocb);
-    cb_mark_all_executable(cb);
-    RB_VM_LOCK_LEAVE();
-}
-
-static int
-tracing_invalidate_all_i(void *vstart, void *vend, size_t stride, void *data)
-{
-    VALUE v = (VALUE)vstart;
-    for (; v != (VALUE)vend; v += stride) {
-        void *ptr = asan_poisoned_object_p(v);
-        asan_unpoison_object(v, false);
-
-        if (rb_obj_is_iseq(v)) {
-            rb_iseq_t *iseq = (rb_iseq_t *)v;
-            invalidate_all_blocks_for_tracing(iseq);
-        }
-
-        asan_poison_object_if(ptr, v);
-    }
-    return 0;
-}
-
-static void
-invalidate_all_blocks_for_tracing(const rb_iseq_t *iseq)
-{
-    struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
-    if (!body) return; // iseq yet to be initialized
-
-    ASSERT_vm_locking();
-
-    // Empty all blocks on the iseq so we don't compile new blocks that jump to the
-    // invalidted region.
-    // TODO Leaking the blocks for now since we might have situations where
-    // a different ractor is waiting in branch_stub_hit(). If we free the block
-    // that ractor can wake up with a dangling block.
-    rb_darray_for(body->yjit_blocks, version_array_idx) {
-        rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
-        rb_darray_for(version_array, version_idx) {
-            // Stop listening for invalidation events like basic operation redefinition.
-            block_t *block = rb_darray_get(version_array, version_idx);
-            yjit_unlink_method_lookup_dependency(block);
-            yjit_block_assumptions_free(block);
-        }
-        rb_darray_free(version_array);
-    }
-    rb_darray_free(body->yjit_blocks);
-    body->yjit_blocks = NULL;
-
-#if USE_MJIT
-    // Reset output code entry point
-    body->jit_func = NULL;
-#endif
-}
-
-static void
-yjit_reg_op(int opcode, codegen_fn gen_fn)
-{
-    RUBY_ASSERT(opcode >= 0 && opcode < VM_INSTRUCTION_SIZE);
-    // Check that the op wasn't previously registered
-    RUBY_ASSERT(gen_fns[opcode] == NULL);
-
-    gen_fns[opcode] = gen_fn;
-}
-
-void
-yjit_init_codegen(void)
-{
-    // Initialize the code blocks
-    uint32_t mem_size = rb_yjit_opts.exec_mem_size * 1024 * 1024;
-    uint8_t *mem_block = alloc_exec_mem(mem_size);
-
-    cb = &block;
-    cb_init(cb, mem_block, mem_size/2);
-
-    ocb = &outline_block;
-    cb_init(ocb, mem_block + mem_size/2, mem_size/2);
-
-    // Generate the interpreter exit code for leave
-    leave_exit_code = yjit_gen_leave_exit(cb);
-
-    // Generate full exit code for C func
-    gen_full_cfunc_return();
-    cb_mark_all_executable(cb);
-
-    // Map YARV opcodes to the corresponding codegen functions
-    yjit_reg_op(BIN(nop), gen_nop);
-    yjit_reg_op(BIN(dup), gen_dup);
-    yjit_reg_op(BIN(dupn), gen_dupn);
-    yjit_reg_op(BIN(swap), gen_swap);
-    yjit_reg_op(BIN(setn), gen_setn);
-    yjit_reg_op(BIN(topn), gen_topn);
-    yjit_reg_op(BIN(pop), gen_pop);
-    yjit_reg_op(BIN(adjuststack), gen_adjuststack);
-    yjit_reg_op(BIN(newarray), gen_newarray);
-    yjit_reg_op(BIN(duparray), gen_duparray);
-    yjit_reg_op(BIN(duphash), gen_duphash);
-    yjit_reg_op(BIN(splatarray), gen_splatarray);
-    yjit_reg_op(BIN(expandarray), gen_expandarray);
-    yjit_reg_op(BIN(newhash), gen_newhash);
-    yjit_reg_op(BIN(newrange), gen_newrange);
-    yjit_reg_op(BIN(concatstrings), gen_concatstrings);
-    yjit_reg_op(BIN(putnil), gen_putnil);
-    yjit_reg_op(BIN(putobject), gen_putobject);
-    yjit_reg_op(BIN(putstring), gen_putstring);
-    yjit_reg_op(BIN(putobject_INT2FIX_0_), gen_putobject_int2fix);
-    yjit_reg_op(BIN(putobject_INT2FIX_1_), gen_putobject_int2fix);
-    yjit_reg_op(BIN(putself), gen_putself);
-    yjit_reg_op(BIN(putspecialobject), gen_putspecialobject);
-    yjit_reg_op(BIN(getlocal), gen_getlocal);
-    yjit_reg_op(BIN(getlocal_WC_0), gen_getlocal_wc0);
-    yjit_reg_op(BIN(getlocal_WC_1), gen_getlocal_wc1);
-    yjit_reg_op(BIN(setlocal), gen_setlocal);
-    yjit_reg_op(BIN(setlocal_WC_0), gen_setlocal_wc0);
-    yjit_reg_op(BIN(setlocal_WC_1), gen_setlocal_wc1);
-    yjit_reg_op(BIN(getinstancevariable), gen_getinstancevariable);
-    yjit_reg_op(BIN(setinstancevariable), gen_setinstancevariable);
-    yjit_reg_op(BIN(defined), gen_defined);
-    yjit_reg_op(BIN(checktype), gen_checktype);
-    yjit_reg_op(BIN(checkkeyword), gen_checkkeyword);
-    yjit_reg_op(BIN(opt_lt), gen_opt_lt);
-    yjit_reg_op(BIN(opt_le), gen_opt_le);
-    yjit_reg_op(BIN(opt_ge), gen_opt_ge);
-    yjit_reg_op(BIN(opt_gt), gen_opt_gt);
-    yjit_reg_op(BIN(opt_eq), gen_opt_eq);
-    yjit_reg_op(BIN(opt_neq), gen_opt_neq);
-    yjit_reg_op(BIN(opt_aref), gen_opt_aref);
-    yjit_reg_op(BIN(opt_aset), gen_opt_aset);
-    yjit_reg_op(BIN(opt_and), gen_opt_and);
-    yjit_reg_op(BIN(opt_or), gen_opt_or);
-    yjit_reg_op(BIN(opt_minus), gen_opt_minus);
-    yjit_reg_op(BIN(opt_plus), gen_opt_plus);
-    yjit_reg_op(BIN(opt_mult), gen_opt_mult);
-    yjit_reg_op(BIN(opt_div), gen_opt_div);
-    yjit_reg_op(BIN(opt_mod), gen_opt_mod);
-    yjit_reg_op(BIN(opt_ltlt), gen_opt_ltlt);
-    yjit_reg_op(BIN(opt_nil_p), gen_opt_nil_p);
-    yjit_reg_op(BIN(opt_empty_p), gen_opt_empty_p);
-    yjit_reg_op(BIN(opt_str_freeze), gen_opt_str_freeze);
-    yjit_reg_op(BIN(opt_str_uminus), gen_opt_str_uminus);
-    yjit_reg_op(BIN(opt_not), gen_opt_not);
-    yjit_reg_op(BIN(opt_size), gen_opt_size);
-    yjit_reg_op(BIN(opt_length), gen_opt_length);
-    yjit_reg_op(BIN(opt_regexpmatch2), gen_opt_regexpmatch2);
-    yjit_reg_op(BIN(opt_getinlinecache), gen_opt_getinlinecache);
-    yjit_reg_op(BIN(invokebuiltin), gen_invokebuiltin);
-    yjit_reg_op(BIN(opt_invokebuiltin_delegate), gen_opt_invokebuiltin_delegate);
-    yjit_reg_op(BIN(opt_invokebuiltin_delegate_leave), gen_opt_invokebuiltin_delegate);
-    yjit_reg_op(BIN(opt_case_dispatch), gen_opt_case_dispatch);
-    yjit_reg_op(BIN(branchif), gen_branchif);
-    yjit_reg_op(BIN(branchunless), gen_branchunless);
-    yjit_reg_op(BIN(branchnil), gen_branchnil);
-    yjit_reg_op(BIN(jump), gen_jump);
-    yjit_reg_op(BIN(getblockparamproxy), gen_getblockparamproxy);
-    yjit_reg_op(BIN(opt_send_without_block), gen_opt_send_without_block);
-    yjit_reg_op(BIN(send), gen_send);
-    yjit_reg_op(BIN(invokesuper), gen_invokesuper);
-    yjit_reg_op(BIN(leave), gen_leave);
-    yjit_reg_op(BIN(getglobal), gen_getglobal);
-    yjit_reg_op(BIN(setglobal), gen_setglobal);
-    yjit_reg_op(BIN(anytostring), gen_anytostring);
-    yjit_reg_op(BIN(objtostring), gen_objtostring);
-    yjit_reg_op(BIN(toregexp), gen_toregexp);
-    yjit_reg_op(BIN(intern), gen_intern);
-    yjit_reg_op(BIN(getspecial), gen_getspecial);
-    yjit_reg_op(BIN(getclassvariable), gen_getclassvariable);
-    yjit_reg_op(BIN(setclassvariable), gen_setclassvariable);
-
-    yjit_method_codegen_table = st_init_numtable();
-
-    // Specialization for C methods. See yjit_reg_method() for details.
-    yjit_reg_method(rb_cBasicObject, "!", jit_rb_obj_not);
-
-    yjit_reg_method(rb_cNilClass, "nil?", jit_rb_true);
-    yjit_reg_method(rb_mKernel, "nil?", jit_rb_false);
-
-    yjit_reg_method(rb_cBasicObject, "==", jit_rb_obj_equal);
-    yjit_reg_method(rb_cBasicObject, "equal?", jit_rb_obj_equal);
-    yjit_reg_method(rb_mKernel, "eql?", jit_rb_obj_equal);
-    yjit_reg_method(rb_cModule, "==", jit_rb_obj_equal);
-    yjit_reg_method(rb_cSymbol, "==", jit_rb_obj_equal);
-    yjit_reg_method(rb_cSymbol, "===", jit_rb_obj_equal);
-
-    // rb_str_to_s() methods in string.c
-    yjit_reg_method(rb_cString, "to_s", jit_rb_str_to_s);
-    yjit_reg_method(rb_cString, "to_str", jit_rb_str_to_s);
-    yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
-
-    // Thread.current
-    yjit_reg_method(rb_singleton_class(rb_cThread), "current", jit_thread_s_current);
-}
diff --git a/yjit_codegen.h b/yjit_codegen.h
deleted file mode 100644
index e3b971af3f..0000000000
--- a/yjit_codegen.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef YJIT_CODEGEN_H
-#define YJIT_CODEGEN_H 1
-
-typedef enum codegen_status {
-    YJIT_END_BLOCK,
-    YJIT_KEEP_COMPILING,
-    YJIT_CANT_COMPILE
-} codegen_status_t;
-
-// Code generation function signature
-typedef codegen_status_t (*codegen_fn)(jitstate_t *jit, ctx_t *ctx, codeblock_t *cb);
-
-static void jit_ensure_block_entry_exit(jitstate_t *jit);
-
-static uint8_t *yjit_entry_prologue(codeblock_t *cb, const rb_iseq_t *iseq);
-
-static block_t *gen_single_block(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec);
-
-static void gen_code_for_exit_from_stub(void);
-
-static void yjit_init_codegen(void);
-
-#endif // #ifndef YJIT_CODEGEN_H
diff --git a/yjit_core.c b/yjit_core.c
deleted file mode 100644
index eee2c52d7f..0000000000
--- a/yjit_core.c
+++ /dev/null
@@ -1,1366 +0,0 @@
-// This file is a fragment of the yjit.o compilation unit. See yjit.c.
-#include "internal.h"
-#include "vm_sync.h"
-#include "builtin.h"
-
-#include "yjit.h"
-#include "yjit_asm.h"
-#include "yjit_iface.h"
-#include "yjit_core.h"
-#include "yjit_codegen.h"
-
-// For exiting from YJIT frame from branch_stub_hit().
-// Filled by gen_code_for_exit_from_stub().
-static uint8_t *code_for_exit_from_stub = NULL;
-
-/*
-Get an operand for the adjusted stack pointer address
-*/
-static x86opnd_t
-ctx_sp_opnd(ctx_t *ctx, int32_t offset_bytes)
-{
-    int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes;
-    return mem_opnd(64, REG_SP, offset);
-}
-
-/*
-Push one new value on the temp stack with an explicit mapping
-Return a pointer to the new stack top
-*/
-static x86opnd_t
-ctx_stack_push_mapping(ctx_t *ctx, temp_type_mapping_t mapping)
-{
-    // If type propagation is disabled, store no types
-    if (rb_yjit_opts.no_type_prop) {
-        mapping.type = TYPE_UNKNOWN;
-    }
-
-    // Keep track of the type and mapping of the value
-    if (ctx->stack_size < MAX_TEMP_TYPES) {
-        ctx->temp_mapping[ctx->stack_size] = mapping.mapping;
-        ctx->temp_types[ctx->stack_size] = mapping.type;
-
-        RUBY_ASSERT(mapping.mapping.kind != TEMP_LOCAL || mapping.mapping.idx < MAX_LOCAL_TYPES);
-        RUBY_ASSERT(mapping.mapping.kind != TEMP_STACK || mapping.mapping.idx == 0);
-        RUBY_ASSERT(mapping.mapping.kind != TEMP_SELF || mapping.mapping.idx == 0);
-    }
-
-    ctx->stack_size += 1;
-    ctx->sp_offset += 1;
-
-    // SP points just above the topmost value
-    int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
-    return mem_opnd(64, REG_SP, offset);
-}
-
-
-/*
-Push one new value on the temp stack
-Return a pointer to the new stack top
-*/
-static x86opnd_t
-ctx_stack_push(ctx_t *ctx, val_type_t type)
-{
-    temp_type_mapping_t mapping = { MAP_STACK, type };
-    return ctx_stack_push_mapping(ctx, mapping);
-}
-
-/*
-Push the self value on the stack
-*/
-static x86opnd_t
-ctx_stack_push_self(ctx_t *ctx)
-{
-    temp_type_mapping_t mapping = { MAP_SELF, TYPE_UNKNOWN };
-    return ctx_stack_push_mapping(ctx, mapping);
-}
-
-/*
-Push a local variable on the stack
-*/
-static x86opnd_t
-ctx_stack_push_local(ctx_t *ctx, size_t local_idx)
-{
-    if (local_idx >= MAX_LOCAL_TYPES) {
-        return ctx_stack_push(ctx, TYPE_UNKNOWN);
-    }
-
-    temp_type_mapping_t mapping = {
-        (temp_mapping_t){ .kind = TEMP_LOCAL, .idx = local_idx },
-        TYPE_UNKNOWN
-    };
-
-    return ctx_stack_push_mapping(ctx, mapping);
-}
-
-/*
-Pop N values off the stack
-Return a pointer to the stack top before the pop operation
-*/
-static x86opnd_t
-ctx_stack_pop(ctx_t *ctx, size_t n)
-{
-    RUBY_ASSERT(n <= ctx->stack_size);
-
-    // SP points just above the topmost value
-    int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
-    x86opnd_t top = mem_opnd(64, REG_SP, offset);
-
-    // Clear the types of the popped values
-    for (size_t i = 0; i < n; ++i)
-    {
-        size_t idx = ctx->stack_size - i - 1;
-        if (idx < MAX_TEMP_TYPES) {
-            ctx->temp_types[idx] = TYPE_UNKNOWN;
-            ctx->temp_mapping[idx] = MAP_STACK;
-        }
-    }
-
-    ctx->stack_size -= n;
-    ctx->sp_offset -= n;
-
-    return top;
-}
-
-/**
-Get an operand pointing to a slot on the temp stack
-*/
-static x86opnd_t
-ctx_stack_opnd(ctx_t *ctx, int32_t idx)
-{
-    // SP points just above the topmost value
-    int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE);
-    x86opnd_t opnd = mem_opnd(64, REG_SP, offset);
-
-    return opnd;
-}
-
-/**
-Get the type of an instruction operand
-*/
-static val_type_t
-ctx_get_opnd_type(const ctx_t *ctx, insn_opnd_t opnd)
-{
-    if (opnd.is_self)
-        return ctx->self_type;
-
-    RUBY_ASSERT(opnd.idx < ctx->stack_size);
-    int stack_idx = ctx->stack_size - 1 - opnd.idx;
-
-    // If outside of tracked range, do nothing
-    if (stack_idx >= MAX_TEMP_TYPES)
-        return TYPE_UNKNOWN;
-
-    temp_mapping_t mapping = ctx->temp_mapping[stack_idx];
-
-    switch (mapping.kind) {
-      case TEMP_SELF:
-        return ctx->self_type;
-
-      case TEMP_STACK:
-        return ctx->temp_types[ctx->stack_size - 1 - opnd.idx];
-
-      case TEMP_LOCAL:
-        RUBY_ASSERT(mapping.idx < MAX_LOCAL_TYPES);
-        return ctx->local_types[mapping.idx];
-    }
-
-    rb_bug("unreachable");
-}
-
-static int type_diff(val_type_t src, val_type_t dst);
-
-#define UPGRADE_TYPE(dest, src) do { \
-    RUBY_ASSERT(type_diff((src), (dest)) != INT_MAX); \
-    (dest) = (src); \
-} while (false)
-
-/**
-Upgrade (or "learn") the type of an instruction operand
-This value must be compatible and at least as specific as the previously known type.
-If this value originated from self, or an lvar, the learned type will be
-propagated back to its source.
-*/
-static void
-ctx_upgrade_opnd_type(ctx_t *ctx, insn_opnd_t opnd, val_type_t type)
-{
-    // If type propagation is disabled, store no types
-    if (rb_yjit_opts.no_type_prop)
-        return;
-
-    if (opnd.is_self) {
-        UPGRADE_TYPE(ctx->self_type, type);
-        return;
-    }
-
-    RUBY_ASSERT(opnd.idx < ctx->stack_size);
-    int stack_idx = ctx->stack_size - 1 - opnd.idx;
-
-    // If outside of tracked range, do nothing
-    if (stack_idx >= MAX_TEMP_TYPES)
-        return;
-
-    temp_mapping_t mapping = ctx->temp_mapping[stack_idx];
-
-    switch (mapping.kind) {
-      case TEMP_SELF:
-        UPGRADE_TYPE(ctx->self_type, type);
-        break;
-
-      case TEMP_STACK:
-        UPGRADE_TYPE(ctx->temp_types[stack_idx], type);
-        break;
-
-      case TEMP_LOCAL:
-        RUBY_ASSERT(mapping.idx < MAX_LOCAL_TYPES);
-        UPGRADE_TYPE(ctx->local_types[mapping.idx], type);
-        break;
-    }
-}
-
-/*
-Get both the type and mapping (where the value originates) of an operand.
-This is can be used with ctx_stack_push_mapping or ctx_set_opnd_mapping to copy
-a stack value's type while maintaining the mapping.
-*/
-static temp_type_mapping_t
-ctx_get_opnd_mapping(const ctx_t *ctx, insn_opnd_t opnd)
-{
-    temp_type_mapping_t type_mapping;
-    type_mapping.type = ctx_get_opnd_type(ctx, opnd);
-
-    if (opnd.is_self) {
-        type_mapping.mapping = MAP_SELF;
-        return type_mapping;
-    }
-
-    RUBY_ASSERT(opnd.idx < ctx->stack_size);
-    int stack_idx = ctx->stack_size - 1 - opnd.idx;
-
-    if (stack_idx < MAX_TEMP_TYPES) {
-        type_mapping.mapping = ctx->temp_mapping[stack_idx];
-    }
-    else {
-        // We can't know the source of this stack operand, so we assume it is
-        // a stack-only temporary. type will be UNKNOWN
-        RUBY_ASSERT(type_mapping.type.type == ETYPE_UNKNOWN);
-        type_mapping.mapping = MAP_STACK;
-    }
-
-    return type_mapping;
-}
-
-/*
-Overwrite both the type and mapping of a stack operand.
-*/
-static void
-ctx_set_opnd_mapping(ctx_t *ctx, insn_opnd_t opnd, temp_type_mapping_t type_mapping)
-{
-    // self is always MAP_SELF
-    RUBY_ASSERT(!opnd.is_self);
-
-    RUBY_ASSERT(opnd.idx < ctx->stack_size);
-    int stack_idx = ctx->stack_size - 1 - opnd.idx;
-
-    // If type propagation is disabled, store no types
-    if (rb_yjit_opts.no_type_prop)
-        return;
-
-    // If outside of tracked range, do nothing
-    if (stack_idx >= MAX_TEMP_TYPES)
-        return;
-
-    ctx->temp_mapping[stack_idx] = type_mapping.mapping;
-
-    // Only used when mapping == MAP_STACK
-    ctx->temp_types[stack_idx] = type_mapping.type;
-}
-
-/**
-Set the type of a local variable
-*/
-static void
-ctx_set_local_type(ctx_t *ctx, size_t idx, val_type_t type)
-{
-    // If type propagation is disabled, store no types
-    if (rb_yjit_opts.no_type_prop)
-        return;
-
-    if (idx >= MAX_LOCAL_TYPES)
-        return;
-
-    // If any values on the stack map to this local we must detach them
-    for (int i = 0; i < MAX_TEMP_TYPES; i++) {
-        temp_mapping_t *mapping = &ctx->temp_mapping[i];
-        if (mapping->kind == TEMP_LOCAL && mapping->idx == idx) {
-            ctx->temp_types[i] = ctx->local_types[mapping->idx];
-            *mapping = MAP_STACK;
-        }
-    }
-
-    ctx->local_types[idx] = type;
-}
-
-// Erase local variable type information
-// eg: because of a call we can't track
-static void
-ctx_clear_local_types(ctx_t *ctx)
-{
-    // When clearing local types we must detach any stack mappings to those
-    // locals. Even if local values may have changed, stack values will not.
-    for (int i = 0; i < MAX_TEMP_TYPES; i++) {
-        temp_mapping_t *mapping = &ctx->temp_mapping[i];
-        if (mapping->kind == TEMP_LOCAL) {
-            RUBY_ASSERT(mapping->idx < MAX_LOCAL_TYPES);
-            ctx->temp_types[i] = ctx->local_types[mapping->idx];
-            *mapping = MAP_STACK;
-        }
-        RUBY_ASSERT(mapping->kind == TEMP_STACK || mapping->kind == TEMP_SELF);
-    }
-    memset(&ctx->local_types, 0, sizeof(ctx->local_types));
-}
-
-
-/* This returns an appropriate val_type_t based on a known value */
-static val_type_t
-yjit_type_of_value(VALUE val)
-{
-    if (SPECIAL_CONST_P(val)) {
-        if (FIXNUM_P(val)) {
-            return TYPE_FIXNUM;
-        }
-        else if (NIL_P(val)) {
-            return TYPE_NIL;
-        }
-        else if (val == Qtrue) {
-            return TYPE_TRUE;
-        }
-        else if (val == Qfalse) {
-            return TYPE_FALSE;
-        }
-        else if (STATIC_SYM_P(val)) {
-            return TYPE_STATIC_SYMBOL;
-        }
-        else if (FLONUM_P(val)) {
-            return TYPE_FLONUM;
-        }
-        else {
-            RUBY_ASSERT(false);
-            UNREACHABLE_RETURN(TYPE_IMM);
-        }
-    }
-    else {
-        switch (BUILTIN_TYPE(val)) {
-          case T_ARRAY:
-            return TYPE_ARRAY;
-          case T_HASH:
-            return TYPE_HASH;
-          case T_STRING:
-            return TYPE_STRING;
-          default:
-            // generic heap object
-            return TYPE_HEAP;
-        }
-    }
-}
-
-/* The name of a type, for debugging */
-RBIMPL_ATTR_MAYBE_UNUSED()
-static const char *
-yjit_type_name(val_type_t type)
-{
-    RUBY_ASSERT(!(type.is_imm && type.is_heap));
-
-    switch (type.type) {
-      case ETYPE_UNKNOWN:
-        if (type.is_imm) {
-            return "unknown immediate";
-        }
-        else if (type.is_heap) {
-            return "unknown heap";
-        }
-        else {
-            return "unknown";
-        }
-      case ETYPE_NIL:
-        return "nil";
-      case ETYPE_TRUE:
-        return "true";
-      case ETYPE_FALSE:
-        return "false";
-      case ETYPE_FIXNUM:
-        return "fixnum";
-      case ETYPE_FLONUM:
-        return "flonum";
-      case ETYPE_ARRAY:
-        return "array";
-      case ETYPE_HASH:
-        return "hash";
-      case ETYPE_SYMBOL:
-        return "symbol";
-      case ETYPE_STRING:
-        return "string";
-    }
-
-    UNREACHABLE_RETURN("");
-}
-
-/*
-Compute a difference between two value types
-Returns 0 if the two are the same
-Returns > 0 if different but compatible
-Returns INT_MAX if incompatible
-*/
-static int
-type_diff(val_type_t src, val_type_t dst)
-{
-    RUBY_ASSERT(!src.is_heap || !src.is_imm);
-    RUBY_ASSERT(!dst.is_heap || !dst.is_imm);
-
-    // If dst assumes heap but src doesn't
-    if (dst.is_heap && !src.is_heap)
-        return INT_MAX;
-
-    // If dst assumes imm but src doesn't
-    if (dst.is_imm && !src.is_imm)
-        return INT_MAX;
-
-    // If dst assumes known type different from src
-    if (dst.type != ETYPE_UNKNOWN && dst.type != src.type)
-        return INT_MAX;
-
-    if (dst.is_heap != src.is_heap)
-        return 1;
-
-    if (dst.is_imm != src.is_imm)
-        return 1;
-
-    if (dst.type != src.type)
-        return 1;
-
-    return 0;
-}
-
-/**
-Compute a difference score for two context objects
-Returns 0 if the two contexts are the same
-Returns > 0 if different but compatible
-Returns INT_MAX if incompatible
-*/
-static int
-ctx_diff(const ctx_t *src, const ctx_t *dst)
-{
-    // Can only lookup the first version in the chain
-    if (dst->chain_depth != 0)
-        return INT_MAX;
-
-    // Blocks with depth > 0 always produce new versions
-    // Sidechains cannot overlap
-    if (src->chain_depth != 0)
-        return INT_MAX;
-
-    if (dst->stack_size != src->stack_size)
-        return INT_MAX;
-
-    if (dst->sp_offset != src->sp_offset)
-        return INT_MAX;
-
-    // Difference sum
-    int diff = 0;
-
-    // Check the type of self
-    int self_diff = type_diff(src->self_type, dst->self_type);
-
-    if (self_diff == INT_MAX)
-        return INT_MAX;
-
-    diff += self_diff;
-
-    // For each local type we track
-    for (size_t i = 0; i < MAX_LOCAL_TYPES; ++i)
-    {
-        val_type_t t_src = src->local_types[i];
-        val_type_t t_dst = dst->local_types[i];
-        int temp_diff = type_diff(t_src, t_dst);
-
-        if (temp_diff == INT_MAX)
-            return INT_MAX;
-
-        diff += temp_diff;
-    }
-
-    // For each value on the temp stack
-    for (size_t i = 0; i < src->stack_size; ++i)
-    {
-        temp_type_mapping_t m_src = ctx_get_opnd_mapping(src, OPND_STACK(i));
-        temp_type_mapping_t m_dst = ctx_get_opnd_mapping(dst, OPND_STACK(i));
-
-        if (m_dst.mapping.kind != m_src.mapping.kind) {
-            if (m_dst.mapping.kind == TEMP_STACK) {
-                // We can safely drop information about the source of the temp
-                // stack operand.
-                diff += 1;
-            }
-            else {
-                return INT_MAX;
-            }
-        }
-        else if (m_dst.mapping.idx != m_src.mapping.idx) {
-            return INT_MAX;
-        }
-
-        int temp_diff = type_diff(m_src.type, m_dst.type);
-
-        if (temp_diff == INT_MAX)
-            return INT_MAX;
-
-        diff += temp_diff;
-    }
-
-    return diff;
-}
-
-// Get all blocks for a particular place in an iseq.
-static rb_yjit_block_array_t
-yjit_get_version_array(const rb_iseq_t *iseq, unsigned idx)
-{
-    struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
-
-    if (rb_darray_size(body->yjit_blocks) == 0) {
-        return NULL;
-    }
-
-    RUBY_ASSERT((unsigned)rb_darray_size(body->yjit_blocks) == body->iseq_size);
-    return rb_darray_get(body->yjit_blocks, idx);
-}
-
-// Count the number of block versions matching a given blockid
-static size_t get_num_versions(blockid_t blockid)
-{
-    return rb_darray_size(yjit_get_version_array(blockid.iseq, blockid.idx));
-}
-
-// Keep track of a block version. Block should be fully constructed.
-static void
-add_block_version(block_t *block)
-{
-    const blockid_t blockid = block->blockid;
-    const rb_iseq_t *iseq = blockid.iseq;
-    struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
-
-    // Function entry blocks must have stack size 0
-    RUBY_ASSERT(!(block->blockid.idx == 0 && block->ctx.stack_size > 0));
-
-    // Ensure yjit_blocks is initialized for this iseq
-    if (rb_darray_size(body->yjit_blocks) == 0) {
-        // Initialize yjit_blocks to be as wide as body->iseq_encoded
-        int32_t casted = (int32_t)body->iseq_size;
-        if ((unsigned)casted != body->iseq_size) {
-            rb_bug("iseq too large");
-        }
-
-        rb_darray_make(&body->yjit_blocks, casted);
-
-#if YJIT_STATS
-        // First block compiled for this iseq
-        yjit_runtime_counters.compiled_iseq_count++;
-#endif
-    }
-
-    RUBY_ASSERT(blockid.idx < rb_darray_size(body->yjit_blocks));
-    rb_yjit_block_array_t *block_array_ref = rb_darray_ref(body->yjit_blocks, blockid.idx);
-
-    // Add the new block
-    rb_darray_append(block_array_ref, block);
-
-    {
-        // By writing the new block to the iseq, the iseq now
-        // contains new references to Ruby objects. Run write barriers.
-        cme_dependency_t *cme_dep;
-        rb_darray_foreach(block->cme_dependencies, cme_dependency_idx, cme_dep) {
-            RB_OBJ_WRITTEN(iseq, Qundef, cme_dep->receiver_klass);
-            RB_OBJ_WRITTEN(iseq, Qundef, cme_dep->callee_cme);
-        }
-
-        // Run write barriers for all objects in generated code.
-        uint32_t *offset_element;
-        rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
-            uint32_t offset_to_value = *offset_element;
-            uint8_t *value_address = cb_get_ptr(cb, offset_to_value);
-
-            VALUE object;
-            memcpy(&object, value_address, SIZEOF_VALUE);
-            RB_OBJ_WRITTEN(iseq, Qundef, object);
-        }
-    }
-
-#if YJIT_STATS
-    yjit_runtime_counters.compiled_block_count++;
-#endif
-}
-
-static ptrdiff_t
-branch_code_size(const branch_t *branch)
-{
-    return branch->end_addr - branch->start_addr;
-}
-
-// Generate code for a branch, possibly rewriting and changing the size of it
-static void
-regenerate_branch(codeblock_t *cb, branch_t *branch)
-{
-    if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
-        // Generating this branch would modify frozen bytes. Do nothing.
-        return;
-    }
-
-    const uint32_t old_write_pos = cb->write_pos;
-    const bool branch_terminates_block = branch->end_addr == branch->block->end_addr;
-
-    RUBY_ASSERT(branch->dst_addrs[0] != NULL);
-
-    cb_set_write_ptr(cb, branch->start_addr);
-    branch->gen_fn(cb, branch->dst_addrs[0], branch->dst_addrs[1], branch->shape);
-    branch->end_addr = cb_get_write_ptr(cb);
-
-    if (branch_terminates_block) {
-        // Adjust block size
-        branch->block->end_addr = branch->end_addr;
-    }
-
-    // cb->write_pos is both a write cursor and a marker for the end of
-    // everything written out so far. Leave cb->write_pos at the end of the
-    // block before returning. This function only ever bump or retain the end
-    // of block marker since that's what the majority of callers want. When the
-    // branch sits at the very end of the codeblock and it shrinks after
-    // regeneration, it's up to the caller to drop bytes off the end to
-    // not leave a gap and implement branch->shape.
-    if (old_write_pos > cb->write_pos) {
-        // We rewound cb->write_pos to generate the branch, now restore it.
-        cb_set_pos(cb, old_write_pos);
-    }
-    else {
-        // The branch sits at the end of cb and consumed some memory.
-        // Keep cb->write_pos.
-    }
-}
-
-// Create a new outgoing branch entry for a block
-static branch_t*
-make_branch_entry(block_t *block, const ctx_t *src_ctx, branchgen_fn gen_fn)
-{
-    RUBY_ASSERT(block != NULL);
-
-    // Allocate and zero-initialize
-    branch_t *branch = calloc(1, sizeof(branch_t));
-
-    branch->block = block;
-    (void)src_ctx; // Unused for now
-    branch->gen_fn = gen_fn;
-    branch->shape = SHAPE_DEFAULT;
-
-    // Add to the list of outgoing branches for the block
-    rb_darray_append(&block->outgoing, branch);
-
-    return branch;
-}
-
-// Retrieve a basic block version for an (iseq, idx) tuple
-static block_t *
-find_block_version(blockid_t blockid, const ctx_t *ctx)
-{
-    rb_yjit_block_array_t versions = yjit_get_version_array(blockid.iseq, blockid.idx);
-
-    // Best match found
-    block_t *best_version = NULL;
-    int best_diff = INT_MAX;
-
-    // For each version matching the blockid
-    rb_darray_for(versions, idx) {
-        block_t *version = rb_darray_get(versions, idx);
-        int diff = ctx_diff(ctx, &version->ctx);
-
-        // Note that we always prefer the first matching
-        // version because of inline-cache chains
-        if (diff < best_diff) {
-            best_version = version;
-            best_diff = diff;
-        }
-    }
-
-    // If greedy versioning is enabled
-    if (rb_yjit_opts.greedy_versioning)
-    {
-        // If we're below the version limit, don't settle for an imperfect match
-        if ((uint32_t)rb_darray_size(versions) + 1 < rb_yjit_opts.max_versions && best_diff > 0) {
-            return NULL;
-        }
-    }
-
-    return best_version;
-}
-
-// Produce a generic context when the block version limit is hit for a blockid
-// Note that this will mutate the ctx argument
-static ctx_t
-limit_block_versions(blockid_t blockid, const ctx_t *ctx)
-{
-    // Guard chains implement limits separately, do nothing
-    if (ctx->chain_depth > 0)
-        return *ctx;
-
-    // If this block version we're about to add will hit the version limit
-    if (get_num_versions(blockid) + 1 >= rb_yjit_opts.max_versions) {
-        // Produce a generic context that stores no type information,
-        // but still respects the stack_size and sp_offset constraints.
-        // This new context will then match all future requests.
-        ctx_t generic_ctx = DEFAULT_CTX;
-        generic_ctx.stack_size = ctx->stack_size;
-        generic_ctx.sp_offset = ctx->sp_offset;
-
-        // Mutate the incoming context
-        return generic_ctx;
-    }
-
-    return *ctx;
-}
-
-static void yjit_free_block(block_t *block);
-static void block_array_remove(rb_yjit_block_array_t block_array, block_t *block);
-
-// Immediately compile a series of block versions at a starting point and
-// return the starting block.
-static block_t *
-gen_block_version(blockid_t blockid, const ctx_t *start_ctx, rb_execution_context_t *ec)
-{
-    // Small array to keep track of all the blocks compiled per invocation. We
-    // tend to have small batches since we often break up compilation with lazy
-    // stubs. Compilation is successful only if the whole batch is successful.
-    enum { MAX_PER_BATCH = 64 };
-    block_t *batch[MAX_PER_BATCH];
-    int compiled_count = 0;
-    bool batch_success = true;
-    block_t *block;
-
-    // Generate code for the first block
-    block = gen_single_block(blockid, start_ctx, ec);
-    if (block) {
-        // Track the block
-        add_block_version(block);
-
-        batch[compiled_count] = block;
-        compiled_count++;
-    }
-    batch_success = block;
-
-    // For each successor block to compile
-    while (batch_success) {
-        // If the previous block compiled doesn't have outgoing branches, stop
-        if (rb_darray_size(block->outgoing) == 0) {
-            break;
-        }
-
-        // Get the last outgoing branch from the previous block. Blocks can use
-        // gen_direct_jump() to request a block to be placed immediately after.
-        branch_t *last_branch = rb_darray_back(block->outgoing);
-
-        // If there is no next block to compile, stop
-        if (last_branch->dst_addrs[0] || last_branch->dst_addrs[1]) {
-            break;
-        }
-
-        if (last_branch->targets[0].iseq == NULL) {
-            rb_bug("invalid target for last branch");
-        }
-
-        // Generate code for the current block using context from the last branch.
-        blockid_t requested_id = last_branch->targets[0];
-        const ctx_t *requested_ctx = &last_branch->target_ctxs[0];
-
-        batch_success = compiled_count < MAX_PER_BATCH;
-        if (batch_success) {
-            block = gen_single_block(requested_id, requested_ctx, ec);
-            batch_success = block;
-        }
-
-        // If the batch failed, stop
-        if (!batch_success) {
-            break;
-        }
-
-        // Connect the last branch and the new block
-        last_branch->dst_addrs[0] = block->start_addr;
-        rb_darray_append(&block->incoming, last_branch);
-        last_branch->blocks[0] = block;
-
-        // This block should immediately follow the last branch
-        RUBY_ASSERT(block->start_addr == last_branch->end_addr);
-
-        // Track the block
-        add_block_version(block);
-
-        batch[compiled_count] = block;
-        compiled_count++;
-    }
-
-    if (batch_success) {
-        // Success. Return first block in the batch.
-        RUBY_ASSERT(compiled_count > 0);
-        return batch[0];
-    }
-    else {
-        // The batch failed. Free everything in the batch
-        for (int block_idx = 0; block_idx < compiled_count; block_idx++) {
-            block_t *const to_free = batch[block_idx];
-
-            // Undo add_block_version()
-            rb_yjit_block_array_t versions = yjit_get_version_array(to_free->blockid.iseq, to_free->blockid.idx);
-            block_array_remove(versions, to_free);
-
-            // Deallocate
-            yjit_free_block(to_free);
-        }
-
-#if YJIT_STATS
-        yjit_runtime_counters.compilation_failure++;
-#endif
-        return NULL;
-    }
-}
-
-// Generate a block version that is an entry point inserted into an iseq
-static uint8_t *
-gen_entry_point(const rb_iseq_t *iseq, uint32_t insn_idx, rb_execution_context_t *ec)
-{
-    // If we aren't at PC 0, don't generate code
-    // See yjit_pc_guard
-    if (ISEQ_BODY(iseq)->iseq_encoded != ec->cfp->pc) {
-        return NULL;
-    }
-
-    // The entry context makes no assumptions about types
-    blockid_t blockid = { iseq, insn_idx };
-
-    rb_vm_barrier();
-    // Write the interpreter entry prologue. Might be NULL when out of memory.
-    uint8_t *code_ptr = yjit_entry_prologue(cb, iseq);
-
-    // Try to generate code for the entry block
-    block_t *block = gen_block_version(blockid, &DEFAULT_CTX, ec);
-
-    cb_mark_all_executable(ocb);
-    cb_mark_all_executable(cb);
-
-    // If we couldn't generate any code
-    if (!block || block->end_idx == insn_idx) {
-        return NULL;
-    }
-
-    return code_ptr;
-}
-
-// Called by the generated code when a branch stub is executed
-// Triggers compilation of branches and code patching
-static uint8_t *
-branch_stub_hit(branch_t *branch, const uint32_t target_idx, rb_execution_context_t *ec)
-{
-    uint8_t *dst_addr = NULL;
-
-    // Stop other ractors since we are going to patch machine code.
-    // This is how the GC does it.
-    RB_VM_LOCK_ENTER();
-    rb_vm_barrier();
-
-    const ptrdiff_t branch_size_on_entry = branch_code_size(branch);
-
-    RUBY_ASSERT(branch != NULL);
-    RUBY_ASSERT(target_idx < 2);
-    blockid_t target = branch->targets[target_idx];
-    const ctx_t *target_ctx = &branch->target_ctxs[target_idx];
-
-    // If this branch has already been patched, return the dst address
-    // Note: ractors can cause the same stub to be hit multiple times
-    if (branch->blocks[target_idx]) {
-        dst_addr = branch->dst_addrs[target_idx];
-    }
-    else {
-        rb_vm_barrier();
-
-        // :stub-sp-flush:
-        // Generated code do stack operations without modifying cfp->sp, while the
-        // cfp->sp tells the GC what values on the stack to root. Generated code
-        // generally takes care of updating cfp->sp when it calls runtime routines that
-        // could trigger GC, but it's inconvenient to do it before calling this function.
-        // So we do it here instead.
-        VALUE *const original_interp_sp = ec->cfp->sp;
-        ec->cfp->sp += target_ctx->sp_offset;
-
-        // Update the PC in the current CFP, because it
-        // may be out of sync in JITted code
-        ec->cfp->pc = yjit_iseq_pc_at_idx(target.iseq, target.idx);
-
-        // Try to find an existing compiled version of this block
-        block_t *p_block = find_block_version(target, target_ctx);
-
-        // If this block hasn't yet been compiled
-        if (!p_block) {
-            const uint8_t branch_old_shape = branch->shape;
-            bool branch_modified = false;
-
-            // If the new block can be generated right after the branch (at cb->write_pos)
-            if (cb_get_write_ptr(cb) == branch->end_addr) {
-                // This branch should be terminating its block
-                RUBY_ASSERT(branch->end_addr == branch->block->end_addr);
-
-                // Change the branch shape to indicate the target block will be placed next
-                branch->shape = (uint8_t)target_idx;
-
-                // Rewrite the branch with the new, potentially more compact shape
-                regenerate_branch(cb, branch);
-                branch_modified = true;
-
-                // Ensure that the branch terminates the codeblock just like
-                // before entering this if block. This drops bytes off the end
-                // in case we shrank the branch when regenerating.
-                cb_set_write_ptr(cb, branch->end_addr);
-            }
-
-            // Compile the new block version
-            p_block = gen_block_version(target, target_ctx, ec);
-
-            if (!p_block && branch_modified) {
-                // We couldn't generate a new block for the branch, but we modified the branch.
-                // Restore the branch by regenerating it.
-                branch->shape = branch_old_shape;
-                regenerate_branch(cb, branch);
-            }
-        }
-
-        if (p_block) {
-            // Branch shape should reflect layout
-            RUBY_ASSERT(!(branch->shape == (uint8_t)target_idx && p_block->start_addr != branch->end_addr));
-
-            // Add this branch to the list of incoming branches for the target
-            rb_darray_append(&p_block->incoming, branch);
-
-            // Update the branch target address
-            dst_addr = p_block->start_addr;
-            branch->dst_addrs[target_idx] = dst_addr;
-
-            // Mark this branch target as patched (no longer a stub)
-            branch->blocks[target_idx] = p_block;
-
-            // Rewrite the branch with the new jump target address
-            regenerate_branch(cb, branch);
-
-            // Restore interpreter sp, since the code hitting the stub expects the original.
-            ec->cfp->sp = original_interp_sp;
-        }
-        else {
-            // Failed to service the stub by generating a new block so now we
-            // need to exit to the interpreter at the stubbed location. We are
-            // intentionally *not* restoring original_interp_sp. At the time of
-            // writing, reconstructing interpreter state only involves setting
-            // cfp->sp and cfp->pc. We set both before trying to generate the
-            // block. All there is left to do to exit is to pop the native
-            // frame. We do that in code_for_exit_from_stub.
-            dst_addr = code_for_exit_from_stub;
-        }
-
-        cb_mark_all_executable(ocb);
-        cb_mark_all_executable(cb);
-    }
-
-    const ptrdiff_t new_branch_size = branch_code_size(branch);
-    RUBY_ASSERT_ALWAYS(new_branch_size >= 0);
-    RUBY_ASSERT_ALWAYS(new_branch_size <= branch_size_on_entry && "branch stubs should not enlarge branches");
-
-    RB_VM_LOCK_LEAVE();
-
-    // Return a pointer to the compiled block version
-    return dst_addr;
-}
-
-// Get a version or stub corresponding to a branch target
-static uint8_t *
-get_branch_target(
-    blockid_t target,
-    const ctx_t *ctx,
-    branch_t *branch,
-    uint32_t target_idx
-)
-{
-    //fprintf(stderr, "get_branch_target, block (%p, %d)\n", target.iseq, target.idx);
-
-    block_t *p_block = find_block_version(target, ctx);
-
-    // If the block already exists
-    if (p_block) {
-        // Add an incoming branch for this version
-        rb_darray_append(&p_block->incoming, branch);
-        branch->blocks[target_idx] = p_block;
-
-        // Return a pointer to the compiled code
-        return p_block->start_addr;
-    }
-
-    // Do we have enough memory for a stub?
-    const long MAX_CODE_SIZE = 64;
-    if (ocb->write_pos + MAX_CODE_SIZE >= cb->mem_size) {
-        return NULL;
-    }
-
-    // Generate an outlined stub that will call branch_stub_hit()
-    uint8_t *stub_addr = cb_get_ptr(ocb, ocb->write_pos);
-
-    // Call branch_stub_hit(branch_idx, target_idx, ec)
-    mov(ocb, C_ARG_REGS[2], REG_EC);
-    mov(ocb, C_ARG_REGS[1], imm_opnd(target_idx));
-    mov(ocb, C_ARG_REGS[0], const_ptr_opnd(branch));
-    call_ptr(ocb, REG0, (void *)&branch_stub_hit);
-
-    // Jump to the address returned by the
-    // branch_stub_hit call
-    jmp_rm(ocb, RAX);
-
-    RUBY_ASSERT(cb_get_ptr(ocb, ocb->write_pos) - stub_addr <= MAX_CODE_SIZE);
-
-    return stub_addr;
-}
-
-static void
-gen_branch(
-    jitstate_t *jit,
-    const ctx_t *src_ctx,
-    blockid_t target0,
-    const ctx_t *ctx0,
-    blockid_t target1,
-    const ctx_t *ctx1,
-    branchgen_fn gen_fn
-)
-{
-    RUBY_ASSERT(target0.iseq != NULL);
-
-    branch_t *branch = make_branch_entry(jit->block, src_ctx, gen_fn);
-    branch->targets[0] = target0;
-    branch->targets[1] = target1;
-    branch->target_ctxs[0] = *ctx0;
-    branch->target_ctxs[1] = ctx1? *ctx1:DEFAULT_CTX;
-
-    // Get the branch targets or stubs
-    branch->dst_addrs[0] = get_branch_target(target0, ctx0, branch, 0);
-    branch->dst_addrs[1] = ctx1? get_branch_target(target1, ctx1, branch, 1):NULL;
-
-    // Call the branch generation function
-    branch->start_addr = cb_get_write_ptr(cb);
-    regenerate_branch(cb, branch);
-}
-
-static void
-gen_jump_branch(codeblock_t *cb, uint8_t *target0, uint8_t *target1, uint8_t shape)
-{
-    switch (shape) {
-      case SHAPE_NEXT0:
-        break;
-
-      case SHAPE_NEXT1:
-        RUBY_ASSERT(false);
-        break;
-
-      case SHAPE_DEFAULT:
-        jmp_ptr(cb, target0);
-        break;
-    }
-}
-
-static void
-gen_direct_jump(
-    jitstate_t *jit,
-    const ctx_t *ctx,
-    blockid_t target0
-)
-{
-    RUBY_ASSERT(target0.iseq != NULL);
-
-    branch_t *branch = make_branch_entry(jit->block, ctx, gen_jump_branch);
-    branch->targets[0] = target0;
-    branch->target_ctxs[0] = *ctx;
-
-    block_t *p_block = find_block_version(target0, ctx);
-
-    // If the version already exists
-    if (p_block) {
-        rb_darray_append(&p_block->incoming, branch);
-
-        branch->dst_addrs[0] = p_block->start_addr;
-        branch->blocks[0] = p_block;
-        branch->shape = SHAPE_DEFAULT;
-
-        // Call the branch generation function
-        branch->start_addr = cb_get_write_ptr(cb);
-        gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
-        branch->end_addr = cb_get_write_ptr(cb);
-    }
-    else {
-        // This NULL target address signals gen_block_version() to compile the
-        // target block right after this one (fallthrough).
-        branch->dst_addrs[0] = NULL;
-        branch->shape = SHAPE_NEXT0;
-        branch->start_addr = cb_get_write_ptr(cb);
-        branch->end_addr = cb_get_write_ptr(cb);
-    }
-}
-
-// Create a stub to force the code up to this point to be executed
-static void
-defer_compilation(
-    jitstate_t *jit,
-    ctx_t *cur_ctx
-)
-{
-    //fprintf(stderr, "defer compilation at (%p, %d) depth=%d\n", block->blockid.iseq, insn_idx, cur_ctx->chain_depth);
-
-    if (cur_ctx->chain_depth != 0) {
-        rb_bug("double defer");
-    }
-
-    ctx_t next_ctx = *cur_ctx;
-
-    if (next_ctx.chain_depth >= UINT8_MAX) {
-        rb_bug("max block version chain depth reached");
-    }
-
-    next_ctx.chain_depth += 1;
-
-    branch_t *branch = make_branch_entry(jit->block, cur_ctx, gen_jump_branch);
-
-    // Get the branch targets or stubs
-    branch->target_ctxs[0] = next_ctx;
-    branch->targets[0] = (blockid_t){ jit->block->blockid.iseq, jit->insn_idx };
-    branch->dst_addrs[0] = get_branch_target(branch->targets[0], &next_ctx, branch, 0);
-
-    // Call the branch generation function
-    codeblock_t *cb = jit->cb;
-    branch->start_addr = cb_get_write_ptr(cb);
-    gen_jump_branch(cb, branch->dst_addrs[0], NULL, SHAPE_DEFAULT);
-    branch->end_addr = cb_get_write_ptr(cb);
-}
-
-// Remove all references to a block then free it.
-static void
-yjit_free_block(block_t *block)
-{
-    yjit_unlink_method_lookup_dependency(block);
-    yjit_block_assumptions_free(block);
-
-    // Remove this block from the predecessor's targets
-    rb_darray_for(block->incoming, incoming_idx) {
-        // Branch from the predecessor to us
-        branch_t *pred_branch = rb_darray_get(block->incoming, incoming_idx);
-
-        // If this is us, nullify the target block
-        for (size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
-            if (pred_branch->blocks[succ_idx] == block) {
-                pred_branch->blocks[succ_idx] = NULL;
-            }
-        }
-    }
-
-    // For each outgoing branch
-    rb_darray_for(block->outgoing, branch_idx) {
-        branch_t *out_branch = rb_darray_get(block->outgoing, branch_idx);
-
-        // For each successor block
-        for (size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
-            block_t *succ = out_branch->blocks[succ_idx];
-
-            if (succ == NULL)
-                continue;
-
-            // Remove this block from the successor's incoming list
-            rb_darray_for(succ->incoming, incoming_idx) {
-                branch_t *pred_branch = rb_darray_get(succ->incoming, incoming_idx);
-                if (pred_branch == out_branch) {
-                    rb_darray_remove_unordered(succ->incoming, incoming_idx);
-                    break;
-                }
-            }
-        }
-
-        // Free the outgoing branch entry
-        free(out_branch);
-    }
-
-    rb_darray_free(block->incoming);
-    rb_darray_free(block->outgoing);
-    rb_darray_free(block->gc_object_offsets);
-
-    free(block);
-}
-
-// Remove a block version
-static void
-block_array_remove(rb_yjit_block_array_t block_array, block_t *block)
-{
-    block_t **element;
-    rb_darray_foreach(block_array, idx, element) {
-        if (*element == block) {
-            rb_darray_remove_unordered(block_array, idx);
-            return;
-        }
-    }
-
-    RUBY_ASSERT(false);
-}
-
-// Some runtime checks for integrity of a program location
-static void
-verify_blockid(const blockid_t blockid)
-{
-    const rb_iseq_t *const iseq = blockid.iseq;
-    RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
-    RUBY_ASSERT_ALWAYS(blockid.idx < ISEQ_BODY(iseq)->iseq_size);
-}
-
-// Invalidate one specific block version
-static void
-invalidate_block_version(block_t *block)
-{
-    ASSERT_vm_locking();
-
-    // TODO: want to assert that all other ractors are stopped here. Can't patch
-    // machine code that some other thread is running.
-
-    verify_blockid(block->blockid);
-
-    const rb_iseq_t *iseq = block->blockid.iseq;
-
-    //fprintf(stderr, "invalidating block (%p, %d)\n", block->blockid.iseq, block->blockid.idx);
-    //fprintf(stderr, "block=%p\n", block);
-
-    // Remove this block from the version array
-    rb_yjit_block_array_t versions = yjit_get_version_array(iseq, block->blockid.idx);
-    block_array_remove(versions, block);
-
-    // Get a pointer to the generated code for this block
-    uint8_t *code_ptr = block->start_addr;
-
-    // Make the the start of the block do an exit. This handles OOM situations
-    // and some cases where we can't efficiently patch incoming branches.
-    // Do this first, since in case there is a fallthrough branch into this
-    // block, the patching loop below can overwrite the start of the block.
-    // In those situations, there is hopefully no jumps to the start of the block
-    // after patching as the start of the block would be in the middle of something
-    // generated by branch_t::gen_fn.
-    {
-        RUBY_ASSERT_ALWAYS(block->entry_exit && "block invalidation requires an exit");
-        if (block->entry_exit == block->start_addr) {
-            // Some blocks exit on entry. Patching a jump to the entry at the
-            // entry makes an infinite loop.
-        }
-        else if (block->start_addr >= cb_get_ptr(cb, yjit_codepage_frozen_bytes)) { // Don't patch frozen code region
-            // Patch in a jump to block->entry_exit.
-            uint32_t cur_pos = cb->write_pos;
-            cb_set_write_ptr(cb, block->start_addr);
-            jmp_ptr(cb, block->entry_exit);
-            RUBY_ASSERT_ALWAYS(cb_get_ptr(cb, cb->write_pos) < block->end_addr && "invalidation wrote past end of block");
-            cb_set_pos(cb, cur_pos);
-        }
-    }
-
-    // For each incoming branch
-    rb_darray_for(block->incoming, incoming_idx) {
-        branch_t *branch = rb_darray_get(block->incoming, incoming_idx);
-        uint32_t target_idx = (branch->dst_addrs[0] == code_ptr)? 0:1;
-        RUBY_ASSERT(branch->dst_addrs[target_idx] == code_ptr);
-        RUBY_ASSERT(branch->blocks[target_idx] == block);
-
-        // Mark this target as being a stub
-        branch->blocks[target_idx] = NULL;
-
-        // Don't patch frozen code region
-        if (branch->start_addr < cb_get_ptr(cb, yjit_codepage_frozen_bytes)) {
-            continue;
-        }
-
-        // Create a stub for this branch target
-        uint8_t *branch_target = get_branch_target(
-            block->blockid,
-            &block->ctx,
-            branch,
-            target_idx
-        );
-
-        if (!branch_target) {
-            // We were unable to generate a stub (e.g. OOM). Use the block's
-            // exit instead of a stub for the block. It's important that we
-            // still patch the branch in this situation so stubs are unique
-            // to branches. Think about what could go wrong if we run out of
-            // memory in the middle of this loop.
-            branch_target = block->entry_exit;
-        }
-
-        branch->dst_addrs[target_idx] = branch_target;
-
-        // Check if the invalidated block immediately follows
-        bool target_next = (block->start_addr == branch->end_addr);
-
-        if (target_next) {
-            // The new block will no longer be adjacent.
-            // Note that we could be enlarging the branch and writing into the
-            // start of the block being invalidated.
-            branch->shape = SHAPE_DEFAULT;
-        }
-
-        // Rewrite the branch with the new jump target address
-        regenerate_branch(cb, branch);
-
-        if (target_next && branch->end_addr > block->end_addr) {
-            fprintf(stderr, "branch_block_idx=%u block_idx=%u over=%td block_size=%td\n",
-                branch->block->blockid.idx,
-                block->blockid.idx,
-                branch->end_addr - block->end_addr,
-                block->end_addr - block->start_addr);
-            yjit_print_iseq(branch->block->blockid.iseq);
-            rb_bug("yjit invalidate rewrote branch past end of invalidated block");
-        }
-    }
-
-    // Clear out the JIT func so that we can recompile later and so the
-    // interpreter will run the iseq
-
-#if JIT_ENABLED
-    // Only clear the jit_func when we're invalidating the JIT entry block.
-    // We only support compiling iseqs from index 0 right now.  So entry
-    // points will always have an instruction index of 0.  We'll need to
-    // change this in the future when we support optional parameters because
-    // they enter the function with a non-zero PC
-    if (block->blockid.idx == 0) {
-        ISEQ_BODY(iseq)->jit_func = 0;
-    }
-#endif
-
-    // TODO:
-    // May want to recompile a new entry point (for interpreter entry blocks)
-    // This isn't necessary for correctness
-
-    // FIXME:
-    // Call continuation addresses on the stack can also be atomically replaced by jumps going to the stub.
-
-    yjit_free_block(block);
-
-#if YJIT_STATS
-    yjit_runtime_counters.invalidation_count++;
-#endif
-
-    cb_mark_all_executable(ocb);
-    cb_mark_all_executable(cb);
-
-    // fprintf(stderr, "invalidation done\n");
-}
-
-static void
-yjit_init_core(void)
-{
-    gen_code_for_exit_from_stub();
-}
diff --git a/yjit_core.h b/yjit_core.h
deleted file mode 100644
index e38dd790ac..0000000000
--- a/yjit_core.h
+++ /dev/null
@@ -1,307 +0,0 @@
-#ifndef YJIT_CORE_H
-#define YJIT_CORE_H 1
-
-#include <stddef.h>
-#include <stdint.h>
-#include "yjit_asm.h"
-
-// Callee-saved regs
-#define REG_CFP R13
-#define REG_EC R12
-#define REG_SP RBX
-
-// Scratch registers used by YJIT
-#define REG0 RAX
-#define REG0_32 EAX
-#define REG0_8 AL
-#define REG1 RCX
-#define REG1_32 ECX
-
-// Maximum number of temp value types we keep track of
-#define MAX_TEMP_TYPES 8
-
-// Maximum number of local variable types we keep track of
-#define MAX_LOCAL_TYPES 8
-
-// Default versioning context (no type information)
-#define DEFAULT_CTX ( (ctx_t){ 0 } )
-
-enum yjit_type_enum
-{
-    ETYPE_UNKNOWN = 0,
-    ETYPE_NIL,
-    ETYPE_TRUE,
-    ETYPE_FALSE,
-    ETYPE_FIXNUM,
-    ETYPE_FLONUM,
-    ETYPE_ARRAY,
-    ETYPE_HASH,
-    ETYPE_SYMBOL,
-    ETYPE_STRING
-};
-
-// Represent the type of a value (local/stack/self) in YJIT
-typedef struct yjit_type_struct
-{
-    // Value is definitely a heap object
-    uint8_t is_heap : 1;
-
-    // Value is definitely an immediate
-    uint8_t is_imm : 1;
-
-    // Specific value type, if known
-    uint8_t type : 4;
-
-} val_type_t;
-STATIC_ASSERT(val_type_size, sizeof(val_type_t) == 1);
-
-// Unknown type, could be anything, all zeroes
-#define TYPE_UNKNOWN ( (val_type_t){ 0 } )
-
-// Could be any heap object
-#define TYPE_HEAP ( (val_type_t){ .is_heap = 1 } )
-
-// Could be any immediate
-#define TYPE_IMM ( (val_type_t){ .is_imm = 1 } )
-
-#define TYPE_NIL ( (val_type_t){ .is_imm = 1, .type = ETYPE_NIL } )
-#define TYPE_TRUE ( (val_type_t){ .is_imm = 1, .type = ETYPE_TRUE } )
-#define TYPE_FALSE ( (val_type_t){ .is_imm = 1, .type = ETYPE_FALSE } )
-#define TYPE_FIXNUM ( (val_type_t){ .is_imm = 1, .type = ETYPE_FIXNUM } )
-#define TYPE_FLONUM ( (val_type_t){ .is_imm = 1, .type = ETYPE_FLONUM } )
-#define TYPE_STATIC_SYMBOL ( (val_type_t){ .is_imm = 1, .type = ETYPE_SYMBOL } )
-#define TYPE_ARRAY ( (val_type_t){ .is_heap = 1, .type = ETYPE_ARRAY } )
-#define TYPE_HASH ( (val_type_t){ .is_heap = 1, .type = ETYPE_HASH } )
-#define TYPE_STRING ( (val_type_t){ .is_heap = 1, .type = ETYPE_STRING } )
-
-enum yjit_temp_loc
-{
-    TEMP_STACK = 0,
-    TEMP_SELF,
-    TEMP_LOCAL,     // Local with index
-    //TEMP_CONST,   // Small constant (0, 1, 2, Qnil, Qfalse, Qtrue)
-};
-
-// Potential mapping of a value on the temporary stack to
-// self, a local variable or constant so that we can track its type
-typedef struct yjit_temp_mapping
-{
-    // Where/how is the value stored?
-    uint8_t kind: 2;
-
-    // Index of the local variale,
-    // or small non-negative constant in [0, 63]
-    uint8_t idx : 6;
-
-} temp_mapping_t;
-STATIC_ASSERT(temp_mapping_size, sizeof(temp_mapping_t) == 1);
-
-// By default, temps are just temps on the stack.
-// Name conflict with an mmap flag. This is a struct instance,
-// so the compiler will check for wrong usage.
-#undef MAP_STACK
-#define MAP_STACK ( (temp_mapping_t) { 0 } )
-
-// Temp value is actually self
-#define MAP_SELF ( (temp_mapping_t) { .kind = TEMP_SELF } )
-
-// Represents both the type and mapping
-typedef struct {
-    temp_mapping_t mapping;
-    val_type_t type;
-} temp_type_mapping_t;
-STATIC_ASSERT(temp_type_mapping_size, sizeof(temp_type_mapping_t) == 2);
-
-// Operand to a bytecode instruction
-typedef struct yjit_insn_opnd
-{
-    // Indicates if the value is self
-    bool is_self;
-
-    // Index on the temporary stack (for stack operands only)
-    uint16_t idx;
-
-} insn_opnd_t;
-
-#define OPND_SELF ( (insn_opnd_t){ .is_self = true } )
-#define OPND_STACK(stack_idx) ( (insn_opnd_t){ .is_self = false, .idx = stack_idx } )
-
-/**
-Code generation context
-Contains information we can use to optimize code
-*/
-typedef struct yjit_context
-{
-    // Number of values currently on the temporary stack
-    uint16_t stack_size;
-
-    // Offset of the JIT SP relative to the interpreter SP
-    // This represents how far the JIT's SP is from the "real" SP
-    int16_t sp_offset;
-
-    // Depth of this block in the sidechain (eg: inline-cache chain)
-    uint8_t chain_depth;
-
-    // Local variable types we keepp track of
-    val_type_t local_types[MAX_LOCAL_TYPES];
-
-    // Temporary variable types we keep track of
-    val_type_t temp_types[MAX_TEMP_TYPES];
-
-    // Type we track for self
-    val_type_t self_type;
-
-    // Mapping of temp stack entries to types we track
-    temp_mapping_t temp_mapping[MAX_TEMP_TYPES];
-
-} ctx_t;
-STATIC_ASSERT(yjit_ctx_size, sizeof(ctx_t) <= 32);
-
-// Tuple of (iseq, idx) used to identify basic blocks
-typedef struct BlockId
-{
-    // Instruction sequence
-    const rb_iseq_t *iseq;
-
-    // Index in the iseq where the block starts
-    uint32_t idx;
-
-} blockid_t;
-
-// Null block id constant
-static const blockid_t BLOCKID_NULL = { 0, 0 };
-
-/// Branch code shape enumeration
-typedef enum branch_shape
-{
-    SHAPE_NEXT0,  // Target 0 is next
-    SHAPE_NEXT1,  // Target 1 is next
-    SHAPE_DEFAULT // Neither target is next
-} branch_shape_t;
-
-// Branch code generation function signature
-typedef void (*branchgen_fn)(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape);
-
-/**
-Store info about an outgoing branch in a code segment
-Note: care must be taken to minimize the size of branch_t objects
-*/
-typedef struct yjit_branch_entry
-{
-    // Block this is attached to
-    struct yjit_block_version *block;
-
-    // Positions where the generated code starts and ends
-    uint8_t *start_addr;
-    uint8_t *end_addr;
-
-    // Context right after the branch instruction
-    // Unused for now.
-    // ctx_t src_ctx;
-
-    // Branch target blocks and their contexts
-    blockid_t targets[2];
-    ctx_t target_ctxs[2];
-    struct yjit_block_version *blocks[2];
-
-    // Jump target addresses
-    uint8_t *dst_addrs[2];
-
-    // Branch code generation function
-    branchgen_fn gen_fn;
-
-    // Shape of the branch
-    branch_shape_t shape : 2;
-
-} branch_t;
-
-// In case this block is invalidated, these two pieces of info
-// help to remove all pointers to this block in the system.
-typedef struct {
-    VALUE receiver_klass;
-    VALUE callee_cme;
-} cme_dependency_t;
-
-typedef rb_darray(cme_dependency_t) cme_dependency_array_t;
-
-typedef rb_darray(branch_t*) branch_array_t;
-
-typedef rb_darray(uint32_t) int32_array_t;
-
-/**
-Basic block version
-Represents a portion of an iseq compiled with a given context
-Note: care must be taken to minimize the size of block_t objects
-*/
-typedef struct yjit_block_version
-{
-    // Bytecode sequence (iseq, idx) this is a version of
-    blockid_t blockid;
-
-    // Context at the start of the block
-    ctx_t ctx;
-
-    // Positions where the generated code starts and ends
-    uint8_t *start_addr;
-    uint8_t *end_addr;
-
-    // List of incoming branches (from predecessors)
-    branch_array_t incoming;
-
-    // List of outgoing branches (to successors)
-    // Note: these are owned by this block version
-    branch_array_t outgoing;
-
-    // Offsets for GC managed objects in the mainline code block
-    int32_array_t gc_object_offsets;
-
-    // CME dependencies of this block, to help to remove all pointers to this
-    // block in the system.
-    cme_dependency_array_t cme_dependencies;
-
-    // Code address of an exit for `ctx` and `blockid`. Used for block
-    // invalidation.
-    uint8_t *entry_exit;
-
-    // Index one past the last instruction in the iseq
-    uint32_t end_idx;
-
-} block_t;
-
-// Code generation state
-typedef struct JITState
-{
-    // Inline and outlined code blocks we are
-    // currently generating code into
-    codeblock_t* cb;
-    codeblock_t* ocb;
-
-    // Block version being compiled
-    block_t *block;
-
-    // Instruction sequence this is associated with
-    const rb_iseq_t *iseq;
-
-    // Index of the current instruction being compiled
-    uint32_t insn_idx;
-
-    // Opcode for the instruction being compiled
-    int opcode;
-
-    // PC of the instruction being compiled
-    VALUE *pc;
-
-    // Side exit to the instruction being compiled. See :side-exit:.
-    uint8_t *side_exit_for_pc;
-
-    // Execution context when compilation started
-    // This allows us to peek at run-time values
-    rb_execution_context_t *ec;
-
-    // Whether we need to record the code address at
-    // the end of this bytecode instruction for global invalidation
-    bool record_boundary_patch_point;
-
-} jitstate_t;
-
-#endif // #ifndef YJIT_CORE_H
diff --git a/yjit_iface.c b/yjit_iface.c
deleted file mode 100644
index 636a39abdf..0000000000
--- a/yjit_iface.c
+++ /dev/null
@@ -1,1311 +0,0 @@
-// This file is a fragment of the yjit.o compilation unit. See yjit.c.
-#include "internal.h"
-#include "vm_sync.h"
-#include "vm_callinfo.h"
-#include "builtin.h"
-#include "gc.h"
-#include "iseq.h"
-#include "internal/compile.h"
-#include "internal/class.h"
-#include "yjit.h"
-#include "yjit_iface.h"
-#include "yjit_codegen.h"
-#include "yjit_core.h"
-#include "darray.h"
-
-#ifdef HAVE_LIBCAPSTONE
-#include <capstone/capstone.h>
-static VALUE cYjitDisasm;
-static VALUE cYjitDisasmInsn;
-#endif
-
-static VALUE mYjit;
-static VALUE cYjitBlock;
-
-#if YJIT_STATS
-static VALUE cYjitCodeComment;
-#endif
-
-#if YJIT_STATS
-extern const int rb_vm_max_insn_name_size;
-static int64_t exit_op_count[VM_INSTRUCTION_SIZE] = { 0 };
-#endif
-
-// Hash table of encoded instructions
-extern st_table *rb_encoded_insn_data;
-
-struct rb_yjit_options rb_yjit_opts;
-
-// Size of code pages to allocate
-#define CODE_PAGE_SIZE (16 * 1024)
-
-// How many code pages to allocate at once
-#define PAGES_PER_ALLOC 512
-
-static const rb_data_type_t yjit_block_type = {
-    "YJIT/Block",
-    {0, 0, 0, },
-    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
-};
-
-// Get the PC for a given index in an iseq
-static VALUE *
-yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx)
-{
-    RUBY_ASSERT(iseq != NULL);
-    RUBY_ASSERT(insn_idx < ISEQ_BODY(iseq)->iseq_size);
-    VALUE *encoded = ISEQ_BODY(iseq)->iseq_encoded;
-    VALUE *pc = &encoded[insn_idx];
-    return pc;
-}
-
-// For debugging. Print the disassembly of an iseq.
-RBIMPL_ATTR_MAYBE_UNUSED()
-static void
-yjit_print_iseq(const rb_iseq_t *iseq)
-{
-    char *ptr;
-    long len;
-    VALUE disassembly = rb_iseq_disasm(iseq);
-    RSTRING_GETMEM(disassembly, ptr, len);
-    fprintf(stderr, "%.*s\n", (int)len, ptr);
-}
-
-static int
-yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc)
-{
-    const VALUE at_pc = *pc;
-    if (FL_TEST_RAW((VALUE)iseq, ISEQ_TRANSLATED)) {
-        return rb_vm_insn_addr2opcode((const void *)at_pc);
-    }
-    else {
-        return (int)at_pc;
-    }
-}
-
-// Verify that calling with cd on receiver goes to callee
-static void
-check_cfunc_dispatch(VALUE receiver, struct rb_callinfo *ci, void *callee, rb_callable_method_entry_t *compile_time_cme)
-{
-    if (METHOD_ENTRY_INVALIDATED(compile_time_cme)) {
-        rb_bug("yjit: output code uses invalidated cme %p", (void *)compile_time_cme);
-    }
-
-    bool callee_correct = false;
-    const rb_callable_method_entry_t *cme = rb_callable_method_entry(CLASS_OF(receiver), vm_ci_mid(ci));
-    if (cme->def->type == VM_METHOD_TYPE_CFUNC) {
-        const rb_method_cfunc_t *cfunc = UNALIGNED_MEMBER_PTR(cme->def, body.cfunc);
-        if ((void *)cfunc->func == callee) {
-            callee_correct = true;
-        }
-    }
-    if (!callee_correct) {
-        rb_bug("yjit: output code calls wrong method");
-    }
-}
-
-MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key);
-
-// GC root for interacting with the GC
-struct yjit_root_struct {
-    int unused; // empty structs are not legal in C99
-};
-
-// Hash table of BOP blocks
-static st_table *blocks_assuming_bops;
-
-static bool
-assume_bop_not_redefined(jitstate_t *jit, int redefined_flag, enum ruby_basic_operators bop)
-{
-    if (BASIC_OP_UNREDEFINED_P(bop, redefined_flag)) {
-        RUBY_ASSERT(blocks_assuming_bops);
-
-        jit_ensure_block_entry_exit(jit);
-        st_insert(blocks_assuming_bops, (st_data_t)jit->block, 0);
-        return true;
-    }
-    else {
-        return false;
-    }
-}
-
-// Map klass => id_table[mid, set of blocks]
-// While a block `b` is in the table, b->callee_cme == rb_callable_method_entry(klass, mid).
-// See assume_method_lookup_stable()
-static st_table *method_lookup_dependency;
-
-// For adding to method_lookup_dependency data with st_update
-struct lookup_dependency_insertion {
-    block_t *block;
-    ID mid;
-};
-
-// Map cme => set of blocks
-// See assume_method_lookup_stable()
-static st_table *cme_validity_dependency;
-
-static int
-add_cme_validity_dependency_i(st_data_t *key, st_data_t *value, st_data_t new_block, int existing)
-{
-    st_table *block_set;
-    if (existing) {
-        block_set = (st_table *)*value;
-    }
-    else {
-        // Make the set and put it into cme_validity_dependency
-        block_set = st_init_numtable();
-        *value = (st_data_t)block_set;
-    }
-
-    // Put block into set
-    st_insert(block_set, new_block, 1);
-
-    return ST_CONTINUE;
-}
-
-static int
-add_lookup_dependency_i(st_data_t *key, st_data_t *value, st_data_t data, int existing)
-{
-    struct lookup_dependency_insertion *info = (void *)data;
-
-    // Find or make an id table
-    struct rb_id_table *id2blocks;
-    if (existing) {
-        id2blocks = (void *)*value;
-    }
-    else {
-        // Make an id table and put it into the st_table
-        id2blocks = rb_id_table_create(1);
-        *value = (st_data_t)id2blocks;
-    }
-
-    // Find or make a block set
-    st_table *block_set;
-    {
-        VALUE blocks;
-        if (rb_id_table_lookup(id2blocks, info->mid, &blocks)) {
-            // Take existing set
-            block_set = (st_table *)blocks;
-        }
-        else {
-            // Make new block set and put it into the id table
-            block_set = st_init_numtable();
-            rb_id_table_insert(id2blocks, info->mid, (VALUE)block_set);
-        }
-    }
-
-    st_insert(block_set, (st_data_t)info->block, 1);
-
-    return ST_CONTINUE;
-}
-
-// Remember that a block assumes that
-// `rb_callable_method_entry(receiver_klass, cme->called_id) == cme` and that
-// `cme` is valid.
-// When either of these assumptions becomes invalid, rb_yjit_method_lookup_change() or
-// rb_yjit_cme_invalidate() invalidates the block.
-//
-// @raise NoMemoryError
-static void
-assume_method_lookup_stable(VALUE receiver_klass, const rb_callable_method_entry_t *cme, jitstate_t *jit)
-{
-    RUBY_ASSERT(cme_validity_dependency);
-    RUBY_ASSERT(method_lookup_dependency);
-    RUBY_ASSERT(rb_callable_method_entry(receiver_klass, cme->called_id) == cme);
-    RUBY_ASSERT_ALWAYS(RB_TYPE_P(receiver_klass, T_CLASS) || RB_TYPE_P(receiver_klass, T_ICLASS));
-    RUBY_ASSERT_ALWAYS(!rb_objspace_garbage_object_p(receiver_klass));
-
-    jit_ensure_block_entry_exit(jit);
-
-    block_t *block = jit->block;
-
-    cme_dependency_t cme_dep = { receiver_klass, (VALUE)cme };
-    rb_darray_append(&block->cme_dependencies, cme_dep);
-
-    st_update(cme_validity_dependency, (st_data_t)cme, add_cme_validity_dependency_i, (st_data_t)block);
-
-    struct lookup_dependency_insertion info = { block, cme->called_id };
-    st_update(method_lookup_dependency, (st_data_t)receiver_klass, add_lookup_dependency_i, (st_data_t)&info);
-}
-
-static st_table *blocks_assuming_single_ractor_mode;
-
-// Can raise NoMemoryError.
-RBIMPL_ATTR_NODISCARD()
-static bool
-assume_single_ractor_mode(jitstate_t *jit)
-{
-    if (rb_multi_ractor_p()) return false;
-
-    jit_ensure_block_entry_exit(jit);
-
-    st_insert(blocks_assuming_single_ractor_mode, (st_data_t)jit->block, 1);
-    return true;
-}
-
-static st_table *blocks_assuming_stable_global_constant_state;
-
-// Assume that the global constant state has not changed since call to this function.
-// Can raise NoMemoryError.
-static void
-assume_stable_global_constant_state(jitstate_t *jit)
-{
-    jit_ensure_block_entry_exit(jit);
-    st_insert(blocks_assuming_stable_global_constant_state, (st_data_t)jit->block, 1);
-}
-
-static int
-mark_and_pin_keys_i(st_data_t k, st_data_t v, st_data_t ignore)
-{
-    rb_gc_mark((VALUE)k);
-
-    return ST_CONTINUE;
-}
-
-// GC callback during mark phase
-static void
-yjit_root_mark(void *ptr)
-{
-    if (method_lookup_dependency) {
-        // TODO: This is a leak. Unused blocks linger in the table forever, preventing the
-        // callee class they speculate on from being collected.
-        // We could do a bespoke weak reference scheme on classes similar to
-        // the interpreter's call cache. See finalizer for T_CLASS and cc_table_free().
-        st_foreach(method_lookup_dependency, mark_and_pin_keys_i, 0);
-    }
-
-    if (cme_validity_dependency) {
-        // Why not let the GC move the cme keys in this table?
-        // Because this is basically a compare_by_identity Hash.
-        // If a key moves, we would need to reinsert it into the table so it is rehashed.
-        // That is tricky to do, espcially as it could trigger allocation which could
-        // trigger GC. Not sure if it is okay to trigger GC while the GC is updating
-        // references.
-        st_foreach(cme_validity_dependency, mark_and_pin_keys_i, 0);
-    }
-}
-
-static void
-yjit_root_free(void *ptr)
-{
-    // Do nothing. The root lives as long as the process.
-}
-
-static size_t
-yjit_root_memsize(const void *ptr)
-{
-    // Count off-gc-heap allocation size of the dependency table
-    return st_memsize(method_lookup_dependency); // TODO: more accurate accounting
-}
-
-// GC callback during compaction
-static void
-yjit_root_update_references(void *ptr)
-{
-}
-
-// Custom type for interacting with the GC
-// TODO: make this write barrier protected
-static const rb_data_type_t yjit_root_type = {
-    "yjit_root",
-    {yjit_root_mark, yjit_root_free, yjit_root_memsize, yjit_root_update_references},
-    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
-};
-
-// st_table iterator for invalidating blocks that are keys to the table.
-static int
-block_set_invalidate_i(st_data_t key, st_data_t v, st_data_t ignore)
-{
-    block_t *version = (block_t *)key;
-
-    // Thankfully, st_table supports deleting while iterating.
-    invalidate_block_version(version);
-
-    return ST_CONTINUE;
-}
-
-// Callback for when rb_callable_method_entry(klass, mid) is going to change.
-// Invalidate blocks that assume stable method lookup of `mid` in `klass` when this happens.
-void
-rb_yjit_method_lookup_change(VALUE klass, ID mid)
-{
-    if (!method_lookup_dependency) return;
-
-    RB_VM_LOCK_ENTER();
-
-    st_data_t image;
-    st_data_t key = (st_data_t)klass;
-    if (st_lookup(method_lookup_dependency, key, &image)) {
-        struct rb_id_table *id2blocks = (void *)image;
-        VALUE blocks;
-
-        // Invalidate all blocks in method_lookup_dependency[klass][mid]
-        if (rb_id_table_lookup(id2blocks, mid, &blocks)) {
-            rb_id_table_delete(id2blocks, mid);
-
-            st_table *block_set = (st_table *)blocks;
-
-#if YJIT_STATS
-            yjit_runtime_counters.invalidate_method_lookup += block_set->num_entries;
-#endif
-
-            st_foreach(block_set, block_set_invalidate_i, 0);
-
-            st_free_table(block_set);
-        }
-    }
-
-    RB_VM_LOCK_LEAVE();
-}
-
-// Callback for when a cme becomes invalid.
-// Invalidate all blocks that depend on cme being valid.
-void
-rb_yjit_cme_invalidate(VALUE cme)
-{
-    if (!cme_validity_dependency) return;
-
-    RUBY_ASSERT(IMEMO_TYPE_P(cme, imemo_ment));
-
-    RB_VM_LOCK_ENTER();
-
-    // Delete the block set from the table
-    st_data_t cme_as_st_data = (st_data_t)cme;
-    st_data_t blocks;
-    if (st_delete(cme_validity_dependency, &cme_as_st_data, &blocks)) {
-        st_table *block_set = (st_table *)blocks;
-
-#if YJIT_STATS
-        yjit_runtime_counters.invalidate_method_lookup += block_set->num_entries;
-#endif
-
-        // Invalidate each block
-        st_foreach(block_set, block_set_invalidate_i, 0);
-
-        st_free_table(block_set);
-    }
-
-    RB_VM_LOCK_LEAVE();
-}
-
-// For dealing with refinements
-void
-rb_yjit_invalidate_all_method_lookup_assumptions(void)
-{
-    // It looks like Module#using actually doesn't need to invalidate all the
-    // method caches, so we do nothing here for now.
-}
-
-// Remove a block from the method lookup dependency table
-static void
-remove_method_lookup_dependency(block_t *block, VALUE receiver_klass, const rb_callable_method_entry_t *callee_cme)
-{
-    RUBY_ASSERT(receiver_klass);
-    RUBY_ASSERT(callee_cme); // callee_cme should be set when receiver_klass is set
-
-    st_data_t image;
-    st_data_t key = (st_data_t)receiver_klass;
-    if (st_lookup(method_lookup_dependency, key, &image)) {
-        struct rb_id_table *id2blocks = (void *)image;
-        ID mid = callee_cme->called_id;
-
-        // Find block set
-        VALUE blocks;
-        if (rb_id_table_lookup(id2blocks, mid, &blocks)) {
-            st_table *block_set = (st_table *)blocks;
-
-            // Remove block from block set
-            st_data_t block_as_st_data = (st_data_t)block;
-            (void)st_delete(block_set, &block_as_st_data, NULL);
-
-            if (block_set->num_entries == 0) {
-                // Block set now empty. Remove from id table.
-                rb_id_table_delete(id2blocks, mid);
-                st_free_table(block_set);
-            }
-        }
-    }
-}
-
-// Remove a block from cme_validity_dependency
-static void
-remove_cme_validity_dependency(block_t *block, const rb_callable_method_entry_t *callee_cme)
-{
-    RUBY_ASSERT(callee_cme);
-
-    st_data_t blocks;
-    if (st_lookup(cme_validity_dependency, (st_data_t)callee_cme, &blocks)) {
-        st_table *block_set = (st_table *)blocks;
-
-        st_data_t block_as_st_data = (st_data_t)block;
-        (void)st_delete(block_set, &block_as_st_data, NULL);
-    }
-}
-
-static void
-yjit_unlink_method_lookup_dependency(block_t *block)
-{
-    cme_dependency_t *cme_dep;
-    rb_darray_foreach(block->cme_dependencies, cme_dependency_idx, cme_dep) {
-        remove_method_lookup_dependency(block, cme_dep->receiver_klass, (const rb_callable_method_entry_t *)cme_dep->callee_cme);
-        remove_cme_validity_dependency(block, (const rb_callable_method_entry_t *)cme_dep->callee_cme);
-    }
-    rb_darray_free(block->cme_dependencies);
-}
-
-static void
-yjit_block_assumptions_free(block_t *block)
-{
-    st_data_t as_st_data = (st_data_t)block;
-    if (blocks_assuming_stable_global_constant_state) {
-        st_delete(blocks_assuming_stable_global_constant_state, &as_st_data, NULL);
-    }
-
-    if (blocks_assuming_single_ractor_mode) {
-        st_delete(blocks_assuming_single_ractor_mode, &as_st_data, NULL);
-    }
-
-    if (blocks_assuming_bops) {
-        st_delete(blocks_assuming_bops, &as_st_data, NULL);
-    }
-}
-
-typedef VALUE (*yjit_func_t)(rb_execution_context_t *, rb_control_frame_t *);
-
-bool
-rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec)
-{
-#if (OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE) && JIT_ENABLED
-    bool success = true;
-    RB_VM_LOCK_ENTER();
-    rb_vm_barrier();
-
-    // Compile a block version starting at the first instruction
-    uint8_t *code_ptr = gen_entry_point(iseq, 0, ec);
-
-    if (code_ptr) {
-        ISEQ_BODY(iseq)->jit_func = (yjit_func_t)code_ptr;
-    }
-    else {
-        ISEQ_BODY(iseq)->jit_func = 0;
-        success = false;
-    }
-
-    RB_VM_LOCK_LEAVE();
-    return success;
-#else
-    return false;
-#endif
-}
-
-struct yjit_block_itr {
-    const rb_iseq_t *iseq;
-    VALUE list;
-};
-
-/* Get a list of the YJIT blocks associated with `rb_iseq` */
-static VALUE
-yjit_blocks_for(VALUE mod, VALUE rb_iseq)
-{
-    if (CLASS_OF(rb_iseq) != rb_cISeq) {
-        return rb_ary_new();
-    }
-
-    const rb_iseq_t *iseq = rb_iseqw_to_iseq(rb_iseq);
-
-    VALUE all_versions = rb_ary_new();
-    rb_darray_for(ISEQ_BODY(iseq)->yjit_blocks, version_array_idx) {
-        rb_yjit_block_array_t versions = rb_darray_get(ISEQ_BODY(iseq)->yjit_blocks, version_array_idx);
-
-        rb_darray_for(versions, block_idx) {
-            block_t *block = rb_darray_get(versions, block_idx);
-
-            // FIXME: The object craeted here can outlive the block itself
-            VALUE rb_block = TypedData_Wrap_Struct(cYjitBlock, &yjit_block_type, block);
-            rb_ary_push(all_versions, rb_block);
-        }
-    }
-
-    return all_versions;
-}
-
-/* Get the address of the code associated with a YJIT::Block */
-static VALUE
-block_address(VALUE self)
-{
-    block_t * block;
-    TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
-    return LONG2NUM((intptr_t)block->start_addr);
-}
-
-/* Get the machine code for YJIT::Block as a binary string */
-static VALUE
-block_code(VALUE self)
-{
-    block_t * block;
-    TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
-
-    return (VALUE)rb_str_new(
-        (const char*)block->start_addr,
-        block->end_addr - block->start_addr
-    );
-}
-
-/* Get the start index in the Instruction Sequence that corresponds to this
- * YJIT::Block */
-static VALUE
-iseq_start_index(VALUE self)
-{
-    block_t * block;
-    TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
-
-    return INT2NUM(block->blockid.idx);
-}
-
-/* Get the end index in the Instruction Sequence that corresponds to this
- * YJIT::Block */
-static VALUE
-iseq_end_index(VALUE self)
-{
-    block_t * block;
-    TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
-
-    return INT2NUM(block->end_idx);
-}
-
-/* Called when a basic operation is redefined */
-void
-rb_yjit_bop_redefined(VALUE klass, const rb_method_entry_t *me, enum ruby_basic_operators bop)
-{
-    if (blocks_assuming_bops) {
-#if YJIT_STATS
-        yjit_runtime_counters.invalidate_bop_redefined += blocks_assuming_bops->num_entries;
-#endif
-
-        st_foreach(blocks_assuming_bops, block_set_invalidate_i, 0);
-    }
-}
-
-/* Called when the constant state changes */
-void
-rb_yjit_constant_state_changed(void)
-{
-    if (blocks_assuming_stable_global_constant_state) {
-#if YJIT_STATS
-        yjit_runtime_counters.constant_state_bumps++;
-        yjit_runtime_counters.invalidate_constant_state_bump += blocks_assuming_stable_global_constant_state->num_entries;
-#endif
-
-        st_foreach(blocks_assuming_stable_global_constant_state, block_set_invalidate_i, 0);
-    }
-}
-
-// Callback from the opt_setinlinecache instruction in the interpreter.
-// Invalidate the block for the matching opt_getinlinecache so it could regenerate code
-// using the new value in the constant cache.
-void
-rb_yjit_constant_ic_update(const rb_iseq_t *const iseq, IC ic)
-{
-    if (!rb_yjit_enabled_p()) return;
-
-    // We can't generate code in these situations, so no need to invalidate.
-    // See gen_opt_getinlinecache.
-    if (ic->entry->ic_cref || rb_multi_ractor_p()) {
-        return;
-    }
-
-    RB_VM_LOCK_ENTER();
-    rb_vm_barrier(); // Stop other ractors since we are going to patch machine code.
-    {
-        const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
-        VALUE *code = body->iseq_encoded;
-        const unsigned get_insn_idx = ic->get_insn_idx;
-
-        // This should come from a running iseq, so direct threading translation
-        // should have been done
-        RUBY_ASSERT(FL_TEST((VALUE)iseq, ISEQ_TRANSLATED));
-        RUBY_ASSERT(get_insn_idx < body->iseq_size);
-        RUBY_ASSERT(rb_vm_insn_addr2insn((const void *)code[get_insn_idx]) == BIN(opt_getinlinecache));
-
-        // Find the matching opt_getinlinecache and invalidate all the blocks there
-        RUBY_ASSERT(insn_op_type(BIN(opt_getinlinecache), 1) == TS_IC);
-        if (ic == (IC)code[get_insn_idx + 1 + 1]) {
-            rb_yjit_block_array_t getinlinecache_blocks = yjit_get_version_array(iseq, get_insn_idx);
-
-            // Put a bound for loop below to be defensive
-            const size_t initial_version_count = rb_darray_size(getinlinecache_blocks);
-            for (size_t iteration=0; iteration<initial_version_count; ++iteration) {
-                getinlinecache_blocks = yjit_get_version_array(iseq, get_insn_idx);
-
-                if (rb_darray_size(getinlinecache_blocks) > 0) {
-                    block_t *block = rb_darray_get(getinlinecache_blocks, 0);
-                    invalidate_block_version(block);
-#if YJIT_STATS
-                    yjit_runtime_counters.invalidate_constant_ic_fill++;
-#endif
-                }
-                else {
-                    break;
-                }
-            }
-
-            // All versions at get_insn_idx should now be gone
-            RUBY_ASSERT(0 == rb_darray_size(yjit_get_version_array(iseq, get_insn_idx)));
-        }
-        else {
-            RUBY_ASSERT(false && "ic->get_insn_diex not set properly");
-        }
-    }
-    RB_VM_LOCK_LEAVE();
-}
-
-void
-rb_yjit_before_ractor_spawn(void)
-{
-    if (blocks_assuming_single_ractor_mode) {
-#if YJIT_STATS
-        yjit_runtime_counters.invalidate_ractor_spawn += blocks_assuming_single_ractor_mode->num_entries;
-#endif
-
-        st_foreach(blocks_assuming_single_ractor_mode, block_set_invalidate_i, 0);
-    }
-}
-
-#ifdef HAVE_LIBCAPSTONE
-static const rb_data_type_t yjit_disasm_type = {
-    "YJIT/Disasm",
-    {0, (void(*)(void *))cs_close, 0, },
-    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
-};
-
-static VALUE
-yjit_disasm_init(VALUE klass)
-{
-    csh * handle;
-    VALUE disasm = TypedData_Make_Struct(klass, csh, &yjit_disasm_type, handle);
-    if (cs_open(CS_ARCH_X86, CS_MODE_64, handle) != CS_ERR_OK) {
-        rb_raise(rb_eRuntimeError, "failed to make Capstone handle");
-    }
-    return disasm;
-}
-
-static VALUE
-yjit_disasm(VALUE self, VALUE code, VALUE from)
-{
-    size_t count;
-    csh * handle;
-    cs_insn *insns;
-
-    TypedData_Get_Struct(self, csh, &yjit_disasm_type, handle);
-    count = cs_disasm(*handle, (uint8_t*)StringValuePtr(code), RSTRING_LEN(code), NUM2ULL(from), 0, &insns);
-    VALUE insn_list = rb_ary_new_capa(count);
-
-    for (size_t i = 0; i < count; i++) {
-        VALUE vals = rb_ary_new_from_args(3, LONG2NUM(insns[i].address),
-                rb_str_new2(insns[i].mnemonic),
-                rb_str_new2(insns[i].op_str));
-        rb_ary_push(insn_list, rb_struct_alloc(cYjitDisasmInsn, vals));
-    }
-    cs_free(insns, count);
-    return insn_list;
-}
-#endif
-
-// Primitive called in yjit.rb. Export all machine code comments as a Ruby array.
-static VALUE
-comments_for(rb_execution_context_t *ec, VALUE self, VALUE start_address, VALUE end_address)
-{
-    VALUE comment_array = rb_ary_new();
-#if RUBY_DEBUG
-    uint8_t *start = (void *)NUM2ULL(start_address);
-    uint8_t *end = (void *)NUM2ULL(end_address);
-
-    rb_darray_for(yjit_code_comments, i) {
-        struct yjit_comment comment = rb_darray_get(yjit_code_comments, i);
-        uint8_t *comment_pos = cb_get_ptr(cb, comment.offset);
-
-        if (comment_pos >= end) {
-            break;
-        }
-        if (comment_pos >= start) {
-            VALUE vals = rb_ary_new_from_args(
-                2,
-                LL2NUM((long long) comment_pos),
-                rb_str_new_cstr(comment.comment)
-            );
-            rb_ary_push(comment_array, rb_struct_alloc(cYjitCodeComment, vals));
-        }
-    }
-
-#endif // if RUBY_DEBUG
-
-    return comment_array;
-}
-
-static VALUE
-yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self)
-{
-    return RBOOL(YJIT_STATS && rb_yjit_opts.gen_stats);
-}
-
-// Primitive called in yjit.rb. Export all YJIT statistics as a Ruby hash.
-static VALUE
-get_yjit_stats(rb_execution_context_t *ec, VALUE self)
-{
-    // Return Qnil if YJIT isn't enabled
-    if (cb == NULL) {
-        return Qnil;
-    }
-
-    VALUE hash = rb_hash_new();
-
-    RB_VM_LOCK_ENTER();
-
-    {
-        VALUE key = ID2SYM(rb_intern("inline_code_size"));
-        VALUE value = LL2NUM((long long)cb->write_pos);
-        rb_hash_aset(hash, key, value);
-
-        key = ID2SYM(rb_intern("outlined_code_size"));
-        value = LL2NUM((long long)ocb->write_pos);
-        rb_hash_aset(hash, key, value);
-    }
-
-#if YJIT_STATS
-    if (rb_yjit_opts.gen_stats) {
-        // Indicate that the complete set of stats is available
-        rb_hash_aset(hash, ID2SYM(rb_intern("all_stats")), Qtrue);
-
-        int64_t *counter_reader = (int64_t *)&yjit_runtime_counters;
-        int64_t *counter_reader_end = &yjit_runtime_counters.last_member;
-
-        // For each counter in yjit_counter_names, add that counter as
-        // a key/value pair.
-
-        // Iterate through comma separated counter name list
-        char *name_reader = yjit_counter_names;
-        char *counter_name_end = yjit_counter_names + sizeof(yjit_counter_names);
-        while (name_reader < counter_name_end && counter_reader < counter_reader_end) {
-            if (*name_reader == ',' || *name_reader == ' ') {
-                name_reader++;
-                continue;
-            }
-
-            // Compute length of counter name
-            int name_len;
-            char *name_end;
-            {
-                name_end = strchr(name_reader, ',');
-                if (name_end == NULL) break;
-                name_len = (int)(name_end - name_reader);
-            }
-
-            // Put counter into hash
-            VALUE key = ID2SYM(rb_intern2(name_reader, name_len));
-            VALUE value = LL2NUM((long long)*counter_reader);
-            rb_hash_aset(hash, key, value);
-
-            counter_reader++;
-            name_reader = name_end;
-        }
-
-        // For each entry in exit_op_count, add a stats entry with key "exit_INSTRUCTION_NAME"
-        // and the value is the count of side exits for that instruction.
-
-        char key_string[rb_vm_max_insn_name_size + 6]; // Leave room for "exit_" and a final NUL
-        for (int i = 0; i < VM_INSTRUCTION_SIZE; i++) {
-            const char *i_name = insn_name(i); // Look up Ruby's NUL-terminated insn name string
-            snprintf(key_string, rb_vm_max_insn_name_size + 6, "%s%s", "exit_", i_name);
-
-            VALUE key = ID2SYM(rb_intern(key_string));
-            VALUE value = LL2NUM((long long)exit_op_count[i]);
-            rb_hash_aset(hash, key, value);
-        }
-    }
-#endif
-
-    RB_VM_LOCK_LEAVE();
-
-    return hash;
-}
-
-// Primitive called in yjit.rb. Zero out all the counters.
-static VALUE
-reset_stats_bang(rb_execution_context_t *ec, VALUE self)
-{
-#if YJIT_STATS
-    memset(&exit_op_count, 0, sizeof(exit_op_count));
-    memset(&yjit_runtime_counters, 0, sizeof(yjit_runtime_counters));
-#endif // if YJIT_STATS
-    return Qnil;
-}
-
-// Primitive for yjit.rb. For testing running out of executable memory
-static VALUE
-simulate_oom_bang(rb_execution_context_t *ec, VALUE self)
-{
-    if (RUBY_DEBUG && cb && ocb) {
-        // Only simulate in debug builds for paranoia.
-        cb_set_pos(cb, cb->mem_size-1);
-        cb_set_pos(ocb, ocb->mem_size-1);
-    }
-    return Qnil;
-}
-
-#include "yjit.rbinc"
-
-#if YJIT_STATS
-void
-rb_yjit_collect_vm_usage_insn(int insn)
-{
-    yjit_runtime_counters.vm_insns_count++;
-}
-
-void
-rb_yjit_collect_binding_alloc(void)
-{
-    yjit_runtime_counters.binding_allocations++;
-}
-
-void
-rb_yjit_collect_binding_set(void)
-{
-    yjit_runtime_counters.binding_set++;
-}
-
-static const VALUE *
-yjit_count_side_exit_op(const VALUE *exit_pc)
-{
-    int insn = rb_vm_insn_addr2opcode((const void *)*exit_pc);
-    exit_op_count[insn]++;
-    return exit_pc; // This function must return exit_pc!
-}
-#endif
-
-void
-rb_yjit_iseq_mark(const struct rb_iseq_constant_body *body)
-{
-    rb_darray_for(body->yjit_blocks, version_array_idx) {
-        rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
-
-        rb_darray_for(version_array, block_idx) {
-            block_t *block = rb_darray_get(version_array, block_idx);
-
-            rb_gc_mark_movable((VALUE)block->blockid.iseq);
-
-            cme_dependency_t *cme_dep;
-            rb_darray_foreach(block->cme_dependencies, cme_dependency_idx, cme_dep) {
-                rb_gc_mark_movable(cme_dep->receiver_klass);
-                rb_gc_mark_movable(cme_dep->callee_cme);
-            }
-
-            // Mark outgoing branch entries
-            rb_darray_for(block->outgoing, branch_idx) {
-                branch_t *branch = rb_darray_get(block->outgoing, branch_idx);
-                for (int i = 0; i < 2; ++i) {
-                    rb_gc_mark_movable((VALUE)branch->targets[i].iseq);
-                }
-            }
-
-            // Walk over references to objects in generated code.
-            uint32_t *offset_element;
-            rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
-                uint32_t offset_to_value = *offset_element;
-                uint8_t *value_address = cb_get_ptr(cb, offset_to_value);
-
-                VALUE object;
-                memcpy(&object, value_address, SIZEOF_VALUE);
-                rb_gc_mark_movable(object);
-            }
-
-            // Mark the machine code page this block lives on
-            //rb_gc_mark_movable(block->code_page);
-        }
-    }
-}
-
-void
-rb_yjit_iseq_update_references(const struct rb_iseq_constant_body *body)
-{
-    rb_vm_barrier();
-
-    rb_darray_for(body->yjit_blocks, version_array_idx) {
-        rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
-
-        rb_darray_for(version_array, block_idx) {
-            block_t *block = rb_darray_get(version_array, block_idx);
-
-            block->blockid.iseq = (const rb_iseq_t *)rb_gc_location((VALUE)block->blockid.iseq);
-
-            cme_dependency_t *cme_dep;
-            rb_darray_foreach(block->cme_dependencies, cme_dependency_idx, cme_dep) {
-                cme_dep->receiver_klass = rb_gc_location(cme_dep->receiver_klass);
-                cme_dep->callee_cme = rb_gc_location(cme_dep->callee_cme);
-            }
-
-            // Update outgoing branch entries
-            rb_darray_for(block->outgoing, branch_idx) {
-                branch_t *branch = rb_darray_get(block->outgoing, branch_idx);
-                for (int i = 0; i < 2; ++i) {
-                    branch->targets[i].iseq = (const void *)rb_gc_location((VALUE)branch->targets[i].iseq);
-                }
-            }
-
-            // Walk over references to objects in generated code.
-            uint32_t *offset_element;
-            rb_darray_foreach(block->gc_object_offsets, offset_idx, offset_element) {
-                uint32_t offset_to_value = *offset_element;
-                uint8_t *value_address = cb_get_ptr(cb, offset_to_value);
-
-                VALUE object;
-                memcpy(&object, value_address, SIZEOF_VALUE);
-                VALUE possibly_moved = rb_gc_location(object);
-                // Only write when the VALUE moves, to be CoW friendly.
-                if (possibly_moved != object) {
-                    // Possibly unlock the page we need to update
-                    cb_mark_position_writeable(cb, offset_to_value);
-
-                    // Object could cross a page boundary, so unlock there as well
-                    cb_mark_position_writeable(cb, offset_to_value + SIZEOF_VALUE - 1);
-                    memcpy(value_address, &possibly_moved, SIZEOF_VALUE);
-                }
-            }
-
-            // Update the machine code page this block lives on
-            //block->code_page = rb_gc_location(block->code_page);
-        }
-    }
-
-    /* If YJIT isn't initialized, then cb or ocb could be NULL. */
-    if (cb) {
-        cb_mark_all_executable(cb);
-    }
-
-    if (ocb) {
-        cb_mark_all_executable(ocb);
-    }
-}
-
-// Free the yjit resources associated with an iseq
-void
-rb_yjit_iseq_free(const struct rb_iseq_constant_body *body)
-{
-    rb_darray_for(body->yjit_blocks, version_array_idx) {
-        rb_yjit_block_array_t version_array = rb_darray_get(body->yjit_blocks, version_array_idx);
-
-        rb_darray_for(version_array, block_idx) {
-            block_t *block = rb_darray_get(version_array, block_idx);
-            yjit_free_block(block);
-        }
-
-        rb_darray_free(version_array);
-    }
-
-    rb_darray_free(body->yjit_blocks);
-}
-
-// Struct representing a code page
-typedef struct code_page_struct
-{
-    // Chunk of executable memory
-    uint8_t* mem_block;
-
-    // Size of the executable memory chunk
-    uint32_t page_size;
-
-    // Inline code block
-    codeblock_t cb;
-
-    // Outlined code block
-    codeblock_t ocb;
-
-    // Next node in the free list (private)
-    struct code_page_struct* _next;
-
-} code_page_t;
-
-// Current code page we are writing machine code into
-static VALUE yjit_cur_code_page = Qfalse;
-
-// Head of the list of free code pages
-static code_page_t *code_page_freelist = NULL;
-
-// Free a code page, add it to the free list
-static void
-yjit_code_page_free(void *voidp)
-{
-    code_page_t* code_page = (code_page_t*)voidp;
-    code_page->_next = code_page_freelist;
-    code_page_freelist = code_page;
-}
-
-// Custom type for interacting with the GC
-static const rb_data_type_t yjit_code_page_type = {
-    "yjit_code_page",
-    {NULL, yjit_code_page_free, NULL, NULL},
-    0, 0, RUBY_TYPED_FREE_IMMEDIATELY
-};
-
-// Allocate a code page and wrap it into a Ruby object owned by the GC
-static VALUE
-rb_yjit_code_page_alloc(void)
-{
-    // If the free list is empty
-    if (!code_page_freelist) {
-        // Allocate many pages at once
-        uint8_t* code_chunk = alloc_exec_mem(PAGES_PER_ALLOC * CODE_PAGE_SIZE);
-
-        // Do this in reverse order so we allocate our pages in order
-        for (int i = PAGES_PER_ALLOC - 1; i >= 0; --i) {
-            code_page_t* code_page = malloc(sizeof(code_page_t));
-            code_page->mem_block = code_chunk + i * CODE_PAGE_SIZE;
-            assert ((intptr_t)code_page->mem_block % CODE_PAGE_SIZE == 0);
-            code_page->page_size = CODE_PAGE_SIZE;
-            code_page->_next = code_page_freelist;
-            code_page_freelist = code_page;
-        }
-    }
-
-    code_page_t* code_page = code_page_freelist;
-    code_page_freelist = code_page_freelist->_next;
-
-    // Create a Ruby wrapper struct for the code page object
-    VALUE wrapper = TypedData_Wrap_Struct(0, &yjit_code_page_type, code_page);
-
-    // Write a pointer to the wrapper object on the page
-    *((VALUE*)code_page->mem_block) = wrapper;
-
-    // Initialize the code blocks
-    uint8_t* page_start = code_page->mem_block + sizeof(VALUE);
-    uint8_t* page_end = code_page->mem_block + CODE_PAGE_SIZE;
-    uint32_t halfsize = (uint32_t)(page_end - page_start) / 2;
-    cb_init(&code_page->cb, page_start, halfsize);
-    cb_init(&code_page->cb, page_start + halfsize, halfsize);
-
-    return wrapper;
-}
-
-// Unwrap the Ruby object representing a code page
-static code_page_t *
-rb_yjit_code_page_unwrap(VALUE cp_obj)
-{
-    code_page_t * code_page;
-    TypedData_Get_Struct(cp_obj, code_page_t, &yjit_code_page_type, code_page);
-    return code_page;
-}
-
-// Get the code page wrapper object for a code pointer
-static VALUE
-rb_yjit_code_page_from_ptr(uint8_t* code_ptr)
-{
-    VALUE* page_start = (VALUE*)((intptr_t)code_ptr & ~(CODE_PAGE_SIZE - 1));
-    VALUE wrapper = *page_start;
-    return wrapper;
-}
-
-// Get the inline code block corresponding to a code pointer
-static void
-yjit_get_cb(codeblock_t* cb, uint8_t* code_ptr)
-{
-    VALUE page_wrapper = rb_yjit_code_page_from_ptr(code_ptr);
-    code_page_t *code_page = rb_yjit_code_page_unwrap(page_wrapper);
-
-    // A pointer to the page wrapper object is written at the start of the code page
-    uint8_t* mem_block = code_page->mem_block + sizeof(VALUE);
-    uint32_t mem_size = (code_page->page_size/2) - sizeof(VALUE);
-    RUBY_ASSERT(mem_block);
-
-    // Map the code block to this memory region
-    cb_init(cb, mem_block, mem_size);
-}
-
-// Get the outlined code block corresponding to a code pointer
-static void
-yjit_get_ocb(codeblock_t* cb, uint8_t* code_ptr)
-{
-    VALUE page_wrapper = rb_yjit_code_page_from_ptr(code_ptr);
-    code_page_t *code_page = rb_yjit_code_page_unwrap(page_wrapper);
-
-    // A pointer to the page wrapper object is written at the start of the code page
-    uint8_t* mem_block = code_page->mem_block + (code_page->page_size/2);
-    uint32_t mem_size = code_page->page_size/2;
-    RUBY_ASSERT(mem_block);
-
-    // Map the code block to this memory region
-    cb_init(cb, mem_block, mem_size);
-}
-
-// Get the current code page or allocate a new one
-static VALUE
-yjit_get_code_page(uint32_t cb_bytes_needed, uint32_t ocb_bytes_needed)
-{
-    // If this is the first code page
-    if (yjit_cur_code_page == Qfalse) {
-        yjit_cur_code_page = rb_yjit_code_page_alloc();
-    }
-
-    // Get the current code page
-    code_page_t *code_page = rb_yjit_code_page_unwrap(yjit_cur_code_page);
-
-    // Compute how many bytes are left in the code blocks
-    uint32_t cb_bytes_left = code_page->cb.mem_size - code_page->cb.write_pos;
-    uint32_t ocb_bytes_left = code_page->ocb.mem_size - code_page->ocb.write_pos;
-    RUBY_ASSERT_ALWAYS(cb_bytes_needed <= code_page->cb.mem_size);
-    RUBY_ASSERT_ALWAYS(ocb_bytes_needed <= code_page->ocb.mem_size);
-
-    // If there's enough space left in the current code page
-    if (cb_bytes_needed <= cb_bytes_left && ocb_bytes_needed <= ocb_bytes_left) {
-        return yjit_cur_code_page;
-    }
-
-    // Allocate a new code page
-    yjit_cur_code_page = rb_yjit_code_page_alloc();
-    code_page_t *new_code_page = rb_yjit_code_page_unwrap(yjit_cur_code_page);
-
-    // Jump to the new code page
-    jmp_ptr(&code_page->cb, cb_get_ptr(&new_code_page->cb, 0));
-
-    return yjit_cur_code_page;
-}
-
-bool
-rb_yjit_enabled_p(void)
-{
-    return rb_yjit_opts.yjit_enabled;
-}
-
-unsigned
-rb_yjit_call_threshold(void)
-{
-    return rb_yjit_opts.call_threshold;
-}
-
-# define PTR2NUM(x)   (rb_int2inum((intptr_t)(void *)(x)))
-
-/**
- *  call-seq: block.id -> unique_id
- *
- *  Returns a unique integer ID for the block.  For example:
- *
- *      blocks = blocks_for(iseq)
- *      blocks.group_by(&:id)
- */
-static VALUE
-block_id(VALUE self)
-{
-    block_t * block;
-    TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
-    return PTR2NUM(block);
-}
-
-/**
- *  call-seq: block.outgoing_ids -> list
- *
- *  Returns a list of outgoing ids for the current block.  This list can be used
- *  in conjunction with Block#id to construct a graph of block objects.
- */
-static VALUE
-outgoing_ids(VALUE self)
-{
-    block_t * block;
-    TypedData_Get_Struct(self, block_t, &yjit_block_type, block);
-
-    VALUE ids = rb_ary_new();
-
-    rb_darray_for(block->outgoing, branch_idx) {
-        branch_t *out_branch = rb_darray_get(block->outgoing, branch_idx);
-
-        for (size_t succ_idx = 0; succ_idx < 2; succ_idx++) {
-            block_t *succ = out_branch->blocks[succ_idx];
-
-            if (succ == NULL)
-                continue;
-
-            rb_ary_push(ids, PTR2NUM(succ));
-        }
-
-    }
-
-    return ids;
-}
-
-// Can raise RuntimeError
-void
-rb_yjit_init(struct rb_yjit_options *options)
-{
-    if (!YJIT_SUPPORTED_P || !JIT_ENABLED) {
-        return;
-    }
-
-    rb_yjit_opts = *options;
-    rb_yjit_opts.yjit_enabled = true;
-
-    rb_yjit_opts.gen_stats = rb_yjit_opts.gen_stats || getenv("RUBY_YJIT_STATS");
-
-#if !YJIT_STATS
-    if(rb_yjit_opts.gen_stats) {
-        rb_warning("--yjit-stats requires that Ruby is compiled with CPPFLAGS='-DYJIT_STATS=1' or CPPFLAGS='-DRUBY_DEBUG=1'");
-    }
-#endif
-
-    // Normalize command-line options to default values
-    if (rb_yjit_opts.exec_mem_size < 1) {
-        rb_yjit_opts.exec_mem_size = 256;
-    }
-    if (rb_yjit_opts.call_threshold < 1) {
-        rb_yjit_opts.call_threshold = YJIT_DEFAULT_CALL_THRESHOLD;
-    }
-    if (rb_yjit_opts.max_versions < 1) {
-        rb_yjit_opts.max_versions = 4;
-    }
-
-    // If type propagation is disabled, max 1 version per block
-    if (rb_yjit_opts.no_type_prop) {
-        rb_yjit_opts.max_versions = 1;
-    }
-
-    blocks_assuming_stable_global_constant_state = st_init_numtable();
-    blocks_assuming_single_ractor_mode = st_init_numtable();
-    blocks_assuming_bops = st_init_numtable();
-
-    yjit_init_codegen();
-    yjit_init_core();
-
-    // YJIT Ruby module
-    mYjit = rb_define_module_under(rb_cRubyVM, "YJIT");
-    rb_define_module_function(mYjit, "blocks_for", yjit_blocks_for, 1);
-
-    // YJIT::Block (block version, code block)
-    cYjitBlock = rb_define_class_under(mYjit, "Block", rb_cObject);
-    rb_undef_alloc_func(cYjitBlock);
-    rb_define_method(cYjitBlock, "address", block_address, 0);
-    rb_define_method(cYjitBlock, "id", block_id, 0);
-    rb_define_method(cYjitBlock, "code", block_code, 0);
-    rb_define_method(cYjitBlock, "iseq_start_index", iseq_start_index, 0);
-    rb_define_method(cYjitBlock, "iseq_end_index", iseq_end_index, 0);
-    rb_define_method(cYjitBlock, "outgoing_ids", outgoing_ids, 0);
-
-    // YJIT disassembler interface
-#ifdef HAVE_LIBCAPSTONE
-    cYjitDisasm = rb_define_class_under(mYjit, "Disasm", rb_cObject);
-    rb_define_alloc_func(cYjitDisasm, yjit_disasm_init);
-    rb_define_method(cYjitDisasm, "disasm", yjit_disasm, 2);
-    cYjitDisasmInsn = rb_struct_define_under(cYjitDisasm, "Insn", "address", "mnemonic", "op_str", NULL);
-#if RUBY_DEBUG
-    cYjitCodeComment = rb_struct_define_under(cYjitDisasm, "Comment", "address", "comment", NULL);
-#endif
-#endif
-
-    // Make dependency tables
-    method_lookup_dependency = st_init_numtable();
-    cme_validity_dependency = st_init_numtable();
-
-    // Initialize the GC hooks
-    struct yjit_root_struct *root;
-    VALUE yjit_root = TypedData_Make_Struct(0, struct yjit_root_struct, &yjit_root_type, root);
-    rb_gc_register_mark_object(yjit_root);
-
-    (void)yjit_get_cb;
-    (void)yjit_get_ocb;
-    (void)yjit_get_code_page;
-}
diff --git a/yjit_iface.h b/yjit_iface.h
deleted file mode 100644
index 7fed846b4d..0000000000
--- a/yjit_iface.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//
-// These are definitions YJIT uses to interface with the CRuby codebase,
-// but which are only used internally by YJIT.
-//
-
-#ifndef YJIT_IFACE_H
-#define YJIT_IFACE_H 1
-
-#include "ruby/internal/config.h"
-#include "ruby_assert.h" // for RUBY_DEBUG
-#include "yjit.h" // for YJIT_STATS
-#include "vm_core.h"
-#include "yjit_core.h"
-
-#ifndef YJIT_DEFAULT_CALL_THRESHOLD
-# define YJIT_DEFAULT_CALL_THRESHOLD 10
-#endif
-
-RUBY_EXTERN struct rb_yjit_options rb_yjit_opts;
-
-static VALUE *yjit_iseq_pc_at_idx(const rb_iseq_t *iseq, uint32_t insn_idx);
-static int yjit_opcode_at_pc(const rb_iseq_t *iseq, const VALUE *pc);
-static void yjit_print_iseq(const rb_iseq_t *iseq);
-
-#if YJIT_STATS
-// this function *must* return passed exit_pc
-static const VALUE *yjit_count_side_exit_op(const VALUE *exit_pc);
-#endif
-
-static void yjit_unlink_method_lookup_dependency(block_t *block);
-static void yjit_block_assumptions_free(block_t *block);
-
-static VALUE yjit_get_code_page(uint32_t cb_bytes_needed, uint32_t ocb_bytes_needed);
-//code_page_t *rb_yjit_code_page_unwrap(VALUE cp_obj);
-//void rb_yjit_get_cb(codeblock_t* cb, uint8_t* code_ptr);
-//void rb_yjit_get_ocb(codeblock_t* cb, uint8_t* code_ptr);
-
-#endif // #ifndef YJIT_IFACE_H
diff --git a/yjit_utils.c b/yjit_utils.c
deleted file mode 100644
index cbc50a4932..0000000000
--- a/yjit_utils.c
+++ /dev/null
@@ -1,109 +0,0 @@
-// This file is a fragment of the yjit.o compilation unit. See yjit.c.
-
-// Save caller-save registers on the stack before a C call
-static void
-push_regs(codeblock_t *cb)
-{
-    push(cb, RAX);
-    push(cb, RCX);
-    push(cb, RDX);
-    push(cb, RSI);
-    push(cb, RDI);
-    push(cb, R8);
-    push(cb, R9);
-    push(cb, R10);
-    push(cb, R11);
-    pushfq(cb);
-}
-
-// Restore caller-save registers from the after a C call
-static void
-pop_regs(codeblock_t *cb)
-{
-    popfq(cb);
-    pop(cb, R11);
-    pop(cb, R10);
-    pop(cb, R9);
-    pop(cb, R8);
-    pop(cb, RDI);
-    pop(cb, RSI);
-    pop(cb, RDX);
-    pop(cb, RCX);
-    pop(cb, RAX);
-}
-
-static void
-print_int_cfun(int64_t val)
-{
-    fprintf(stderr, "%lld\n", (long long int)val);
-}
-
-RBIMPL_ATTR_MAYBE_UNUSED()
-static void
-print_int(codeblock_t *cb, x86opnd_t opnd)
-{
-    push_regs(cb);
-
-    if (opnd.num_bits < 64 && opnd.type != OPND_IMM)
-        movsx(cb, RDI, opnd);
-    else
-        mov(cb, RDI, opnd);
-
-    // Call the print function
-    mov(cb, RAX, const_ptr_opnd((void*)&print_int_cfun));
-    call(cb, RAX);
-
-    pop_regs(cb);
-}
-
-static void
-print_ptr_cfun(void *val)
-{
-    fprintf(stderr, "%p\n", val);
-}
-
-RBIMPL_ATTR_MAYBE_UNUSED()
-static void
-print_ptr(codeblock_t *cb, x86opnd_t opnd)
-{
-    assert (opnd.num_bits == 64);
-
-    push_regs(cb);
-
-    mov(cb, RDI, opnd);
-    mov(cb, RAX, const_ptr_opnd((void*)&print_ptr_cfun));
-    call(cb, RAX);
-
-    pop_regs(cb);
-}
-
-static void
-print_str_cfun(const char *str)
-{
-    fprintf(stderr, "%s\n", str);
-}
-
-// Print a constant string to stdout
-static void
-print_str(codeblock_t *cb, const char *str)
-{
-    //as.comment("printStr(\"" ~ str ~ "\")");
-    size_t len = strlen(str);
-
-    push_regs(cb);
-
-    // Load the string address and jump over the string data
-    lea(cb, RDI, mem_opnd(8, RIP, 5));
-    jmp32(cb, (int32_t)len + 1);
-
-    // Write the string chars and a null terminator
-    for (size_t i = 0; i < len; ++i)
-        cb_write_byte(cb, (uint8_t)str[i]);
-    cb_write_byte(cb, 0);
-
-    // Call the print function
-    mov(cb, RAX, const_ptr_opnd((void*)&print_str_cfun));
-    call(cb, RAX);
-
-    pop_regs(cb);
-}
author	Alan Wu <alanwu@ruby-lang.org>	2022-04-19 14:40:21 -0400
committer	Alan Wu <XrXr@users.noreply.github.com>	2022-04-27 11:00:22 -0400
commit	f90549cd38518231a6a74432fe1168c943a7cc18 (patch)
tree	c277bbfab47e230bd549bd5f607f60c3e812a714
parent	f553180a86b71830a1de49dd04874b3880c5c698 (diff)
download	ruby-f90549cd38518231a6a74432fe1168c943a7cc18.tar.gz