Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add create_mcjit_execution_engine_with_memory_manager for custom MCJIT memory management #566

Merged
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub mod debug_info;
pub mod execution_engine;
pub mod intrinsics;
pub mod memory_buffer;
pub mod memory_manager;
#[deny(missing_docs)]
pub mod module;
pub mod object_file;
Expand Down
180 changes: 180 additions & 0 deletions src/memory_manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
use llvm_sys::prelude::LLVMBool;

/// A trait for user-defined memory management in MCJIT.
///
/// Implementors can override how LLVM's MCJIT engine allocates memory for code
/// and data sections. This is sometimes needed for:
/// - custom allocators,
/// - sandboxed or restricted environments,
/// - capturing stack map sections (e.g., for garbage collection),
/// - or other specialized JIT memory management requirements.
///
/// # StackMap and GC Integration
///
/// By examining the `section_name` argument in [`allocate_data_section`], you
/// can detect sections such as `.llvm_stackmaps` (on ELF) or `__llvm_stackmaps`
/// (on Mach-O). Recording the location of these sections may be useful for
/// custom garbage collectors. For more information, refer to the [LLVM
/// StackMaps documentation](https://llvm.org/docs/StackMaps.html#stack-map-section).
///
/// Typically, on Darwin (Mach-O), the stack map section name is `__llvm_stackmaps`,
/// and on Linux (ELF), it is `.llvm_stackmaps`.
pub trait McjitMemoryManager: std::fmt::Debug {
/// Allocates a block of memory for a code section.
///
/// # Parameters
///
/// * `size` - The size in bytes for the code section.
/// * `alignment` - The required alignment in bytes.
/// * `section_id` - A numeric ID that LLVM uses to identify this section.
/// * `section_name` - A name for this section, if provided by LLVM.
///
/// # Returns
///
/// Returns a pointer to the allocated memory. Implementors must ensure it is
/// at least `size` bytes long and meets `alignment` requirements.
fn allocate_code_section(
&mut self,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: &str,
) -> *mut u8;

/// Allocates a block of memory for a data section.
///
/// # Parameters
///
/// * `size` - The size in bytes for the data section.
/// * `alignment` - The required alignment in bytes.
/// * `section_id` - A numeric ID that LLVM uses to identify this section.
/// * `section_name` - A name for this section, if provided by LLVM.
/// * `is_read_only` - Whether this data section should be read-only.
///
/// # Returns
///
/// Returns a pointer to the allocated memory. Implementors must ensure it is
/// at least `size` bytes long and meets `alignment` requirements.
fn allocate_data_section(
&mut self,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: &str,
is_read_only: bool,
) -> *mut u8;

/// Finalizes memory permissions for all allocated sections.
///
/// This is called once all sections have been allocated. Implementors can set
/// permissions such as making code sections executable or data sections
/// read-only.
///
/// # Errors
///
/// If any error occurs (for example, failing to set page permissions),
/// return an `Err(String)`. This error is reported back to LLVM as a C string.
fn finalize_memory(&mut self) -> Result<(), String>;

/// Cleans up or deallocates resources before the memory manager is destroyed.
///
/// This is called when LLVM has finished using the memory manager. Any
/// additional allocations or references should be released here if needed.
fn destroy(&mut self);
}

/// Holds a boxed `McjitMemoryManager` and passes it to LLVM as an opaque pointer.
///
/// LLVM calls into the adapter using the extern "C" function pointers defined below.
#[derive(Debug)]
pub struct MemoryManagerAdapter {
pub memory_manager: Box<dyn McjitMemoryManager>,
}

// ------ Extern "C" Adapters ------

/// Adapter for `allocate_code_section`.
///
/// Called by LLVM with a raw pointer (`opaque`). Casts back to `MemoryManagerAdapter`
/// and delegates to `allocate_code_section`.
pub(crate) extern "C" fn allocate_code_section_adapter(
opaque: *mut libc::c_void,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: *const libc::c_char,
) -> *mut u8 {
let adapter = unsafe { &mut *(opaque as *mut MemoryManagerAdapter) };
let sname = unsafe { c_str_to_str(section_name) };
adapter
.memory_manager
.allocate_code_section(size, alignment, section_id, sname)
}

/// Adapter for `allocate_data_section`.
///
/// Note that `LLVMBool` is `0` for false, and `1` for true. We check `!= 0` to
/// interpret it as a bool.
pub(crate) extern "C" fn allocate_data_section_adapter(
opaque: *mut libc::c_void,
size: libc::uintptr_t,
alignment: libc::c_uint,
section_id: libc::c_uint,
section_name: *const libc::c_char,
is_read_only: LLVMBool,
) -> *mut u8 {
let adapter = unsafe { &mut *(opaque as *mut MemoryManagerAdapter) };
let sname = unsafe { c_str_to_str(section_name) };
adapter
.memory_manager
.allocate_data_section(size, alignment, section_id, sname, is_read_only != 0)
}

/// Adapter for `finalize_memory`.
///
/// If an error is returned, the message is converted into a C string and set in `err_msg_out`.
pub(crate) extern "C" fn finalize_memory_adapter(
opaque: *mut libc::c_void,
err_msg_out: *mut *mut libc::c_char,
) -> libc::c_int {
let adapter = unsafe { &mut *(opaque as *mut MemoryManagerAdapter) };
match adapter.memory_manager.finalize_memory() {
Ok(()) => 0,
Err(e) => {
let cstring = std::ffi::CString::new(e).unwrap_or_default();
unsafe {
*err_msg_out = cstring.into_raw();
}
1
},
}
}

/// Adapter for `destroy`.
///
/// Called when LLVM is done with the memory manager. Calls `destroy` and drops
/// the adapter to free resources.
pub(crate) extern "C" fn destroy_adapter(opaque: *mut libc::c_void) {
// Re-box to drop the adapter and its contents.
// SAFETY: `opaque` must have been allocated by Box<MemoryManagerAdapter>.
let mut adapter = unsafe { Box::from_raw(opaque as *mut MemoryManagerAdapter) };

// Clean up user-defined resources
adapter.memory_manager.destroy();

// Dropping `adapter` automatically frees the memory
}

/// Converts a raw C string pointer to a Rust `&str`.
///
/// # Safety
///
/// The caller must ensure `ptr` points to a valid, null-terminated UTF-8 string.
/// If the string is invalid UTF-8 or `ptr` is null, an empty string is returned.
unsafe fn c_str_to_str<'a>(ptr: *const libc::c_char) -> &'a str {
if ptr.is_null() {
""
} else {
unsafe { std::ffi::CStr::from_ptr(ptr) }.to_str().unwrap_or("")
}
}
145 changes: 139 additions & 6 deletions src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@ use llvm_sys::core::LLVMGetTypeByName;

use llvm_sys::core::{
LLVMAddFunction, LLVMAddGlobal, LLVMAddGlobalInAddressSpace, LLVMAddNamedMetadataOperand, LLVMCloneModule,
LLVMDisposeModule, LLVMDumpModule, LLVMGetFirstFunction, LLVMGetFirstGlobal, LLVMGetLastFunction,
LLVMGetLastGlobal, LLVMGetModuleContext, LLVMGetModuleIdentifier, LLVMGetNamedFunction, LLVMGetNamedGlobal,
LLVMGetNamedMetadataNumOperands, LLVMGetNamedMetadataOperands, LLVMGetTarget, LLVMPrintModuleToFile,
LLVMPrintModuleToString, LLVMSetDataLayout, LLVMSetModuleIdentifier, LLVMSetTarget, LLVMDisposeMessage
LLVMDisposeMessage, LLVMDisposeModule, LLVMDumpModule, LLVMGetFirstFunction, LLVMGetFirstGlobal,
LLVMGetLastFunction, LLVMGetLastGlobal, LLVMGetModuleContext, LLVMGetModuleIdentifier, LLVMGetNamedFunction,
LLVMGetNamedGlobal, LLVMGetNamedMetadataNumOperands, LLVMGetNamedMetadataOperands, LLVMGetTarget,
LLVMPrintModuleToFile, LLVMPrintModuleToString, LLVMSetDataLayout, LLVMSetModuleIdentifier, LLVMSetTarget,
};
#[llvm_versions(7..)]
use llvm_sys::core::{LLVMAddModuleFlag, LLVMGetModuleFlag};
#[llvm_versions(13..)]
use llvm_sys::error::LLVMGetErrorMessage;
use llvm_sys::execution_engine::{
LLVMCreateExecutionEngineForModule, LLVMCreateInterpreterForModule, LLVMCreateJITCompilerForModule,
LLVMCreateSimpleMCJITMemoryManager,
};
use llvm_sys::prelude::{LLVMModuleRef, LLVMValueRef};
#[llvm_versions(13..)]
Expand All @@ -29,7 +30,7 @@ use llvm_sys::LLVMLinkage;
use llvm_sys::LLVMModuleFlagBehavior;

use std::cell::{Cell, Ref, RefCell};
use std::ffi::CStr;
use std::ffi::{c_void, CStr};
use std::fs::File;
use std::marker::PhantomData;
use std::mem::{forget, MaybeUninit};
Expand All @@ -45,12 +46,16 @@ use crate::data_layout::DataLayout;
use crate::debug_info::{DICompileUnit, DWARFEmissionKind, DWARFSourceLanguage, DebugInfoBuilder};
use crate::execution_engine::ExecutionEngine;
use crate::memory_buffer::MemoryBuffer;
use crate::memory_manager::{
allocate_code_section_adapter, allocate_data_section_adapter, destroy_adapter, finalize_memory_adapter,
McjitMemoryManager, MemoryManagerAdapter,
};
#[llvm_versions(13..)]
use crate::passes::PassBuilderOptions;
use crate::support::{to_c_str, LLVMString};
#[llvm_versions(13..)]
use crate::targets::TargetMachine;
use crate::targets::{InitializationConfig, Target, TargetTriple};
use crate::targets::{CodeModel, InitializationConfig, Target, TargetTriple};
use crate::types::{AsTypeRef, BasicType, FunctionType, StructType};
#[llvm_versions(7..)]
use crate::values::BasicValue;
Expand Down Expand Up @@ -609,6 +614,134 @@ impl<'ctx> Module<'ctx> {
Ok(execution_engine)
}

/// Creates an MCJIT `ExecutionEngine` for this `Module` using a custom memory manager.
///
/// # Parameters
///
/// * `memory_manager` - Specifies how LLVM allocates and finalizes code and data sections.
/// Implement the [`McjitMemoryManager`] trait to customize these operations.
/// * `opt_level` - Sets the desired optimization level (e.g. `None`, `Less`, `Default`, `Aggressive`).
/// Higher levels generally produce faster code at the expense of longer compilation times.
/// * `code_model` - Determines how code addresses are represented. Common values include
/// `CodeModel::Default` or `CodeModel::JITDefault`. This impacts the generated machine code layout.
/// * `no_frame_pointer_elim` - If true, frame pointer elimination is disabled. This may assist
/// with certain debugging or profiling tasks but can incur a performance cost.
/// * `enable_fast_isel` - If true, uses a faster instruction selector where possible. This can
/// improve compilation speed, though it may produce less optimized code in some cases.
///
/// # Returns
///
/// Returns a newly created [`ExecutionEngine`] for MCJIT on success. Returns an error if:
/// - The native target fails to initialize,
/// - The `Module` is already owned by another `ExecutionEngine`,
/// - Or MCJIT fails to create the engine (in which case an error string is returned from LLVM).
///
/// # Notes
///
/// Using a custom memory manager can help intercept or manage allocations for specific
/// sections (for example, capturing `.llvm_stackmaps` or applying custom permissions).
/// For details, refer to the [`McjitMemoryManager`] documentation.
///
/// # Safety
///
/// The returned [`ExecutionEngine`] takes ownership of the memory manager. Do not move
/// or free the `memory_manager` after calling this method. When the `ExecutionEngine`
/// is dropped, LLVM will destroy the memory manager by calling
/// [`McjitMemoryManager::destroy()`] and freeing its adapter.
pub fn create_mcjit_execution_engine_with_memory_manager(
&self,
memory_manager: impl McjitMemoryManager + 'static,
opt_level: OptimizationLevel,
code_model: CodeModel,
no_frame_pointer_elim: bool,
enable_fast_isel: bool,
) -> Result<ExecutionEngine<'ctx>, LLVMString> {
use std::mem::MaybeUninit;
// ...

// 1) Initialize the native target
Target::initialize_native(&InitializationConfig::default()).map_err(|mut err_string| {
err_string.push('\0');
LLVMString::create_from_str(&err_string)
})?;

// Check if the module is already owned by an ExecutionEngine
if self.owned_by_ee.borrow().is_some() {
let string = "This module is already owned by an ExecutionEngine.\0";
return Err(LLVMString::create_from_str(string));
}

// 2) Box the memory_manager into a MemoryManagerAdapter
let adapter = MemoryManagerAdapter {
memory_manager: Box::new(memory_manager),
};
let adapter_box = Box::new(adapter);
// Convert the Box into a raw pointer for LLVM.
// In `destroy_adapter`, we use `Box::from_raw` to safely reclaim ownership.
let opaque = Box::into_raw(adapter_box) as *mut c_void;

// 3) Create the LLVMMCJITMemoryManager using the custom callbacks
let mmgr = unsafe {
LLVMCreateSimpleMCJITMemoryManager(
opaque,
allocate_code_section_adapter,
allocate_data_section_adapter,
finalize_memory_adapter,
Some(destroy_adapter),
)
};
if mmgr.is_null() {
let msg = "Failed to create SimpleMCJITMemoryManager.\0";
return Err(LLVMString::create_from_str(msg));
}

// 4) Build LLVMMCJITCompilerOptions
let mut options_uninit = MaybeUninit::<llvm_sys::execution_engine::LLVMMCJITCompilerOptions>::zeroed();
unsafe {
// Ensure defaults are initialized
llvm_sys::execution_engine::LLVMInitializeMCJITCompilerOptions(
options_uninit.as_mut_ptr(),
std::mem::size_of::<llvm_sys::execution_engine::LLVMMCJITCompilerOptions>(),
);
}
let mut options = unsafe { options_uninit.assume_init() };

// Override fields
options.OptLevel = opt_level as u32;
options.CodeModel = code_model.into();
options.NoFramePointerElim = no_frame_pointer_elim as i32;
options.EnableFastISel = enable_fast_isel as i32;
options.MCJMM = mmgr;

// 5) Create MCJIT
let mut execution_engine = MaybeUninit::uninit();
let mut err_string = MaybeUninit::uninit();
let code = unsafe {
llvm_sys::execution_engine::LLVMCreateMCJITCompilerForModule(
execution_engine.as_mut_ptr(),
self.module.get(),
&mut options,
std::mem::size_of::<llvm_sys::execution_engine::LLVMMCJITCompilerOptions>(),
err_string.as_mut_ptr(),
)
};

// If creation fails, extract the error string
if code == 1 {
unsafe {
return Err(LLVMString::new(err_string.assume_init()));
}
}

// Otherwise, it succeeded, so wrap the raw pointer
let execution_engine = unsafe { execution_engine.assume_init() };
let execution_engine = unsafe { ExecutionEngine::new(Rc::new(execution_engine), true) };

*self.owned_by_ee.borrow_mut() = Some(execution_engine.clone());

Ok(execution_engine)
}

/// Creates a `GlobalValue` based on a type in an address space.
///
/// # Example
Expand Down
Loading