refactor: split project into multiple crates

This commit is contained in:
Romain Paquet 2025-11-05 20:23:17 +01:00
parent 486af67fc2
commit 857f747524
27 changed files with 308 additions and 222 deletions

15
lila-jit/Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "lila-jit"
version = "0.0.1"
edition = "2021"
[dependencies]
ariadne = "0.4.1" # TODO: use ariadne only in CLI
cranelift = "0.109.0"
cranelift-jit = "0.109.0"
cranelift-module = "0.109.0"
cranelift-native = "0.109.0"
lila = { path = "../lila" }
lila-ast = { path = "../lila-ast", features = ["ariadne"] } # TODO: don't include ariadne feature
lila-checking = { path = "../lila-checking" }
lila-parsing = { path = "../lila-parsing" }

530
lila-jit/src/lib.rs Normal file
View file

@ -0,0 +1,530 @@
use ariadne::Cache as _;
use cranelift::{codegen::ir::UserFuncName, prelude::*};
use cranelift_jit::{JITBuilder, JITModule};
use cranelift_module::{DataDescription, FuncId, FuncOrDataId, Linkage, Module};
use lila::report::ToReport as _;
use lila::source::SourceCache;
use lila_ast as ast;
use lila_ast::typing::{self, Type};
use lila_ast::{
expr::BinaryExpression, BinaryOperator, Expr, FunctionDefinition, ModulePath, ReturnStatement,
SourceId, Statement, UnaryOperator,
};
use lila_checking::TypeCheckModule as _;
use lila_parsing::{DefaultParser, Parser};
use std::collections::HashMap;
/// The basic JIT class.
pub struct JIT {
/// The function builder context, which is reused across multiple
/// FunctionBuilder instances.
builder_context: FunctionBuilderContext,
/// The main Cranelift context, which holds the state for codegen. Cranelift
/// separates this from `Module` to allow for parallel compilation, with a
/// context per thread, though this isn't in the simple demo here.
ctx: codegen::Context,
/// The data description, which is to data objects what `ctx` is to functions.
data_desc: DataDescription,
/// The module, with the jit backend, which manages the JIT'd functions.
module: JITModule,
/// Whether to print CLIR during compilation
pub dump_clir: bool,
/// Parser used to build the AST
pub parser: DefaultParser,
}
impl Default for JIT {
fn default() -> Self {
let mut flag_builder = codegen::settings::builder();
flag_builder.set("use_colocated_libcalls", "false").unwrap();
flag_builder.set("is_pic", "false").unwrap();
let isa_builder = cranelift_native::builder().unwrap_or_else(|msg| {
panic!("host machine is not supported: {msg}");
});
let isa = isa_builder
.finish(settings::Flags::new(flag_builder))
.unwrap();
let builder = JITBuilder::with_isa(isa, cranelift_module::default_libcall_names());
let module = JITModule::new(builder);
let mut ctx = module.make_context();
ctx.set_disasm(true);
Self {
builder_context: FunctionBuilderContext::new(),
ctx,
data_desc: DataDescription::new(),
module,
dump_clir: false,
parser: DefaultParser::default(),
}
}
}
impl JIT {
/// Compile source code into machine code.
pub fn compile(
&mut self,
namespace: ModulePath,
id: SourceId,
mut source_cache: &mut SourceCache,
) -> Result<*const u8, String> {
let input = source_cache
.fetch(&id)
.map(|s| s.text())
.map_err(|e| format!("{e:?}"))?;
// Parse the source code into an AST
let mut ast = self
.parser
.parse_as_module(input, namespace, id)
.map_err(|x| format!("Parsing error: {x}"))?;
ast.type_check().map_err(|errors| {
errors
.iter()
.for_each(|e| e.to_report(&ast).eprint(&mut source_cache).unwrap());
"Typing errors, cannot compile"
})?;
// Translate the AST into Cranelift IR
self.translate(&ast)?;
// Finalize the functions which we just defined, which resolves any
// outstanding relocations (patching in addresses, now that they're
// available).
self.module.finalize_definitions().unwrap();
// We can now retrieve a pointer to the machine code.
if let Some(FuncOrDataId::Func(main_id)) = self.module.get_name("main") {
let code = self.module.get_finalized_function(main_id);
Ok(code)
} else {
Err("no main function".into())
}
}
pub fn compile_file(
&mut self,
path: &str,
id: SourceId,
source_cache: &mut SourceCache,
) -> Result<*const u8, String> {
self.compile(
AsRef::<std::path::Path>::as_ref(path).into(),
id,
source_cache,
)
}
/// Translate language AST into Cranelift IR.
fn translate(&mut self, ast: &ast::Module) -> Result<(), String> {
let mut signatures: Vec<Signature> = Vec::with_capacity(ast.functions.len());
let mut func_ids: Vec<FuncId> = Vec::with_capacity(ast.functions.len());
// Declare functions
for func in &ast.functions {
// Create the signature
let mut sig = self.module.make_signature();
for param in &func.parameters {
assert_ne!(param.typ, Type::Unit);
sig.params.append(&mut to_abi_params(&param.typ));
}
if let Some(return_type) = &func.return_type {
if *return_type != Type::Unit {
sig.returns = to_abi_params(return_type);
}
};
let id: FuncId = self
.module
.declare_function(&func.name, Linkage::Export, &sig)
.map_err(|e| e.to_string())?;
signatures.push(sig);
func_ids.push(id);
}
// Translate functions
for (i, func) in ast.functions.iter().enumerate() {
self.ctx.func.signature = signatures[i].clone();
self.ctx.func.name = UserFuncName::user(0, func_ids[i].as_u32());
self.translate_function(func)?;
self.module
.define_function(func_ids[i], &mut self.ctx)
.unwrap();
if self.dump_clir {
println!("// {}", ast.full_func_path(i));
println!("{}", self.ctx.func);
}
self.module.clear_context(&mut self.ctx);
}
Ok(())
}
fn translate_function(&mut self, function: &FunctionDefinition) -> Result<(), String> {
// Create the builder to build a function.
let mut builder = FunctionBuilder::new(&mut self.ctx.func, &mut self.builder_context);
// Create the entry block, to start emitting code in.
let entry_block = builder.create_block();
// Since this is the entry block, add block parameters corresponding to
// the function's parameters.
builder.append_block_params_for_function_params(entry_block);
// Tell the builder to emit code in this block.
builder.switch_to_block(entry_block);
// And, tell the builder that this block will have no further
// predecessors. Since it's the entry block, it won't have any
// predecessors.
builder.seal_block(entry_block);
// Walk the AST and declare all implicitly-declared variables.
let variables = HashMap::<String, Variable>::default(); // TODO: actually do this
let mut translator = FunctionTranslator {
builder,
variables,
module: &mut self.module,
data_desc: &mut self.data_desc,
};
// Add a variable for each parameter.
let param_values: Box<[Value]> = translator.builder.block_params(entry_block).into();
assert_eq!(param_values.len(), function.parameters.len());
for (i, param) in function.parameters.iter().enumerate() {
let var = Variable::from_u32(translator.variables.len() as u32);
translator.variables.insert(param.name.clone(), var);
let value = param_values[i];
let typ = translator.translate_type(&param.typ);
translator.builder.declare_var(var, typ);
translator.builder.def_var(var, value);
}
// Now translate the statements of the function body.
for stmt in &function.body.statements {
translator.translate_statement(stmt);
}
// Emit the final return instruction.
if let Some(return_expr) = &function.body.value {
let return_value = translator.translate_expr(&return_expr.expr);
translator.builder.ins().return_(&[return_value]);
} else {
translator.builder.ins().return_(&[]);
}
// Tell the builder we're done with this function.
translator.builder.finalize();
Ok(())
}
}
fn to_abi_params(value: &Type) -> Vec<AbiParam> {
match value {
Type::Bool => vec![AbiParam::new(types::I8)],
Type::Int => vec![AbiParam::new(types::I32)],
Type::Float => vec![AbiParam::new(types::F32)],
_ => unimplemented!(),
}
}
/// A collection of state used for translating from AST nodes
/// into Cranelift IR.
struct FunctionTranslator<'a> {
builder: FunctionBuilder<'a>,
variables: HashMap<String, Variable>,
module: &'a mut JITModule,
data_desc: &'a mut DataDescription,
}
impl<'a> FunctionTranslator<'a> {
fn translate_statement(&mut self, stmt: &Statement) -> Option<Value> {
match stmt {
Statement::AssignStatement {
lhs: name,
rhs: expr,
..
} => {
// `def_var` is used to write the value of a variable. Note that
// variables can have multiple definitions. Cranelift will
// convert them into SSA form for itself automatically.
let new_value = self.translate_expr(&expr.expr);
let variable = self.variables.get(name).unwrap();
self.builder.def_var(*variable, new_value);
Some(new_value)
}
Statement::DeclareStatement {
lhs: name,
rhs: expr,
..
} => {
let value = self.translate_expr(&expr.expr);
let variable = Variable::from_u32(self.variables.len() as u32);
self.builder
.declare_var(variable, self.translate_type(&expr.ty()));
self.builder.def_var(variable, value);
self.variables.insert(name.clone(), variable);
Some(value)
}
Statement::ReturnStatement(ReturnStatement {
expr: maybe_expr, ..
}) => {
// TODO: investigate tail call
let values = if let Some(expr) = maybe_expr {
vec![self.translate_expr(&expr.expr)]
} else {
// XXX: urgh
Vec::with_capacity(0)
};
// XXX: Should we pass multiple values ?
self.builder.ins().return_(&values);
None
}
Statement::CallStatement { call, .. } => self.translate_call(call),
Statement::UseStatement { .. } => todo!(),
Statement::IfStatement {
condition: cond,
then_block: then_body,
..
} => {
let condition_value = self.translate_expr(&cond.expr);
let then_block = self.builder.create_block();
let merge_block = self.builder.create_block();
self.builder
.ins()
.brif(condition_value, then_block, &[], merge_block, &[]);
self.builder.switch_to_block(then_block);
self.builder.seal_block(then_block);
self.translate_block(then_body);
self.builder.ins().jump(merge_block, &[]);
self.builder.switch_to_block(merge_block);
self.builder.seal_block(merge_block);
None
}
Statement::WhileStatement { .. } => todo!(),
}
}
fn translate_expr(&mut self, expr: &Expr) -> Value {
match expr {
Expr::UnitLiteral => unreachable!(),
Expr::BooleanLiteral(imm, ..) => self.builder.ins().iconst(types::I8, i64::from(*imm)),
Expr::IntegerLiteral(imm, ..) => self.builder.ins().iconst(types::I32, *imm),
Expr::FloatLiteral(imm, ..) => self.builder.ins().f64const(*imm),
Expr::StringLiteral(s, ..) => {
let id = self.module.declare_anonymous_data(false, false).unwrap();
let bytes: Box<[u8]> = s.as_bytes().into();
self.data_desc.define(bytes);
self.module.define_data(id, self.data_desc).unwrap();
let gv = self.module.declare_data_in_func(id, self.builder.func);
self.data_desc.clear();
self.builder
.ins()
.global_value(self.module.isa().pointer_type(), gv)
}
Expr::BinaryExpression(BinaryExpression { lhs, rhs, op, .. }) => {
let lhs_value = self.translate_expr(&lhs.expr);
let rhs_value = self.translate_expr(&rhs.expr);
match (lhs.ty(), lhs.ty()) {
(Type::Int, Type::Int) => match op {
BinaryOperator::Add => self.builder.ins().iadd(lhs_value, rhs_value),
BinaryOperator::Sub => self.builder.ins().isub(lhs_value, rhs_value),
BinaryOperator::Mul => self.builder.ins().imul(lhs_value, rhs_value),
// TODO: investigate division (case rhs <= 0)
BinaryOperator::Div => self.builder.ins().udiv(lhs_value, rhs_value),
BinaryOperator::Modulo => todo!(),
BinaryOperator::Equal => {
self.builder.ins().icmp(IntCC::Equal, lhs_value, rhs_value)
}
BinaryOperator::NotEqual => {
self.builder
.ins()
.icmp(IntCC::NotEqual, lhs_value, rhs_value)
}
_ => unreachable!(),
},
(Type::Bool, Type::Bool) => match op {
// XXX: Is min and max ok or should it be something else?
BinaryOperator::And => self.builder.ins().umin(lhs_value, rhs_value),
BinaryOperator::Or => self.builder.ins().umax(lhs_value, rhs_value),
_ => unreachable!(),
},
_ => unimplemented!(),
}
}
Expr::IfExpr {
cond,
then_body,
else_body,
typ,
..
} => {
let condition_value = self.translate_expr(&cond.expr);
let then_block = self.builder.create_block();
let else_block = self.builder.create_block();
let merge_block = self.builder.create_block();
// If-else constructs in the language have a return value.
// In traditional SSA form, this would produce a PHI between
// the then and else bodies. Cranelift uses block parameters,
// so set up a parameter in the merge block, and we'll pass
// the return values to it from the branches.
self.builder
.append_block_param(merge_block, self.translate_type(typ));
// Test the if condition and conditionally branch.
self.builder
.ins()
.brif(condition_value, then_block, &[], else_block, &[]);
self.builder.switch_to_block(then_block);
self.builder.seal_block(then_block);
for stmt in &then_body.statements {
self.translate_statement(stmt);
}
let then_return_value = match &then_body.value {
Some(val) => vec![self.translate_expr(&val.expr)],
None => Vec::with_capacity(0),
};
// Jump to the merge block, passing it the block return value.
self.builder.ins().jump(merge_block, &then_return_value);
self.builder.switch_to_block(else_block);
self.builder.seal_block(else_block);
// XXX: the else can be just an expression: do we always need to
// make a second branch in that case? Or leave it to cranelift?
let else_return_value = match else_body.expr {
Expr::UnitLiteral => Vec::with_capacity(0),
_ => vec![self.translate_expr(&else_body.expr)],
};
// Jump to the merge block, passing it the block return value.
self.builder.ins().jump(merge_block, &else_return_value);
// Switch to the merge block for subsequent statements.
self.builder.switch_to_block(merge_block);
// We've now seen all the predecessors of the merge block.
self.builder.seal_block(merge_block);
// Read the value of the if-else by reading the merge block
// parameter.
let phi = self.builder.block_params(merge_block)[0];
phi
}
Expr::UnaryExpression { op, inner, .. } => {
let inner_value = self.translate_expr(&inner.expr);
match op {
// XXX: This should not be a literal translation
UnaryOperator::Not => {
let one = self.builder.ins().iconst(types::I8, 1);
self.builder.ins().isub(one, inner_value)
}
}
}
Expr::Identifier { name, .. } => {
self.builder.use_var(*self.variables.get(name).unwrap())
}
Expr::Call(call, ..) => self.translate_call(call).unwrap(),
Expr::Block(block, ..) => self.translate_block(block).unwrap(),
}
}
fn translate_block(&mut self, block: &ast::Block) -> Option<Value> {
for stmt in &block.statements {
self.translate_statement(stmt);
}
block
.value
.as_ref()
.map(|block_value| self.translate_expr(&block_value.expr))
}
fn translate_call(&mut self, call: &ast::Call) -> Option<Value> {
match &call.callee.expr {
Expr::Identifier { name, .. } => {
let func_ref = if let Some(func_or_data_id) = self.module.get_name(name.as_ref()) {
if let FuncOrDataId::Func(func_id) = func_or_data_id {
self.module.declare_func_in_func(func_id, self.builder.func)
} else {
panic!()
}
} else {
todo!()
};
let args: Vec<Value> = call
.args
.iter()
.map(|a| self.translate_expr(&a.expr))
.collect();
let call_inst = self.builder.ins().call(func_ref, &args);
let results = self.builder.inst_results(call_inst);
Some(results[0])
}
_ => unimplemented!(),
}
}
fn translate_type(&self, value: &crate::typing::Type) -> types::Type {
match value {
Type::Bool => types::I8,
Type::Int => types::I32,
Type::Float => types::F32,
Type::Unit => unreachable!(),
Type::Str => self.module.isa().pointer_type(),
Type::Custom(_) => todo!(),
Type::Function {
params: _,
returns: _,
} => todo!(),
Type::Undefined => unreachable!(),
}
}
}