From 43df8c4b0ad633f254afb44c500b1ddd77c9a5b0 Mon Sep 17 00:00:00 2001 From: Romain Paquet Date: Mon, 12 Jun 2023 20:19:19 +0200 Subject: [PATCH] initial commit --- Cargo.toml | 14 +++ src/ast/expr.rs | 19 ++++ src/ast/mod.rs | 71 +++++++++++++++ src/ast/typ.rs | 158 ++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 62 ++++++++++++++ src/parsing/grammar.pest | 52 +++++++++++ src/parsing/mod.rs | 38 ++++++++ src/parsing/pest.rs | 181 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 596 insertions(+) create mode 100644 Cargo.toml create mode 100644 src/ast/expr.rs create mode 100644 src/ast/mod.rs create mode 100644 src/ast/typ.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/parsing/grammar.pest create mode 100644 src/parsing/mod.rs create mode 100644 src/parsing/pest.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..59f89ad --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "kronec" +version = "0.1.0" +edition = "2021" + +[dependencies] +clap = { version = "4.3.0", features = ["derive"] } +cranelift = "0.96.3" +cranelift-jit = "0.96.3" +cranelift-module = "0.96.3" +cranelift-native = "0.96.3" +lazy_static = "1.4.0" +pest = "2.6.0" +pest_derive = "2.6.0" diff --git a/src/ast/expr.rs b/src/ast/expr.rs new file mode 100644 index 0000000..e031f83 --- /dev/null +++ b/src/ast/expr.rs @@ -0,0 +1,19 @@ +use crate::ast::*; + +#[derive(Debug, PartialEq)] +pub enum Expr { + BinaryExpression(Box, BinaryOperator, Box), + Identifier(Identifier), + IntegerLiteral(i64), + FloatLiteral(f64), + StringLiteral(String), + Call(Box), +} + +#[derive(Debug, PartialEq, Clone)] +pub enum BinaryOperator { + Add, + Sub, + Mul, + Div, +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 0000000..3e63c19 --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,71 @@ +pub mod expr; +pub mod typ; + +pub use crate::ast::expr::{BinaryOperator, Expr}; +pub use crate::ast::typ::*; + +// XXX: Is this enum actually useful? Is 3:30 AM btw +#[derive(Debug, PartialEq)] +pub enum Ast { + FunctionDefinition(FunctionDefinition), + Expr(Expr), + Module(Vec), + Block(Block), + Statement(Statement), +} + +#[derive(Debug, PartialEq)] +pub struct FunctionDefinition { + pub name: Identifier, + pub parameters: Vec, + pub return_type: Option, + pub body: Box, +} + +#[derive(Debug, PartialEq)] +pub struct Block { + pub statements: Vec, + pub value: Option, +} + +#[derive(Debug, PartialEq)] +pub enum Statement { + AssignStatement(Identifier, Expr), + ReturnStatement(Option), + CallStatement(Call), +} + +#[derive(Debug, PartialEq)] +pub struct Call { + pub callee: Expr, + pub args: Vec, +} + +pub type Identifier = String; + +#[derive(Debug, PartialEq)] +pub struct Parameter { + pub name: Identifier, + pub typ: Type, +} + +impl Ast { + /// Type checks the AST and add missing return types. + pub fn check_return_types(&mut self) -> Result<(), TypeError> { + match self { + Ast::Module(defs) => { + for def in defs { + if let Ast::FunctionDefinition { .. } = def { + def.check_return_types()?; + } + } + } + Ast::FunctionDefinition(func) => { + let typ = func.typ(&mut TypeContext::default())?; + func.return_type = Some(typ.clone()); + } + _ => unreachable!(), + } + Ok(()) + } +} diff --git a/src/ast/typ.rs b/src/ast/typ.rs new file mode 100644 index 0000000..5f102a1 --- /dev/null +++ b/src/ast/typ.rs @@ -0,0 +1,158 @@ +use std::collections::HashMap; + +use crate::ast::*; + +#[derive(Debug, PartialEq, Clone)] +pub enum Type { + Int, + Float, + Unit, + Str, + Custom(Identifier), +} + +impl From<&str> for Type { + fn from(value: &str) -> Self { + match value { + "int" => Type::Int, + "float" => Type::Float, + _ => Type::Custom(Identifier::from(value)), + } + } +} + +#[derive(Debug)] +pub enum TypeError { + InvalidBinaryOperator { + operator: BinaryOperator, + lht: Type, + rht: Type, + }, + BlockTypeDoesNotMatchFunctionType { + function_name: String, + function_type: Type, + block_type: Type, + }, + ReturnTypeDoesNotMatchFunctionType { + function_name: String, + function_type: Type, + ret_type: Type, + }, + UnknownIdentifier { + identifier: String, + }, +} + +#[derive(Default)] +pub struct TypeContext { + pub function: Option, + pub variables: HashMap, +} + +/// Trait for nodes which have a deducible type. +pub trait Typ { + /// Try to resolve the type of the node. + fn typ(&self, ctx: &mut TypeContext) -> Result; +} + +impl Typ for FunctionDefinition { + fn typ(&self, ctx: &mut TypeContext) -> Result { + let func = self; + + let mut ctx = TypeContext { + function: Some(func.name.clone()), + ..Default::default() + }; + for param in &func.parameters { + ctx.variables.insert(param.name.clone(), param.typ.clone()); + } + + let body_type = &func.body.typ(&mut ctx)?; + + // If the return type is not specified, it is unit. + let func_return_type = match &func.return_type { + Some(typ) => typ, + None => &Type::Unit, + }; + + // Check coherence with the body's type. + if *func_return_type != *body_type { + return Err(TypeError::BlockTypeDoesNotMatchFunctionType { + function_name: func.name.clone(), + function_type: func_return_type.clone(), + block_type: body_type.clone(), + }) + } + + // Check coherence with return statements. + for statement in &func.body.statements { + if let Statement::ReturnStatement(value) = statement { + let ret_type = match value { + Some(expr) => expr.typ(&mut ctx)?, + None => Type::Unit, + }; + if ret_type != *func_return_type { + return Err(TypeError::ReturnTypeDoesNotMatchFunctionType { + function_name: func.name.clone(), + function_type: func_return_type.clone(), + ret_type, + }) + } + } + } + + Ok(func_return_type.clone()) + } +} + +impl Typ for Block { + fn typ(&self, ctx: &mut TypeContext) -> Result { + // Check if there is an expression at the end of the block. + if let Some(expr) = &self.value { + expr.typ(ctx) + } else { + Ok(Type::Unit) + } + } +} + +impl Typ for Expr { + fn typ(&self, ctx: &mut TypeContext) -> Result { + match self { + Expr::Identifier(identifier) => { + if let Some(typ) = ctx.variables.get(identifier) { + Ok(typ.clone()) + } else { + Err(TypeError::UnknownIdentifier { + identifier: identifier.clone(), + }) + } + } + Expr::IntegerLiteral(_) => Ok(Type::Int), + Expr::FloatLiteral(_) => Ok(Type::Float), + Expr::BinaryExpression(lhs, op, rhs) => match op { + BinaryOperator::Add + | BinaryOperator::Sub + | BinaryOperator::Mul + | BinaryOperator::Div => { + let left_type = &lhs.typ(ctx)?; + let right_type = &rhs.typ(ctx)?; + match (left_type, right_type) { + (Type::Int, Type::Int) => Ok(Type::Int), + (Type::Float, Type::Int | Type::Float) => Ok(Type::Float), + (Type::Int, Type::Float) => Ok(Type::Float), + (_, _) => Err(TypeError::InvalidBinaryOperator { + operator: op.clone(), + lht: left_type.clone(), + rht: right_type.clone(), + }), + } + } + }, + Expr::StringLiteral(_) => Ok(Type::Str), + Expr::Call(call) => { + todo!("resolve call type using ctx"); + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..851c0bc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +pub mod ast; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..6820f10 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,62 @@ +mod ast; +mod parsing; + +use clap::{Parser, Subcommand}; +use std::fs; + +/// Experimental compiler for krone +#[derive(Parser, Debug)] +#[command(author = "Romain P. ")] +#[command(version, about, long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Subcommand, Debug)] +enum Commands { + Parse { + /// Path to the source file + file: String, + + /// Dump the AST to stdout + #[arg(long)] + dump_ast: bool, + + /// Add missing return types in the AST + #[arg(long)] + complete_ast: bool, + }, +} + +fn main() { + let cli = Cli::parse(); + + match &cli.command { + Commands::Parse { + file, + dump_ast, + complete_ast, + } => { + let source = fs::read_to_string(&file).expect("could not read the source file"); + let mut ast = match parsing::parse(&source) { + Ok(ast) => ast, + Err(e) => panic!("Parsing error: {:#?}", e), + }; + + if *complete_ast { + if let Err(e) = ast.check_return_types() { + eprintln!("{:#?}", e); + return; + } + } + + if *dump_ast { + println!("{:#?}", &ast); + return; + } + + println!("Parsing OK"); + } + } +} diff --git a/src/parsing/grammar.pest b/src/parsing/grammar.pest new file mode 100644 index 0000000..bf80948 --- /dev/null +++ b/src/parsing/grammar.pest @@ -0,0 +1,52 @@ +// This file is just a little test of pest.rs + +source_file = { SOI ~ definition* ~ EOI } + +statement = { assign_statement | return_statement | call_statement } +assign_statement = { "set" ~ ident ~ "=" ~ expr ~ ";" } +return_statement = { "return" ~ expr? ~ ";" } +call_statement = { call ~ ";" } + +// Function calls +call = { ident ~ "(" ~ args ~ ")" } +args = { (expr ~ ",")* ~ expr? } + +definition = { func_def } + +func_def = { "fn" ~ ident ~ "(" ~ parameters ~ ")" ~ typ? ~ block } +parameters = { + (parameter ~ ",")* ~ (parameter)? +} +parameter = { ident ~ ":" ~ typ } + +block = { "{" ~ statement* ~ expr? ~ "}" } + +// Operators +infix = _{ add | subtract | multiply | divide } +add = { "+" } +subtract = { "-" } +multiply = { "*" } +divide = { "/" } + +prefix = _{ not } +not = { "!" } + +expr = { prefix? ~ atom ~ (infix ~ prefix? ~ atom)* } +atom = _{ call | ident | literal | "(" ~ expr ~ ")" } + +ident = @{ (ASCII_ALPHA | "_")+ } +typ = _{ ident } + +// Literals +literal = _{ float_literal | integer_literal | string_literal } +string_literal = ${ "\"" ~ string_content ~ "\"" } +string_content = @{ char* } +char = { + !("\"" | "\\") ~ ANY + | "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") + | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) +} +integer_literal = @{ ASCII_DIGIT+ } +float_literal = @{ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ "." ~ ASCII_DIGIT* } + +WHITESPACE = _{ " " | "\n" | "\t" } diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..6d354dc --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,38 @@ +pub mod pest; + +pub use self::pest::parse; + +mod tests { + #[test] + fn test_addition_function() { + use crate::ast::*; + use crate::parsing::pest::parse; + + let source = "fn add(a: int, b: int) int { a + b }"; + + let ast = Ast::FunctionDefinition(FunctionDefinition { + name: Identifier::from("add"), + parameters: vec![ + Parameter { + name: Identifier::from("a"), + typ: Type::Int, + }, + Parameter { + name: Identifier::from("b"), + typ: Type::Int, + }, + ], + return_type: Some(Type::Int), + body: Box::new(Block { + statements: vec![], + value: Some(Expr::BinaryExpression( + Box::new(Expr::Identifier(Identifier::from("a"))), + BinaryOperator::Add, + Box::new(Expr::Identifier(Identifier::from("b"))), + )), + }), + }); + + assert_eq!(parse(source).unwrap(), Ast::Module(vec![ast])); + } +} diff --git a/src/parsing/pest.rs b/src/parsing/pest.rs new file mode 100644 index 0000000..e56eb78 --- /dev/null +++ b/src/parsing/pest.rs @@ -0,0 +1,181 @@ +use lazy_static; +use pest::error::Error; +use pest::iterators::Pair; +use pest::pratt_parser::PrattParser; +use pest::Parser; + +use crate::ast::*; + +#[derive(pest_derive::Parser)] +#[grammar = "parsing/grammar.pest"] +struct KrParser; + +lazy_static::lazy_static! { + static ref PRATT_PARSER: PrattParser = { + use pest::pratt_parser::{Assoc::*, Op}; + use Rule::*; + + // Precedence is defined lowest to highest + PrattParser::new() + // Addition and subtract have equal precedence + .op(Op::infix(add, Left) | Op::infix(subtract, Left)) + .op(Op::infix(multiply, Left) | Op::infix(divide, Left)) + }; +} + +pub fn parse(source: &str) -> Result> { + let mut definitions: Vec = vec![]; + + let pairs = KrParser::parse(Rule::source_file, source)?; + for pair in pairs { + match pair.as_rule() { + Rule::source_file => { + let pairs = pair.into_inner(); + for pair in pairs { + match pair.as_rule() { + Rule::definition => { + let definition = parse_definition(pair.into_inner().next().unwrap()); + definitions.push(definition); + } + Rule::EOI => {} + _ => panic!("unexpected rule in source_file: {:?}", pair.as_rule()), + } + } + } + _ => eprintln!("unexpected top-level rule {:?}", pair.as_rule()), + } + } + + Ok(Ast::Module(definitions)) +} + +fn parse_block(pair: Pair) -> Block { + let mut statements = vec![]; + let mut value = None; + + for pair in pair.into_inner() { + match pair.as_rule() { + Rule::statement => statements.push(parse_statement(pair)), + Rule::expr => value = Some(parse_expression(pair)), + _ => panic!("unexpected rule {:?} in block", pair.as_rule()), + } + } + + Block { statements, value } +} + +fn parse_statement(pair: Pair) -> Statement { + let pair = pair.into_inner().next().unwrap(); + match pair.as_rule() { + Rule::assign_statement => { + let mut pairs = pair.into_inner(); + let identifier = pairs.next().unwrap().as_str().to_string(); + let expr = parse_expression(pairs.next().unwrap()); + Statement::AssignStatement(identifier, expr) + } + Rule::return_statement => { + let expr = if let Some(pair) = pair.into_inner().next() { + Some(parse_expression(pair)) + } else { + None + }; + Statement::ReturnStatement(expr) + } + Rule::call_statement => { + let call = parse_call(pair.into_inner().next().unwrap()); + Statement::CallStatement(call) + } + _ => unreachable!("unexpected rule '{:?}' in parse_statement", pair.as_rule()), + } +} + +fn parse_call(pair: Pair) -> Call { + let mut pairs = pair.into_inner(); + // TODO: support calls on more than identifiers (needs grammar change) + let callee = Expr::Identifier(pairs.next().unwrap().as_str().to_string()); + let args: Vec = pairs + .next() + .unwrap() + .into_inner() + .map(parse_expression) + .collect(); + Call { callee, args } +} + +fn parse_expression(pair: Pair) -> Expr { + let pairs = pair.into_inner(); + PRATT_PARSER + .map_primary(|primary| match primary.as_rule() { + Rule::integer_literal => Expr::IntegerLiteral(primary.as_str().parse().unwrap()), + Rule::float_literal => Expr::FloatLiteral(primary.as_str().parse().unwrap()), + Rule::string_literal => Expr::StringLiteral( + primary + .into_inner() + .next() + .unwrap() + .as_str() + .parse() + .unwrap(), + ), + Rule::ident => Expr::Identifier(primary.as_str().to_string()), + Rule::expr => parse_expression(primary), + Rule::call => Expr::Call(Box::new(parse_call(primary))), + _ => unreachable!( + "Unexpected rule '{:?}' in primary expression", + primary.as_rule() + ), + }) + .map_infix(|lhs, op, rhs| { + let operator = match op.as_rule() { + Rule::add => BinaryOperator::Add, + Rule::subtract => BinaryOperator::Sub, + Rule::multiply => BinaryOperator::Mul, + Rule::divide => BinaryOperator::Div, + _ => unreachable!(), + }; + Expr::BinaryExpression(Box::new(lhs), operator, Box::new(rhs)) + }) + .parse(pairs) +} + +fn parse_parameter(pair: Pair) -> Parameter { + assert!(pair.as_rule() == Rule::parameter); + let mut pair = pair.into_inner(); + let name: String = pair.next().unwrap().as_str().to_string(); + let typ = Type::from(pair.next().unwrap().as_str()); + Parameter { name, typ } +} + +fn parse_definition(pair: Pair) -> Ast { + match pair.as_rule() { + Rule::func_def => { + let mut pairs = pair.into_inner(); + let name = pairs.next().unwrap().as_str().to_string(); + let parameters: Vec = pairs + .next() + .unwrap() + .into_inner() + .map(parse_parameter) + .collect(); + let pair = pairs.next().unwrap(); + // Before the block there is an optional return type + let (return_type, pair) = match pair.as_rule() { + Rule::ident => (Some(Type::from(pair.as_str())), pairs.next().unwrap()), + Rule::block => (None, pair), + _ => unreachable!( + "Unexpected rule '{:?}' in function definition, expected return type or block", + pair.as_rule() + ), + }; + let body = parse_block(pair); + let body = Box::new(body); + Ast::FunctionDefinition(FunctionDefinition { + name, + parameters, + return_type, + body, + }) + } + _ => panic!("unexpected node for definition: {:?}", pair.as_rule()), + } +}