initial commit

This commit is contained in:
Romain Paquet 2023-06-12 20:19:19 +02:00
commit 43df8c4b0a
9 changed files with 596 additions and 0 deletions

14
Cargo.toml Normal file
View file

@ -0,0 +1,14 @@
[package]
name = "kronec"
version = "0.1.0"
edition = "2021"
[dependencies]
clap = { version = "4.3.0", features = ["derive"] }
cranelift = "0.96.3"
cranelift-jit = "0.96.3"
cranelift-module = "0.96.3"
cranelift-native = "0.96.3"
lazy_static = "1.4.0"
pest = "2.6.0"
pest_derive = "2.6.0"

19
src/ast/expr.rs Normal file
View file

@ -0,0 +1,19 @@
use crate::ast::*;
#[derive(Debug, PartialEq)]
pub enum Expr {
BinaryExpression(Box<Expr>, BinaryOperator, Box<Expr>),
Identifier(Identifier),
IntegerLiteral(i64),
FloatLiteral(f64),
StringLiteral(String),
Call(Box<Call>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum BinaryOperator {
Add,
Sub,
Mul,
Div,
}

71
src/ast/mod.rs Normal file
View file

@ -0,0 +1,71 @@
pub mod expr;
pub mod typ;
pub use crate::ast::expr::{BinaryOperator, Expr};
pub use crate::ast::typ::*;
// XXX: Is this enum actually useful? Is 3:30 AM btw
#[derive(Debug, PartialEq)]
pub enum Ast {
FunctionDefinition(FunctionDefinition),
Expr(Expr),
Module(Vec<Ast>),
Block(Block),
Statement(Statement),
}
#[derive(Debug, PartialEq)]
pub struct FunctionDefinition {
pub name: Identifier,
pub parameters: Vec<Parameter>,
pub return_type: Option<Type>,
pub body: Box<Block>,
}
#[derive(Debug, PartialEq)]
pub struct Block {
pub statements: Vec<Statement>,
pub value: Option<Expr>,
}
#[derive(Debug, PartialEq)]
pub enum Statement {
AssignStatement(Identifier, Expr),
ReturnStatement(Option<Expr>),
CallStatement(Call),
}
#[derive(Debug, PartialEq)]
pub struct Call {
pub callee: Expr,
pub args: Vec<Expr>,
}
pub type Identifier = String;
#[derive(Debug, PartialEq)]
pub struct Parameter {
pub name: Identifier,
pub typ: Type,
}
impl Ast {
/// Type checks the AST and add missing return types.
pub fn check_return_types(&mut self) -> Result<(), TypeError> {
match self {
Ast::Module(defs) => {
for def in defs {
if let Ast::FunctionDefinition { .. } = def {
def.check_return_types()?;
}
}
}
Ast::FunctionDefinition(func) => {
let typ = func.typ(&mut TypeContext::default())?;
func.return_type = Some(typ.clone());
}
_ => unreachable!(),
}
Ok(())
}
}

158
src/ast/typ.rs Normal file
View file

@ -0,0 +1,158 @@
use std::collections::HashMap;
use crate::ast::*;
#[derive(Debug, PartialEq, Clone)]
pub enum Type {
Int,
Float,
Unit,
Str,
Custom(Identifier),
}
impl From<&str> for Type {
fn from(value: &str) -> Self {
match value {
"int" => Type::Int,
"float" => Type::Float,
_ => Type::Custom(Identifier::from(value)),
}
}
}
#[derive(Debug)]
pub enum TypeError {
InvalidBinaryOperator {
operator: BinaryOperator,
lht: Type,
rht: Type,
},
BlockTypeDoesNotMatchFunctionType {
function_name: String,
function_type: Type,
block_type: Type,
},
ReturnTypeDoesNotMatchFunctionType {
function_name: String,
function_type: Type,
ret_type: Type,
},
UnknownIdentifier {
identifier: String,
},
}
#[derive(Default)]
pub struct TypeContext {
pub function: Option<Identifier>,
pub variables: HashMap<Identifier, Type>,
}
/// Trait for nodes which have a deducible type.
pub trait Typ {
/// Try to resolve the type of the node.
fn typ(&self, ctx: &mut TypeContext) -> Result<Type, TypeError>;
}
impl Typ for FunctionDefinition {
fn typ(&self, ctx: &mut TypeContext) -> Result<Type, TypeError> {
let func = self;
let mut ctx = TypeContext {
function: Some(func.name.clone()),
..Default::default()
};
for param in &func.parameters {
ctx.variables.insert(param.name.clone(), param.typ.clone());
}
let body_type = &func.body.typ(&mut ctx)?;
// If the return type is not specified, it is unit.
let func_return_type = match &func.return_type {
Some(typ) => typ,
None => &Type::Unit,
};
// Check coherence with the body's type.
if *func_return_type != *body_type {
return Err(TypeError::BlockTypeDoesNotMatchFunctionType {
function_name: func.name.clone(),
function_type: func_return_type.clone(),
block_type: body_type.clone(),
})
}
// Check coherence with return statements.
for statement in &func.body.statements {
if let Statement::ReturnStatement(value) = statement {
let ret_type = match value {
Some(expr) => expr.typ(&mut ctx)?,
None => Type::Unit,
};
if ret_type != *func_return_type {
return Err(TypeError::ReturnTypeDoesNotMatchFunctionType {
function_name: func.name.clone(),
function_type: func_return_type.clone(),
ret_type,
})
}
}
}
Ok(func_return_type.clone())
}
}
impl Typ for Block {
fn typ(&self, ctx: &mut TypeContext) -> Result<Type, TypeError> {
// Check if there is an expression at the end of the block.
if let Some(expr) = &self.value {
expr.typ(ctx)
} else {
Ok(Type::Unit)
}
}
}
impl Typ for Expr {
fn typ(&self, ctx: &mut TypeContext) -> Result<Type, TypeError> {
match self {
Expr::Identifier(identifier) => {
if let Some(typ) = ctx.variables.get(identifier) {
Ok(typ.clone())
} else {
Err(TypeError::UnknownIdentifier {
identifier: identifier.clone(),
})
}
}
Expr::IntegerLiteral(_) => Ok(Type::Int),
Expr::FloatLiteral(_) => Ok(Type::Float),
Expr::BinaryExpression(lhs, op, rhs) => match op {
BinaryOperator::Add
| BinaryOperator::Sub
| BinaryOperator::Mul
| BinaryOperator::Div => {
let left_type = &lhs.typ(ctx)?;
let right_type = &rhs.typ(ctx)?;
match (left_type, right_type) {
(Type::Int, Type::Int) => Ok(Type::Int),
(Type::Float, Type::Int | Type::Float) => Ok(Type::Float),
(Type::Int, Type::Float) => Ok(Type::Float),
(_, _) => Err(TypeError::InvalidBinaryOperator {
operator: op.clone(),
lht: left_type.clone(),
rht: right_type.clone(),
}),
}
}
},
Expr::StringLiteral(_) => Ok(Type::Str),
Expr::Call(call) => {
todo!("resolve call type using ctx");
}
}
}
}

1
src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod ast;

62
src/main.rs Normal file
View file

@ -0,0 +1,62 @@
mod ast;
mod parsing;
use clap::{Parser, Subcommand};
use std::fs;
/// Experimental compiler for krone
#[derive(Parser, Debug)]
#[command(author = "Romain P. <rpqt@rpqt.fr>")]
#[command(version, about, long_about = None)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand, Debug)]
enum Commands {
Parse {
/// Path to the source file
file: String,
/// Dump the AST to stdout
#[arg(long)]
dump_ast: bool,
/// Add missing return types in the AST
#[arg(long)]
complete_ast: bool,
},
}
fn main() {
let cli = Cli::parse();
match &cli.command {
Commands::Parse {
file,
dump_ast,
complete_ast,
} => {
let source = fs::read_to_string(&file).expect("could not read the source file");
let mut ast = match parsing::parse(&source) {
Ok(ast) => ast,
Err(e) => panic!("Parsing error: {:#?}", e),
};
if *complete_ast {
if let Err(e) = ast.check_return_types() {
eprintln!("{:#?}", e);
return;
}
}
if *dump_ast {
println!("{:#?}", &ast);
return;
}
println!("Parsing OK");
}
}
}

52
src/parsing/grammar.pest Normal file
View file

@ -0,0 +1,52 @@
// This file is just a little test of pest.rs
source_file = { SOI ~ definition* ~ EOI }
statement = { assign_statement | return_statement | call_statement }
assign_statement = { "set" ~ ident ~ "=" ~ expr ~ ";" }
return_statement = { "return" ~ expr? ~ ";" }
call_statement = { call ~ ";" }
// Function calls
call = { ident ~ "(" ~ args ~ ")" }
args = { (expr ~ ",")* ~ expr? }
definition = { func_def }
func_def = { "fn" ~ ident ~ "(" ~ parameters ~ ")" ~ typ? ~ block }
parameters = {
(parameter ~ ",")* ~ (parameter)?
}
parameter = { ident ~ ":" ~ typ }
block = { "{" ~ statement* ~ expr? ~ "}" }
// Operators
infix = _{ add | subtract | multiply | divide }
add = { "+" }
subtract = { "-" }
multiply = { "*" }
divide = { "/" }
prefix = _{ not }
not = { "!" }
expr = { prefix? ~ atom ~ (infix ~ prefix? ~ atom)* }
atom = _{ call | ident | literal | "(" ~ expr ~ ")" }
ident = @{ (ASCII_ALPHA | "_")+ }
typ = _{ ident }
// Literals
literal = _{ float_literal | integer_literal | string_literal }
string_literal = ${ "\"" ~ string_content ~ "\"" }
string_content = @{ char* }
char = {
!("\"" | "\\") ~ ANY
| "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
}
integer_literal = @{ ASCII_DIGIT+ }
float_literal = @{ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ "." ~ ASCII_DIGIT* }
WHITESPACE = _{ " " | "\n" | "\t" }

38
src/parsing/mod.rs Normal file
View file

@ -0,0 +1,38 @@
pub mod pest;
pub use self::pest::parse;
mod tests {
#[test]
fn test_addition_function() {
use crate::ast::*;
use crate::parsing::pest::parse;
let source = "fn add(a: int, b: int) int { a + b }";
let ast = Ast::FunctionDefinition(FunctionDefinition {
name: Identifier::from("add"),
parameters: vec![
Parameter {
name: Identifier::from("a"),
typ: Type::Int,
},
Parameter {
name: Identifier::from("b"),
typ: Type::Int,
},
],
return_type: Some(Type::Int),
body: Box::new(Block {
statements: vec![],
value: Some(Expr::BinaryExpression(
Box::new(Expr::Identifier(Identifier::from("a"))),
BinaryOperator::Add,
Box::new(Expr::Identifier(Identifier::from("b"))),
)),
}),
});
assert_eq!(parse(source).unwrap(), Ast::Module(vec![ast]));
}
}

181
src/parsing/pest.rs Normal file
View file

@ -0,0 +1,181 @@
use lazy_static;
use pest::error::Error;
use pest::iterators::Pair;
use pest::pratt_parser::PrattParser;
use pest::Parser;
use crate::ast::*;
#[derive(pest_derive::Parser)]
#[grammar = "parsing/grammar.pest"]
struct KrParser;
lazy_static::lazy_static! {
static ref PRATT_PARSER: PrattParser<Rule> = {
use pest::pratt_parser::{Assoc::*, Op};
use Rule::*;
// Precedence is defined lowest to highest
PrattParser::new()
// Addition and subtract have equal precedence
.op(Op::infix(add, Left) | Op::infix(subtract, Left))
.op(Op::infix(multiply, Left) | Op::infix(divide, Left))
};
}
pub fn parse(source: &str) -> Result<Ast, Error<Rule>> {
let mut definitions: Vec<Ast> = vec![];
let pairs = KrParser::parse(Rule::source_file, source)?;
for pair in pairs {
match pair.as_rule() {
Rule::source_file => {
let pairs = pair.into_inner();
for pair in pairs {
match pair.as_rule() {
Rule::definition => {
let definition = parse_definition(pair.into_inner().next().unwrap());
definitions.push(definition);
}
Rule::EOI => {}
_ => panic!("unexpected rule in source_file: {:?}", pair.as_rule()),
}
}
}
_ => eprintln!("unexpected top-level rule {:?}", pair.as_rule()),
}
}
Ok(Ast::Module(definitions))
}
fn parse_block(pair: Pair<Rule>) -> Block {
let mut statements = vec![];
let mut value = None;
for pair in pair.into_inner() {
match pair.as_rule() {
Rule::statement => statements.push(parse_statement(pair)),
Rule::expr => value = Some(parse_expression(pair)),
_ => panic!("unexpected rule {:?} in block", pair.as_rule()),
}
}
Block { statements, value }
}
fn parse_statement(pair: Pair<Rule>) -> Statement {
let pair = pair.into_inner().next().unwrap();
match pair.as_rule() {
Rule::assign_statement => {
let mut pairs = pair.into_inner();
let identifier = pairs.next().unwrap().as_str().to_string();
let expr = parse_expression(pairs.next().unwrap());
Statement::AssignStatement(identifier, expr)
}
Rule::return_statement => {
let expr = if let Some(pair) = pair.into_inner().next() {
Some(parse_expression(pair))
} else {
None
};
Statement::ReturnStatement(expr)
}
Rule::call_statement => {
let call = parse_call(pair.into_inner().next().unwrap());
Statement::CallStatement(call)
}
_ => unreachable!("unexpected rule '{:?}' in parse_statement", pair.as_rule()),
}
}
fn parse_call(pair: Pair<Rule>) -> Call {
let mut pairs = pair.into_inner();
// TODO: support calls on more than identifiers (needs grammar change)
let callee = Expr::Identifier(pairs.next().unwrap().as_str().to_string());
let args: Vec<Expr> = pairs
.next()
.unwrap()
.into_inner()
.map(parse_expression)
.collect();
Call { callee, args }
}
fn parse_expression(pair: Pair<Rule>) -> Expr {
let pairs = pair.into_inner();
PRATT_PARSER
.map_primary(|primary| match primary.as_rule() {
Rule::integer_literal => Expr::IntegerLiteral(primary.as_str().parse().unwrap()),
Rule::float_literal => Expr::FloatLiteral(primary.as_str().parse().unwrap()),
Rule::string_literal => Expr::StringLiteral(
primary
.into_inner()
.next()
.unwrap()
.as_str()
.parse()
.unwrap(),
),
Rule::ident => Expr::Identifier(primary.as_str().to_string()),
Rule::expr => parse_expression(primary),
Rule::call => Expr::Call(Box::new(parse_call(primary))),
_ => unreachable!(
"Unexpected rule '{:?}' in primary expression",
primary.as_rule()
),
})
.map_infix(|lhs, op, rhs| {
let operator = match op.as_rule() {
Rule::add => BinaryOperator::Add,
Rule::subtract => BinaryOperator::Sub,
Rule::multiply => BinaryOperator::Mul,
Rule::divide => BinaryOperator::Div,
_ => unreachable!(),
};
Expr::BinaryExpression(Box::new(lhs), operator, Box::new(rhs))
})
.parse(pairs)
}
fn parse_parameter(pair: Pair<Rule>) -> Parameter {
assert!(pair.as_rule() == Rule::parameter);
let mut pair = pair.into_inner();
let name: String = pair.next().unwrap().as_str().to_string();
let typ = Type::from(pair.next().unwrap().as_str());
Parameter { name, typ }
}
fn parse_definition(pair: Pair<Rule>) -> Ast {
match pair.as_rule() {
Rule::func_def => {
let mut pairs = pair.into_inner();
let name = pairs.next().unwrap().as_str().to_string();
let parameters: Vec<Parameter> = pairs
.next()
.unwrap()
.into_inner()
.map(parse_parameter)
.collect();
let pair = pairs.next().unwrap();
// Before the block there is an optional return type
let (return_type, pair) = match pair.as_rule() {
Rule::ident => (Some(Type::from(pair.as_str())), pairs.next().unwrap()),
Rule::block => (None, pair),
_ => unreachable!(
"Unexpected rule '{:?}' in function definition, expected return type or block",
pair.as_rule()
),
};
let body = parse_block(pair);
let body = Box::new(body);
Ast::FunctionDefinition(FunctionDefinition {
name,
parameters,
return_type,
body,
})
}
_ => panic!("unexpected node for definition: {:?}", pair.as_rule()),
}
}