diff --git a/Cargo.toml b/Cargo.toml index 438ac82..a0dfdff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,11 @@ name = "lilac" version = "0.0.1" edition = "2021" +[features] +default = ["pest"] +pest = ["dep:pest", "dep:pest_derive"] +tree-sitter = ["dep:tree-sitter", "dep:tree-sitter-lila"] + [dependencies] clap = { version = "4.5.7", features = ["derive"] } cranelift = "0.109.0" @@ -10,10 +15,19 @@ cranelift-jit = "0.109.0" cranelift-module = "0.109.0" cranelift-native = "0.109.0" lazy_static = "1.4.0" -pest = "2.7.4" -pest_derive = "2.7.4" +pest = { version = "2.7.4", optional = true } +pest_derive = { version = "2.7.4", optional = true } +tree-sitter = { version = "0.22", optional = true } ariadne = "0.4.1" anyhow = "1.0.86" +[dependencies.tree-sitter-lila] +version = "0.0.1" +optional = true +path = "./tree-sitter-lila" + [dev-dependencies] pretty_assertions = "1.4.0" + +[build-dependencies] +cc = "*" diff --git a/src/parsing/backend/mod.rs b/src/parsing/backend/mod.rs index b63ce4a..551f911 100644 --- a/src/parsing/backend/mod.rs +++ b/src/parsing/backend/mod.rs @@ -1 +1,5 @@ +#[cfg(feature = "pest")] pub mod pest; + +#[cfg(feature = "tree-sitter")] +pub mod tree_sitter; diff --git a/src/parsing/backend/pest/mod.rs b/src/parsing/backend/pest/mod.rs index 82ba471..3dd58fa 100644 --- a/src/parsing/backend/pest/mod.rs +++ b/src/parsing/backend/pest/mod.rs @@ -1,6 +1,3 @@ -use std::fs; -use std::path::Path; - use expr::BinaryExpression; use pest::iterators::Pair; use pest::pratt_parser::PrattParser; @@ -38,14 +35,6 @@ pub struct Parser { } impl crate::parsing::Parser for Parser { - fn parse_file(&mut self, path: &Path, id: SourceId) -> anyhow::Result { - let source = fs::read_to_string(path)?; - let module_path = ModulePath::from(path); - let mut module = self.parse_as_module(&source, module_path, id)?; - module.file = Some(path.to_owned()); - Ok(module) - } - fn parse_as_module( &mut self, source: &str, diff --git a/src/parsing/backend/tree_sitter/lib.rs b/src/parsing/backend/tree_sitter/lib.rs deleted file mode 100644 index 775fdf2..0000000 --- a/src/parsing/backend/tree_sitter/lib.rs +++ /dev/null @@ -1,222 +0,0 @@ -use tree_sitter::{self, Language, Parser, TreeCursor}; - -enum Ast { - FuncDef(FuncDef), - Expr(Expr), - Module(Vec), - Block(Vec, Option), - Statement(Statement), -} - -enum BinaryOperator { - Add, - Sub, - Mul, - Div, -} - -enum Expr { - BinaryExpression(Box, BinaryOperator, Box), -} - -enum Statement { - AssignStatement(Identifier, Expr), -} - -type Identifier = String; -type Type = String; - -struct Parameter { - name: Identifier, - typ: Type, -} - -struct FuncDef { - name: Identifier, - parameters: Vec, - return_type: Option, - body: Box, -} - -#[derive(Debug)] -struct AstError { - message: String, -} - -impl AstError { - fn new(message: &str) -> Self { - AstError { - message: message.into(), - } - } -} - -extern "C" { - fn tree_sitter_krone() -> Language; -} - -struct TreeCursorChildrenIter<'a, A: AsRef<[u8]>> { - source: A, - cursor: &'a mut TreeCursor<'a>, - on_child: bool, -} - -impl<'a, A: AsRef<[u8]>> Iterator for TreeCursorChildrenIter<'a, A> { - type Item = Result; - - fn next(&mut self) -> Option { - if self.on_child { - if self.cursor.goto_next_sibling() { - Some(parse_from_cursor(&self.source, self.cursor)) - } else { - self.cursor.goto_parent(); - None - } - } else { - if self.cursor.goto_first_child() { - self.on_child = true; - Some(parse_from_cursor(&self.source, self.cursor)) - } else { - None - } - } - } -} - -fn iter_children<'a, A: AsRef<[u8]>>( - source: A, - cursor: &'a mut TreeCursor<'a>, -) -> TreeCursorChildrenIter<'a, A> { - TreeCursorChildrenIter { - source, - cursor, - on_child: false, - } -} - -fn parse_from_cursor<'a>( - source: impl AsRef<[u8]>, - cursor: &'a mut TreeCursor<'a>, -) -> Result { - match cursor.node().kind() { - "block" => { - let mut statements = Vec::new(); - let mut value = None; - - for child in iter_children(source, cursor) { - match child.unwrap() { - Ast::Statement(statement) => { - if value.is_none() { - statements.push(statement); - } else { - return Err(AstError::new( - "cannot have a statement after an expression in a block", - )); - // perhaps there is a missing semicolon ; - } - } - Ast::Expr(expr) => value = Some(expr), - _ => return Err(AstError::new("invalid node type")), - }; - } - - let block = Ast::Block(statements, value); - Ok(block) - } - - "function_definition" => { - // 1: name - assert!(cursor.goto_first_child()); - assert!(cursor.field_name() == Some("name")); - let name: String = cursor - .node() - .utf8_text(source.as_ref()) - .expect("utf8 error") - .into(); - - // 2: parameters - assert!(cursor.goto_next_sibling()); - assert!(cursor.field_name() == Some("parameters")); - let mut parameters = Vec::new(); - - if cursor.goto_first_child() { - loop { - let param = cursor.node(); - - assert!(cursor.goto_first_child()); - let name = cursor - .node() - .utf8_text(source.as_ref()) - .expect("utf8 error") - .into(); - - assert!(cursor.goto_next_sibling()); - let typ = cursor - .node() - .utf8_text(source.as_ref()) - .expect("utf8 error") - .into(); - - cursor.goto_parent(); - - parameters.push(Parameter { name, typ }); - - if !cursor.goto_next_sibling() { - break; - } - } - - cursor.goto_parent(); - } - - // 3: return type - assert!(cursor.goto_next_sibling()); - assert!(cursor.field_name() == Some("return_type")); - let return_type = Some( - cursor - .node() - .utf8_text(source.as_ref()) - .expect("utf8 error") - .into(), - ); - - // 4: body - assert!(cursor.goto_next_sibling()); - assert!(cursor.field_name() == Some("body")); - let body = parse_from_cursor(source, cursor).unwrap(); - let body = Box::new(body); - - Ok(Ast::FuncDef(FuncDef { - name, - parameters, - return_type, - body, - })) - } - - _ => panic!("unexpected node kind: {}", cursor.node().kind()), - } -} - -fn parse_with_tree_sitter(source: impl AsRef<[u8]>) -> Result { - let mut parser = Parser::new(); - let language = unsafe { tree_sitter_krone() }; - parser.set_language(language).unwrap(); - - let tree = parser.parse(&source, None).unwrap(); - - let mut cursor = tree.walk(); - let node = cursor.node(); - assert!(node.kind() == "source_file"); - let mut top_level_nodes = Vec::new(); - - for node in iter_children(source, &mut cursor) { - let node = node.unwrap(); - match node { - Ast::FuncDef(_) => top_level_nodes.push(node), - _ => panic!("unexpected top-level node type"), - }; - } - - Ok(Ast::Module(top_level_nodes)) -} diff --git a/src/parsing/backend/tree_sitter/mod.rs b/src/parsing/backend/tree_sitter/mod.rs new file mode 100644 index 0000000..2acf031 --- /dev/null +++ b/src/parsing/backend/tree_sitter/mod.rs @@ -0,0 +1,203 @@ +use anyhow::{anyhow, bail, ensure}; +use tree_sitter::{Node, TreeCursor}; + +use crate::ast::*; + +pub struct Parser(tree_sitter::Parser); + +impl Default for Parser { + fn default() -> Self { + let mut parser = tree_sitter::Parser::new(); + let language = tree_sitter_lila::language(); + parser.set_language(&language).unwrap(); + + Self(parser) + } +} + +struct SourceParsingContext<'a> { + source: &'a str, + source_id: SourceId, + cursor: &'a mut TreeCursor<'a>, +} + +impl<'a> SourceParsingContext<'a> { + fn span(&self) -> Span { + Span { + source: self.source_id, + start: self.cursor.node().start_byte(), + end: self.cursor.node().end_byte(), + } + } + + fn iter_children(&'a mut self) -> NodeIterator<'a> { + NodeIterator { + is_child: false, + cursor: self.cursor, + } + } +} + +struct NodeIterator<'a> { + is_child: bool, + cursor: &'a mut TreeCursor<'a>, +} + +impl<'a> Iterator for NodeIterator<'a> { + type Item = Node<'a>; + + fn next(&mut self) -> Option { + if self.is_child { + match self.cursor.goto_next_sibling() { + true => Some(self.cursor.node()), + false => None, + } + } else { + match self.cursor.goto_first_child() { + true => Some(self.cursor.node()), + false => None, + } + } + } +} + +impl Parser { + fn parse_param<'a>(&self, ctx: &mut SourceParsingContext) -> anyhow::Result { + ensure!(ctx.cursor.goto_first_child()); + let name = ctx + .cursor + .node() + .utf8_text(ctx.source.as_ref()) + .expect("utf8 error") + .into(); + + ensure!(ctx.cursor.goto_next_sibling()); + let typ = ctx + .cursor + .node() + .utf8_text(ctx.source.as_ref()) + .expect("utf8 error") + .into(); + + ctx.cursor.goto_parent(); + + Ok(Parameter { name, typ }) + } + + fn parse_function<'a>( + &self, + ctx: &mut SourceParsingContext, + ) -> anyhow::Result { + let span = ctx.span(); + + // 1: name + assert!(ctx.cursor.goto_first_child()); + assert!(ctx.cursor.field_name() == Some("name")); + let name: String = ctx + .cursor + .node() + .utf8_text(ctx.source.as_ref()) + .expect("utf8 error") + .into(); + + // 2: parameters + assert!(ctx.cursor.goto_next_sibling()); + assert!(ctx.cursor.field_name() == Some("parameters")); + let mut parameters = Vec::new(); + + if ctx.cursor.goto_first_child() { + loop { + let param = self.parse_param(ctx)?; + + parameters.push(param); + + if !ctx.cursor.goto_next_sibling() { + break; + } + } + + ctx.cursor.goto_parent(); + } + + // 3: return type + assert!(ctx.cursor.goto_next_sibling()); + assert!(ctx.cursor.field_name() == Some("return_type")); + let return_type = Some(ctx.cursor.node().utf8_text(ctx.source.as_ref())?.into()); + let return_type_span = Some(ctx.span()); + + // 4: body + assert!(ctx.cursor.goto_next_sibling()); + assert!(ctx.cursor.field_name() == Some("body")); + let body = Box::new(self.parse_block(ctx)?); + + Ok(FunctionDefinition { + name, + parameters, + return_type, + return_type_span, + body, + span, + }) + } + + fn parse_block(&self, ctx: &mut SourceParsingContext) -> anyhow::Result { + let mut statements = Vec::new(); + let mut value = None; + + for child in ctx.iter_children() { + match child.kind() { + "statement" => { + ensure!( + value.is_none(), + "cannot have a statement after an expression in a block" + ); + let statement = self.parse_statement(ctx)?; + statements.push(statement); + } + "expr" => value = Some(self.parse_expr(ctx)), + }; + } + + let block = Block { statements, value }; + Ok(block) + } + + fn parse_statement(&self, ctx: &mut SourceParsingContext) -> anyhow::Result { + assert_eq!(ctx.cursor.node().kind(), "statement"); + + ctx.cursor.Some(statement) + } + + fn parse_expr(&self, ctx: &mut SourceParsingContext) -> anyhow::Result { + assert_eq!(ctx.cursor.node().kind(), "expr"); + + Some() + } +} + +impl crate::parsing::Parser for Parser { + fn parse_as_module( + &mut self, + source: &str, + path: ModulePath, + id: SourceId, + ) -> anyhow::Result { + let tree = self.0.parse(&source, None).unwrap(); + + let mut cursor = tree.walk(); + { + let node = cursor.node(); + assert!(node.kind() == "source_file"); + } + + let ctx = SourceParsingContext { + source, + source_id: id, + cursor: &mut cursor, + }; + + let mut module = Module::new(path); + + Ok(module) + } +} diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 8cf64e4..5984bfd 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -4,7 +4,13 @@ mod tests; use crate::ast::{Module, ModulePath, SourceId}; pub trait Parser: Default { - fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result; + fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result { + let source = std::fs::read_to_string(path)?; + let module_path = ModulePath::from(path); + let mut module = self.parse_as_module(&source, module_path, id)?; + module.file = Some(path.to_owned()); + Ok(module) + } fn parse_as_module( &mut self, @@ -14,5 +20,8 @@ pub trait Parser: Default { ) -> anyhow::Result; } -pub use self::backend::pest::Parser as PestParser; -pub use PestParser as DefaultParser; +#[cfg(feature = "pest")] +pub use self::backend::pest::Parser as DefaultParser; + +#[cfg(feature = "tree-sitter")] +pub use self::backend::tree_sitter::Parser as DefaultParser; diff --git a/tree-sitter-lila b/tree-sitter-lila index ef98449..a44288e 160000 --- a/tree-sitter-lila +++ b/tree-sitter-lila @@ -1 +1 @@ -Subproject commit ef984491f7d650f910f65605bd07ad3bf34484b9 +Subproject commit a44288effcf367d693d02608cf8528fdef89c080