begin tree-sitter parsing backend

This commit is contained in:
Romain Paquet 2024-12-06 18:23:55 +01:00
parent 9c0a3042d5
commit 1b91ee53d4
7 changed files with 236 additions and 239 deletions

View file

@ -3,6 +3,11 @@ name = "lilac"
version = "0.0.1" version = "0.0.1"
edition = "2021" edition = "2021"
[features]
default = ["pest"]
pest = ["dep:pest", "dep:pest_derive"]
tree-sitter = ["dep:tree-sitter", "dep:tree-sitter-lila"]
[dependencies] [dependencies]
clap = { version = "4.5.7", features = ["derive"] } clap = { version = "4.5.7", features = ["derive"] }
cranelift = "0.109.0" cranelift = "0.109.0"
@ -10,10 +15,19 @@ cranelift-jit = "0.109.0"
cranelift-module = "0.109.0" cranelift-module = "0.109.0"
cranelift-native = "0.109.0" cranelift-native = "0.109.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
pest = "2.7.4" pest = { version = "2.7.4", optional = true }
pest_derive = "2.7.4" pest_derive = { version = "2.7.4", optional = true }
tree-sitter = { version = "0.22", optional = true }
ariadne = "0.4.1" ariadne = "0.4.1"
anyhow = "1.0.86" anyhow = "1.0.86"
[dependencies.tree-sitter-lila]
version = "0.0.1"
optional = true
path = "./tree-sitter-lila"
[dev-dependencies] [dev-dependencies]
pretty_assertions = "1.4.0" pretty_assertions = "1.4.0"
[build-dependencies]
cc = "*"

View file

@ -1 +1,5 @@
#[cfg(feature = "pest")]
pub mod pest; pub mod pest;
#[cfg(feature = "tree-sitter")]
pub mod tree_sitter;

View file

@ -1,6 +1,3 @@
use std::fs;
use std::path::Path;
use expr::BinaryExpression; use expr::BinaryExpression;
use pest::iterators::Pair; use pest::iterators::Pair;
use pest::pratt_parser::PrattParser; use pest::pratt_parser::PrattParser;
@ -38,14 +35,6 @@ pub struct Parser {
} }
impl crate::parsing::Parser for Parser { impl crate::parsing::Parser for Parser {
fn parse_file(&mut self, path: &Path, id: SourceId) -> anyhow::Result<Module> {
let source = fs::read_to_string(path)?;
let module_path = ModulePath::from(path);
let mut module = self.parse_as_module(&source, module_path, id)?;
module.file = Some(path.to_owned());
Ok(module)
}
fn parse_as_module( fn parse_as_module(
&mut self, &mut self,
source: &str, source: &str,

View file

@ -1,222 +0,0 @@
use tree_sitter::{self, Language, Parser, TreeCursor};
enum Ast {
FuncDef(FuncDef),
Expr(Expr),
Module(Vec<Ast>),
Block(Vec<Statement>, Option<Expr>),
Statement(Statement),
}
enum BinaryOperator {
Add,
Sub,
Mul,
Div,
}
enum Expr {
BinaryExpression(Box<Expr>, BinaryOperator, Box<Expr>),
}
enum Statement {
AssignStatement(Identifier, Expr),
}
type Identifier = String;
type Type = String;
struct Parameter {
name: Identifier,
typ: Type,
}
struct FuncDef {
name: Identifier,
parameters: Vec<Parameter>,
return_type: Option<Type>,
body: Box<Ast>,
}
#[derive(Debug)]
struct AstError {
message: String,
}
impl AstError {
fn new(message: &str) -> Self {
AstError {
message: message.into(),
}
}
}
extern "C" {
fn tree_sitter_krone() -> Language;
}
struct TreeCursorChildrenIter<'a, A: AsRef<[u8]>> {
source: A,
cursor: &'a mut TreeCursor<'a>,
on_child: bool,
}
impl<'a, A: AsRef<[u8]>> Iterator for TreeCursorChildrenIter<'a, A> {
type Item = Result<Ast, AstError>;
fn next(&mut self) -> Option<Self::Item> {
if self.on_child {
if self.cursor.goto_next_sibling() {
Some(parse_from_cursor(&self.source, self.cursor))
} else {
self.cursor.goto_parent();
None
}
} else {
if self.cursor.goto_first_child() {
self.on_child = true;
Some(parse_from_cursor(&self.source, self.cursor))
} else {
None
}
}
}
}
fn iter_children<'a, A: AsRef<[u8]>>(
source: A,
cursor: &'a mut TreeCursor<'a>,
) -> TreeCursorChildrenIter<'a, A> {
TreeCursorChildrenIter {
source,
cursor,
on_child: false,
}
}
fn parse_from_cursor<'a>(
source: impl AsRef<[u8]>,
cursor: &'a mut TreeCursor<'a>,
) -> Result<Ast, AstError> {
match cursor.node().kind() {
"block" => {
let mut statements = Vec::new();
let mut value = None;
for child in iter_children(source, cursor) {
match child.unwrap() {
Ast::Statement(statement) => {
if value.is_none() {
statements.push(statement);
} else {
return Err(AstError::new(
"cannot have a statement after an expression in a block",
));
// perhaps there is a missing semicolon ;
}
}
Ast::Expr(expr) => value = Some(expr),
_ => return Err(AstError::new("invalid node type")),
};
}
let block = Ast::Block(statements, value);
Ok(block)
}
"function_definition" => {
// 1: name
assert!(cursor.goto_first_child());
assert!(cursor.field_name() == Some("name"));
let name: String = cursor
.node()
.utf8_text(source.as_ref())
.expect("utf8 error")
.into();
// 2: parameters
assert!(cursor.goto_next_sibling());
assert!(cursor.field_name() == Some("parameters"));
let mut parameters = Vec::new();
if cursor.goto_first_child() {
loop {
let param = cursor.node();
assert!(cursor.goto_first_child());
let name = cursor
.node()
.utf8_text(source.as_ref())
.expect("utf8 error")
.into();
assert!(cursor.goto_next_sibling());
let typ = cursor
.node()
.utf8_text(source.as_ref())
.expect("utf8 error")
.into();
cursor.goto_parent();
parameters.push(Parameter { name, typ });
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
// 3: return type
assert!(cursor.goto_next_sibling());
assert!(cursor.field_name() == Some("return_type"));
let return_type = Some(
cursor
.node()
.utf8_text(source.as_ref())
.expect("utf8 error")
.into(),
);
// 4: body
assert!(cursor.goto_next_sibling());
assert!(cursor.field_name() == Some("body"));
let body = parse_from_cursor(source, cursor).unwrap();
let body = Box::new(body);
Ok(Ast::FuncDef(FuncDef {
name,
parameters,
return_type,
body,
}))
}
_ => panic!("unexpected node kind: {}", cursor.node().kind()),
}
}
fn parse_with_tree_sitter(source: impl AsRef<[u8]>) -> Result<Ast, AstError> {
let mut parser = Parser::new();
let language = unsafe { tree_sitter_krone() };
parser.set_language(language).unwrap();
let tree = parser.parse(&source, None).unwrap();
let mut cursor = tree.walk();
let node = cursor.node();
assert!(node.kind() == "source_file");
let mut top_level_nodes = Vec::new();
for node in iter_children(source, &mut cursor) {
let node = node.unwrap();
match node {
Ast::FuncDef(_) => top_level_nodes.push(node),
_ => panic!("unexpected top-level node type"),
};
}
Ok(Ast::Module(top_level_nodes))
}

View file

@ -0,0 +1,203 @@
use anyhow::{anyhow, bail, ensure};
use tree_sitter::{Node, TreeCursor};
use crate::ast::*;
pub struct Parser(tree_sitter::Parser);
impl Default for Parser {
fn default() -> Self {
let mut parser = tree_sitter::Parser::new();
let language = tree_sitter_lila::language();
parser.set_language(&language).unwrap();
Self(parser)
}
}
struct SourceParsingContext<'a> {
source: &'a str,
source_id: SourceId,
cursor: &'a mut TreeCursor<'a>,
}
impl<'a> SourceParsingContext<'a> {
fn span(&self) -> Span {
Span {
source: self.source_id,
start: self.cursor.node().start_byte(),
end: self.cursor.node().end_byte(),
}
}
fn iter_children(&'a mut self) -> NodeIterator<'a> {
NodeIterator {
is_child: false,
cursor: self.cursor,
}
}
}
struct NodeIterator<'a> {
is_child: bool,
cursor: &'a mut TreeCursor<'a>,
}
impl<'a> Iterator for NodeIterator<'a> {
type Item = Node<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.is_child {
match self.cursor.goto_next_sibling() {
true => Some(self.cursor.node()),
false => None,
}
} else {
match self.cursor.goto_first_child() {
true => Some(self.cursor.node()),
false => None,
}
}
}
}
impl Parser {
fn parse_param<'a>(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Parameter> {
ensure!(ctx.cursor.goto_first_child());
let name = ctx
.cursor
.node()
.utf8_text(ctx.source.as_ref())
.expect("utf8 error")
.into();
ensure!(ctx.cursor.goto_next_sibling());
let typ = ctx
.cursor
.node()
.utf8_text(ctx.source.as_ref())
.expect("utf8 error")
.into();
ctx.cursor.goto_parent();
Ok(Parameter { name, typ })
}
fn parse_function<'a>(
&self,
ctx: &mut SourceParsingContext,
) -> anyhow::Result<FunctionDefinition> {
let span = ctx.span();
// 1: name
assert!(ctx.cursor.goto_first_child());
assert!(ctx.cursor.field_name() == Some("name"));
let name: String = ctx
.cursor
.node()
.utf8_text(ctx.source.as_ref())
.expect("utf8 error")
.into();
// 2: parameters
assert!(ctx.cursor.goto_next_sibling());
assert!(ctx.cursor.field_name() == Some("parameters"));
let mut parameters = Vec::new();
if ctx.cursor.goto_first_child() {
loop {
let param = self.parse_param(ctx)?;
parameters.push(param);
if !ctx.cursor.goto_next_sibling() {
break;
}
}
ctx.cursor.goto_parent();
}
// 3: return type
assert!(ctx.cursor.goto_next_sibling());
assert!(ctx.cursor.field_name() == Some("return_type"));
let return_type = Some(ctx.cursor.node().utf8_text(ctx.source.as_ref())?.into());
let return_type_span = Some(ctx.span());
// 4: body
assert!(ctx.cursor.goto_next_sibling());
assert!(ctx.cursor.field_name() == Some("body"));
let body = Box::new(self.parse_block(ctx)?);
Ok(FunctionDefinition {
name,
parameters,
return_type,
return_type_span,
body,
span,
})
}
fn parse_block(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Block> {
let mut statements = Vec::new();
let mut value = None;
for child in ctx.iter_children() {
match child.kind() {
"statement" => {
ensure!(
value.is_none(),
"cannot have a statement after an expression in a block"
);
let statement = self.parse_statement(ctx)?;
statements.push(statement);
}
"expr" => value = Some(self.parse_expr(ctx)),
};
}
let block = Block { statements, value };
Ok(block)
}
fn parse_statement(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Statement> {
assert_eq!(ctx.cursor.node().kind(), "statement");
ctx.cursor.Some(statement)
}
fn parse_expr(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Expr> {
assert_eq!(ctx.cursor.node().kind(), "expr");
Some()
}
}
impl crate::parsing::Parser for Parser {
fn parse_as_module(
&mut self,
source: &str,
path: ModulePath,
id: SourceId,
) -> anyhow::Result<Module> {
let tree = self.0.parse(&source, None).unwrap();
let mut cursor = tree.walk();
{
let node = cursor.node();
assert!(node.kind() == "source_file");
}
let ctx = SourceParsingContext {
source,
source_id: id,
cursor: &mut cursor,
};
let mut module = Module::new(path);
Ok(module)
}
}

View file

@ -4,7 +4,13 @@ mod tests;
use crate::ast::{Module, ModulePath, SourceId}; use crate::ast::{Module, ModulePath, SourceId};
pub trait Parser: Default { pub trait Parser: Default {
fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result<Module>; fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result<Module> {
let source = std::fs::read_to_string(path)?;
let module_path = ModulePath::from(path);
let mut module = self.parse_as_module(&source, module_path, id)?;
module.file = Some(path.to_owned());
Ok(module)
}
fn parse_as_module( fn parse_as_module(
&mut self, &mut self,
@ -14,5 +20,8 @@ pub trait Parser: Default {
) -> anyhow::Result<Module>; ) -> anyhow::Result<Module>;
} }
pub use self::backend::pest::Parser as PestParser; #[cfg(feature = "pest")]
pub use PestParser as DefaultParser; pub use self::backend::pest::Parser as DefaultParser;
#[cfg(feature = "tree-sitter")]
pub use self::backend::tree_sitter::Parser as DefaultParser;

@ -1 +1 @@
Subproject commit ef984491f7d650f910f65605bd07ad3bf34484b9 Subproject commit a44288effcf367d693d02608cf8528fdef89c080