begin tree-sitter parsing backend
This commit is contained in:
parent
9c0a3042d5
commit
1b91ee53d4
7 changed files with 236 additions and 239 deletions
18
Cargo.toml
18
Cargo.toml
|
|
@ -3,6 +3,11 @@ name = "lilac"
|
|||
version = "0.0.1"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = ["pest"]
|
||||
pest = ["dep:pest", "dep:pest_derive"]
|
||||
tree-sitter = ["dep:tree-sitter", "dep:tree-sitter-lila"]
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.5.7", features = ["derive"] }
|
||||
cranelift = "0.109.0"
|
||||
|
|
@ -10,10 +15,19 @@ cranelift-jit = "0.109.0"
|
|||
cranelift-module = "0.109.0"
|
||||
cranelift-native = "0.109.0"
|
||||
lazy_static = "1.4.0"
|
||||
pest = "2.7.4"
|
||||
pest_derive = "2.7.4"
|
||||
pest = { version = "2.7.4", optional = true }
|
||||
pest_derive = { version = "2.7.4", optional = true }
|
||||
tree-sitter = { version = "0.22", optional = true }
|
||||
ariadne = "0.4.1"
|
||||
anyhow = "1.0.86"
|
||||
|
||||
[dependencies.tree-sitter-lila]
|
||||
version = "0.0.1"
|
||||
optional = true
|
||||
path = "./tree-sitter-lila"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.0"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "*"
|
||||
|
|
|
|||
|
|
@ -1 +1,5 @@
|
|||
#[cfg(feature = "pest")]
|
||||
pub mod pest;
|
||||
|
||||
#[cfg(feature = "tree-sitter")]
|
||||
pub mod tree_sitter;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,3 @@
|
|||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use expr::BinaryExpression;
|
||||
use pest::iterators::Pair;
|
||||
use pest::pratt_parser::PrattParser;
|
||||
|
|
@ -38,14 +35,6 @@ pub struct Parser {
|
|||
}
|
||||
|
||||
impl crate::parsing::Parser for Parser {
|
||||
fn parse_file(&mut self, path: &Path, id: SourceId) -> anyhow::Result<Module> {
|
||||
let source = fs::read_to_string(path)?;
|
||||
let module_path = ModulePath::from(path);
|
||||
let mut module = self.parse_as_module(&source, module_path, id)?;
|
||||
module.file = Some(path.to_owned());
|
||||
Ok(module)
|
||||
}
|
||||
|
||||
fn parse_as_module(
|
||||
&mut self,
|
||||
source: &str,
|
||||
|
|
|
|||
|
|
@ -1,222 +0,0 @@
|
|||
use tree_sitter::{self, Language, Parser, TreeCursor};
|
||||
|
||||
enum Ast {
|
||||
FuncDef(FuncDef),
|
||||
Expr(Expr),
|
||||
Module(Vec<Ast>),
|
||||
Block(Vec<Statement>, Option<Expr>),
|
||||
Statement(Statement),
|
||||
}
|
||||
|
||||
enum BinaryOperator {
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div,
|
||||
}
|
||||
|
||||
enum Expr {
|
||||
BinaryExpression(Box<Expr>, BinaryOperator, Box<Expr>),
|
||||
}
|
||||
|
||||
enum Statement {
|
||||
AssignStatement(Identifier, Expr),
|
||||
}
|
||||
|
||||
type Identifier = String;
|
||||
type Type = String;
|
||||
|
||||
struct Parameter {
|
||||
name: Identifier,
|
||||
typ: Type,
|
||||
}
|
||||
|
||||
struct FuncDef {
|
||||
name: Identifier,
|
||||
parameters: Vec<Parameter>,
|
||||
return_type: Option<Type>,
|
||||
body: Box<Ast>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct AstError {
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl AstError {
|
||||
fn new(message: &str) -> Self {
|
||||
AstError {
|
||||
message: message.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn tree_sitter_krone() -> Language;
|
||||
}
|
||||
|
||||
struct TreeCursorChildrenIter<'a, A: AsRef<[u8]>> {
|
||||
source: A,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
on_child: bool,
|
||||
}
|
||||
|
||||
impl<'a, A: AsRef<[u8]>> Iterator for TreeCursorChildrenIter<'a, A> {
|
||||
type Item = Result<Ast, AstError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.on_child {
|
||||
if self.cursor.goto_next_sibling() {
|
||||
Some(parse_from_cursor(&self.source, self.cursor))
|
||||
} else {
|
||||
self.cursor.goto_parent();
|
||||
None
|
||||
}
|
||||
} else {
|
||||
if self.cursor.goto_first_child() {
|
||||
self.on_child = true;
|
||||
Some(parse_from_cursor(&self.source, self.cursor))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_children<'a, A: AsRef<[u8]>>(
|
||||
source: A,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
) -> TreeCursorChildrenIter<'a, A> {
|
||||
TreeCursorChildrenIter {
|
||||
source,
|
||||
cursor,
|
||||
on_child: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_from_cursor<'a>(
|
||||
source: impl AsRef<[u8]>,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
) -> Result<Ast, AstError> {
|
||||
match cursor.node().kind() {
|
||||
"block" => {
|
||||
let mut statements = Vec::new();
|
||||
let mut value = None;
|
||||
|
||||
for child in iter_children(source, cursor) {
|
||||
match child.unwrap() {
|
||||
Ast::Statement(statement) => {
|
||||
if value.is_none() {
|
||||
statements.push(statement);
|
||||
} else {
|
||||
return Err(AstError::new(
|
||||
"cannot have a statement after an expression in a block",
|
||||
));
|
||||
// perhaps there is a missing semicolon ;
|
||||
}
|
||||
}
|
||||
Ast::Expr(expr) => value = Some(expr),
|
||||
_ => return Err(AstError::new("invalid node type")),
|
||||
};
|
||||
}
|
||||
|
||||
let block = Ast::Block(statements, value);
|
||||
Ok(block)
|
||||
}
|
||||
|
||||
"function_definition" => {
|
||||
// 1: name
|
||||
assert!(cursor.goto_first_child());
|
||||
assert!(cursor.field_name() == Some("name"));
|
||||
let name: String = cursor
|
||||
.node()
|
||||
.utf8_text(source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
// 2: parameters
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert!(cursor.field_name() == Some("parameters"));
|
||||
let mut parameters = Vec::new();
|
||||
|
||||
if cursor.goto_first_child() {
|
||||
loop {
|
||||
let param = cursor.node();
|
||||
|
||||
assert!(cursor.goto_first_child());
|
||||
let name = cursor
|
||||
.node()
|
||||
.utf8_text(source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
assert!(cursor.goto_next_sibling());
|
||||
let typ = cursor
|
||||
.node()
|
||||
.utf8_text(source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
cursor.goto_parent();
|
||||
|
||||
parameters.push(Parameter { name, typ });
|
||||
|
||||
if !cursor.goto_next_sibling() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cursor.goto_parent();
|
||||
}
|
||||
|
||||
// 3: return type
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert!(cursor.field_name() == Some("return_type"));
|
||||
let return_type = Some(
|
||||
cursor
|
||||
.node()
|
||||
.utf8_text(source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into(),
|
||||
);
|
||||
|
||||
// 4: body
|
||||
assert!(cursor.goto_next_sibling());
|
||||
assert!(cursor.field_name() == Some("body"));
|
||||
let body = parse_from_cursor(source, cursor).unwrap();
|
||||
let body = Box::new(body);
|
||||
|
||||
Ok(Ast::FuncDef(FuncDef {
|
||||
name,
|
||||
parameters,
|
||||
return_type,
|
||||
body,
|
||||
}))
|
||||
}
|
||||
|
||||
_ => panic!("unexpected node kind: {}", cursor.node().kind()),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_with_tree_sitter(source: impl AsRef<[u8]>) -> Result<Ast, AstError> {
|
||||
let mut parser = Parser::new();
|
||||
let language = unsafe { tree_sitter_krone() };
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
let tree = parser.parse(&source, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
let node = cursor.node();
|
||||
assert!(node.kind() == "source_file");
|
||||
let mut top_level_nodes = Vec::new();
|
||||
|
||||
for node in iter_children(source, &mut cursor) {
|
||||
let node = node.unwrap();
|
||||
match node {
|
||||
Ast::FuncDef(_) => top_level_nodes.push(node),
|
||||
_ => panic!("unexpected top-level node type"),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(Ast::Module(top_level_nodes))
|
||||
}
|
||||
203
src/parsing/backend/tree_sitter/mod.rs
Normal file
203
src/parsing/backend/tree_sitter/mod.rs
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
use anyhow::{anyhow, bail, ensure};
|
||||
use tree_sitter::{Node, TreeCursor};
|
||||
|
||||
use crate::ast::*;
|
||||
|
||||
pub struct Parser(tree_sitter::Parser);
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let language = tree_sitter_lila::language();
|
||||
parser.set_language(&language).unwrap();
|
||||
|
||||
Self(parser)
|
||||
}
|
||||
}
|
||||
|
||||
struct SourceParsingContext<'a> {
|
||||
source: &'a str,
|
||||
source_id: SourceId,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SourceParsingContext<'a> {
|
||||
fn span(&self) -> Span {
|
||||
Span {
|
||||
source: self.source_id,
|
||||
start: self.cursor.node().start_byte(),
|
||||
end: self.cursor.node().end_byte(),
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_children(&'a mut self) -> NodeIterator<'a> {
|
||||
NodeIterator {
|
||||
is_child: false,
|
||||
cursor: self.cursor,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct NodeIterator<'a> {
|
||||
is_child: bool,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for NodeIterator<'a> {
|
||||
type Item = Node<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.is_child {
|
||||
match self.cursor.goto_next_sibling() {
|
||||
true => Some(self.cursor.node()),
|
||||
false => None,
|
||||
}
|
||||
} else {
|
||||
match self.cursor.goto_first_child() {
|
||||
true => Some(self.cursor.node()),
|
||||
false => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn parse_param<'a>(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Parameter> {
|
||||
ensure!(ctx.cursor.goto_first_child());
|
||||
let name = ctx
|
||||
.cursor
|
||||
.node()
|
||||
.utf8_text(ctx.source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
ensure!(ctx.cursor.goto_next_sibling());
|
||||
let typ = ctx
|
||||
.cursor
|
||||
.node()
|
||||
.utf8_text(ctx.source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
ctx.cursor.goto_parent();
|
||||
|
||||
Ok(Parameter { name, typ })
|
||||
}
|
||||
|
||||
fn parse_function<'a>(
|
||||
&self,
|
||||
ctx: &mut SourceParsingContext,
|
||||
) -> anyhow::Result<FunctionDefinition> {
|
||||
let span = ctx.span();
|
||||
|
||||
// 1: name
|
||||
assert!(ctx.cursor.goto_first_child());
|
||||
assert!(ctx.cursor.field_name() == Some("name"));
|
||||
let name: String = ctx
|
||||
.cursor
|
||||
.node()
|
||||
.utf8_text(ctx.source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
// 2: parameters
|
||||
assert!(ctx.cursor.goto_next_sibling());
|
||||
assert!(ctx.cursor.field_name() == Some("parameters"));
|
||||
let mut parameters = Vec::new();
|
||||
|
||||
if ctx.cursor.goto_first_child() {
|
||||
loop {
|
||||
let param = self.parse_param(ctx)?;
|
||||
|
||||
parameters.push(param);
|
||||
|
||||
if !ctx.cursor.goto_next_sibling() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ctx.cursor.goto_parent();
|
||||
}
|
||||
|
||||
// 3: return type
|
||||
assert!(ctx.cursor.goto_next_sibling());
|
||||
assert!(ctx.cursor.field_name() == Some("return_type"));
|
||||
let return_type = Some(ctx.cursor.node().utf8_text(ctx.source.as_ref())?.into());
|
||||
let return_type_span = Some(ctx.span());
|
||||
|
||||
// 4: body
|
||||
assert!(ctx.cursor.goto_next_sibling());
|
||||
assert!(ctx.cursor.field_name() == Some("body"));
|
||||
let body = Box::new(self.parse_block(ctx)?);
|
||||
|
||||
Ok(FunctionDefinition {
|
||||
name,
|
||||
parameters,
|
||||
return_type,
|
||||
return_type_span,
|
||||
body,
|
||||
span,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_block(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Block> {
|
||||
let mut statements = Vec::new();
|
||||
let mut value = None;
|
||||
|
||||
for child in ctx.iter_children() {
|
||||
match child.kind() {
|
||||
"statement" => {
|
||||
ensure!(
|
||||
value.is_none(),
|
||||
"cannot have a statement after an expression in a block"
|
||||
);
|
||||
let statement = self.parse_statement(ctx)?;
|
||||
statements.push(statement);
|
||||
}
|
||||
"expr" => value = Some(self.parse_expr(ctx)),
|
||||
};
|
||||
}
|
||||
|
||||
let block = Block { statements, value };
|
||||
Ok(block)
|
||||
}
|
||||
|
||||
fn parse_statement(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Statement> {
|
||||
assert_eq!(ctx.cursor.node().kind(), "statement");
|
||||
|
||||
ctx.cursor.Some(statement)
|
||||
}
|
||||
|
||||
fn parse_expr(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Expr> {
|
||||
assert_eq!(ctx.cursor.node().kind(), "expr");
|
||||
|
||||
Some()
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::parsing::Parser for Parser {
|
||||
fn parse_as_module(
|
||||
&mut self,
|
||||
source: &str,
|
||||
path: ModulePath,
|
||||
id: SourceId,
|
||||
) -> anyhow::Result<Module> {
|
||||
let tree = self.0.parse(&source, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
{
|
||||
let node = cursor.node();
|
||||
assert!(node.kind() == "source_file");
|
||||
}
|
||||
|
||||
let ctx = SourceParsingContext {
|
||||
source,
|
||||
source_id: id,
|
||||
cursor: &mut cursor,
|
||||
};
|
||||
|
||||
let mut module = Module::new(path);
|
||||
|
||||
Ok(module)
|
||||
}
|
||||
}
|
||||
|
|
@ -4,7 +4,13 @@ mod tests;
|
|||
use crate::ast::{Module, ModulePath, SourceId};
|
||||
|
||||
pub trait Parser: Default {
|
||||
fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result<Module>;
|
||||
fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result<Module> {
|
||||
let source = std::fs::read_to_string(path)?;
|
||||
let module_path = ModulePath::from(path);
|
||||
let mut module = self.parse_as_module(&source, module_path, id)?;
|
||||
module.file = Some(path.to_owned());
|
||||
Ok(module)
|
||||
}
|
||||
|
||||
fn parse_as_module(
|
||||
&mut self,
|
||||
|
|
@ -14,5 +20,8 @@ pub trait Parser: Default {
|
|||
) -> anyhow::Result<Module>;
|
||||
}
|
||||
|
||||
pub use self::backend::pest::Parser as PestParser;
|
||||
pub use PestParser as DefaultParser;
|
||||
#[cfg(feature = "pest")]
|
||||
pub use self::backend::pest::Parser as DefaultParser;
|
||||
|
||||
#[cfg(feature = "tree-sitter")]
|
||||
pub use self::backend::tree_sitter::Parser as DefaultParser;
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
Subproject commit ef984491f7d650f910f65605bd07ad3bf34484b9
|
||||
Subproject commit a44288effcf367d693d02608cf8528fdef89c080
|
||||
Loading…
Add table
Add a link
Reference in a new issue