refactor: split project into multiple crates
This commit is contained in:
parent
486af67fc2
commit
857f747524
27 changed files with 308 additions and 222 deletions
29
lila-parsing/Cargo.toml
Normal file
29
lila-parsing/Cargo.toml
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
[package]
|
||||
name = "lila-parsing"
|
||||
version = "0.0.1"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = ["pest"]
|
||||
pest = ["dep:pest", "dep:pest_derive", "dep:lazy_static"]
|
||||
tree-sitter = ["dep:tree-sitter", "dep:tree-sitter-lila"]
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.86"
|
||||
lazy_static = { version = "1.4.0", optional = true }
|
||||
lila-ast = { path = "../lila-ast" }
|
||||
pest = { version = "2.7.4", optional = true }
|
||||
pest_derive = { version = "2.7.4", optional = true }
|
||||
tree-sitter = { version = "0.22", optional = true }
|
||||
|
||||
[dependencies.tree-sitter-lila]
|
||||
version = "0.0.1"
|
||||
optional = true
|
||||
git = "https://git.sr.ht/~rpqt/tree-sitter-lila"
|
||||
branch = "main"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "*"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "1.4.0"
|
||||
157
lila-parsing/src/backend/handmade/lex.rs
Normal file
157
lila-parsing/src/backend/handmade/lex.rs
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
use std::iter::Peekable;
|
||||
use std::str::Chars;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
LeftBracket,
|
||||
RightBracket,
|
||||
If,
|
||||
Else,
|
||||
Identifier(String),
|
||||
LeftParenthesis,
|
||||
RightParenthesis,
|
||||
Func,
|
||||
Colon,
|
||||
While,
|
||||
Set,
|
||||
LineComment,
|
||||
Mul,
|
||||
Sub,
|
||||
Add,
|
||||
Slash,
|
||||
Modulo,
|
||||
NotEqual,
|
||||
Equal,
|
||||
DoubleEquals,
|
||||
Exclamation,
|
||||
NumberLiteral,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TokenError {
|
||||
InvalidToken,
|
||||
}
|
||||
|
||||
pub struct Lexer {
|
||||
line: usize,
|
||||
column: usize,
|
||||
}
|
||||
|
||||
impl Lexer {
|
||||
pub fn new() -> Self {
|
||||
Self { line: 1, column: 1 }
|
||||
}
|
||||
|
||||
pub fn tokenize(&mut self, input: String) -> Result<Vec<Token>, TokenError> {
|
||||
let mut tokens: Vec<Token> = Vec::new();
|
||||
let mut chars = input.chars().peekable();
|
||||
while let Some(tok_or_err) = self.get_next_token(&mut chars) {
|
||||
match tok_or_err {
|
||||
Ok(token) => tokens.push(token),
|
||||
Err(err) => return Err(err),
|
||||
};
|
||||
}
|
||||
Ok(tokens)
|
||||
}
|
||||
|
||||
fn get_next_token(&mut self, chars: &mut Peekable<Chars>) -> Option<Result<Token, TokenError>> {
|
||||
if let Some(ch) = chars.next() {
|
||||
let tok_or_err = match ch {
|
||||
'(' => Ok(Token::LeftParenthesis),
|
||||
')' => Ok(Token::RightParenthesis),
|
||||
'{' => Ok(Token::LeftBracket),
|
||||
'}' => Ok(Token::RightBracket),
|
||||
'+' => Ok(Token::Add),
|
||||
'-' => Ok(Token::Sub),
|
||||
'*' => Ok(Token::Mul),
|
||||
'%' => Ok(Token::Modulo),
|
||||
'/' => {
|
||||
if let Some('/') = chars.peek() {
|
||||
chars.next();
|
||||
let comment = chars.take_while(|c| c != &'\n');
|
||||
self.column += comment.count() + 1;
|
||||
Ok(Token::LineComment)
|
||||
} else {
|
||||
Ok(Token::Slash)
|
||||
}
|
||||
}
|
||||
'=' => {
|
||||
if let Some(ch2) = chars.peek() {
|
||||
match ch2 {
|
||||
'=' => {
|
||||
chars.next();
|
||||
self.column += 1;
|
||||
Ok(Token::DoubleEquals)
|
||||
}
|
||||
' ' => Ok(Token::Equal),
|
||||
_ => Err(TokenError::InvalidToken),
|
||||
}
|
||||
} else {
|
||||
Ok(Token::Equal)
|
||||
}
|
||||
}
|
||||
'!' => {
|
||||
if let Some(ch2) = chars.next() {
|
||||
match ch2 {
|
||||
'=' => {
|
||||
self.column += 1;
|
||||
Ok(Token::NotEqual)
|
||||
}
|
||||
_ => Err(TokenError::InvalidToken),
|
||||
}
|
||||
} else {
|
||||
Ok(Token::Exclamation)
|
||||
}
|
||||
}
|
||||
'a'..='z' | 'A'..='Z' => {
|
||||
let mut word = String::from(ch);
|
||||
while let Some(ch2) = chars.peek() {
|
||||
if ch2.is_alphanumeric() {
|
||||
if let Some(ch2) = chars.next() {
|
||||
word.push(ch2);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.column += word.len();
|
||||
match word.as_str() {
|
||||
"func" => Ok(Token::Func),
|
||||
"if" => Ok(Token::If),
|
||||
"else" => Ok(Token::Else),
|
||||
"set" => Ok(Token::Set),
|
||||
"while" => Ok(Token::While),
|
||||
_ => Ok(Token::Identifier(word)),
|
||||
}
|
||||
}
|
||||
'0'..='9' | '.' => {
|
||||
let word = chars
|
||||
.take_while(|c| c.is_numeric() || c == &'.')
|
||||
.collect::<String>();
|
||||
self.column += word.len();
|
||||
// XXX: handle syntax error in number literals
|
||||
Ok(Token::NumberLiteral)
|
||||
}
|
||||
':' => Ok(Token::Colon),
|
||||
'\n' => {
|
||||
self.line += 1;
|
||||
self.column = 1;
|
||||
return self.get_next_token(chars);
|
||||
}
|
||||
' ' => {
|
||||
self.column += 1;
|
||||
return self.get_next_token(chars);
|
||||
}
|
||||
'\t' => {
|
||||
self.column += 8;
|
||||
return self.get_next_token(chars);
|
||||
}
|
||||
_ => Err(TokenError::InvalidToken),
|
||||
};
|
||||
self.column += 1;
|
||||
Some(tok_or_err)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
149
lila-parsing/src/backend/handmade/mod.rs
Normal file
149
lila-parsing/src/backend/handmade/mod.rs
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
// In progress parser from scratch
|
||||
|
||||
use crate::lex::Token;
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum NodeType {
|
||||
Document, // This is the root node
|
||||
LineComment,
|
||||
FunctionDefinition,
|
||||
FunctionParam,
|
||||
VariableName(String),
|
||||
Type(String),
|
||||
}
|
||||
|
||||
use NodeType::*;
|
||||
|
||||
pub struct Node {
|
||||
kind: NodeType,
|
||||
parent: Option<Rc<RefCell<Node>>>,
|
||||
children: Vec<Box<Node>>,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
fn new() -> Self {
|
||||
Node::default()
|
||||
}
|
||||
|
||||
fn with_kind(&mut self, kind: NodeType) -> &mut Self {
|
||||
self.kind = kind;
|
||||
self
|
||||
}
|
||||
|
||||
fn with_children(&mut self, children: Vec<Node>) -> &mut Self {
|
||||
for child in children {
|
||||
self.push_child(child);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn push_child(&mut self, mut child: Node) {
|
||||
child.parent = Some(Rc::new(RefCell::new(*self)));
|
||||
self.children.push(Box::new(child));
|
||||
}
|
||||
|
||||
pub fn print_tree(&self) {
|
||||
self.print_tree_rec(0);
|
||||
}
|
||||
|
||||
fn print_tree_rec(&self, indent: u8) {
|
||||
for _ in 1..=indent {
|
||||
print!(" ");
|
||||
}
|
||||
println!("{:?}", self.kind);
|
||||
for child in &self.children {
|
||||
child.print_tree_rec(indent + 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Node {
|
||||
fn default() -> Self {
|
||||
Node {
|
||||
kind: Document,
|
||||
parent: None,
|
||||
children: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NodeType> for Node {
|
||||
fn from(value: NodeType) -> Self {
|
||||
Node {
|
||||
kind: value,
|
||||
..Node::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SyntaxError {
|
||||
FuncExpectedIdentifier,
|
||||
FuncExpectedLeftParenthesisAfterIdentifier,
|
||||
UnexpectedToken,
|
||||
}
|
||||
|
||||
pub struct Parser {}
|
||||
|
||||
impl Parser {
|
||||
pub fn new() -> Self {
|
||||
Parser {}
|
||||
}
|
||||
|
||||
pub fn parse_tokens(&mut self, tokens: Vec<Token>) -> Result<Node, SyntaxError> {
|
||||
let mut tokens = tokens.iter().peekable();
|
||||
let mut root_node = Node::new();
|
||||
|
||||
while let Some(token) = tokens.next() {
|
||||
let node_or_err = match token {
|
||||
Token::LineComment => Ok(Node {
|
||||
kind: LineComment,
|
||||
..Node::default()
|
||||
}),
|
||||
|
||||
Token::Func => {
|
||||
let identifier = if let Some(ident) = tokens.next() {
|
||||
match ident {
|
||||
Token::Identifier(id) => Some(id),
|
||||
_ => return Err(SyntaxError::FuncExpectedIdentifier),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if let Some(Token::LeftParenthesis) = tokens.next() {
|
||||
} else {
|
||||
return Err(SyntaxError::FuncExpectedLeftParenthesisAfterIdentifier);
|
||||
};
|
||||
|
||||
let mut params: Vec<Node> = Vec::new();
|
||||
while let Some(Token::Identifier(_)) = tokens.peek() {
|
||||
if let Some(Token::Identifier(param_name)) = tokens.next() {
|
||||
if let Some(Token::Colon) = tokens.next() {
|
||||
if let Some(Token::Identifier(type_name)) = tokens.next() {
|
||||
let mut node =
|
||||
Node::new().with_kind(FunctionParam).with_children(vec![
|
||||
VariableName(param_name.into()).into(),
|
||||
Type(type_name.into()).into(),
|
||||
]);
|
||||
params.push(*node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let node = Node::from(NodeType::FunctionDefinition).with_children(params);
|
||||
Ok(*node)
|
||||
}
|
||||
|
||||
_ => Err(SyntaxError::UnexpectedToken),
|
||||
};
|
||||
if let Ok(node) = node_or_err {
|
||||
root_node.push_child(node);
|
||||
} else {
|
||||
};
|
||||
}
|
||||
Ok(root_node)
|
||||
}
|
||||
}
|
||||
5
lila-parsing/src/backend/mod.rs
Normal file
5
lila-parsing/src/backend/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
#[cfg(feature = "pest")]
|
||||
pub mod pest;
|
||||
|
||||
#[cfg(feature = "tree-sitter")]
|
||||
pub mod tree_sitter;
|
||||
77
lila-parsing/src/backend/pest/grammar.pest
Normal file
77
lila-parsing/src/backend/pest/grammar.pest
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
// This file is just a little test of pest.rs
|
||||
|
||||
source_file = { SOI ~ module_items ~ EOI }
|
||||
module_items = { (use_statement | definition)* }
|
||||
|
||||
// Statements
|
||||
statement = { assign_statement | declare_statement | return_statement | call_statement | use_statement | while_statement | if_statement }
|
||||
declare_statement = { ident ~ "=" ~ expr ~ ";" }
|
||||
assign_statement = { "set" ~ ident ~ "=" ~ expr ~ ";" }
|
||||
return_statement = { "return" ~ expr? ~ ";" }
|
||||
call_statement = { call ~ ";" }
|
||||
use_statement = { "use" ~ import_path ~ ";" }
|
||||
while_statement = { "while" ~ expr ~ block ~ ";" }
|
||||
if_statement = { if_branch ~ ("else" ~ (if_branch | block))? ~ ";" }
|
||||
|
||||
if_branch = _{ "if" ~ expr ~ block }
|
||||
|
||||
// Module paths
|
||||
import_path = { ident ~ ("::" ~ ident)* }
|
||||
|
||||
// Function call
|
||||
call = { ident ~ "(" ~ args ~ ")" }
|
||||
args = { (expr ~ ",")* ~ expr? }
|
||||
|
||||
definition = { func_def }
|
||||
|
||||
// Function definition
|
||||
func_def = { "fn" ~ ident ~ "(" ~ parameters ~ ")" ~ typ? ~ block }
|
||||
parameters = {
|
||||
(parameter ~ ",")* ~ (parameter)?
|
||||
}
|
||||
parameter = { ident ~ ":" ~ typ }
|
||||
|
||||
// Operators
|
||||
infix = _{ arithmetic_operator | logical_operator }
|
||||
|
||||
arithmetic_operator = _{ add | subtract | multiply | divide | not_equal | equal | modulo }
|
||||
add = { "+" }
|
||||
subtract = { "-" }
|
||||
multiply = { "*" }
|
||||
divide = { "/" }
|
||||
modulo = { "%" }
|
||||
equal = { "==" }
|
||||
not_equal = { "!=" }
|
||||
|
||||
logical_operator = _{ and | or }
|
||||
and = { "&&" }
|
||||
or = { "||" }
|
||||
|
||||
prefix = _{ not }
|
||||
not = { "!" }
|
||||
|
||||
// Expressions
|
||||
expr = { prefix? ~ atom ~ (infix ~ prefix? ~ atom)* }
|
||||
atom = _{ call | if_expr | block | literal | ident | "(" ~ expr ~ ")" }
|
||||
block = { "{" ~ statement* ~ expr? ~ "}" }
|
||||
if_expr = { "if" ~ expr ~ block ~ "else" ~ (block | if_expr) }
|
||||
//tuple = { "(" ~ (expr ~ ",")+ ~ expr ~ ")" }
|
||||
|
||||
ident = @{ (ASCII_ALPHANUMERIC | "_")+ }
|
||||
typ = _{ ident }
|
||||
|
||||
// Literals
|
||||
literal = _{ boolean_literal | float_literal | integer_literal | string_literal }
|
||||
boolean_literal = @{ "true" | "false" }
|
||||
string_literal = ${ "\"" ~ string_content ~ "\"" }
|
||||
string_content = @{ char* }
|
||||
char = {
|
||||
!("\"" | "\\") ~ ANY
|
||||
| "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t")
|
||||
| "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})
|
||||
}
|
||||
integer_literal = @{ ASCII_DIGIT+ }
|
||||
float_literal = @{ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ "." ~ ASCII_DIGIT* }
|
||||
|
||||
WHITESPACE = _{ " " | "\n" | "\t" }
|
||||
COMMENT = _{ "//" ~ (!NEWLINE ~ ANY)* }
|
||||
376
lila-parsing/src/backend/pest/mod.rs
Normal file
376
lila-parsing/src/backend/pest/mod.rs
Normal file
|
|
@ -0,0 +1,376 @@
|
|||
use lila_ast::typing::Type;
|
||||
use pest::iterators::Pair;
|
||||
use pest::pratt_parser::PrattParser;
|
||||
use pest::Parser as PestParser;
|
||||
|
||||
use lila_ast::*;
|
||||
|
||||
#[derive(pest_derive::Parser)]
|
||||
#[grammar = "src/backend/pest/grammar.pest"]
|
||||
struct LilaParser;
|
||||
|
||||
use lazy_static;
|
||||
lazy_static::lazy_static! {
|
||||
static ref PRATT_PARSER: PrattParser<Rule> = {
|
||||
use pest::pratt_parser::{Assoc::*, Op};
|
||||
use Rule::*;
|
||||
|
||||
// Precedence is defined lowest to highest
|
||||
PrattParser::new()
|
||||
.op(Op::infix(and, Left))
|
||||
.op(Op::infix(or, Left))
|
||||
.op(Op::prefix(not))
|
||||
.op(Op::infix(equal, Left) | Op::infix(not_equal, Left))
|
||||
.op(Op::infix(add, Left) | Op::infix(subtract, Left))
|
||||
.op(Op::infix(modulo, Left))
|
||||
.op(Op::infix(multiply, Left) | Op::infix(divide, Left))
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Parser {
|
||||
source: SourceId,
|
||||
}
|
||||
|
||||
impl crate::Parser for Parser {
|
||||
fn parse_as_module(
|
||||
&mut self,
|
||||
source: &str,
|
||||
path: ModulePath,
|
||||
id: SourceId,
|
||||
) -> anyhow::Result<Module> {
|
||||
self.source = id;
|
||||
let mut pairs = LilaParser::parse(Rule::source_file, source)?;
|
||||
|
||||
assert!(pairs.len() == 1);
|
||||
let module = self.parse_module(pairs.next().unwrap().into_inner().next().unwrap(), path);
|
||||
|
||||
Ok(module)
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn parse_module(&self, pair: Pair<Rule>, path: ModulePath) -> Module {
|
||||
assert!(pair.as_rule() == Rule::module_items);
|
||||
|
||||
let mut module = Module::new(path);
|
||||
|
||||
let pairs = pair.into_inner();
|
||||
for pair in pairs {
|
||||
match pair.as_rule() {
|
||||
Rule::definition => {
|
||||
let def = self.parse_definition(pair.into_inner().next().unwrap());
|
||||
match def {
|
||||
Definition::FunctionDefinition(func) => module.functions.push(func),
|
||||
}
|
||||
}
|
||||
Rule::use_statement => {
|
||||
let path = self.parse_import(pair.into_inner().next().unwrap());
|
||||
module.imports.push(path);
|
||||
}
|
||||
_ => panic!("unexpected rule in source_file: {:?}", pair.as_rule()),
|
||||
}
|
||||
}
|
||||
|
||||
module
|
||||
}
|
||||
|
||||
fn parse_block(&self, pair: Pair<Rule>) -> Block {
|
||||
let mut statements = vec![];
|
||||
let mut value = None;
|
||||
let span = self.make_span(&pair);
|
||||
|
||||
for pair in pair.into_inner() {
|
||||
match pair.as_rule() {
|
||||
Rule::statement => statements.push(self.parse_statement(pair)),
|
||||
Rule::expr => value = Some(self.parse_expression(pair)),
|
||||
_ => panic!("unexpected rule {:?} in block", pair.as_rule()),
|
||||
}
|
||||
}
|
||||
|
||||
Block {
|
||||
statements,
|
||||
value,
|
||||
typ: Type::Undefined,
|
||||
span: Some(span),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_statement(&self, pair: Pair<Rule>) -> Statement {
|
||||
let pair = pair.into_inner().next().unwrap();
|
||||
let span = self.make_span(&pair);
|
||||
|
||||
match pair.as_rule() {
|
||||
Rule::assign_statement => {
|
||||
let mut pairs = pair.into_inner();
|
||||
let identifier = pairs.next().unwrap().as_str().to_string();
|
||||
let expr = self.parse_expression(pairs.next().unwrap());
|
||||
Statement::AssignStatement {
|
||||
lhs: identifier,
|
||||
rhs: Box::new(expr),
|
||||
span,
|
||||
}
|
||||
}
|
||||
Rule::declare_statement => {
|
||||
let mut pairs = pair.into_inner();
|
||||
let identifier = pairs.next().unwrap().as_str().to_string();
|
||||
let expr = self.parse_expression(pairs.next().unwrap());
|
||||
Statement::DeclareStatement {
|
||||
lhs: identifier,
|
||||
rhs: Box::new(expr),
|
||||
span,
|
||||
}
|
||||
}
|
||||
Rule::return_statement => {
|
||||
let expr = pair
|
||||
.into_inner()
|
||||
.next()
|
||||
.map(|expr| self.parse_expression(expr));
|
||||
Statement::ReturnStatement(ReturnStatement { expr, span })
|
||||
}
|
||||
Rule::call_statement => {
|
||||
let call = self.parse_call(pair.into_inner().next().unwrap());
|
||||
Statement::CallStatement {
|
||||
call: Box::new(call),
|
||||
span,
|
||||
}
|
||||
}
|
||||
Rule::use_statement => {
|
||||
let import = self.parse_import(pair.into_inner().next().unwrap());
|
||||
Statement::UseStatement {
|
||||
import: Box::new(import),
|
||||
span,
|
||||
}
|
||||
}
|
||||
Rule::if_statement => {
|
||||
let mut pairs = pair.into_inner();
|
||||
let condition = self.parse_expression(pairs.next().unwrap());
|
||||
let block = self.parse_block(pairs.next().unwrap());
|
||||
if pairs.next().is_some() {
|
||||
todo!("implement if-statements with else branch (and else if)")
|
||||
}
|
||||
Statement::IfStatement {
|
||||
condition: Box::new(condition),
|
||||
then_block: Box::new(block),
|
||||
span,
|
||||
}
|
||||
}
|
||||
Rule::while_statement => {
|
||||
let mut pairs = pair.into_inner();
|
||||
let condition = self.parse_expression(pairs.next().unwrap());
|
||||
let block = self.parse_block(pairs.next().unwrap());
|
||||
Statement::WhileStatement {
|
||||
condition: Box::new(condition),
|
||||
loop_block: Box::new(block),
|
||||
span,
|
||||
}
|
||||
}
|
||||
_ => unreachable!("unexpected rule '{:?}' in parse_statement", pair.as_rule()),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_import(&self, pair: Pair<Rule>) -> Import {
|
||||
Import(pair.as_str().to_string())
|
||||
}
|
||||
|
||||
fn parse_call(&self, pair: Pair<Rule>) -> Call {
|
||||
let mut pairs = pair.into_inner();
|
||||
// TODO: support calls on more than identifiers (needs grammar change)
|
||||
|
||||
let pair = pairs.next().unwrap();
|
||||
let callee = SExpr {
|
||||
expr: Expr::Identifier {
|
||||
name: pair.as_str().to_string(),
|
||||
typ: Type::Undefined,
|
||||
},
|
||||
span: self.make_span(&pair),
|
||||
};
|
||||
let args: Vec<SExpr> = pairs
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_inner()
|
||||
.map(|arg| self.parse_expression(arg))
|
||||
.collect();
|
||||
|
||||
Call {
|
||||
callee: Box::new(callee),
|
||||
args,
|
||||
typ: Type::Undefined,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expression(&self, pair: Pair<Rule>) -> SExpr {
|
||||
let span = self.make_span(&pair);
|
||||
let pairs = pair.into_inner();
|
||||
let mut map = PRATT_PARSER
|
||||
.map_primary(|primary| {
|
||||
let span = self.make_span(&primary);
|
||||
match primary.as_rule() {
|
||||
Rule::integer_literal => SExpr {
|
||||
expr: Expr::IntegerLiteral(primary.as_str().parse().unwrap()),
|
||||
span,
|
||||
},
|
||||
|
||||
Rule::float_literal => SExpr {
|
||||
expr: Expr::FloatLiteral(primary.as_str().parse().unwrap()),
|
||||
span,
|
||||
},
|
||||
|
||||
Rule::string_literal => SExpr {
|
||||
expr: Expr::StringLiteral(
|
||||
primary
|
||||
.into_inner()
|
||||
.next()
|
||||
.unwrap()
|
||||
.as_str()
|
||||
.parse()
|
||||
.unwrap(),
|
||||
),
|
||||
span,
|
||||
},
|
||||
|
||||
Rule::expr => self.parse_expression(primary),
|
||||
|
||||
Rule::ident => SExpr {
|
||||
expr: Expr::Identifier {
|
||||
name: primary.as_str().to_string(),
|
||||
typ: Type::Undefined,
|
||||
},
|
||||
span,
|
||||
},
|
||||
|
||||
Rule::call => SExpr {
|
||||
expr: Expr::Call(Box::new(self.parse_call(primary))),
|
||||
span,
|
||||
},
|
||||
|
||||
Rule::block => SExpr {
|
||||
expr: Expr::Block(Box::new(self.parse_block(primary))),
|
||||
span,
|
||||
},
|
||||
|
||||
Rule::if_expr => {
|
||||
let mut pairs = primary.into_inner();
|
||||
let condition = self.parse_expression(pairs.next().unwrap());
|
||||
let true_block = self.parse_block(pairs.next().unwrap());
|
||||
let else_value = self.parse_expression(pairs.next().unwrap());
|
||||
SExpr {
|
||||
expr: Expr::IfExpr {
|
||||
cond: Box::new(condition),
|
||||
then_body: Box::new(true_block),
|
||||
else_body: Box::new(else_value),
|
||||
typ: Type::Undefined,
|
||||
},
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
Rule::boolean_literal => SExpr {
|
||||
expr: Expr::BooleanLiteral(match primary.as_str() {
|
||||
"true" => true,
|
||||
"false" => false,
|
||||
_ => unreachable!(),
|
||||
}),
|
||||
span,
|
||||
},
|
||||
|
||||
_ => unreachable!(
|
||||
"Unexpected rule '{:?}' in primary expression",
|
||||
primary.as_rule()
|
||||
),
|
||||
}
|
||||
})
|
||||
.map_infix(|lhs, op, rhs| {
|
||||
let operator = match op.as_rule() {
|
||||
Rule::add => BinaryOperator::Add,
|
||||
Rule::subtract => BinaryOperator::Sub,
|
||||
Rule::multiply => BinaryOperator::Mul,
|
||||
Rule::divide => BinaryOperator::Div,
|
||||
Rule::modulo => BinaryOperator::Modulo,
|
||||
Rule::equal => BinaryOperator::Equal,
|
||||
Rule::not_equal => BinaryOperator::NotEqual,
|
||||
Rule::and => BinaryOperator::And,
|
||||
Rule::or => BinaryOperator::Or,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let expr = Expr::BinaryExpression(BinaryExpression {
|
||||
lhs: Box::new(lhs),
|
||||
op: operator,
|
||||
op_span: self.make_span(&op),
|
||||
rhs: Box::new(rhs),
|
||||
typ: Type::Undefined,
|
||||
});
|
||||
SExpr { expr, span }
|
||||
})
|
||||
.map_prefix(|op, inner| {
|
||||
let operator = match op.as_rule() {
|
||||
Rule::not => UnaryOperator::Not,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let expr = Expr::UnaryExpression {
|
||||
op: operator,
|
||||
inner: Box::new(inner),
|
||||
};
|
||||
SExpr { expr, span }
|
||||
});
|
||||
map.parse(pairs)
|
||||
}
|
||||
|
||||
fn parse_parameter(&self, pair: Pair<Rule>) -> Parameter {
|
||||
assert!(pair.as_rule() == Rule::parameter);
|
||||
let mut pair = pair.into_inner();
|
||||
let name = pair.next().unwrap().as_str().to_string();
|
||||
let typ = Type::from(pair.next().unwrap().as_str());
|
||||
Parameter { name, typ }
|
||||
}
|
||||
|
||||
fn parse_definition(&self, pair: Pair<Rule>) -> Definition {
|
||||
match pair.as_rule() {
|
||||
Rule::func_def => {
|
||||
let span = self.make_span(&pair);
|
||||
let mut pairs = pair.into_inner();
|
||||
let name = pairs.next().unwrap().as_str().to_string();
|
||||
let parameters: Vec<Parameter> = pairs
|
||||
.next()
|
||||
.unwrap()
|
||||
.into_inner()
|
||||
.map(|param| self.parse_parameter(param))
|
||||
.collect();
|
||||
let pair = pairs.next().unwrap();
|
||||
// Before the block there is an optional return type
|
||||
let (return_type, return_type_span, pair) = match pair.as_rule() {
|
||||
Rule::ident => (
|
||||
Some(Type::from(pair.as_str())),
|
||||
Some(self.make_span(&pair)),
|
||||
pairs.next().unwrap(),
|
||||
),
|
||||
Rule::block => (None, None, pair),
|
||||
_ => unreachable!(
|
||||
"Unexpected rule '{:?}' in function definition, expected return type or block",
|
||||
pair.as_rule()
|
||||
),
|
||||
};
|
||||
let body = self.parse_block(pair);
|
||||
let body = Box::new(body);
|
||||
Definition::FunctionDefinition(FunctionDefinition {
|
||||
name,
|
||||
parameters,
|
||||
return_type,
|
||||
return_type_span,
|
||||
span,
|
||||
body,
|
||||
})
|
||||
}
|
||||
_ => panic!("unexpected node for definition: {:?}", pair.as_rule()),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_span(&self, pair: &Pair<Rule>) -> Span {
|
||||
let span = pair.as_span();
|
||||
Span {
|
||||
source: self.source,
|
||||
start: span.start(),
|
||||
end: span.end(),
|
||||
}
|
||||
}
|
||||
}
|
||||
203
lila-parsing/src/backend/tree_sitter/mod.rs
Normal file
203
lila-parsing/src/backend/tree_sitter/mod.rs
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
use anyhow::{anyhow, bail, ensure};
|
||||
use tree_sitter::{Node, TreeCursor};
|
||||
|
||||
use crate::ast::*;
|
||||
|
||||
pub struct Parser(tree_sitter::Parser);
|
||||
|
||||
impl Default for Parser {
|
||||
fn default() -> Self {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let language = tree_sitter_lila::language();
|
||||
parser.set_language(&language).unwrap();
|
||||
|
||||
Self(parser)
|
||||
}
|
||||
}
|
||||
|
||||
struct SourceParsingContext<'a> {
|
||||
source: &'a str,
|
||||
source_id: SourceId,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SourceParsingContext<'a> {
|
||||
fn span(&self) -> Span {
|
||||
Span {
|
||||
source: self.source_id,
|
||||
start: self.cursor.node().start_byte(),
|
||||
end: self.cursor.node().end_byte(),
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_children(&'a mut self) -> NodeIterator<'a> {
|
||||
NodeIterator {
|
||||
is_child: false,
|
||||
cursor: self.cursor,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct NodeIterator<'a> {
|
||||
is_child: bool,
|
||||
cursor: &'a mut TreeCursor<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for NodeIterator<'a> {
|
||||
type Item = Node<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.is_child {
|
||||
match self.cursor.goto_next_sibling() {
|
||||
true => Some(self.cursor.node()),
|
||||
false => None,
|
||||
}
|
||||
} else {
|
||||
match self.cursor.goto_first_child() {
|
||||
true => Some(self.cursor.node()),
|
||||
false => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn parse_param<'a>(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Parameter> {
|
||||
ensure!(ctx.cursor.goto_first_child());
|
||||
let name = ctx
|
||||
.cursor
|
||||
.node()
|
||||
.utf8_text(ctx.source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
ensure!(ctx.cursor.goto_next_sibling());
|
||||
let typ = ctx
|
||||
.cursor
|
||||
.node()
|
||||
.utf8_text(ctx.source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
ctx.cursor.goto_parent();
|
||||
|
||||
Ok(Parameter { name, typ })
|
||||
}
|
||||
|
||||
fn parse_function<'a>(
|
||||
&self,
|
||||
ctx: &mut SourceParsingContext,
|
||||
) -> anyhow::Result<FunctionDefinition> {
|
||||
let span = ctx.span();
|
||||
|
||||
// 1: name
|
||||
assert!(ctx.cursor.goto_first_child());
|
||||
assert!(ctx.cursor.field_name() == Some("name"));
|
||||
let name: String = ctx
|
||||
.cursor
|
||||
.node()
|
||||
.utf8_text(ctx.source.as_ref())
|
||||
.expect("utf8 error")
|
||||
.into();
|
||||
|
||||
// 2: parameters
|
||||
assert!(ctx.cursor.goto_next_sibling());
|
||||
assert!(ctx.cursor.field_name() == Some("parameters"));
|
||||
let mut parameters = Vec::new();
|
||||
|
||||
if ctx.cursor.goto_first_child() {
|
||||
loop {
|
||||
let param = self.parse_param(ctx)?;
|
||||
|
||||
parameters.push(param);
|
||||
|
||||
if !ctx.cursor.goto_next_sibling() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ctx.cursor.goto_parent();
|
||||
}
|
||||
|
||||
// 3: return type
|
||||
assert!(ctx.cursor.goto_next_sibling());
|
||||
assert!(ctx.cursor.field_name() == Some("return_type"));
|
||||
let return_type = Some(ctx.cursor.node().utf8_text(ctx.source.as_ref())?.into());
|
||||
let return_type_span = Some(ctx.span());
|
||||
|
||||
// 4: body
|
||||
assert!(ctx.cursor.goto_next_sibling());
|
||||
assert!(ctx.cursor.field_name() == Some("body"));
|
||||
let body = Box::new(self.parse_block(ctx)?);
|
||||
|
||||
Ok(FunctionDefinition {
|
||||
name,
|
||||
parameters,
|
||||
return_type,
|
||||
return_type_span,
|
||||
body,
|
||||
span,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_block(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Block> {
|
||||
let mut statements = Vec::new();
|
||||
let mut value = None;
|
||||
|
||||
for child in ctx.iter_children() {
|
||||
match child.kind() {
|
||||
"statement" => {
|
||||
ensure!(
|
||||
value.is_none(),
|
||||
"cannot have a statement after an expression in a block"
|
||||
);
|
||||
let statement = self.parse_statement(ctx)?;
|
||||
statements.push(statement);
|
||||
}
|
||||
"expr" => value = Some(self.parse_expr(ctx)),
|
||||
};
|
||||
}
|
||||
|
||||
let block = Block { statements, value };
|
||||
Ok(block)
|
||||
}
|
||||
|
||||
fn parse_statement(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Statement> {
|
||||
assert_eq!(ctx.cursor.node().kind(), "statement");
|
||||
|
||||
ctx.cursor.Some(statement)
|
||||
}
|
||||
|
||||
fn parse_expr(&self, ctx: &mut SourceParsingContext) -> anyhow::Result<Expr> {
|
||||
assert_eq!(ctx.cursor.node().kind(), "expr");
|
||||
|
||||
Some()
|
||||
}
|
||||
}
|
||||
|
||||
impl crate::parsing::Parser for Parser {
|
||||
fn parse_as_module(
|
||||
&mut self,
|
||||
source: &str,
|
||||
path: ModulePath,
|
||||
id: SourceId,
|
||||
) -> anyhow::Result<Module> {
|
||||
let tree = self.0.parse(&source, None).unwrap();
|
||||
|
||||
let mut cursor = tree.walk();
|
||||
{
|
||||
let node = cursor.node();
|
||||
assert!(node.kind() == "source_file");
|
||||
}
|
||||
|
||||
let ctx = SourceParsingContext {
|
||||
source,
|
||||
source_id: id,
|
||||
cursor: &mut cursor,
|
||||
};
|
||||
|
||||
let mut module = Module::new(path);
|
||||
|
||||
Ok(module)
|
||||
}
|
||||
}
|
||||
27
lila-parsing/src/lib.rs
Normal file
27
lila-parsing/src/lib.rs
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
mod backend;
|
||||
mod tests;
|
||||
|
||||
use lila_ast::{Module, ModulePath, SourceId};
|
||||
|
||||
pub trait Parser: Default {
|
||||
fn parse_file(&mut self, path: &std::path::Path, id: SourceId) -> anyhow::Result<Module> {
|
||||
let source = std::fs::read_to_string(path)?;
|
||||
let module_path = ModulePath::from(path);
|
||||
let mut module = self.parse_as_module(&source, module_path, id)?;
|
||||
module.file = Some(path.to_owned());
|
||||
Ok(module)
|
||||
}
|
||||
|
||||
fn parse_as_module(
|
||||
&mut self,
|
||||
source: &str,
|
||||
path: ModulePath,
|
||||
id: SourceId,
|
||||
) -> anyhow::Result<Module>;
|
||||
}
|
||||
|
||||
#[cfg(feature = "pest")]
|
||||
pub use self::backend::pest::Parser as DefaultParser;
|
||||
|
||||
#[cfg(feature = "tree-sitter")]
|
||||
pub use self::backend::tree_sitter::Parser as DefaultParser;
|
||||
94
lila-parsing/src/tests.rs
Normal file
94
lila-parsing/src/tests.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
#[cfg(test)]
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_addition_function() {
|
||||
use crate::*;
|
||||
use lila_ast::typing::*;
|
||||
use lila_ast::*;
|
||||
|
||||
let source = "fn add(a: int, b: int) int { a + b }";
|
||||
let path = ModulePath::from("test");
|
||||
let module = DefaultParser::default()
|
||||
.parse_as_module(source, path.clone(), 0)
|
||||
.expect("parsing error");
|
||||
|
||||
let expected_module = Module {
|
||||
file: None,
|
||||
imports: vec![],
|
||||
functions: vec![FunctionDefinition {
|
||||
name: Identifier::from("add"),
|
||||
parameters: vec![
|
||||
Parameter {
|
||||
name: Identifier::from("a"),
|
||||
typ: Type::Int,
|
||||
},
|
||||
Parameter {
|
||||
name: Identifier::from("b"),
|
||||
typ: Type::Int,
|
||||
},
|
||||
],
|
||||
return_type: Some(Type::Int),
|
||||
body: Box::new(Block {
|
||||
statements: vec![],
|
||||
value: Some(SExpr {
|
||||
expr: Expr::BinaryExpression(BinaryExpression {
|
||||
lhs: Box::new(SExpr {
|
||||
expr: Expr::Identifier {
|
||||
name: Identifier::from("a"),
|
||||
typ: Type::Undefined,
|
||||
},
|
||||
span: Span {
|
||||
source: 0,
|
||||
start: 29,
|
||||
end: 30,
|
||||
},
|
||||
}),
|
||||
op: BinaryOperator::Add,
|
||||
op_span: Span {
|
||||
source: 0,
|
||||
start: 31,
|
||||
end: 32,
|
||||
},
|
||||
rhs: Box::new(SExpr {
|
||||
expr: Expr::Identifier {
|
||||
name: Identifier::from("b"),
|
||||
typ: Type::Undefined,
|
||||
},
|
||||
span: Span {
|
||||
source: 0,
|
||||
start: 33,
|
||||
end: 34,
|
||||
},
|
||||
}),
|
||||
typ: Type::Undefined,
|
||||
}),
|
||||
span: Span {
|
||||
source: 0,
|
||||
start: 29,
|
||||
end: 34,
|
||||
},
|
||||
}),
|
||||
typ: Type::Undefined,
|
||||
span: Some(Span {
|
||||
source: 0,
|
||||
start: 27,
|
||||
end: source.len(),
|
||||
}),
|
||||
}),
|
||||
span: Span {
|
||||
source: 0,
|
||||
start: 0,
|
||||
end: source.len(),
|
||||
},
|
||||
return_type_span: Some(Span {
|
||||
source: 0,
|
||||
start: 23,
|
||||
end: 26,
|
||||
}),
|
||||
}],
|
||||
path,
|
||||
};
|
||||
|
||||
assert_eq!(module, expected_module);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue