Yard/src/parser/mod.rs

591 lines
21 KiB
Rust

use core::{panic};
use std::{collections::{VecDeque}, vec};
use crate::token::{Token, Operator, Assoc, Prim, MessageType, Keyword};
pub mod data;
use data::*;
/// simple brace-counting parser to detect functions
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str) -> (Vec<Func<'a>>, Vec<Declr<'a>>) {
let mut funcs = Vec::new();
let mut declrs = Vec::new();
// function to currently identifiy
let mut func = Func::new();
let mut declr = Declr::new();
// count open brackets
let mut brace_cnt = 0;
let mut paren_cnt = 0;
let mut single_line = false;
macro_rules! finish_func {
($dbginf:expr) => {
if declrs.contains(&declr) {
panic!("Function defined multiple times: {declr}")
}
if declr.results && declr.result_typ.is_none() {
$dbginf.print(MessageType::Error, format!("Function is missing return type: {}", declr).as_str(), source);
panic!();
}
funcs.push(func);
declrs.push(declr);
declr = Declr::new();
func = Func::new();
single_line = false;
};
}
while let Some(top) = tokens.pop_front() {
// function body detection
// has highest priority
match &top {
Token::Delemiter(char, dbginf) => match char {
'{' => {
brace_cnt += 1;
if brace_cnt == 1 {
if declr.name.is_none() {
dbginf.print(MessageType::Error, "Anonymous function not permitted", source);
panic!();
}
if paren_cnt > 0 {
dbginf.print(MessageType::Error, "Unclosed parameter list", source);
panic!();
}
single_line = false;
func.raw = Some(VecDeque::new());
continue;
}
},
'}' => {
brace_cnt -= 1;
if brace_cnt == 0 {
finish_func!(dbginf);
continue;
}
}
_ => ()
}
Token::Type(typ, dbginf) => {
if declr.results {
if declr.result_typ.is_some() {
dbginf.print(MessageType::Error, "Function must return either nothing or a single type", source);
panic!();
}
declr.result_typ = Some(*typ);
continue;
} else {
dbginf.print(MessageType::Error, "Missing equal sign", source);
panic!();
}
},
Token::LineBreak(dbginf) => if single_line {
finish_func!(dbginf);
continue;
}
_ => if single_line && func.raw.is_none() {
func.raw = Some(VecDeque::new());
}
}
if func.raw.is_none() {
match &top {
Token::Operator(op, dbginf) => match op {
Operator::Assign => {
if declr.results {
dbginf.print(MessageType::Error, "double function assignment", source);
panic!();
}
if declr.name.is_none() {
dbginf.print(MessageType::Error, "Anonymous function", source);
panic!();
}
declr.results = true;
single_line = true;
continue;
}
_ => ()
}
Token::Word(text, dbginf) => {
if declr.name.is_some() {
if declr.args.is_none() {
dbginf.print(MessageType::Error, "multiple function names", source);
panic!();
}
} else if brace_cnt > 0 {
dbginf.print(MessageType::Error, "brace count missmatch", source);
panic!();
}
else {
declr.name = Some(text);
continue;
}
},
Token::Assign(name, _, dbginf) => {
if declr.results {
dbginf.print(MessageType::Error, "double function assignment", source);
panic!();
}
if declr.name.is_some() {
dbginf.print(MessageType::Error, "multiple function names", source);
panic!();
}
func.raw = Some(VecDeque::new());
declr.name = Some(name);
declr.results = true;
single_line = true;
continue;
}
Token::Delemiter(char, dbginf) => match char {
'(' => if func.raw.is_none() {
paren_cnt += 1;
if paren_cnt == 1 {
if declr.args.is_some() {
dbginf.print(MessageType::Error, "double parameter list", source);
panic!();
}
declr.args = Some(Vec::new());
continue;
}
},
')' => {
paren_cnt -= 1;
if paren_cnt == 0 {
continue;
}
}
_ => ()
}
_ => ()
}
}
if let Some(body) = &mut func.raw {
body.push_back(top);
continue;
}
else if let Some(args) = &mut declr.args {
if paren_cnt == 0 {
top.print(MessageType::Error, "token is no parameter", source);
panic!();
}
match &top {
Token::Decl(name, typ, _dbginf) => args.push((name, *typ)),
Token::Word(_, dbginf) => {
dbginf.print(MessageType::Error, "type declaration missing", source);
panic!()
},
_ => {
top.print(MessageType::Error, "argument must be declaration", source);
panic!()
}
}
continue;
}
// if we have anything left it might be an error
match &top {
Token::LineBreak(_) | Token::Terminator(_) => (), // valid whitespace
_ => {
top.print(MessageType::Error, "unresolvable token", source);
panic!()
}
}
}
if let Some(raw) = func.raw {
if let Some(front) = raw.front() {
front.print(MessageType::Error, "Open function body", source);
panic!();
}
}
(funcs, declrs)
}
/// parse the functions raw content to expr for easy compilation using a brace-counter.
/// - ```{...}``` surround a block
/// - line breaks seperate expressions
fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>, _: &Vec<Declr<'a>>, source: &'a str) {
for func in functions.iter_mut() {
let mut blocks = vec![Block::new()];
let mut expr = VecDeque::new();
while let Some(top) = func.raw.as_mut().unwrap().pop_front() {
match &top {
Token::LineBreak(dbginf) | Token::Terminator(dbginf) => if !expr.is_empty() {
blocks.last_mut().unwrap_or_else(|| {
dbginf.print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Term(expr));
expr = VecDeque::new();
continue;
}
Token::Delemiter(char, dbginf) => match char {
'{' => {
blocks.last_mut().unwrap_or_else(|| {
dbginf.print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Term(expr));
expr = VecDeque::new();
blocks.push(Block::new());
continue;
},
'}' => {
// pop topmost block of the stack, storing it in the next lower block
if let Some(block) = blocks.pop() {
blocks.last_mut().unwrap_or_else(|| {
dbginf.print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Block(block));
} else {
panic!("Curly brace missmatch")
}
continue;
},
_ => ()
},
_ => ()
}
expr.push_back(top)
}
if !expr.is_empty() {
blocks.last_mut().unwrap_or_else(|| {
expr.back().unwrap().print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Term(expr));
}
if let Some(block) = blocks.pop() {
func.expr = Some(Expr::Block(block));
} else {
panic!("curly brace missmatch")
}
}
}
fn check_var_typ(typ: &mut Option<Prim>, operands: &mut Vec<Prim>, dbginf: &crate::token::DebugInfo, source: &str) {
if let Some(value) = operands.pop() {
if !operands.is_empty() {
dbginf.print(MessageType::Error, format!("Expr does't resolve to a single value but multiple").as_str(), source);
panic!();
}
if let Some(typ) = typ {
if !typ.is_equal(value) {
dbginf.print(MessageType::Error, format!("Variable has type {:?} but {:?} was given", typ, value).as_str(), source);
panic!();
}
} else {
// assign a type to untyped variable
dbginf.print(MessageType::Info, format!("Variable has no fixed type, guessing type: {:?}", value).as_str(), source);
*typ = Some(value);
}
} else {
dbginf.print(MessageType::Error, "No result to bind variable to", source);
panic!();
}
}
fn process_keyword(keyword: Keyword, _: &Vec<Declr>, scope: &mut Scope, operands: &mut Vec<Prim>, dbginf: &crate::token::DebugInfo, source: &str) {
match keyword {
Keyword::If | Keyword::While => {
if operands.len() != 1 {
dbginf.print(MessageType::Error, format!("Expected single boolean got {} values", operands.len()).as_str(), source);
panic!();
}
if let Some(operand) = operands.pop() {
match operand {
Prim::Bool => (),
_ => {
dbginf.print(MessageType::Error, format!("Expected boolean, got {:?}", operand).as_str(), source);
panic!();
}
}
}
},
Keyword::Return => {
if scope.func_return_typ.is_some() {
dbginf.print(MessageType::Error, "cannot return function, did u mean to use `yield`?", source);
panic!();
}
}
Keyword::Yield => {
if operands.len() != 1 {
dbginf.print(MessageType::Error, format!("Expected single value but got {} values", operands.len()).as_str(), source);
panic!();
}
if let Some(operand) = operands.pop() {
if let Some(typ) = scope.func_return_typ {
if typ != operand {
dbginf.print(MessageType::Error, format!("Expected {:?} but got {:?}", typ, operand).as_str(), source);
panic!();
}
if scope.cond_scope {
scope.yields = true;
}
} else {
dbginf.print(MessageType::Error, format!("Function does not return anything").as_str(), source);
panic!();
}
} else {
dbginf.print(MessageType::Error, format!("Yield must return something").as_str(), source);
panic!();
}
}
_ => ()
}
}
fn collapse_operation(operation: &Token, declrs: &Vec<Declr>, scope: &mut Scope, operands: &mut Vec<Prim>, source: &str) {
match operation {
Token::Operator(op, dbginf) => op.operate(operands, &dbginf, source),
Token::Assign(name, mut typ, dbginf) => {
check_var_typ(&mut typ, operands, &dbginf, source);
scope.decl_var((*name).to_owned(), typ.to_owned());
},
Token::Func(name, dbginf) => call_func(name, declrs, scope, operands, &dbginf, source),
Token::Keyword(keyword, dbginf) => process_keyword(*keyword, declrs, scope, operands, &dbginf, source),
_ => ()
}
}
fn call_func(name: &str, declrs: &Vec<Declr>, _: &mut Scope, operands: &mut Vec<Prim>, dbginf: &crate::token::DebugInfo, source: &str) {
for declr in declrs {
if declr.name.is_some() && declr.name.unwrap() == name {
if let Some(args) = &declr.args {
if args.len() > operands.len() {
dbginf.print(MessageType::Error, format!("Expected {} parameters but got {}", args.len(), operands.len()).as_str(), source);
panic!()
}
for (x, arg) in args.iter().enumerate() {
let operand = operands.first().unwrap();
if !operand.is_equal(arg.1) {
dbginf.print(MessageType::Error, format!("Expected {:?} as parameter {x}, but got {:?}", arg, operand).as_str(), source);
panic!()
}
operands.remove(0);
}
}
if let Some(typ) = declr.result_typ {
operands.push(typ);
}
break
}
}
}
/// parse a single term using a modified shunting yard
fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, declrs: &Vec<Declr<'a>>, scope: &mut Scope, source: & str) {
let mut op_stack = vec![];
let mut output = VecDeque::with_capacity(term.len());
let mut value_stack = vec![];
'outer:
while let Some(token) = term.pop_front() {
match &token {
Token::Word(text, dbginf) => {
if is_func(declrs, text) {
op_stack.push(Token::Func(text, *dbginf));
continue;
} else if scope.is_arg(text) {
value_stack.push(scope.get_arg_type(text));
output.push_back(Token::Arg(text, *dbginf));
continue;
} else if scope.is_var(text).is_some() {
value_stack.push(scope.get_var_type(text));
output.push_back(Token::Var(text, *dbginf));
continue;
}
dbginf.print(MessageType::Error, "Unknown word", source);
panic!()
}
Token::Bool(_, _) => {
output.push_back(token);
value_stack.push(Prim::Bool)
},
Token::Number(_, hint, _) => {
output.push_back(token);
value_stack.push(Prim::UntypedNum(*hint))
},
Token::Assign(_, _, _) => {
op_stack.push(token);
},
Token::Keyword(_, _) => op_stack.push(token),
Token::Delemiter(char, _) => {
match char {
'(' => op_stack.push(token),
')' => {
while let Some(token) = op_stack.pop() {
match &token {
Token::Delemiter(char, _) => if *char == '(' {
if let Some(next) = op_stack.last() {
match &next {
Token::Func(_, _) => {
let token = op_stack.pop().unwrap();
collapse_operation(&token, declrs, scope, &mut value_stack, source);
output.push_back(token);
},
_ => ()
}
}
continue 'outer;
},
_ => {
collapse_operation(&token, declrs, scope, &mut value_stack, source);
output.push_back(token)
}
}
}
panic!("Mismatched right parenthesis")
},
_ => panic!("Misplaced character: '{char}'")
}
}
Token::Operator(op, _) => {
let prec0 = op.prec();
while let Some(top) = op_stack.last(){
match &top {
Token::Operator(op1, _) => {
let prec1 = op1.prec();
if prec1 > prec0 || prec0 == prec1 && op.assoc() == Assoc::Left {
collapse_operation(top, declrs, scope, &mut value_stack, source);
output.push_back(op_stack.pop().unwrap());
continue
}
break
},
_ => break
}
}
op_stack.push(token);
}
_ => ()
}
}
while let Some(token) = op_stack.pop() {
match &token {
Token::Delemiter(char, _) => if *char == '(' {
panic!("Mismatched parenthesis")
},
_ => {
collapse_operation(&token, declrs, scope, &mut value_stack, source);
output.push_back(token)
}
}
}
if value_stack.len() > 1 {
output[0].print(MessageType::Error, "expression resolves to multiple results", source);
panic!();
}
term.append(&mut output);
}
fn is_func(declrs: &[Declr], text: &str) -> bool {
for declr in declrs {
if declr.name.is_some() && declr.name.unwrap() == text {
return true;
}
}
return false;
}
fn parse_block<'a>(block: &mut Block<'a>, declrs: &Vec<Declr<'a>>, scope: &mut Scope, source: &str) {
scope.alloc_scope();
for expr in block.iter_mut() {
match expr {
Expr::Block(block) => parse_block(block, declrs, scope, source),
Expr::Term(term) => parse_term(term, declrs, scope, source)
}
}
scope.pop_scope();
}
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>, declrs: &Vec<Declr<'a>>, source: &'a str) {
let mut scope = Scope {
args: None,
vars: vec![],
func_return_typ: None,
cond_scope: false,
yields: false,
};
for (x, func) in funcs.iter_mut().enumerate() {
match func.expr.as_mut().expect("Function has no body") {
Expr::Block(block) => {
scope.args = declrs[x].args.as_ref();
scope.func_return_typ = declrs[x].result_typ;
scope.cond_scope = false;
scope.yields = false;
parse_block(block, declrs, &mut scope, source);
if scope.func_return_typ.is_some() && !scope.yields {
crate::message(MessageType::Error, format!("Function {} missing return value at some point", declrs[x]));
panic!();
}
},
_ => panic!("Fatal-Compilier-Error: function must have a block")
}
}
}
/// reorder and organize a listing of instructions to a RPN based format:
/// any program is made out of functions.
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &'a str) -> Vec<Func<'a>> {
let (mut funcs, declrs) = discover_functions(tokens, source);
discover_exprs(&mut funcs, &declrs, source);
parse_exprs(&mut funcs, &declrs, source);
for (x, f) in funcs.iter().enumerate() {
println!("{:#?}{:#?}", declrs[x], f);
}
funcs
}