From a9b150a40eb6cfc83be02d3ec4950a0d30309163 Mon Sep 17 00:00:00 2001 From: Servostar Date: Mon, 24 Oct 2022 21:08:54 +0200 Subject: [PATCH] finished static type system --- README.md | 2 +- src/main.rs | 18 ++-- src/parser/data.rs | 62 ++++++++---- src/parser/mod.rs | 228 +++++++++++++++++++++++++++++++++++---------- src/token/mod.rs | 114 ++++++++++++++++++++++- 5 files changed, 341 insertions(+), 83 deletions(-) diff --git a/README.md b/README.md index b462aee..e4d3429 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,4 @@ It *will* contain features such as: 1. a COMEFROM keyword (inverse goto) 2. a ```don't``` code block which never executes 3. ```rand(x)``` returns x, always. -3. swapped meaning of "" (for single characters) and '' (now for string literal) \ No newline at end of file +4. no if. only `unless`, an inverted version of if. Meaning a block get executed if the is false and doesn't if it is true \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 2f7ba7a..48f46d8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,19 +20,15 @@ fn main() { r" pi = 3.1415926535 -sin(x: f4) = { { - x +foo(x:i4, y:f4) { + } -man() { - - x:i4 = 0 - loop { - x = x + 1 - if sin(x > 5) { - break - } - } +main() { + a:i4 = 8 + b:f4 = 9 + + foo(a, 6) } "; diff --git a/src/parser/data.rs b/src/parser/data.rs index 4859e49..6b6d5f7 100644 --- a/src/parser/data.rs +++ b/src/parser/data.rs @@ -1,17 +1,12 @@ +use core::panic; use std::collections::{VecDeque}; use crate::token::{Token}; use crate::Prim; #[derive(Debug)] pub struct Func<'a> { - /// name of this function - pub name: Option<&'a str>, - /// parameter names - pub args: Option>, /// raw tokens pub raw: Option>>, - /// if the function returns a single value - pub results: bool, /// parsed content pub expr: Option>, } @@ -19,22 +14,33 @@ pub struct Func<'a> { impl<'a> Func<'a> { pub fn new() -> Self { Self { - args: None, raw: None, - name: None, - results: false, expr: None, } } } -impl<'a> PartialEq for Func<'a> { - fn eq(&self, other: &Self) -> bool { - self.args == other.args && self.name == self.name +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Declr<'a> { + /// name of this function + pub name: Option<&'a str>, + /// parameter names + pub args: Option>, + /// if the function returns a single value + pub results: bool, +} + +impl<'a> Declr<'a> { + pub fn new() -> Self { + Self { + name: None, + args: None, + results: false + } } } -impl<'a> std::fmt::Display for Func<'a> { +impl<'a> std::fmt::Display for Declr<'a> { /// print this functions declaration in the form of ```foo(x,y) = {}``` fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("{}", &self.name.unwrap()))?; @@ -72,7 +78,6 @@ pub enum Expr<'a> { } pub struct Scope<'a> { - pub funcs: Vec<&'a str>, pub args: Option<&'a Vec<(&'a str, Prim)>>, /// stack of scoped block variables pub vars: Vec)>>, @@ -90,10 +95,6 @@ impl<'a> Scope<'a> { pub fn decl_var(&mut self, name: String, typ: Option) { self.vars.last_mut().unwrap().push((name, typ)) } - - pub fn is_func(&self, name: &'a str) -> bool { - self.funcs.contains(&name) - } pub fn is_arg(&self, name: &'a str) -> bool { if let Some(args) = self.args { @@ -106,6 +107,31 @@ impl<'a> Scope<'a> { false } + pub fn get_arg_type(&self, name: &'a str) -> Prim { + if let Some(args) = self.args { + for arg in args.iter() { + if arg.0 == name { + return arg.1; + } + } + } + panic!("No argument of name: {name}"); + } + + pub fn get_var_type(&self, name: &'a str) -> Prim { + // create an owned version of the string + let owned = &name.to_owned(); + + for vars in self.vars.iter() { + for var in vars.iter() { + if &var.0 == owned { + return var.1.expect("Untyped variable"); + } + } + } + panic!("No variable of name: {name}"); + } + pub fn is_var(&self, name: &'a str) -> Option { // create an owned version of the string let owned = &name.to_owned(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0ecd860..0042f38 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,17 +1,19 @@ use core::{panic}; use std::{collections::{VecDeque}, vec}; -use crate::token::{Token, Operator, Assoc, Prim, MessageType}; +use crate::token::{Token, Operator, Assoc, Prim, MessageType, Keyword}; pub mod data; use data::*; /// simple brace-counting parser to detect functions -fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) -> Vec> { +fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) -> (Vec>, Vec>) { let mut funcs = Vec::new(); + let mut declrs = Vec::new(); // function to currently identifiy let mut func = Func::new(); + let mut declr = Declr::new(); // count open brackets let mut brace_cnt = 0; @@ -21,11 +23,13 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) macro_rules! finish_func { () => { - if funcs.contains(&func) { - panic!("Function defined multiple times: {func}") + if declrs.contains(&declr) { + panic!("Function defined multiple times: {declr}") } funcs.push(func); + declrs.push(declr); + declr = Declr::new(); func = Func::new(); single_line = false; }; @@ -40,10 +44,16 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) '{' => { brace_cnt += 1; if brace_cnt == 1 { - if func.name.is_none() { + if declr.name.is_none() { dbginf.print(MessageType::Error, "Anonymous function not permitted", source); panic!(); } + + if paren_cnt > 0 { + dbginf.print(MessageType::Error, "Unclosed parameter list", source); + panic!(); + } + single_line = false; func.raw = Some(VecDeque::new()); continue; @@ -73,16 +83,16 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) match &top { Token::Operator(op, dbginf) => match op { Operator::Assign => { - if func.results { + if declr.results { dbginf.print(MessageType::Error, "double function assignment", source); panic!(); } - if func.name.is_none() { + if declr.name.is_none() { dbginf.print(MessageType::Error, "Anonymous function", source); panic!(); } - func.results = true; + declr.results = true; single_line = true; continue; } @@ -90,18 +100,18 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) } Token::Assign(name, _, dbginf) => { - if func.results { + if declr.results { dbginf.print(MessageType::Error, "double function assignment", source); panic!(); } - if func.name.is_some() { + if declr.name.is_some() { dbginf.print(MessageType::Error, "multiple function names", source); panic!(); } func.raw = Some(VecDeque::new()); - func.name = Some(name); - func.results = true; + declr.name = Some(name); + declr.results = true; single_line = true; continue; } @@ -112,12 +122,12 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) paren_cnt += 1; if paren_cnt == 1 { - if func.args.is_some() { + if declr.args.is_some() { dbginf.print(MessageType::Error, "double parameter list", source); panic!(); } - func.args = Some(Vec::new()); + declr.args = Some(Vec::new()); continue; } }, @@ -132,14 +142,17 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) Token::Word(text, dbginf) => { - if func.name.is_some() { - if func.args.is_none() { + if declr.name.is_some() { + if declr.args.is_none() { dbginf.print(MessageType::Error, "multiple function names", source); panic!(); } + } else if brace_cnt > 0 { + dbginf.print(MessageType::Error, "brace count missmatch", source); + panic!(); } else { - func.name = Some(text); + declr.name = Some(text); continue; } } @@ -151,7 +164,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) body.push_back(top); continue; } - else if let Some(args) = &mut func.args { + else if let Some(args) = &mut declr.args { if paren_cnt == 0 { top.print(MessageType::Error, "token is no parameter", source); @@ -181,14 +194,21 @@ fn discover_functions<'a>(tokens: &mut VecDeque>, source: &str) } } } + + if let Some(raw) = func.raw { + if let Some(front) = raw.front() { + front.print(MessageType::Error, "Open function body", source); + panic!(); + } + } - funcs + (funcs, declrs) } /// parse the functions raw content to expr for easy compilation using a brace-counter. /// - ```{...}``` surround a block /// - line breaks seperate expressions -fn discover_exprs<'a>(functions: &mut Vec>, source: &'a str) { +fn discover_exprs<'a>(functions: &mut Vec>, _: &Vec>, source: &'a str) { for func in functions.iter_mut() { let mut blocks = vec![Block::new()]; @@ -251,42 +271,131 @@ fn discover_exprs<'a>(functions: &mut Vec>, source: &'a str) { } } +fn check_var_typ(typ: &mut Option, operands: &mut Vec, dbginf: &crate::token::DebugInfo, source: &str) { + if let Some(value) = operands.pop() { + if !operands.is_empty() { + dbginf.print(MessageType::Error, format!("Expr does't resolve to a single value but multiple").as_str(), source); + panic!(); + } + + if let Some(typ) = typ { + if !typ.is_equal(value) { + dbginf.print(MessageType::Error, format!("Variable has type {:?} but {:?} was given", typ, value).as_str(), source); + panic!(); + } + } else { + // assign a type to untyped variable + dbginf.print(MessageType::Info, format!("Variable has no fixed type, guessing type: {:?}", value).as_str(), source); + + *typ = Some(value); + } + + } else { + dbginf.print(MessageType::Error, "No result to bind variable to", source); + panic!(); + } +} + +fn process_keyword(keyword: Keyword, _: &Vec, _: &mut Scope, operands: &mut Vec, dbginf: &crate::token::DebugInfo, source: &str) { + match keyword { + Keyword::If => { + if operands.len() != 1 { + dbginf.print(MessageType::Error, format!("Expected single boolean got {} values", operands.len()).as_str(), source); + panic!(); + } + + if let Some(operand) = operands.pop() { + match operand { + Prim::Bool => (), + _ => { + dbginf.print(MessageType::Error, format!("Expected boolean, got {:?}", operand).as_str(), source); + panic!(); + } + } + } + }, + _ => () + } +} + +fn collapse_operation(operation: &Token, declrs: &Vec, scope: &mut Scope, operands: &mut Vec, source: &str) { + match operation { + Token::Operator(op, dbginf) => op.operate(operands, &dbginf, source), + Token::Assign(name, mut typ, dbginf) => { + check_var_typ(&mut typ, operands, &dbginf, source); + scope.decl_var((*name).to_owned(), typ.to_owned()); + }, + Token::Func(name, dbginf) => call_func(name, declrs, scope, operands, &dbginf, source), + Token::Keyword(keyword, dbginf) => process_keyword(*keyword, declrs, scope, operands, &dbginf, source), + _ => () + } +} + +fn call_func(name: &str, declrs: &Vec, scope: &mut Scope, operands: &mut Vec, dbginf: &crate::token::DebugInfo, source: &str) { + for declr in declrs { + if declr.name.is_some() && declr.name.unwrap() == name { + + if let Some(args) = &declr.args { + + if args.len() > operands.len() { + dbginf.print(MessageType::Error, format!("Expected {} parameters but got {}", args.len(), operands.len()).as_str(), source); + panic!() + } + + for (x, arg) in args.iter().enumerate() { + let operand = operands.first().unwrap(); + + if !operand.is_equal(arg.1) { + dbginf.print(MessageType::Error, format!("Expected {:?} as parameter {x}, but got {:?}", arg, operand).as_str(), source); + panic!() + } + + operands.remove(0); + } + } + + // TODO: push result type + // operands.push(); + + break + } + } +} + /// parse a single term using a modified shunting yard -fn parse_term<'a, 'b>(term: &mut VecDeque>, scope: &mut Scope, source: &'b str) { +fn parse_term<'a>(term: &mut VecDeque>, declrs: &Vec>, scope: &mut Scope, source: & str) { let mut op_stack = vec![]; let mut output = VecDeque::with_capacity(term.len()); let mut value_stack = vec![]; - /* - - Token::Number(text) => value_stack.push(CompileTimeType::UntypedNum(text)), - Token::Bool(_) => value_stack.push(CompileTimeType::Prim(Prim::Bool)), - - */ - 'outer: while let Some(token) = term.pop_front() { match &token { Token::Word(text, dbginf) => { - if scope.is_func(text) { + if is_func(declrs, text) { op_stack.push(Token::Func(text, *dbginf)); continue; } else if scope.is_arg(text) { + value_stack.push(scope.get_arg_type(text)); output.push_back(Token::Arg(text, *dbginf)); continue; } else if scope.is_var(text).is_some() { + value_stack.push(scope.get_var_type(text)); output.push_back(Token::Var(text, *dbginf)); continue; } dbginf.print(MessageType::Error, "Unknown word", source); panic!() } + Token::Bool(_, _) => { + output.push_back(token); + value_stack.push(Prim::Bool) + }, Token::Number(_, _) => { output.push_back(token); - value_stack.push(CompileTimeType::UntypedNum) + value_stack.push(Prim::UntypedNum) }, - Token::Assign(text, typ, _) => { - scope.decl_var((*text).to_owned(), typ.to_owned()); + Token::Assign(_, _, _) => { op_stack.push(token); }, Token::Keyword(_, _) => op_stack.push(token), @@ -300,13 +409,20 @@ fn parse_term<'a, 'b>(term: &mut VecDeque>, scope: &mut Scope, source: Token::Delemiter(char, _) => if *char == '(' { if let Some(next) = op_stack.last() { match &next { - Token::Func(_, _) => output.push_back(op_stack.pop().unwrap()), + Token::Func(_, _) => { + let token = op_stack.pop().unwrap(); + collapse_operation(&token, declrs, scope, &mut value_stack, source); + output.push_back(token); + }, _ => () } } continue 'outer; }, - _ => output.push_back(token) + _ => { + collapse_operation(&token, declrs, scope, &mut value_stack, source); + output.push_back(token) + } } } panic!("Mismatched right parenthesis") @@ -323,6 +439,7 @@ fn parse_term<'a, 'b>(term: &mut VecDeque>, scope: &mut Scope, source: let prec1 = op1.prec(); if prec1 > prec0 || prec0 == prec1 && op.assoc() == Assoc::Left { + collapse_operation(top, declrs, scope, &mut value_stack, source); output.push_back(op_stack.pop().unwrap()); continue } @@ -342,42 +459,53 @@ fn parse_term<'a, 'b>(term: &mut VecDeque>, scope: &mut Scope, source: Token::Delemiter(char, _) => if *char == '(' { panic!("Mismatched parenthesis") }, - _ => output.push_back(token) + _ => { + collapse_operation(&token, declrs, scope, &mut value_stack, source); + output.push_back(token) + } } } + if value_stack.len() > 1 { + output[0].print(MessageType::Error, "expression resolves to multiple results", source); + panic!(); + } + term.append(&mut output); } -enum CompileTimeType { - Prim(Prim), - UntypedNum, +fn is_func(declrs: &[Declr], text: &str) -> bool { + for declr in declrs { + if declr.name.is_some() && declr.name.unwrap() == text { + return true; + } + } + return false; } -fn parse_block<'a>(block: &mut Block, scope: &mut Scope, source: &'a str) { +fn parse_block<'a>(block: &mut Block<'a>, declrs: &Vec>, scope: &mut Scope, source: &str) { scope.alloc_scope(); for expr in block.iter_mut() { match expr { - Expr::Block(block) => parse_block(block, scope, source), - Expr::Term(term) => parse_term(term, scope, source) + Expr::Block(block) => parse_block(block, declrs, scope, source), + Expr::Term(term) => parse_term(term, declrs, scope, source) } } scope.pop_scope(); } -fn parse_exprs<'a>(funcs: &mut Vec>, source: &'a str) { +fn parse_exprs<'a>(funcs: &mut Vec>, declrs: &Vec>, source: &'a str) { let mut scope = Scope { - funcs: funcs.iter().map(|f| f.name.unwrap()).collect(), args: None, vars: vec![] }; - for func in funcs.iter_mut() { + for (x, func) in funcs.iter_mut().enumerate() { match func.expr.as_mut().expect("Function has no body") { Expr::Block(block) => { - scope.args = func.args.as_ref(); - - parse_block(block, &mut scope, source) + scope.args = declrs[x].args.as_ref(); + + parse_block(block, declrs, &mut scope, source) }, _ => panic!("Fatal-Compilier-Error: function must have a block") } @@ -388,10 +516,10 @@ fn parse_exprs<'a>(funcs: &mut Vec>, source: &'a str) { /// any program is made out of functions. /// A function has a name followed by an optional parameter list, followed by an optional equal sign and block. pub fn parse<'a>(tokens: &mut VecDeque>, source: &'a str) -> Vec> { - let mut funcs = discover_functions(tokens, source); + let (mut funcs, declrs) = discover_functions(tokens, source); - discover_exprs(&mut funcs, source); - parse_exprs(&mut funcs, source); + discover_exprs(&mut funcs, &declrs, source); + parse_exprs(&mut funcs, &declrs, source); funcs.iter().for_each(|f| println!("{:?}", f)); diff --git a/src/token/mod.rs b/src/token/mod.rs index 55f2a74..4b2f693 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -79,6 +79,95 @@ impl Operator { _ => Assoc::Right } } + + fn present_types(operands: &[Prim], types: &[Prim], r#yield: Prim, dbginf: &DebugInfo, source: &str) -> Option { + if operands.len() < types.len() { + dbginf.print(MessageType::Error, format!("Missing {} operands", types.len() - operands.len()).as_str(), source); + panic!() + } + + for (x, typ) in types.iter().enumerate() { + if typ != &operands[x] { + return None + } + } + Some(r#yield) + } + + fn check_types(operands: &[Prim], types: &[(Vec, Prim)], dbginf: &DebugInfo, source: &str) -> Option { + for combination in types.iter() { + + if let Some(result) = Self::present_types(operands, &combination.0, combination.1, dbginf, source) { + return Some(result); + } + } + None + } + + pub fn operate(&self, operands: &mut Vec, dbginf: &DebugInfo, source: &str) { + match self { + Operator::Add | Operator::Sub | Operator::Mul | Operator::Div=> { + let types_valid = Self::check_types(operands, &[ + // +-----------------------------------+---------------------------------+ + // | Parameter list of types | result type | + // +-----------------------------------+---------------------------------+ + (vec![Prim::Int, Prim::Int ], Prim::Int ), + (vec![Prim::Real, Prim::Real ], Prim::Real), + (vec![Prim::UntypedNum, Prim::Int ], Prim::Int ), + (vec![Prim::UntypedNum, Prim::Real ], Prim::Real), + (vec![Prim::Int, Prim::UntypedNum], Prim::Int ), + (vec![Prim::Real, Prim::UntypedNum], Prim::Real), + (vec![Prim::UntypedNum, Prim::UntypedNum], Prim::UntypedNum) + ], dbginf, source); + + if let Some(result) = types_valid { + operands.pop(); + operands.pop(); + operands.push(result); + } else { + dbginf.print(MessageType::Error, format!("Missmatched types for {:?}, expected either two integer or reals", self).as_str(), source); + panic!() + } + }, + Operator::And | Operator::Or | Operator::Xor => { + let types_valid = Self::check_types(operands, &[ + (vec![Prim::Bool, Prim::Bool ], Prim::Bool), + ], dbginf, source); + + if let Some(result) = types_valid { + operands.pop(); + operands.pop(); + operands.push(result); + } else { + dbginf.print(MessageType::Error, format!("Missmatched types for {:?}, expected two booleans", self).as_str(), source); + panic!() + } + }, + Operator::Eq | Operator::NotEq | Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => { + let types_valid = Self::check_types(operands, &[ + (vec![Prim::Int, Prim::Int ], Prim::Bool ), + (vec![Prim::Real, Prim::Real ], Prim::Bool ), + (vec![Prim::UntypedNum, Prim::Int ], Prim::Bool ), + (vec![Prim::UntypedNum, Prim::Real ], Prim::Bool ), + (vec![Prim::Int, Prim::UntypedNum], Prim::Bool ), + (vec![Prim::Real, Prim::UntypedNum], Prim::Bool ), + (vec![Prim::UntypedNum, Prim::UntypedNum], Prim::Bool ) + ], dbginf, source); + + if let Some(result) = types_valid { + println!("checked: {:?} for: {:?}", self, operands); + + operands.pop(); + operands.pop(); + operands.push(result); + } else { + dbginf.print(MessageType::Error, format!("Missmatched types for {:?}, expected two numbers", self).as_str(), source); + panic!() + } + }, + _ => panic!("Unknown operator: {:?}", self) + } + } } #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] @@ -94,7 +183,7 @@ pub enum Keyword { impl Keyword { pub fn parse<'a>(text: &'a str) -> Keyword { return match text { - "if" => Keyword::If, + "unless" => Keyword::If, "while" => Keyword::While, "loop" => Keyword::Loop, "break" => Keyword::Break, @@ -110,6 +199,7 @@ pub enum Prim { Int, Real, Bool, + UntypedNum } impl Prim { @@ -125,6 +215,24 @@ impl Prim { } } } + + pub fn is_equal(&self, value: Prim) -> bool { + return match self { + Prim::Bool => *self == value, + Prim::Real => return match value { + Prim::UntypedNum => true, + _ => *self == value, + }, + Prim::Int => return match value { + Prim::UntypedNum => true, + _ => *self == value, + }, + Prim::UntypedNum => return match value { + Prim::Real | Prim::Int => true, + _ => *self == value, + }, + } + } } #[derive(Debug, PartialEq, Eq, Copy, Clone)] @@ -217,7 +325,7 @@ impl<'a> Token<'a> { } } -const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)"; +const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(unless|while|loop|break|continue)|(true|false|ye|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)"; lazy_static::lazy_static! { static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap(); @@ -291,7 +399,7 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque> { } fn parse_bool(text: &str) -> bool { - return match text.to_ascii_lowercase().as_str() { + return match text { "true" | "ye" => true, "false" |"no" => false, "maybe" => rand::random(),