finished static type system

This commit is contained in:
Sven Vogel 2022-10-24 21:08:54 +02:00
parent e8e6ce32cf
commit a9b150a40e
5 changed files with 341 additions and 83 deletions

View File

@ -4,4 +4,4 @@ It *will* contain features such as:
1. a COMEFROM keyword (inverse goto)
2. a ```don't``` code block which never executes
3. ```rand(x)``` returns x, always.
3. swapped meaning of "" (for single characters) and '' (now for string literal)
4. no if. only `unless`, an inverted version of if. Meaning a block get executed if the is false and doesn't if it is true

View File

@ -20,19 +20,15 @@ fn main() {
r"
pi = 3.1415926535
sin(x: f4) = { {
x
foo(x:i4, y:f4) {
}
man() {
main() {
a:i4 = 8
b:f4 = 9
x:i4 = 0
loop {
x = x + 1
if sin(x > 5) {
break
}
}
foo(a, 6)
}
";

View File

@ -1,17 +1,12 @@
use core::panic;
use std::collections::{VecDeque};
use crate::token::{Token};
use crate::Prim;
#[derive(Debug)]
pub struct Func<'a> {
/// name of this function
pub name: Option<&'a str>,
/// parameter names
pub args: Option<Vec<(&'a str, Prim)>>,
/// raw tokens
pub raw: Option<VecDeque<Token<'a>>>,
/// if the function returns a single value
pub results: bool,
/// parsed content
pub expr: Option<Expr<'a>>,
}
@ -19,22 +14,33 @@ pub struct Func<'a> {
impl<'a> Func<'a> {
pub fn new() -> Self {
Self {
args: None,
raw: None,
name: None,
results: false,
expr: None,
}
}
}
impl<'a> PartialEq for Func<'a> {
fn eq(&self, other: &Self) -> bool {
self.args == other.args && self.name == self.name
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Declr<'a> {
/// name of this function
pub name: Option<&'a str>,
/// parameter names
pub args: Option<Vec<(&'a str, Prim)>>,
/// if the function returns a single value
pub results: bool,
}
impl<'a> Declr<'a> {
pub fn new() -> Self {
Self {
name: None,
args: None,
results: false
}
}
}
impl<'a> std::fmt::Display for Func<'a> {
impl<'a> std::fmt::Display for Declr<'a> {
/// print this functions declaration in the form of ```foo(x,y) = {}```
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("{}", &self.name.unwrap()))?;
@ -72,7 +78,6 @@ pub enum Expr<'a> {
}
pub struct Scope<'a> {
pub funcs: Vec<&'a str>,
pub args: Option<&'a Vec<(&'a str, Prim)>>,
/// stack of scoped block variables
pub vars: Vec<Vec<(String, Option<Prim>)>>,
@ -91,10 +96,6 @@ impl<'a> Scope<'a> {
self.vars.last_mut().unwrap().push((name, typ))
}
pub fn is_func(&self, name: &'a str) -> bool {
self.funcs.contains(&name)
}
pub fn is_arg(&self, name: &'a str) -> bool {
if let Some(args) = self.args {
for arg in args.iter() {
@ -106,6 +107,31 @@ impl<'a> Scope<'a> {
false
}
pub fn get_arg_type(&self, name: &'a str) -> Prim {
if let Some(args) = self.args {
for arg in args.iter() {
if arg.0 == name {
return arg.1;
}
}
}
panic!("No argument of name: {name}");
}
pub fn get_var_type(&self, name: &'a str) -> Prim {
// create an owned version of the string
let owned = &name.to_owned();
for vars in self.vars.iter() {
for var in vars.iter() {
if &var.0 == owned {
return var.1.expect("Untyped variable");
}
}
}
panic!("No variable of name: {name}");
}
pub fn is_var(&self, name: &'a str) -> Option<Prim> {
// create an owned version of the string
let owned = &name.to_owned();

View File

@ -1,17 +1,19 @@
use core::{panic};
use std::{collections::{VecDeque}, vec};
use crate::token::{Token, Operator, Assoc, Prim, MessageType};
use crate::token::{Token, Operator, Assoc, Prim, MessageType, Keyword};
pub mod data;
use data::*;
/// simple brace-counting parser to detect functions
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str) -> Vec<Func<'a>> {
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str) -> (Vec<Func<'a>>, Vec<Declr<'a>>) {
let mut funcs = Vec::new();
let mut declrs = Vec::new();
// function to currently identifiy
let mut func = Func::new();
let mut declr = Declr::new();
// count open brackets
let mut brace_cnt = 0;
@ -21,11 +23,13 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
macro_rules! finish_func {
() => {
if funcs.contains(&func) {
panic!("Function defined multiple times: {func}")
if declrs.contains(&declr) {
panic!("Function defined multiple times: {declr}")
}
funcs.push(func);
declrs.push(declr);
declr = Declr::new();
func = Func::new();
single_line = false;
};
@ -40,10 +44,16 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
'{' => {
brace_cnt += 1;
if brace_cnt == 1 {
if func.name.is_none() {
if declr.name.is_none() {
dbginf.print(MessageType::Error, "Anonymous function not permitted", source);
panic!();
}
if paren_cnt > 0 {
dbginf.print(MessageType::Error, "Unclosed parameter list", source);
panic!();
}
single_line = false;
func.raw = Some(VecDeque::new());
continue;
@ -73,16 +83,16 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
match &top {
Token::Operator(op, dbginf) => match op {
Operator::Assign => {
if func.results {
if declr.results {
dbginf.print(MessageType::Error, "double function assignment", source);
panic!();
}
if func.name.is_none() {
if declr.name.is_none() {
dbginf.print(MessageType::Error, "Anonymous function", source);
panic!();
}
func.results = true;
declr.results = true;
single_line = true;
continue;
}
@ -90,18 +100,18 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
}
Token::Assign(name, _, dbginf) => {
if func.results {
if declr.results {
dbginf.print(MessageType::Error, "double function assignment", source);
panic!();
}
if func.name.is_some() {
if declr.name.is_some() {
dbginf.print(MessageType::Error, "multiple function names", source);
panic!();
}
func.raw = Some(VecDeque::new());
func.name = Some(name);
func.results = true;
declr.name = Some(name);
declr.results = true;
single_line = true;
continue;
}
@ -112,12 +122,12 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
paren_cnt += 1;
if paren_cnt == 1 {
if func.args.is_some() {
if declr.args.is_some() {
dbginf.print(MessageType::Error, "double parameter list", source);
panic!();
}
func.args = Some(Vec::new());
declr.args = Some(Vec::new());
continue;
}
},
@ -132,14 +142,17 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
Token::Word(text, dbginf) => {
if func.name.is_some() {
if func.args.is_none() {
if declr.name.is_some() {
if declr.args.is_none() {
dbginf.print(MessageType::Error, "multiple function names", source);
panic!();
}
} else if brace_cnt > 0 {
dbginf.print(MessageType::Error, "brace count missmatch", source);
panic!();
}
else {
func.name = Some(text);
declr.name = Some(text);
continue;
}
}
@ -151,7 +164,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
body.push_back(top);
continue;
}
else if let Some(args) = &mut func.args {
else if let Some(args) = &mut declr.args {
if paren_cnt == 0 {
top.print(MessageType::Error, "token is no parameter", source);
@ -182,13 +195,20 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str)
}
}
funcs
if let Some(raw) = func.raw {
if let Some(front) = raw.front() {
front.print(MessageType::Error, "Open function body", source);
panic!();
}
}
(funcs, declrs)
}
/// parse the functions raw content to expr for easy compilation using a brace-counter.
/// - ```{...}``` surround a block
/// - line breaks seperate expressions
fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>, source: &'a str) {
fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>, _: &Vec<Declr<'a>>, source: &'a str) {
for func in functions.iter_mut() {
let mut blocks = vec![Block::new()];
@ -251,42 +271,131 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>, source: &'a str) {
}
}
fn check_var_typ(typ: &mut Option<Prim>, operands: &mut Vec<Prim>, dbginf: &crate::token::DebugInfo, source: &str) {
if let Some(value) = operands.pop() {
if !operands.is_empty() {
dbginf.print(MessageType::Error, format!("Expr does't resolve to a single value but multiple").as_str(), source);
panic!();
}
if let Some(typ) = typ {
if !typ.is_equal(value) {
dbginf.print(MessageType::Error, format!("Variable has type {:?} but {:?} was given", typ, value).as_str(), source);
panic!();
}
} else {
// assign a type to untyped variable
dbginf.print(MessageType::Info, format!("Variable has no fixed type, guessing type: {:?}", value).as_str(), source);
*typ = Some(value);
}
} else {
dbginf.print(MessageType::Error, "No result to bind variable to", source);
panic!();
}
}
fn process_keyword(keyword: Keyword, _: &Vec<Declr>, _: &mut Scope, operands: &mut Vec<Prim>, dbginf: &crate::token::DebugInfo, source: &str) {
match keyword {
Keyword::If => {
if operands.len() != 1 {
dbginf.print(MessageType::Error, format!("Expected single boolean got {} values", operands.len()).as_str(), source);
panic!();
}
if let Some(operand) = operands.pop() {
match operand {
Prim::Bool => (),
_ => {
dbginf.print(MessageType::Error, format!("Expected boolean, got {:?}", operand).as_str(), source);
panic!();
}
}
}
},
_ => ()
}
}
fn collapse_operation(operation: &Token, declrs: &Vec<Declr>, scope: &mut Scope, operands: &mut Vec<Prim>, source: &str) {
match operation {
Token::Operator(op, dbginf) => op.operate(operands, &dbginf, source),
Token::Assign(name, mut typ, dbginf) => {
check_var_typ(&mut typ, operands, &dbginf, source);
scope.decl_var((*name).to_owned(), typ.to_owned());
},
Token::Func(name, dbginf) => call_func(name, declrs, scope, operands, &dbginf, source),
Token::Keyword(keyword, dbginf) => process_keyword(*keyword, declrs, scope, operands, &dbginf, source),
_ => ()
}
}
fn call_func(name: &str, declrs: &Vec<Declr>, scope: &mut Scope, operands: &mut Vec<Prim>, dbginf: &crate::token::DebugInfo, source: &str) {
for declr in declrs {
if declr.name.is_some() && declr.name.unwrap() == name {
if let Some(args) = &declr.args {
if args.len() > operands.len() {
dbginf.print(MessageType::Error, format!("Expected {} parameters but got {}", args.len(), operands.len()).as_str(), source);
panic!()
}
for (x, arg) in args.iter().enumerate() {
let operand = operands.first().unwrap();
if !operand.is_equal(arg.1) {
dbginf.print(MessageType::Error, format!("Expected {:?} as parameter {x}, but got {:?}", arg, operand).as_str(), source);
panic!()
}
operands.remove(0);
}
}
// TODO: push result type
// operands.push();
break
}
}
}
/// parse a single term using a modified shunting yard
fn parse_term<'a, 'b>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope, source: &'b str) {
fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, declrs: &Vec<Declr<'a>>, scope: &mut Scope, source: & str) {
let mut op_stack = vec![];
let mut output = VecDeque::with_capacity(term.len());
let mut value_stack = vec![];
/*
Token::Number(text) => value_stack.push(CompileTimeType::UntypedNum(text)),
Token::Bool(_) => value_stack.push(CompileTimeType::Prim(Prim::Bool)),
*/
'outer:
while let Some(token) = term.pop_front() {
match &token {
Token::Word(text, dbginf) => {
if scope.is_func(text) {
if is_func(declrs, text) {
op_stack.push(Token::Func(text, *dbginf));
continue;
} else if scope.is_arg(text) {
value_stack.push(scope.get_arg_type(text));
output.push_back(Token::Arg(text, *dbginf));
continue;
} else if scope.is_var(text).is_some() {
value_stack.push(scope.get_var_type(text));
output.push_back(Token::Var(text, *dbginf));
continue;
}
dbginf.print(MessageType::Error, "Unknown word", source);
panic!()
}
Token::Bool(_, _) => {
output.push_back(token);
value_stack.push(Prim::Bool)
},
Token::Number(_, _) => {
output.push_back(token);
value_stack.push(CompileTimeType::UntypedNum)
value_stack.push(Prim::UntypedNum)
},
Token::Assign(text, typ, _) => {
scope.decl_var((*text).to_owned(), typ.to_owned());
Token::Assign(_, _, _) => {
op_stack.push(token);
},
Token::Keyword(_, _) => op_stack.push(token),
@ -300,13 +409,20 @@ fn parse_term<'a, 'b>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope, source:
Token::Delemiter(char, _) => if *char == '(' {
if let Some(next) = op_stack.last() {
match &next {
Token::Func(_, _) => output.push_back(op_stack.pop().unwrap()),
Token::Func(_, _) => {
let token = op_stack.pop().unwrap();
collapse_operation(&token, declrs, scope, &mut value_stack, source);
output.push_back(token);
},
_ => ()
}
}
continue 'outer;
},
_ => output.push_back(token)
_ => {
collapse_operation(&token, declrs, scope, &mut value_stack, source);
output.push_back(token)
}
}
}
panic!("Mismatched right parenthesis")
@ -323,6 +439,7 @@ fn parse_term<'a, 'b>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope, source:
let prec1 = op1.prec();
if prec1 > prec0 || prec0 == prec1 && op.assoc() == Assoc::Left {
collapse_operation(top, declrs, scope, &mut value_stack, source);
output.push_back(op_stack.pop().unwrap());
continue
}
@ -342,42 +459,53 @@ fn parse_term<'a, 'b>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope, source:
Token::Delemiter(char, _) => if *char == '(' {
panic!("Mismatched parenthesis")
},
_ => output.push_back(token)
_ => {
collapse_operation(&token, declrs, scope, &mut value_stack, source);
output.push_back(token)
}
}
}
if value_stack.len() > 1 {
output[0].print(MessageType::Error, "expression resolves to multiple results", source);
panic!();
}
term.append(&mut output);
}
enum CompileTimeType {
Prim(Prim),
UntypedNum,
fn is_func(declrs: &[Declr], text: &str) -> bool {
for declr in declrs {
if declr.name.is_some() && declr.name.unwrap() == text {
return true;
}
}
return false;
}
fn parse_block<'a>(block: &mut Block, scope: &mut Scope, source: &'a str) {
fn parse_block<'a>(block: &mut Block<'a>, declrs: &Vec<Declr<'a>>, scope: &mut Scope, source: &str) {
scope.alloc_scope();
for expr in block.iter_mut() {
match expr {
Expr::Block(block) => parse_block(block, scope, source),
Expr::Term(term) => parse_term(term, scope, source)
Expr::Block(block) => parse_block(block, declrs, scope, source),
Expr::Term(term) => parse_term(term, declrs, scope, source)
}
}
scope.pop_scope();
}
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>, source: &'a str) {
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>, declrs: &Vec<Declr<'a>>, source: &'a str) {
let mut scope = Scope {
funcs: funcs.iter().map(|f| f.name.unwrap()).collect(),
args: None,
vars: vec![]
};
for func in funcs.iter_mut() {
for (x, func) in funcs.iter_mut().enumerate() {
match func.expr.as_mut().expect("Function has no body") {
Expr::Block(block) => {
scope.args = func.args.as_ref();
scope.args = declrs[x].args.as_ref();
parse_block(block, &mut scope, source)
parse_block(block, declrs, &mut scope, source)
},
_ => panic!("Fatal-Compilier-Error: function must have a block")
}
@ -388,10 +516,10 @@ fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>, source: &'a str) {
/// any program is made out of functions.
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &'a str) -> Vec<Func<'a>> {
let mut funcs = discover_functions(tokens, source);
let (mut funcs, declrs) = discover_functions(tokens, source);
discover_exprs(&mut funcs, source);
parse_exprs(&mut funcs, source);
discover_exprs(&mut funcs, &declrs, source);
parse_exprs(&mut funcs, &declrs, source);
funcs.iter().for_each(|f| println!("{:?}", f));

View File

@ -79,6 +79,95 @@ impl Operator {
_ => Assoc::Right
}
}
fn present_types(operands: &[Prim], types: &[Prim], r#yield: Prim, dbginf: &DebugInfo, source: &str) -> Option<Prim> {
if operands.len() < types.len() {
dbginf.print(MessageType::Error, format!("Missing {} operands", types.len() - operands.len()).as_str(), source);
panic!()
}
for (x, typ) in types.iter().enumerate() {
if typ != &operands[x] {
return None
}
}
Some(r#yield)
}
fn check_types(operands: &[Prim], types: &[(Vec<Prim>, Prim)], dbginf: &DebugInfo, source: &str) -> Option<Prim> {
for combination in types.iter() {
if let Some(result) = Self::present_types(operands, &combination.0, combination.1, dbginf, source) {
return Some(result);
}
}
None
}
pub fn operate(&self, operands: &mut Vec<Prim>, dbginf: &DebugInfo, source: &str) {
match self {
Operator::Add | Operator::Sub | Operator::Mul | Operator::Div=> {
let types_valid = Self::check_types(operands, &[
// +-----------------------------------+---------------------------------+
// | Parameter list of types | result type |
// +-----------------------------------+---------------------------------+
(vec![Prim::Int, Prim::Int ], Prim::Int ),
(vec![Prim::Real, Prim::Real ], Prim::Real),
(vec![Prim::UntypedNum, Prim::Int ], Prim::Int ),
(vec![Prim::UntypedNum, Prim::Real ], Prim::Real),
(vec![Prim::Int, Prim::UntypedNum], Prim::Int ),
(vec![Prim::Real, Prim::UntypedNum], Prim::Real),
(vec![Prim::UntypedNum, Prim::UntypedNum], Prim::UntypedNum)
], dbginf, source);
if let Some(result) = types_valid {
operands.pop();
operands.pop();
operands.push(result);
} else {
dbginf.print(MessageType::Error, format!("Missmatched types for {:?}, expected either two integer or reals", self).as_str(), source);
panic!()
}
},
Operator::And | Operator::Or | Operator::Xor => {
let types_valid = Self::check_types(operands, &[
(vec![Prim::Bool, Prim::Bool ], Prim::Bool),
], dbginf, source);
if let Some(result) = types_valid {
operands.pop();
operands.pop();
operands.push(result);
} else {
dbginf.print(MessageType::Error, format!("Missmatched types for {:?}, expected two booleans", self).as_str(), source);
panic!()
}
},
Operator::Eq | Operator::NotEq | Operator::Lt | Operator::Gt | Operator::GtEq | Operator::LtEq => {
let types_valid = Self::check_types(operands, &[
(vec![Prim::Int, Prim::Int ], Prim::Bool ),
(vec![Prim::Real, Prim::Real ], Prim::Bool ),
(vec![Prim::UntypedNum, Prim::Int ], Prim::Bool ),
(vec![Prim::UntypedNum, Prim::Real ], Prim::Bool ),
(vec![Prim::Int, Prim::UntypedNum], Prim::Bool ),
(vec![Prim::Real, Prim::UntypedNum], Prim::Bool ),
(vec![Prim::UntypedNum, Prim::UntypedNum], Prim::Bool )
], dbginf, source);
if let Some(result) = types_valid {
println!("checked: {:?} for: {:?}", self, operands);
operands.pop();
operands.pop();
operands.push(result);
} else {
dbginf.print(MessageType::Error, format!("Missmatched types for {:?}, expected two numbers", self).as_str(), source);
panic!()
}
},
_ => panic!("Unknown operator: {:?}", self)
}
}
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
@ -94,7 +183,7 @@ pub enum Keyword {
impl Keyword {
pub fn parse<'a>(text: &'a str) -> Keyword {
return match text {
"if" => Keyword::If,
"unless" => Keyword::If,
"while" => Keyword::While,
"loop" => Keyword::Loop,
"break" => Keyword::Break,
@ -110,6 +199,7 @@ pub enum Prim {
Int,
Real,
Bool,
UntypedNum
}
impl Prim {
@ -125,6 +215,24 @@ impl Prim {
}
}
}
pub fn is_equal(&self, value: Prim) -> bool {
return match self {
Prim::Bool => *self == value,
Prim::Real => return match value {
Prim::UntypedNum => true,
_ => *self == value,
},
Prim::Int => return match value {
Prim::UntypedNum => true,
_ => *self == value,
},
Prim::UntypedNum => return match value {
Prim::Real | Prim::Int => true,
_ => *self == value,
},
}
}
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
@ -217,7 +325,7 @@ impl<'a> Token<'a> {
}
}
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)";
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(unless|while|loop|break|continue)|(true|false|ye|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)";
lazy_static::lazy_static! {
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
@ -291,7 +399,7 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
}
fn parse_bool(text: &str) -> bool {
return match text.to_ascii_lowercase().as_str() {
return match text {
"true" | "ye" => true,
"false" |"no" => false,
"maybe" => rand::random(),