From 843f8dbcf0ddb10e2dae7f6e41ea684257c81fe6 Mon Sep 17 00:00:00 2001 From: servostar Date: Tue, 11 Oct 2022 19:54:45 +0200 Subject: [PATCH] first part of static type system --- src/main.rs | 2 +- src/parser/data.rs | 35 +++++++++++++--------- src/parser/mod.rs | 35 ++++++++++++++++------ src/token/mod.rs | 74 ++++++++++++++++++++++++++++++---------------- 4 files changed, 96 insertions(+), 50 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7ef8214..3fb3d6a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ fn main() { r" pi = 3.1415926535 -sin(x) = { +sin(x: f4) = { x } diff --git a/src/parser/data.rs b/src/parser/data.rs index aec68dc..4859e49 100644 --- a/src/parser/data.rs +++ b/src/parser/data.rs @@ -1,12 +1,13 @@ use std::collections::{VecDeque}; use crate::token::{Token}; +use crate::Prim; -#[derive(Eq, Debug)] +#[derive(Debug)] pub struct Func<'a> { /// name of this function pub name: Option<&'a str>, /// parameter names - pub args: Option>, + pub args: Option>, /// raw tokens pub raw: Option>>, /// if the function returns a single value @@ -43,11 +44,11 @@ impl<'a> std::fmt::Display for Func<'a> { f.write_str("(")?; for (x, arg) in args.iter().enumerate() { if x == 0 { - f.write_fmt(format_args!("{}", arg))?; + f.write_fmt(format_args!("{}", arg.0))?; continue; } - f.write_fmt(format_args!(", {}", arg))?; + f.write_fmt(format_args!(", {}", arg.0))?; } f.write_str(")")?; } @@ -62,7 +63,7 @@ impl<'a> std::fmt::Display for Func<'a> { pub type Block<'a> = VecDeque>; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Expr<'a> { /// group of more expressions Block(Block<'a>), @@ -72,9 +73,9 @@ pub enum Expr<'a> { pub struct Scope<'a> { pub funcs: Vec<&'a str>, - pub args: Option<&'a Vec<&'a str>>, + pub args: Option<&'a Vec<(&'a str, Prim)>>, /// stack of scoped block variables - pub vars: Vec>, + pub vars: Vec)>>, } impl<'a> Scope<'a> { @@ -86,8 +87,8 @@ impl<'a> Scope<'a> { self.vars.pop(); } - pub fn decl_var(&mut self, name: String) { - self.vars.last_mut().unwrap().push(name) + pub fn decl_var(&mut self, name: String, typ: Option) { + self.vars.last_mut().unwrap().push((name, typ)) } pub fn is_func(&self, name: &'a str) -> bool { @@ -96,21 +97,27 @@ impl<'a> Scope<'a> { pub fn is_arg(&self, name: &'a str) -> bool { if let Some(args) = self.args { - return args.contains(&name); + for arg in args.iter() { + if arg.0 == name { + return true; + } + } } false } - pub fn is_var(&self, name: &'a str) -> bool { + pub fn is_var(&self, name: &'a str) -> Option { // create an owned version of the string let owned = &name.to_owned(); // search for vars in self.vars.iter() { - if vars.contains(owned) { - return true; + for var in vars.iter() { + if &var.0 == owned { + return var.1; + } } } - false + None } } \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d43db00..3853b74 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,6 @@ -use core::panic; +use core::{panic}; use std::{collections::{VecDeque}, vec}; -use crate::token::{Token, Operator, Assoc}; +use crate::token::{Token, Operator, Assoc, Prim}; pub mod data; @@ -86,7 +86,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque>) -> Vec () } - Token::Assign(name) => { + Token::Assign(name, _) => { if func.results { panic!("double function assignment not permitted") } @@ -151,8 +151,9 @@ fn discover_functions<'a>(tokens: &mut VecDeque>) -> Vec args.push(text), - _ => panic!("Argument is not a word {:?}", &top) + Token::Decl(name, typ) => args.push((name, *typ)), + Token::Word(name) => panic!("Missing type declaration {name}"), + _ => panic!("Argument is not a declaration {:?}", &top) } continue; } @@ -221,6 +222,14 @@ fn discover_exprs<'a>(functions: &mut Vec>) { fn parse_term<'a>(term: &mut VecDeque>, scope: &mut Scope) { let mut op_stack = vec![]; let mut output = VecDeque::with_capacity(term.len()); + let mut value_stack = vec![]; + + /* + + Token::Number(text) => value_stack.push(CompileTimeType::UntypedNum(text)), + Token::Bool(_) => value_stack.push(CompileTimeType::Prim(Prim::Bool)), + + */ 'outer: while let Some(token) = term.pop_front() { @@ -232,15 +241,18 @@ fn parse_term<'a>(term: &mut VecDeque>, scope: &mut Scope) { } else if scope.is_arg(text) { output.push_back(Token::Arg(text)); continue; - } else if scope.is_var(text) { + } else if scope.is_var(text).is_some() { output.push_back(Token::Var(text)); continue; } panic!("Unknwon word: {text}") } - Token::Number(_) => output.push_back(token), - Token::Assign(text) => { - scope.decl_var((*text).to_owned()); + Token::Number(_) => { + output.push_back(token); + value_stack.push(CompileTimeType::UntypedNum) + }, + Token::Assign(text, typ) => { + scope.decl_var((*text).to_owned(), typ.to_owned()); op_stack.push(token); }, Token::Keyword(_) => op_stack.push(token), @@ -303,6 +315,11 @@ fn parse_term<'a>(term: &mut VecDeque>, scope: &mut Scope) { term.append(&mut output); } +enum CompileTimeType { + Prim(Prim), + UntypedNum, +} + fn parse_block(block: &mut Block, scope: &mut Scope) { scope.alloc_scope(); for expr in block.iter_mut() { diff --git a/src/token/mod.rs b/src/token/mod.rs index b735458..83b3ffb 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -57,13 +57,7 @@ impl Operator { Operator::Lt => 2, Operator::Gt => 2, Operator::LtEq => 2, - Operator::GtEq => 2, Operator::Eq => 2, - Operator::Lt => 2, - Operator::Gt => 2, - Operator::LtEq => 2, Operator::GtEq => 2, - Operator::NotEq => 2, - Operator::NotEq => 2, Operator::Or => 0, Operator::Xor => 0, @@ -108,15 +102,25 @@ impl Keyword { } } -pub struct SourceString<'a> { - pub string: &'a str, - /// line in which the source string is to be found - pub line: usize, - /// index in source where the token starts - pub start: usize +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum Prim { + Int, + Real, + Bool, } -#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)] +impl Prim { + fn from<'a>(text: &'a str) -> Prim { + return match text { + "i4" => Prim::Int, + "f4" => Prim::Real, + "bool" => Prim::Bool, + _ => panic!("Unknown type declaration: {text}") + } + } +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] /// A token represents a basic building block for source code. /// They give a meaning to patterns of chars allowing to interpret them. pub enum Token<'a> { @@ -129,13 +133,13 @@ pub enum Token<'a> { Func(&'a str), Var(&'a str), Arg(&'a str), - Assign(&'a str), + Assign(&'a str, Option), + Decl(&'a str, Prim), Bool(bool), Keyword(Keyword), - TypeDecl(&'a str) } -const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*(i4|f4|bool))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)"; +const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n+)"; lazy_static::lazy_static! { static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap(); @@ -146,7 +150,14 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque> { let mut tokens = VecDeque::new(); for cap in TOKEN_REGEX.captures_iter(source) { - for (i, group) in cap.iter().enumerate() { + let mut enumerator = cap.iter().enumerate(); + loop { + let next = enumerator.next(); + if next.is_none() { + break + } + + let (i, group) = next.unwrap(); // ignore first group as its the entire match, // as well as the 1st group (= comments) @@ -155,18 +166,29 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque> { } // if we have a match, save it as token - if let Some(mat) = group { - tokens.push_back(match i { + if let Some(mat) = group { + tokens.push_back(match i { 2 => Token::Keyword(Keyword::parse(mat.as_str())), 3 => Token::Bool(parse_bool(mat.as_str())), - 4 => Token::Assign(mat.as_str()), - 5 => Token::Word(mat.as_str()), - 6 => Token::Number(mat.as_str()), - 7 => Token::Operator(Operator::parse(mat.as_str())), - 8 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()), - 9 => Token::LineBreak, + 4 => { + let var_type = Prim::from(enumerator.next().unwrap().1.unwrap().as_str()); + Token::Decl(mat.as_str(), var_type) + }, + 6 => { + let var_type = if let Some(mat) = enumerator.next().unwrap().1 { + Some(Prim::from(mat.as_str())) + } else { + None + }; + Token::Assign(mat.as_str(), var_type) + }, + 8 => Token::Word(mat.as_str()), + 9 => Token::Number(mat.as_str()), + 10 => Token::Operator(Operator::parse(mat.as_str())), + 11 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()), + 12 => Token::LineBreak, - _ => panic!("Unknown match to tokenize: {}", mat.as_str()) + _ => panic!("Unknown match to tokenize ({i}): {}", mat.as_str()) }); break; }