first part of static type system

This commit is contained in:
Sven Vogel 2022-10-11 19:54:45 +02:00
parent 85e0c299af
commit 843f8dbcf0
4 changed files with 96 additions and 50 deletions

View File

@ -15,7 +15,7 @@ fn main() {
r" r"
pi = 3.1415926535 pi = 3.1415926535
sin(x) = { sin(x: f4) = {
x x
} }

View File

@ -1,12 +1,13 @@
use std::collections::{VecDeque}; use std::collections::{VecDeque};
use crate::token::{Token}; use crate::token::{Token};
use crate::Prim;
#[derive(Eq, Debug)] #[derive(Debug)]
pub struct Func<'a> { pub struct Func<'a> {
/// name of this function /// name of this function
pub name: Option<&'a str>, pub name: Option<&'a str>,
/// parameter names /// parameter names
pub args: Option<Vec<&'a str>>, pub args: Option<Vec<(&'a str, Prim)>>,
/// raw tokens /// raw tokens
pub raw: Option<VecDeque<Token<'a>>>, pub raw: Option<VecDeque<Token<'a>>>,
/// if the function returns a single value /// if the function returns a single value
@ -43,11 +44,11 @@ impl<'a> std::fmt::Display for Func<'a> {
f.write_str("(")?; f.write_str("(")?;
for (x, arg) in args.iter().enumerate() { for (x, arg) in args.iter().enumerate() {
if x == 0 { if x == 0 {
f.write_fmt(format_args!("{}", arg))?; f.write_fmt(format_args!("{}", arg.0))?;
continue; continue;
} }
f.write_fmt(format_args!(", {}", arg))?; f.write_fmt(format_args!(", {}", arg.0))?;
} }
f.write_str(")")?; f.write_str(")")?;
} }
@ -62,7 +63,7 @@ impl<'a> std::fmt::Display for Func<'a> {
pub type Block<'a> = VecDeque<Expr<'a>>; pub type Block<'a> = VecDeque<Expr<'a>>;
#[derive(Debug)] #[derive(Debug, Clone)]
pub enum Expr<'a> { pub enum Expr<'a> {
/// group of more expressions /// group of more expressions
Block(Block<'a>), Block(Block<'a>),
@ -72,9 +73,9 @@ pub enum Expr<'a> {
pub struct Scope<'a> { pub struct Scope<'a> {
pub funcs: Vec<&'a str>, pub funcs: Vec<&'a str>,
pub args: Option<&'a Vec<&'a str>>, pub args: Option<&'a Vec<(&'a str, Prim)>>,
/// stack of scoped block variables /// stack of scoped block variables
pub vars: Vec<Vec<String>>, pub vars: Vec<Vec<(String, Option<Prim>)>>,
} }
impl<'a> Scope<'a> { impl<'a> Scope<'a> {
@ -86,8 +87,8 @@ impl<'a> Scope<'a> {
self.vars.pop(); self.vars.pop();
} }
pub fn decl_var(&mut self, name: String) { pub fn decl_var(&mut self, name: String, typ: Option<Prim>) {
self.vars.last_mut().unwrap().push(name) self.vars.last_mut().unwrap().push((name, typ))
} }
pub fn is_func(&self, name: &'a str) -> bool { pub fn is_func(&self, name: &'a str) -> bool {
@ -96,21 +97,27 @@ impl<'a> Scope<'a> {
pub fn is_arg(&self, name: &'a str) -> bool { pub fn is_arg(&self, name: &'a str) -> bool {
if let Some(args) = self.args { if let Some(args) = self.args {
return args.contains(&name); for arg in args.iter() {
if arg.0 == name {
return true;
}
}
} }
false false
} }
pub fn is_var(&self, name: &'a str) -> bool { pub fn is_var(&self, name: &'a str) -> Option<Prim> {
// create an owned version of the string // create an owned version of the string
let owned = &name.to_owned(); let owned = &name.to_owned();
// search // search
for vars in self.vars.iter() { for vars in self.vars.iter() {
if vars.contains(owned) { for var in vars.iter() {
return true; if &var.0 == owned {
return var.1;
} }
} }
false }
None
} }
} }

View File

@ -1,6 +1,6 @@
use core::panic; use core::{panic};
use std::{collections::{VecDeque}, vec}; use std::{collections::{VecDeque}, vec};
use crate::token::{Token, Operator, Assoc}; use crate::token::{Token, Operator, Assoc, Prim};
pub mod data; pub mod data;
@ -86,7 +86,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
_ => () _ => ()
} }
Token::Assign(name) => { Token::Assign(name, _) => {
if func.results { if func.results {
panic!("double function assignment not permitted") panic!("double function assignment not permitted")
} }
@ -151,8 +151,9 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
} }
match &top { match &top {
Token::Word(text) => args.push(text), Token::Decl(name, typ) => args.push((name, *typ)),
_ => panic!("Argument is not a word {:?}", &top) Token::Word(name) => panic!("Missing type declaration {name}"),
_ => panic!("Argument is not a declaration {:?}", &top)
} }
continue; continue;
} }
@ -221,6 +222,14 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) { fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
let mut op_stack = vec![]; let mut op_stack = vec![];
let mut output = VecDeque::with_capacity(term.len()); let mut output = VecDeque::with_capacity(term.len());
let mut value_stack = vec![];
/*
Token::Number(text) => value_stack.push(CompileTimeType::UntypedNum(text)),
Token::Bool(_) => value_stack.push(CompileTimeType::Prim(Prim::Bool)),
*/
'outer: 'outer:
while let Some(token) = term.pop_front() { while let Some(token) = term.pop_front() {
@ -232,15 +241,18 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
} else if scope.is_arg(text) { } else if scope.is_arg(text) {
output.push_back(Token::Arg(text)); output.push_back(Token::Arg(text));
continue; continue;
} else if scope.is_var(text) { } else if scope.is_var(text).is_some() {
output.push_back(Token::Var(text)); output.push_back(Token::Var(text));
continue; continue;
} }
panic!("Unknwon word: {text}") panic!("Unknwon word: {text}")
} }
Token::Number(_) => output.push_back(token), Token::Number(_) => {
Token::Assign(text) => { output.push_back(token);
scope.decl_var((*text).to_owned()); value_stack.push(CompileTimeType::UntypedNum)
},
Token::Assign(text, typ) => {
scope.decl_var((*text).to_owned(), typ.to_owned());
op_stack.push(token); op_stack.push(token);
}, },
Token::Keyword(_) => op_stack.push(token), Token::Keyword(_) => op_stack.push(token),
@ -303,6 +315,11 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
term.append(&mut output); term.append(&mut output);
} }
enum CompileTimeType {
Prim(Prim),
UntypedNum,
}
fn parse_block(block: &mut Block, scope: &mut Scope) { fn parse_block(block: &mut Block, scope: &mut Scope) {
scope.alloc_scope(); scope.alloc_scope();
for expr in block.iter_mut() { for expr in block.iter_mut() {

View File

@ -57,13 +57,7 @@ impl Operator {
Operator::Lt => 2, Operator::Lt => 2,
Operator::Gt => 2, Operator::Gt => 2,
Operator::LtEq => 2, Operator::LtEq => 2,
Operator::GtEq => 2, Operator::Eq => 2,
Operator::Lt => 2,
Operator::Gt => 2,
Operator::LtEq => 2,
Operator::GtEq => 2, Operator::GtEq => 2,
Operator::NotEq => 2,
Operator::NotEq => 2,
Operator::Or => 0, Operator::Or => 0,
Operator::Xor => 0, Operator::Xor => 0,
@ -108,15 +102,25 @@ impl Keyword {
} }
} }
pub struct SourceString<'a> { #[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub string: &'a str, pub enum Prim {
/// line in which the source string is to be found Int,
pub line: usize, Real,
/// index in source where the token starts Bool,
pub start: usize
} }
#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)] impl Prim {
fn from<'a>(text: &'a str) -> Prim {
return match text {
"i4" => Prim::Int,
"f4" => Prim::Real,
"bool" => Prim::Bool,
_ => panic!("Unknown type declaration: {text}")
}
}
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
/// A token represents a basic building block for source code. /// A token represents a basic building block for source code.
/// They give a meaning to patterns of chars allowing to interpret them. /// They give a meaning to patterns of chars allowing to interpret them.
pub enum Token<'a> { pub enum Token<'a> {
@ -129,13 +133,13 @@ pub enum Token<'a> {
Func(&'a str), Func(&'a str),
Var(&'a str), Var(&'a str),
Arg(&'a str), Arg(&'a str),
Assign(&'a str), Assign(&'a str, Option<Prim>),
Decl(&'a str, Prim),
Bool(bool), Bool(bool),
Keyword(Keyword), Keyword(Keyword),
TypeDecl(&'a str)
} }
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*(i4|f4|bool))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)"; const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n+)";
lazy_static::lazy_static! { lazy_static::lazy_static! {
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap(); static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
@ -146,7 +150,14 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
let mut tokens = VecDeque::new(); let mut tokens = VecDeque::new();
for cap in TOKEN_REGEX.captures_iter(source) { for cap in TOKEN_REGEX.captures_iter(source) {
for (i, group) in cap.iter().enumerate() { let mut enumerator = cap.iter().enumerate();
loop {
let next = enumerator.next();
if next.is_none() {
break
}
let (i, group) = next.unwrap();
// ignore first group as its the entire match, // ignore first group as its the entire match,
// as well as the 1st group (= comments) // as well as the 1st group (= comments)
@ -159,14 +170,25 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
tokens.push_back(match i { tokens.push_back(match i {
2 => Token::Keyword(Keyword::parse(mat.as_str())), 2 => Token::Keyword(Keyword::parse(mat.as_str())),
3 => Token::Bool(parse_bool(mat.as_str())), 3 => Token::Bool(parse_bool(mat.as_str())),
4 => Token::Assign(mat.as_str()), 4 => {
5 => Token::Word(mat.as_str()), let var_type = Prim::from(enumerator.next().unwrap().1.unwrap().as_str());
6 => Token::Number(mat.as_str()), Token::Decl(mat.as_str(), var_type)
7 => Token::Operator(Operator::parse(mat.as_str())), },
8 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()), 6 => {
9 => Token::LineBreak, let var_type = if let Some(mat) = enumerator.next().unwrap().1 {
Some(Prim::from(mat.as_str()))
} else {
None
};
Token::Assign(mat.as_str(), var_type)
},
8 => Token::Word(mat.as_str()),
9 => Token::Number(mat.as_str()),
10 => Token::Operator(Operator::parse(mat.as_str())),
11 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
12 => Token::LineBreak,
_ => panic!("Unknown match to tokenize: {}", mat.as_str()) _ => panic!("Unknown match to tokenize ({i}): {}", mat.as_str())
}); });
break; break;
} }