From 85e0c299af7077a4c7dc4cbe3e4f38460ee6d356 Mon Sep 17 00:00:00 2001 From: servostar Date: Mon, 10 Oct 2022 15:16:55 +0200 Subject: [PATCH] added keywords --- src/main.rs | 19 ++++++++++++++++--- src/parser/data.rs | 14 ++++++++++++-- src/parser/mod.rs | 15 +++++++++++---- src/token/mod.rs | 31 ++++++++++++++++++++++++++----- 4 files changed, 65 insertions(+), 14 deletions(-) diff --git a/src/main.rs b/src/main.rs index 47fa7ee..7ef8214 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,10 @@ +// tokenizer mod token; +// ro parse a queue of tokens into functions with expressions mod parser; +// translate a tree of functions and expressions to pseudo assembly +// designed for a virtual stack machiene use token::*; use parser::*; @@ -9,11 +13,20 @@ fn main() { let source = r" -pi = 3.1415926 +pi = 3.1415926535 + +sin(x) = { + x +} main() { - if 4 > 2 { - val = 9 / 5 + + x:i4 = 0 + loop { + x = x + 1 + if sin(x > 5) { + break + } } } "; diff --git a/src/parser/data.rs b/src/parser/data.rs index a57352c..aec68dc 100644 --- a/src/parser/data.rs +++ b/src/parser/data.rs @@ -74,7 +74,7 @@ pub struct Scope<'a> { pub funcs: Vec<&'a str>, pub args: Option<&'a Vec<&'a str>>, /// stack of scoped block variables - pub vars: Vec>, + pub vars: Vec>, } impl<'a> Scope<'a> { @@ -86,18 +86,28 @@ impl<'a> Scope<'a> { self.vars.pop(); } + pub fn decl_var(&mut self, name: String) { + self.vars.last_mut().unwrap().push(name) + } + pub fn is_func(&self, name: &'a str) -> bool { self.funcs.contains(&name) } + pub fn is_arg(&self, name: &'a str) -> bool { if let Some(args) = self.args { return args.contains(&name); } false } + pub fn is_var(&self, name: &'a str) -> bool { + // create an owned version of the string + let owned = &name.to_owned(); + + // search for vars in self.vars.iter() { - if vars.contains(&name) { + if vars.contains(owned) { return true; } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 32a051c..d43db00 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2,7 +2,7 @@ use core::panic; use std::{collections::{VecDeque}, vec}; use crate::token::{Token, Operator, Assoc}; -mod data; +pub mod data; use data::*; @@ -239,7 +239,10 @@ fn parse_term<'a>(term: &mut VecDeque>, scope: &mut Scope) { panic!("Unknwon word: {text}") } Token::Number(_) => output.push_back(token), - Token::Assign(_) => op_stack.push(token), + Token::Assign(text) => { + scope.decl_var((*text).to_owned()); + op_stack.push(token); + }, Token::Keyword(_) => op_stack.push(token), Token::Delemiter(char) => { @@ -269,7 +272,7 @@ fn parse_term<'a>(term: &mut VecDeque>, scope: &mut Scope) { Token::Operator(op) => { let prec0 = op.prec(); while let Some(top) = op_stack.last(){ - match &top { + match &top { Token::Operator(op1) => { let prec1 = op1.prec(); @@ -301,12 +304,14 @@ fn parse_term<'a>(term: &mut VecDeque>, scope: &mut Scope) { } fn parse_block(block: &mut Block, scope: &mut Scope) { + scope.alloc_scope(); for expr in block.iter_mut() { match expr { Expr::Block(block) => parse_block(block, scope), Expr::Term(term) => parse_term(term, scope) } } + scope.pop_scope(); } fn parse_exprs<'a>(funcs: &mut Vec>) { @@ -331,11 +336,13 @@ fn parse_exprs<'a>(funcs: &mut Vec>) { /// reorder and organize a listing of instructions to a RPN based format: /// any program is made out of functions. /// A function has a name followed by an optional parameter list, followed by an optional equal sign and block. -pub fn parse<'a>(tokens: &mut VecDeque>) { +pub fn parse<'a>(tokens: &mut VecDeque>) -> Vec> { let mut funcs = discover_functions(tokens); discover_exprs(&mut funcs); parse_exprs(&mut funcs); funcs.iter().for_each(|f| println!("{:?}", f)); + + funcs } \ No newline at end of file diff --git a/src/token/mod.rs b/src/token/mod.rs index 0ed519d..b735458 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -1,6 +1,6 @@ use std::{collections::{VecDeque}}; -#[derive(Debug, Hash, PartialEq, Eq)] +#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)] pub enum Operator { Or, And, @@ -57,8 +57,13 @@ impl Operator { Operator::Lt => 2, Operator::Gt => 2, Operator::LtEq => 2, + Operator::GtEq => 2, Operator::Eq => 2, + Operator::Lt => 2, + Operator::Gt => 2, + Operator::LtEq => 2, Operator::GtEq => 2, Operator::NotEq => 2, + Operator::NotEq => 2, Operator::Or => 0, Operator::Xor => 0, @@ -84,7 +89,10 @@ impl Operator { #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] pub enum Keyword { If, - While + While, + Loop, + Break, + Continue, } impl Keyword { @@ -92,12 +100,23 @@ impl Keyword { return match text { "if" => Keyword::If, "while" => Keyword::While, + "loop" => Keyword::Loop, + "break" => Keyword::Break, + "continue" => Keyword::Continue, _ => panic!("Text not a known keyword {text}") } } } -#[derive(Debug, Hash, PartialEq, Eq)] +pub struct SourceString<'a> { + pub string: &'a str, + /// line in which the source string is to be found + pub line: usize, + /// index in source where the token starts + pub start: usize +} + +#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)] /// A token represents a basic building block for source code. /// They give a meaning to patterns of chars allowing to interpret them. pub enum Token<'a> { @@ -112,10 +131,11 @@ pub enum Token<'a> { Arg(&'a str), Assign(&'a str), Bool(bool), - Keyword(Keyword) + Keyword(Keyword), + TypeDecl(&'a str) } -const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)"; +const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*(i4|f4|bool))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)"; lazy_static::lazy_static! { static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap(); @@ -148,6 +168,7 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque> { _ => panic!("Unknown match to tokenize: {}", mat.as_str()) }); + break; } } }