added keywords
This commit is contained in:
parent
3db4fd982e
commit
85e0c299af
19
src/main.rs
19
src/main.rs
|
@ -1,6 +1,10 @@
|
||||||
|
|
||||||
|
// tokenizer
|
||||||
mod token;
|
mod token;
|
||||||
|
// ro parse a queue of tokens into functions with expressions
|
||||||
mod parser;
|
mod parser;
|
||||||
|
// translate a tree of functions and expressions to pseudo assembly
|
||||||
|
// designed for a virtual stack machiene
|
||||||
|
|
||||||
use token::*;
|
use token::*;
|
||||||
use parser::*;
|
use parser::*;
|
||||||
|
@ -9,11 +13,20 @@ fn main() {
|
||||||
|
|
||||||
let source =
|
let source =
|
||||||
r"
|
r"
|
||||||
pi = 3.1415926
|
pi = 3.1415926535
|
||||||
|
|
||||||
|
sin(x) = {
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
if 4 > 2 {
|
|
||||||
val = 9 / 5
|
x:i4 = 0
|
||||||
|
loop {
|
||||||
|
x = x + 1
|
||||||
|
if sin(x > 5) {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
";
|
";
|
||||||
|
|
|
@ -74,7 +74,7 @@ pub struct Scope<'a> {
|
||||||
pub funcs: Vec<&'a str>,
|
pub funcs: Vec<&'a str>,
|
||||||
pub args: Option<&'a Vec<&'a str>>,
|
pub args: Option<&'a Vec<&'a str>>,
|
||||||
/// stack of scoped block variables
|
/// stack of scoped block variables
|
||||||
pub vars: Vec<Vec<&'a str>>,
|
pub vars: Vec<Vec<String>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Scope<'a> {
|
impl<'a> Scope<'a> {
|
||||||
|
@ -86,18 +86,28 @@ impl<'a> Scope<'a> {
|
||||||
self.vars.pop();
|
self.vars.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn decl_var(&mut self, name: String) {
|
||||||
|
self.vars.last_mut().unwrap().push(name)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_func(&self, name: &'a str) -> bool {
|
pub fn is_func(&self, name: &'a str) -> bool {
|
||||||
self.funcs.contains(&name)
|
self.funcs.contains(&name)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_arg(&self, name: &'a str) -> bool {
|
pub fn is_arg(&self, name: &'a str) -> bool {
|
||||||
if let Some(args) = self.args {
|
if let Some(args) = self.args {
|
||||||
return args.contains(&name);
|
return args.contains(&name);
|
||||||
}
|
}
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_var(&self, name: &'a str) -> bool {
|
pub fn is_var(&self, name: &'a str) -> bool {
|
||||||
|
// create an owned version of the string
|
||||||
|
let owned = &name.to_owned();
|
||||||
|
|
||||||
|
// search
|
||||||
for vars in self.vars.iter() {
|
for vars in self.vars.iter() {
|
||||||
if vars.contains(&name) {
|
if vars.contains(owned) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@ use core::panic;
|
||||||
use std::{collections::{VecDeque}, vec};
|
use std::{collections::{VecDeque}, vec};
|
||||||
use crate::token::{Token, Operator, Assoc};
|
use crate::token::{Token, Operator, Assoc};
|
||||||
|
|
||||||
mod data;
|
pub mod data;
|
||||||
|
|
||||||
use data::*;
|
use data::*;
|
||||||
|
|
||||||
|
@ -239,7 +239,10 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
||||||
panic!("Unknwon word: {text}")
|
panic!("Unknwon word: {text}")
|
||||||
}
|
}
|
||||||
Token::Number(_) => output.push_back(token),
|
Token::Number(_) => output.push_back(token),
|
||||||
Token::Assign(_) => op_stack.push(token),
|
Token::Assign(text) => {
|
||||||
|
scope.decl_var((*text).to_owned());
|
||||||
|
op_stack.push(token);
|
||||||
|
},
|
||||||
Token::Keyword(_) => op_stack.push(token),
|
Token::Keyword(_) => op_stack.push(token),
|
||||||
|
|
||||||
Token::Delemiter(char) => {
|
Token::Delemiter(char) => {
|
||||||
|
@ -269,7 +272,7 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
||||||
Token::Operator(op) => {
|
Token::Operator(op) => {
|
||||||
let prec0 = op.prec();
|
let prec0 = op.prec();
|
||||||
while let Some(top) = op_stack.last(){
|
while let Some(top) = op_stack.last(){
|
||||||
match &top {
|
match &top {
|
||||||
Token::Operator(op1) => {
|
Token::Operator(op1) => {
|
||||||
let prec1 = op1.prec();
|
let prec1 = op1.prec();
|
||||||
|
|
||||||
|
@ -301,12 +304,14 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_block(block: &mut Block, scope: &mut Scope) {
|
fn parse_block(block: &mut Block, scope: &mut Scope) {
|
||||||
|
scope.alloc_scope();
|
||||||
for expr in block.iter_mut() {
|
for expr in block.iter_mut() {
|
||||||
match expr {
|
match expr {
|
||||||
Expr::Block(block) => parse_block(block, scope),
|
Expr::Block(block) => parse_block(block, scope),
|
||||||
Expr::Term(term) => parse_term(term, scope)
|
Expr::Term(term) => parse_term(term, scope)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
scope.pop_scope();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
|
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
|
||||||
|
@ -331,11 +336,13 @@ fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
|
||||||
/// reorder and organize a listing of instructions to a RPN based format:
|
/// reorder and organize a listing of instructions to a RPN based format:
|
||||||
/// any program is made out of functions.
|
/// any program is made out of functions.
|
||||||
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
||||||
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) {
|
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'a>> {
|
||||||
let mut funcs = discover_functions(tokens);
|
let mut funcs = discover_functions(tokens);
|
||||||
|
|
||||||
discover_exprs(&mut funcs);
|
discover_exprs(&mut funcs);
|
||||||
parse_exprs(&mut funcs);
|
parse_exprs(&mut funcs);
|
||||||
|
|
||||||
funcs.iter().for_each(|f| println!("{:?}", f));
|
funcs.iter().for_each(|f| println!("{:?}", f));
|
||||||
|
|
||||||
|
funcs
|
||||||
}
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
use std::{collections::{VecDeque}};
|
use std::{collections::{VecDeque}};
|
||||||
|
|
||||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)]
|
||||||
pub enum Operator {
|
pub enum Operator {
|
||||||
Or,
|
Or,
|
||||||
And,
|
And,
|
||||||
|
@ -57,8 +57,13 @@ impl Operator {
|
||||||
Operator::Lt => 2,
|
Operator::Lt => 2,
|
||||||
Operator::Gt => 2,
|
Operator::Gt => 2,
|
||||||
Operator::LtEq => 2,
|
Operator::LtEq => 2,
|
||||||
|
Operator::GtEq => 2, Operator::Eq => 2,
|
||||||
|
Operator::Lt => 2,
|
||||||
|
Operator::Gt => 2,
|
||||||
|
Operator::LtEq => 2,
|
||||||
Operator::GtEq => 2,
|
Operator::GtEq => 2,
|
||||||
Operator::NotEq => 2,
|
Operator::NotEq => 2,
|
||||||
|
Operator::NotEq => 2,
|
||||||
|
|
||||||
Operator::Or => 0,
|
Operator::Or => 0,
|
||||||
Operator::Xor => 0,
|
Operator::Xor => 0,
|
||||||
|
@ -84,7 +89,10 @@ impl Operator {
|
||||||
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
||||||
pub enum Keyword {
|
pub enum Keyword {
|
||||||
If,
|
If,
|
||||||
While
|
While,
|
||||||
|
Loop,
|
||||||
|
Break,
|
||||||
|
Continue,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Keyword {
|
impl Keyword {
|
||||||
|
@ -92,12 +100,23 @@ impl Keyword {
|
||||||
return match text {
|
return match text {
|
||||||
"if" => Keyword::If,
|
"if" => Keyword::If,
|
||||||
"while" => Keyword::While,
|
"while" => Keyword::While,
|
||||||
|
"loop" => Keyword::Loop,
|
||||||
|
"break" => Keyword::Break,
|
||||||
|
"continue" => Keyword::Continue,
|
||||||
_ => panic!("Text not a known keyword {text}")
|
_ => panic!("Text not a known keyword {text}")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
pub struct SourceString<'a> {
|
||||||
|
pub string: &'a str,
|
||||||
|
/// line in which the source string is to be found
|
||||||
|
pub line: usize,
|
||||||
|
/// index in source where the token starts
|
||||||
|
pub start: usize
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)]
|
||||||
/// A token represents a basic building block for source code.
|
/// A token represents a basic building block for source code.
|
||||||
/// They give a meaning to patterns of chars allowing to interpret them.
|
/// They give a meaning to patterns of chars allowing to interpret them.
|
||||||
pub enum Token<'a> {
|
pub enum Token<'a> {
|
||||||
|
@ -112,10 +131,11 @@ pub enum Token<'a> {
|
||||||
Arg(&'a str),
|
Arg(&'a str),
|
||||||
Assign(&'a str),
|
Assign(&'a str),
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
Keyword(Keyword)
|
Keyword(Keyword),
|
||||||
|
TypeDecl(&'a str)
|
||||||
}
|
}
|
||||||
|
|
||||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)";
|
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*(i4|f4|bool))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)";
|
||||||
|
|
||||||
lazy_static::lazy_static! {
|
lazy_static::lazy_static! {
|
||||||
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
||||||
|
@ -148,6 +168,7 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
|
||||||
|
|
||||||
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
|
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
|
||||||
});
|
});
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue