proper error message added

This commit is contained in:
Sven Vogel 2022-10-12 09:33:33 +02:00
parent 843f8dbcf0
commit e8e6ce32cf
6 changed files with 277 additions and 90 deletions

54
Cargo.lock generated
View File

@ -6,6 +6,7 @@ version = 3
name = "Yard" name = "Yard"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"colored",
"lazy_static", "lazy_static",
"rand", "rand",
"regex", "regex",
@ -20,12 +21,34 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "atty"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
dependencies = [
"hermit-abi",
"libc",
"winapi",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "colored"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
dependencies = [
"atty",
"lazy_static",
"winapi",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.7" version = "0.2.7"
@ -37,6 +60,15 @@ dependencies = [
"wasi", "wasi",
] ]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"
@ -113,3 +145,25 @@ name = "wasi"
version = "0.11.0+wasi-snapshot-preview1" version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -9,3 +9,4 @@ edition = "2021"
regex = "*" regex = "*"
lazy_static = "1.4.0" lazy_static = "1.4.0"
rand = "0.8.5" rand = "0.8.5"
colored = "*"

View File

@ -3,4 +3,5 @@ Yard is an funny programming language compiler and interpreter written in pure R
It *will* contain features such as: It *will* contain features such as:
1. a COMEFROM keyword (inverse goto) 1. a COMEFROM keyword (inverse goto)
2. a ```don't``` code block which never executes 2. a ```don't``` code block which never executes
3. ```rand(x)``` returns x, always.
3. swapped meaning of "" (for single characters) and '' (now for string literal) 3. swapped meaning of "" (for single characters) and '' (now for string literal)

View File

@ -8,6 +8,11 @@ mod parser;
use token::*; use token::*;
use parser::*; use parser::*;
use colored::{Colorize};
pub fn message(typ: MessageType, msg: String) {
println!("{}: {}", typ.to_colored(), msg.bold().bright_white());
}
fn main() { fn main() {
@ -15,11 +20,11 @@ fn main() {
r" r"
pi = 3.1415926535 pi = 3.1415926535
sin(x: f4) = { sin(x: f4) = { {
x x
} }
main() { man() {
x:i4 = 0 x:i4 = 0
loop { loop {
@ -31,5 +36,5 @@ main() {
} }
"; ";
parse(&mut tokenize(source)); parse(&mut tokenize(source), source);
} }

View File

@ -1,13 +1,13 @@
use core::{panic}; use core::{panic};
use std::{collections::{VecDeque}, vec}; use std::{collections::{VecDeque}, vec};
use crate::token::{Token, Operator, Assoc, Prim}; use crate::token::{Token, Operator, Assoc, Prim, MessageType};
pub mod data; pub mod data;
use data::*; use data::*;
/// simple brace-counting parser to detect functions /// simple brace-counting parser to detect functions
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'a>> { fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str) -> Vec<Func<'a>> {
let mut funcs = Vec::new(); let mut funcs = Vec::new();
// function to currently identifiy // function to currently identifiy
@ -22,7 +22,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
macro_rules! finish_func { macro_rules! finish_func {
() => { () => {
if funcs.contains(&func) { if funcs.contains(&func) {
panic!("Function already defined: {func}") panic!("Function defined multiple times: {func}")
} }
funcs.push(func); funcs.push(func);
@ -36,12 +36,13 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
// function body detection // function body detection
// has highest priority // has highest priority
match &top { match &top {
Token::Delemiter(char) => match char { Token::Delemiter(char, dbginf) => match char {
'{' => { '{' => {
brace_cnt += 1; brace_cnt += 1;
if brace_cnt == 1 { if brace_cnt == 1 {
if func.name.is_none() { if func.name.is_none() {
panic!("Anonymous function not permitted"); dbginf.print(MessageType::Error, "Anonymous function not permitted", source);
panic!();
} }
single_line = false; single_line = false;
func.raw = Some(VecDeque::new()); func.raw = Some(VecDeque::new());
@ -58,7 +59,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
_ => () _ => ()
} }
Token::LineBreak => if single_line { Token::LineBreak(_) => if single_line {
finish_func!(); finish_func!();
continue; continue;
} }
@ -70,13 +71,15 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
if func.raw.is_none() { if func.raw.is_none() {
match &top { match &top {
Token::Operator(op) => match op { Token::Operator(op, dbginf) => match op {
Operator::Assign => { Operator::Assign => {
if func.results { if func.results {
panic!("double function assignment not permitted") dbginf.print(MessageType::Error, "double function assignment", source);
panic!();
} }
if func.name.is_none() { if func.name.is_none() {
panic!("Anonymous function not permitted"); dbginf.print(MessageType::Error, "Anonymous function", source);
panic!();
} }
func.results = true; func.results = true;
@ -86,12 +89,14 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
_ => () _ => ()
} }
Token::Assign(name, _) => { Token::Assign(name, _, dbginf) => {
if func.results { if func.results {
panic!("double function assignment not permitted") dbginf.print(MessageType::Error, "double function assignment", source);
panic!();
} }
if func.name.is_some() { if func.name.is_some() {
panic!("function already named"); dbginf.print(MessageType::Error, "multiple function names", source);
panic!();
} }
func.raw = Some(VecDeque::new()); func.raw = Some(VecDeque::new());
@ -101,14 +106,15 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
continue; continue;
} }
Token::Delemiter(char) => match char { Token::Delemiter(char, dbginf) => match char {
'(' => if func.raw.is_none() { '(' => if func.raw.is_none() {
paren_cnt += 1; paren_cnt += 1;
if paren_cnt == 1 { if paren_cnt == 1 {
if func.args.is_some() { if func.args.is_some() {
panic!("double parameter list not permitted"); dbginf.print(MessageType::Error, "double parameter list", source);
panic!();
} }
func.args = Some(Vec::new()); func.args = Some(Vec::new());
@ -124,11 +130,12 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
_ => () _ => ()
} }
Token::Word(text) => { Token::Word(text, dbginf) => {
if func.name.is_some() { if func.name.is_some() {
if func.args.is_none() { if func.args.is_none() {
panic!("Function name already set: {text}") dbginf.print(MessageType::Error, "multiple function names", source);
panic!();
} }
} }
else { else {
@ -147,21 +154,31 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
else if let Some(args) = &mut func.args { else if let Some(args) = &mut func.args {
if paren_cnt == 0 { if paren_cnt == 0 {
panic!("Token is not in parameter list: {:?}", top) top.print(MessageType::Error, "token is no parameter", source);
panic!();
} }
match &top { match &top {
Token::Decl(name, typ) => args.push((name, *typ)), Token::Decl(name, typ, _dbginf) => args.push((name, *typ)),
Token::Word(name) => panic!("Missing type declaration {name}"), Token::Word(_, dbginf) => {
_ => panic!("Argument is not a declaration {:?}", &top) dbginf.print(MessageType::Error, "type declaration missing", source);
panic!()
},
_ => {
top.print(MessageType::Error, "argument must be declaration", source);
panic!()
}
} }
continue; continue;
} }
// if we have anything left it might be an error // if we have anything left it might be an error
match &top { match &top {
Token::LineBreak => (), // valid whitespace Token::LineBreak(_) => (), // valid whitespace
_ => panic!("Invalid token: {:?}", top) _ => {
top.print(MessageType::Error, "unresolvable token", source);
panic!()
}
} }
} }
@ -171,7 +188,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
/// parse the functions raw content to expr for easy compilation using a brace-counter. /// parse the functions raw content to expr for easy compilation using a brace-counter.
/// - ```{...}``` surround a block /// - ```{...}``` surround a block
/// - line breaks seperate expressions /// - line breaks seperate expressions
fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) { fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>, source: &'a str) {
for func in functions.iter_mut() { for func in functions.iter_mut() {
let mut blocks = vec![Block::new()]; let mut blocks = vec![Block::new()];
@ -181,14 +198,20 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
while let Some(top) = func.raw.as_mut().unwrap().pop_front() { while let Some(top) = func.raw.as_mut().unwrap().pop_front() {
match &top { match &top {
Token::LineBreak => if !expr.is_empty() { Token::LineBreak(dbginf) => if !expr.is_empty() {
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Term(expr)); blocks.last_mut().unwrap_or_else(|| {
dbginf.print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Term(expr));
expr = VecDeque::new(); expr = VecDeque::new();
continue; continue;
} }
Token::Delemiter(char) => match char { Token::Delemiter(char, dbginf) => match char {
'{' => { '{' => {
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Term(expr)); blocks.last_mut().unwrap_or_else(|| {
dbginf.print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Term(expr));
expr = VecDeque::new(); expr = VecDeque::new();
blocks.push(Block::new()); blocks.push(Block::new());
continue; continue;
@ -196,7 +219,10 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
'}' => { '}' => {
// pop topmost block of the stack, storing it in the next lower block // pop topmost block of the stack, storing it in the next lower block
if let Some(block) = blocks.pop() { if let Some(block) = blocks.pop() {
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Block(block)); blocks.last_mut().unwrap_or_else(|| {
dbginf.print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Block(block));
} else { } else {
panic!("Curly brace missmatch") panic!("Curly brace missmatch")
} }
@ -211,15 +237,22 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
} }
if !expr.is_empty() { if !expr.is_empty() {
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Term(expr)); blocks.last_mut().unwrap_or_else(|| {
expr.back().unwrap().print(MessageType::Error, "curly brace missmatch", source);
panic!()
}).push_back(Expr::Term(expr));
} }
func.expr = Some(Expr::Block(blocks.pop().expect("Curly brace missmmatch"))); if let Some(block) = blocks.pop() {
func.expr = Some(Expr::Block(block));
} else {
panic!("curly brace missmatch")
}
} }
} }
/// parse a single term using a modified shunting yard /// parse a single term using a modified shunting yard
fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) { fn parse_term<'a, 'b>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope, source: &'b str) {
let mut op_stack = vec![]; let mut op_stack = vec![];
let mut output = VecDeque::with_capacity(term.len()); let mut output = VecDeque::with_capacity(term.len());
let mut value_stack = vec![]; let mut value_stack = vec![];
@ -234,39 +267,40 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
'outer: 'outer:
while let Some(token) = term.pop_front() { while let Some(token) = term.pop_front() {
match &token { match &token {
Token::Word(text) => { Token::Word(text, dbginf) => {
if scope.is_func(text) { if scope.is_func(text) {
op_stack.push(Token::Func(text)); op_stack.push(Token::Func(text, *dbginf));
continue; continue;
} else if scope.is_arg(text) { } else if scope.is_arg(text) {
output.push_back(Token::Arg(text)); output.push_back(Token::Arg(text, *dbginf));
continue; continue;
} else if scope.is_var(text).is_some() { } else if scope.is_var(text).is_some() {
output.push_back(Token::Var(text)); output.push_back(Token::Var(text, *dbginf));
continue; continue;
} }
panic!("Unknwon word: {text}") dbginf.print(MessageType::Error, "Unknown word", source);
panic!()
} }
Token::Number(_) => { Token::Number(_, _) => {
output.push_back(token); output.push_back(token);
value_stack.push(CompileTimeType::UntypedNum) value_stack.push(CompileTimeType::UntypedNum)
}, },
Token::Assign(text, typ) => { Token::Assign(text, typ, _) => {
scope.decl_var((*text).to_owned(), typ.to_owned()); scope.decl_var((*text).to_owned(), typ.to_owned());
op_stack.push(token); op_stack.push(token);
}, },
Token::Keyword(_) => op_stack.push(token), Token::Keyword(_, _) => op_stack.push(token),
Token::Delemiter(char) => { Token::Delemiter(char, _) => {
match char { match char {
'(' => op_stack.push(token), '(' => op_stack.push(token),
')' => { ')' => {
while let Some(token) = op_stack.pop() { while let Some(token) = op_stack.pop() {
match &token { match &token {
Token::Delemiter(char) => if *char == '(' { Token::Delemiter(char, _) => if *char == '(' {
if let Some(next) = op_stack.last() { if let Some(next) = op_stack.last() {
match &next { match &next {
Token::Func(_) => output.push_back(op_stack.pop().unwrap()), Token::Func(_, _) => output.push_back(op_stack.pop().unwrap()),
_ => () _ => ()
} }
} }
@ -281,11 +315,11 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
} }
} }
Token::Operator(op) => { Token::Operator(op, _) => {
let prec0 = op.prec(); let prec0 = op.prec();
while let Some(top) = op_stack.last(){ while let Some(top) = op_stack.last(){
match &top { match &top {
Token::Operator(op1) => { Token::Operator(op1, _) => {
let prec1 = op1.prec(); let prec1 = op1.prec();
if prec1 > prec0 || prec0 == prec1 && op.assoc() == Assoc::Left { if prec1 > prec0 || prec0 == prec1 && op.assoc() == Assoc::Left {
@ -305,7 +339,7 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
while let Some(token) = op_stack.pop() { while let Some(token) = op_stack.pop() {
match &token { match &token {
Token::Delemiter(char) => if *char == '(' { Token::Delemiter(char, _) => if *char == '(' {
panic!("Mismatched parenthesis") panic!("Mismatched parenthesis")
}, },
_ => output.push_back(token) _ => output.push_back(token)
@ -320,18 +354,18 @@ enum CompileTimeType {
UntypedNum, UntypedNum,
} }
fn parse_block(block: &mut Block, scope: &mut Scope) { fn parse_block<'a>(block: &mut Block, scope: &mut Scope, source: &'a str) {
scope.alloc_scope(); scope.alloc_scope();
for expr in block.iter_mut() { for expr in block.iter_mut() {
match expr { match expr {
Expr::Block(block) => parse_block(block, scope), Expr::Block(block) => parse_block(block, scope, source),
Expr::Term(term) => parse_term(term, scope) Expr::Term(term) => parse_term(term, scope, source)
} }
} }
scope.pop_scope(); scope.pop_scope();
} }
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) { fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>, source: &'a str) {
let mut scope = Scope { let mut scope = Scope {
funcs: funcs.iter().map(|f| f.name.unwrap()).collect(), funcs: funcs.iter().map(|f| f.name.unwrap()).collect(),
args: None, args: None,
@ -343,7 +377,7 @@ fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
Expr::Block(block) => { Expr::Block(block) => {
scope.args = func.args.as_ref(); scope.args = func.args.as_ref();
parse_block(block, &mut scope) parse_block(block, &mut scope, source)
}, },
_ => panic!("Fatal-Compilier-Error: function must have a block") _ => panic!("Fatal-Compilier-Error: function must have a block")
} }
@ -353,11 +387,11 @@ fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
/// reorder and organize a listing of instructions to a RPN based format: /// reorder and organize a listing of instructions to a RPN based format:
/// any program is made out of functions. /// any program is made out of functions.
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block. /// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'a>> { pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &'a str) -> Vec<Func<'a>> {
let mut funcs = discover_functions(tokens); let mut funcs = discover_functions(tokens, source);
discover_exprs(&mut funcs); discover_exprs(&mut funcs, source);
parse_exprs(&mut funcs); parse_exprs(&mut funcs, source);
funcs.iter().for_each(|f| println!("{:?}", f)); funcs.iter().for_each(|f| println!("{:?}", f));

View File

@ -1,4 +1,5 @@
use std::{collections::{VecDeque}}; use std::{collections::{VecDeque}};
use colored::{Colorize, ColoredString};
#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)] #[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)]
pub enum Operator { pub enum Operator {
@ -84,6 +85,7 @@ impl Operator {
pub enum Keyword { pub enum Keyword {
If, If,
While, While,
/// while(true) loop
Loop, Loop,
Break, Break,
Continue, Continue,
@ -102,7 +104,8 @@ impl Keyword {
} }
} }
#[derive(Debug, PartialEq, Eq, Copy, Clone)] #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
/// primitve types
pub enum Prim { pub enum Prim {
Int, Int,
Real, Real,
@ -110,36 +113,111 @@ pub enum Prim {
} }
impl Prim { impl Prim {
fn from<'a>(text: &'a str) -> Prim { fn from<'a>(text: &'a str, dbginf: &DebugInfo, source: &str) -> Prim {
return match text { return match text {
"i4" => Prim::Int, "i4" => Prim::Int,
"f4" => Prim::Real, "f4" => Prim::Real,
"bool" => Prim::Bool, "bool" => Prim::Bool,
_ => panic!("Unknown type declaration: {text}")
_ => {
dbginf.print(MessageType::Error, "Unknown type declaration", source);
panic!()
}
} }
} }
} }
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub struct DebugInfo {
/// index in source string where the token begins
start: usize,
/// index in source string where the token ends
end: usize,
/// line number where the line in which the token is begins
line: usize
}
#[derive(Debug)]
pub enum MessageType {
Error,
Warning,
Info
}
impl MessageType {
/// return a colorized string representation:
/// - Error (in red)
/// - Warning (in yellow)
/// - Info (in blue)
pub fn to_colored(&self) -> ColoredString {
let raw = format!("{:#?}", self);
return match self {
MessageType::Error => raw.red().bold(),
MessageType::Warning => raw.yellow().bold(),
MessageType::Info => raw.blue().bold()
};
}
}
impl DebugInfo {
/// print message in the form of:
/// ```text
/// Error (message) in line 7: token `code`
/// somewhere in here:
/// --> `code line`
/// ```
pub fn print<'a>(&self, typ: MessageType, msg: &str, source: &'a str) {
println!("{} ({}) in line {}: token `{}`", typ.to_colored(), msg.bold().bright_white(), self.line, &source[self.start..self.end].bold());
println!(" somewhere in here:\n --> `{}`\n", source.lines().nth(self.line).unwrap().trim().bold().bright_white())
}
}
#[derive(Debug, PartialEq, Eq, Copy, Clone)] #[derive(Debug, PartialEq, Eq, Copy, Clone)]
/// A token represents a basic building block for source code. /// A token represents a basic building block for source code.
/// They give a meaning to patterns of chars allowing to interpret them. /// They give a meaning to patterns of chars allowing to interpret them.
pub enum Token<'a> { pub enum Token<'a> {
// base tokens that can simply be split to from raw source code // base tokens that can simply be split to from raw source code
Word(&'a str), Word(&'a str, DebugInfo),
Delemiter(char), /// Single symbol delemiter like ```(```,```}```
Operator(Operator), Delemiter(char, DebugInfo),
Number(&'a str), Operator(Operator, DebugInfo),
LineBreak, Number(&'a str, DebugInfo),
Func(&'a str), LineBreak(DebugInfo),
Var(&'a str), Func(&'a str, DebugInfo),
Arg(&'a str), /// Variable
Assign(&'a str, Option<Prim>), Var(&'a str, DebugInfo),
Decl(&'a str, Prim), /// Function argument
Bool(bool), Arg(&'a str, DebugInfo),
Keyword(Keyword), /// Variable assignment in the form of ```name = ```
Assign(&'a str, Option<Prim>, DebugInfo),
/// Variable type declaration in the form of ```name:type```
Decl(&'a str, Prim, DebugInfo),
Bool(bool, DebugInfo),
/// Keywords like ```if```,```break```,```while```
Keyword(Keyword, DebugInfo),
} }
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n+)"; impl<'a> Token<'a> {
/// redirect for ```DebugInfo.print()```
pub fn print(&self, error: MessageType, arg: &str, source: &str) {
match self {
Token::Word(_, dbginf) => dbginf.print(error, arg, source),
Token::Delemiter(_, dbginf) => dbginf.print(error, arg, source),
Token::Operator(_, dbginf) => dbginf.print(error, arg, source),
Token::Number(_, dbginf) => dbginf.print(error, arg, source),
Token::LineBreak(dbginf) => dbginf.print(error, arg, source),
Token::Func(_, dbginf) => dbginf.print(error, arg, source),
Token::Var(_, dbginf) => dbginf.print(error, arg, source),
Token::Arg(_, dbginf) => dbginf.print(error, arg, source),
Token::Assign(_, _, dbginf) => dbginf.print(error, arg, source),
Token::Decl(_, _, dbginf) => dbginf.print(error, arg, source),
Token::Bool(_, dbginf) => dbginf.print(error, arg, source),
Token::Keyword(_, dbginf) => dbginf.print(error, arg, source),
}
}
}
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)";
lazy_static::lazy_static! { lazy_static::lazy_static! {
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap(); static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
@ -149,6 +227,8 @@ lazy_static::lazy_static! {
pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> { pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
let mut tokens = VecDeque::new(); let mut tokens = VecDeque::new();
let mut line_count = 0;
for cap in TOKEN_REGEX.captures_iter(source) { for cap in TOKEN_REGEX.captures_iter(source) {
let mut enumerator = cap.iter().enumerate(); let mut enumerator = cap.iter().enumerate();
loop { loop {
@ -167,28 +247,40 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
// if we have a match, save it as token // if we have a match, save it as token
if let Some(mat) = group { if let Some(mat) = group {
let debug_info = DebugInfo {
start: mat.start(),
end: mat.end(),
line: line_count
};
tokens.push_back(match i { tokens.push_back(match i {
2 => Token::Keyword(Keyword::parse(mat.as_str())), 2 => Token::Keyword(Keyword::parse(mat.as_str()), debug_info),
3 => Token::Bool(parse_bool(mat.as_str())), 3 => Token::Bool(parse_bool(mat.as_str()), debug_info),
4 => { 4 => {
let var_type = Prim::from(enumerator.next().unwrap().1.unwrap().as_str());
Token::Decl(mat.as_str(), var_type)
},
6 => {
let var_type = if let Some(mat) = enumerator.next().unwrap().1 { let var_type = if let Some(mat) = enumerator.next().unwrap().1 {
Some(Prim::from(mat.as_str())) Some(Prim::from(mat.as_str(), &debug_info, source))
} else { } else {
None None
}; };
Token::Assign(mat.as_str(), var_type) Token::Assign(mat.as_str(), var_type, debug_info)
},
6 => {
let var_type = Prim::from(enumerator.next().unwrap().1.unwrap().as_str(), &debug_info, source);
Token::Decl(mat.as_str(), var_type, debug_info)
},
8 => Token::Word(mat.as_str(), debug_info),
9 => Token::Number(mat.as_str(), debug_info),
10 => Token::Operator(Operator::parse(mat.as_str()), debug_info),
11 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap(), debug_info),
12 => {
line_count += 1;
Token::LineBreak(debug_info)
}, },
8 => Token::Word(mat.as_str()),
9 => Token::Number(mat.as_str()),
10 => Token::Operator(Operator::parse(mat.as_str())),
11 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
12 => Token::LineBreak,
_ => panic!("Unknown match to tokenize ({i}): {}", mat.as_str()) _ => {
debug_info.print(MessageType::Error, "Unable to identify sequence as token", source);
panic!()
}
}); });
break; break;
} }