proper error message added
This commit is contained in:
parent
843f8dbcf0
commit
e8e6ce32cf
|
@ -6,6 +6,7 @@ version = 3
|
|||
name = "Yard"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"colored",
|
||||
"lazy_static",
|
||||
"rand",
|
||||
"regex",
|
||||
|
@ -20,12 +21,34 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "colored"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"lazy_static",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.7"
|
||||
|
@ -37,6 +60,15 @@ dependencies = [
|
|||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
|
@ -113,3 +145,25 @@ name = "wasi"
|
|||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
|
|
@ -9,3 +9,4 @@ edition = "2021"
|
|||
regex = "*"
|
||||
lazy_static = "1.4.0"
|
||||
rand = "0.8.5"
|
||||
colored = "*"
|
|
@ -3,4 +3,5 @@ Yard is an funny programming language compiler and interpreter written in pure R
|
|||
It *will* contain features such as:
|
||||
1. a COMEFROM keyword (inverse goto)
|
||||
2. a ```don't``` code block which never executes
|
||||
3. ```rand(x)``` returns x, always.
|
||||
3. swapped meaning of "" (for single characters) and '' (now for string literal)
|
11
src/main.rs
11
src/main.rs
|
@ -8,6 +8,11 @@ mod parser;
|
|||
|
||||
use token::*;
|
||||
use parser::*;
|
||||
use colored::{Colorize};
|
||||
|
||||
pub fn message(typ: MessageType, msg: String) {
|
||||
println!("{}: {}", typ.to_colored(), msg.bold().bright_white());
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
||||
|
@ -15,11 +20,11 @@ fn main() {
|
|||
r"
|
||||
pi = 3.1415926535
|
||||
|
||||
sin(x: f4) = {
|
||||
sin(x: f4) = { {
|
||||
x
|
||||
}
|
||||
|
||||
main() {
|
||||
man() {
|
||||
|
||||
x:i4 = 0
|
||||
loop {
|
||||
|
@ -31,5 +36,5 @@ main() {
|
|||
}
|
||||
";
|
||||
|
||||
parse(&mut tokenize(source));
|
||||
parse(&mut tokenize(source), source);
|
||||
}
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
use core::{panic};
|
||||
use std::{collections::{VecDeque}, vec};
|
||||
use crate::token::{Token, Operator, Assoc, Prim};
|
||||
use crate::token::{Token, Operator, Assoc, Prim, MessageType};
|
||||
|
||||
pub mod data;
|
||||
|
||||
use data::*;
|
||||
|
||||
/// simple brace-counting parser to detect functions
|
||||
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'a>> {
|
||||
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &str) -> Vec<Func<'a>> {
|
||||
let mut funcs = Vec::new();
|
||||
|
||||
// function to currently identifiy
|
||||
|
@ -22,7 +22,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
macro_rules! finish_func {
|
||||
() => {
|
||||
if funcs.contains(&func) {
|
||||
panic!("Function already defined: {func}")
|
||||
panic!("Function defined multiple times: {func}")
|
||||
}
|
||||
|
||||
funcs.push(func);
|
||||
|
@ -36,12 +36,13 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
// function body detection
|
||||
// has highest priority
|
||||
match &top {
|
||||
Token::Delemiter(char) => match char {
|
||||
Token::Delemiter(char, dbginf) => match char {
|
||||
'{' => {
|
||||
brace_cnt += 1;
|
||||
if brace_cnt == 1 {
|
||||
if func.name.is_none() {
|
||||
panic!("Anonymous function not permitted");
|
||||
dbginf.print(MessageType::Error, "Anonymous function not permitted", source);
|
||||
panic!();
|
||||
}
|
||||
single_line = false;
|
||||
func.raw = Some(VecDeque::new());
|
||||
|
@ -58,7 +59,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
_ => ()
|
||||
}
|
||||
|
||||
Token::LineBreak => if single_line {
|
||||
Token::LineBreak(_) => if single_line {
|
||||
finish_func!();
|
||||
continue;
|
||||
}
|
||||
|
@ -70,13 +71,15 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
|
||||
if func.raw.is_none() {
|
||||
match &top {
|
||||
Token::Operator(op) => match op {
|
||||
Token::Operator(op, dbginf) => match op {
|
||||
Operator::Assign => {
|
||||
if func.results {
|
||||
panic!("double function assignment not permitted")
|
||||
dbginf.print(MessageType::Error, "double function assignment", source);
|
||||
panic!();
|
||||
}
|
||||
if func.name.is_none() {
|
||||
panic!("Anonymous function not permitted");
|
||||
dbginf.print(MessageType::Error, "Anonymous function", source);
|
||||
panic!();
|
||||
}
|
||||
|
||||
func.results = true;
|
||||
|
@ -86,12 +89,14 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
_ => ()
|
||||
}
|
||||
|
||||
Token::Assign(name, _) => {
|
||||
Token::Assign(name, _, dbginf) => {
|
||||
if func.results {
|
||||
panic!("double function assignment not permitted")
|
||||
dbginf.print(MessageType::Error, "double function assignment", source);
|
||||
panic!();
|
||||
}
|
||||
if func.name.is_some() {
|
||||
panic!("function already named");
|
||||
dbginf.print(MessageType::Error, "multiple function names", source);
|
||||
panic!();
|
||||
}
|
||||
|
||||
func.raw = Some(VecDeque::new());
|
||||
|
@ -101,14 +106,15 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
continue;
|
||||
}
|
||||
|
||||
Token::Delemiter(char) => match char {
|
||||
Token::Delemiter(char, dbginf) => match char {
|
||||
|
||||
'(' => if func.raw.is_none() {
|
||||
paren_cnt += 1;
|
||||
if paren_cnt == 1 {
|
||||
|
||||
if func.args.is_some() {
|
||||
panic!("double parameter list not permitted");
|
||||
dbginf.print(MessageType::Error, "double parameter list", source);
|
||||
panic!();
|
||||
}
|
||||
|
||||
func.args = Some(Vec::new());
|
||||
|
@ -124,11 +130,12 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
_ => ()
|
||||
}
|
||||
|
||||
Token::Word(text) => {
|
||||
Token::Word(text, dbginf) => {
|
||||
|
||||
if func.name.is_some() {
|
||||
if func.args.is_none() {
|
||||
panic!("Function name already set: {text}")
|
||||
dbginf.print(MessageType::Error, "multiple function names", source);
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -147,21 +154,31 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
else if let Some(args) = &mut func.args {
|
||||
|
||||
if paren_cnt == 0 {
|
||||
panic!("Token is not in parameter list: {:?}", top)
|
||||
top.print(MessageType::Error, "token is no parameter", source);
|
||||
panic!();
|
||||
}
|
||||
|
||||
match &top {
|
||||
Token::Decl(name, typ) => args.push((name, *typ)),
|
||||
Token::Word(name) => panic!("Missing type declaration {name}"),
|
||||
_ => panic!("Argument is not a declaration {:?}", &top)
|
||||
Token::Decl(name, typ, _dbginf) => args.push((name, *typ)),
|
||||
Token::Word(_, dbginf) => {
|
||||
dbginf.print(MessageType::Error, "type declaration missing", source);
|
||||
panic!()
|
||||
},
|
||||
_ => {
|
||||
top.print(MessageType::Error, "argument must be declaration", source);
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// if we have anything left it might be an error
|
||||
match &top {
|
||||
Token::LineBreak => (), // valid whitespace
|
||||
_ => panic!("Invalid token: {:?}", top)
|
||||
Token::LineBreak(_) => (), // valid whitespace
|
||||
_ => {
|
||||
top.print(MessageType::Error, "unresolvable token", source);
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -171,7 +188,7 @@ fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'
|
|||
/// parse the functions raw content to expr for easy compilation using a brace-counter.
|
||||
/// - ```{...}``` surround a block
|
||||
/// - line breaks seperate expressions
|
||||
fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
|
||||
fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>, source: &'a str) {
|
||||
for func in functions.iter_mut() {
|
||||
|
||||
let mut blocks = vec![Block::new()];
|
||||
|
@ -181,14 +198,20 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
|
|||
while let Some(top) = func.raw.as_mut().unwrap().pop_front() {
|
||||
|
||||
match &top {
|
||||
Token::LineBreak => if !expr.is_empty() {
|
||||
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Term(expr));
|
||||
Token::LineBreak(dbginf) => if !expr.is_empty() {
|
||||
blocks.last_mut().unwrap_or_else(|| {
|
||||
dbginf.print(MessageType::Error, "curly brace missmatch", source);
|
||||
panic!()
|
||||
}).push_back(Expr::Term(expr));
|
||||
expr = VecDeque::new();
|
||||
continue;
|
||||
}
|
||||
Token::Delemiter(char) => match char {
|
||||
Token::Delemiter(char, dbginf) => match char {
|
||||
'{' => {
|
||||
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Term(expr));
|
||||
blocks.last_mut().unwrap_or_else(|| {
|
||||
dbginf.print(MessageType::Error, "curly brace missmatch", source);
|
||||
panic!()
|
||||
}).push_back(Expr::Term(expr));
|
||||
expr = VecDeque::new();
|
||||
blocks.push(Block::new());
|
||||
continue;
|
||||
|
@ -196,7 +219,10 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
|
|||
'}' => {
|
||||
// pop topmost block of the stack, storing it in the next lower block
|
||||
if let Some(block) = blocks.pop() {
|
||||
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Block(block));
|
||||
blocks.last_mut().unwrap_or_else(|| {
|
||||
dbginf.print(MessageType::Error, "curly brace missmatch", source);
|
||||
panic!()
|
||||
}).push_back(Expr::Block(block));
|
||||
} else {
|
||||
panic!("Curly brace missmatch")
|
||||
}
|
||||
|
@ -211,15 +237,22 @@ fn discover_exprs<'a>(functions: &mut Vec<Func<'a>>) {
|
|||
}
|
||||
|
||||
if !expr.is_empty() {
|
||||
blocks.last_mut().expect("Curly brace missmatch").push_back(Expr::Term(expr));
|
||||
blocks.last_mut().unwrap_or_else(|| {
|
||||
expr.back().unwrap().print(MessageType::Error, "curly brace missmatch", source);
|
||||
panic!()
|
||||
}).push_back(Expr::Term(expr));
|
||||
}
|
||||
|
||||
func.expr = Some(Expr::Block(blocks.pop().expect("Curly brace missmmatch")));
|
||||
if let Some(block) = blocks.pop() {
|
||||
func.expr = Some(Expr::Block(block));
|
||||
} else {
|
||||
panic!("curly brace missmatch")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// parse a single term using a modified shunting yard
|
||||
fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
||||
fn parse_term<'a, 'b>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope, source: &'b str) {
|
||||
let mut op_stack = vec![];
|
||||
let mut output = VecDeque::with_capacity(term.len());
|
||||
let mut value_stack = vec![];
|
||||
|
@ -234,39 +267,40 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
|||
'outer:
|
||||
while let Some(token) = term.pop_front() {
|
||||
match &token {
|
||||
Token::Word(text) => {
|
||||
Token::Word(text, dbginf) => {
|
||||
if scope.is_func(text) {
|
||||
op_stack.push(Token::Func(text));
|
||||
op_stack.push(Token::Func(text, *dbginf));
|
||||
continue;
|
||||
} else if scope.is_arg(text) {
|
||||
output.push_back(Token::Arg(text));
|
||||
output.push_back(Token::Arg(text, *dbginf));
|
||||
continue;
|
||||
} else if scope.is_var(text).is_some() {
|
||||
output.push_back(Token::Var(text));
|
||||
output.push_back(Token::Var(text, *dbginf));
|
||||
continue;
|
||||
}
|
||||
panic!("Unknwon word: {text}")
|
||||
dbginf.print(MessageType::Error, "Unknown word", source);
|
||||
panic!()
|
||||
}
|
||||
Token::Number(_) => {
|
||||
Token::Number(_, _) => {
|
||||
output.push_back(token);
|
||||
value_stack.push(CompileTimeType::UntypedNum)
|
||||
},
|
||||
Token::Assign(text, typ) => {
|
||||
Token::Assign(text, typ, _) => {
|
||||
scope.decl_var((*text).to_owned(), typ.to_owned());
|
||||
op_stack.push(token);
|
||||
},
|
||||
Token::Keyword(_) => op_stack.push(token),
|
||||
Token::Keyword(_, _) => op_stack.push(token),
|
||||
|
||||
Token::Delemiter(char) => {
|
||||
Token::Delemiter(char, _) => {
|
||||
match char {
|
||||
'(' => op_stack.push(token),
|
||||
')' => {
|
||||
while let Some(token) = op_stack.pop() {
|
||||
match &token {
|
||||
Token::Delemiter(char) => if *char == '(' {
|
||||
Token::Delemiter(char, _) => if *char == '(' {
|
||||
if let Some(next) = op_stack.last() {
|
||||
match &next {
|
||||
Token::Func(_) => output.push_back(op_stack.pop().unwrap()),
|
||||
Token::Func(_, _) => output.push_back(op_stack.pop().unwrap()),
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
|
@ -281,11 +315,11 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
|||
}
|
||||
}
|
||||
|
||||
Token::Operator(op) => {
|
||||
Token::Operator(op, _) => {
|
||||
let prec0 = op.prec();
|
||||
while let Some(top) = op_stack.last(){
|
||||
match &top {
|
||||
Token::Operator(op1) => {
|
||||
Token::Operator(op1, _) => {
|
||||
let prec1 = op1.prec();
|
||||
|
||||
if prec1 > prec0 || prec0 == prec1 && op.assoc() == Assoc::Left {
|
||||
|
@ -305,7 +339,7 @@ fn parse_term<'a>(term: &mut VecDeque<Token<'a>>, scope: &mut Scope) {
|
|||
|
||||
while let Some(token) = op_stack.pop() {
|
||||
match &token {
|
||||
Token::Delemiter(char) => if *char == '(' {
|
||||
Token::Delemiter(char, _) => if *char == '(' {
|
||||
panic!("Mismatched parenthesis")
|
||||
},
|
||||
_ => output.push_back(token)
|
||||
|
@ -320,18 +354,18 @@ enum CompileTimeType {
|
|||
UntypedNum,
|
||||
}
|
||||
|
||||
fn parse_block(block: &mut Block, scope: &mut Scope) {
|
||||
fn parse_block<'a>(block: &mut Block, scope: &mut Scope, source: &'a str) {
|
||||
scope.alloc_scope();
|
||||
for expr in block.iter_mut() {
|
||||
match expr {
|
||||
Expr::Block(block) => parse_block(block, scope),
|
||||
Expr::Term(term) => parse_term(term, scope)
|
||||
Expr::Block(block) => parse_block(block, scope, source),
|
||||
Expr::Term(term) => parse_term(term, scope, source)
|
||||
}
|
||||
}
|
||||
scope.pop_scope();
|
||||
}
|
||||
|
||||
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
|
||||
fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>, source: &'a str) {
|
||||
let mut scope = Scope {
|
||||
funcs: funcs.iter().map(|f| f.name.unwrap()).collect(),
|
||||
args: None,
|
||||
|
@ -343,7 +377,7 @@ fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
|
|||
Expr::Block(block) => {
|
||||
scope.args = func.args.as_ref();
|
||||
|
||||
parse_block(block, &mut scope)
|
||||
parse_block(block, &mut scope, source)
|
||||
},
|
||||
_ => panic!("Fatal-Compilier-Error: function must have a block")
|
||||
}
|
||||
|
@ -353,11 +387,11 @@ fn parse_exprs<'a>(funcs: &mut Vec<Func<'a>>) {
|
|||
/// reorder and organize a listing of instructions to a RPN based format:
|
||||
/// any program is made out of functions.
|
||||
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
||||
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> Vec<Func<'a>> {
|
||||
let mut funcs = discover_functions(tokens);
|
||||
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>, source: &'a str) -> Vec<Func<'a>> {
|
||||
let mut funcs = discover_functions(tokens, source);
|
||||
|
||||
discover_exprs(&mut funcs);
|
||||
parse_exprs(&mut funcs);
|
||||
discover_exprs(&mut funcs, source);
|
||||
parse_exprs(&mut funcs, source);
|
||||
|
||||
funcs.iter().for_each(|f| println!("{:?}", f));
|
||||
|
||||
|
|
152
src/token/mod.rs
152
src/token/mod.rs
|
@ -1,4 +1,5 @@
|
|||
use std::{collections::{VecDeque}};
|
||||
use colored::{Colorize, ColoredString};
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq, Copy, Clone)]
|
||||
pub enum Operator {
|
||||
|
@ -84,6 +85,7 @@ impl Operator {
|
|||
pub enum Keyword {
|
||||
If,
|
||||
While,
|
||||
/// while(true) loop
|
||||
Loop,
|
||||
Break,
|
||||
Continue,
|
||||
|
@ -102,7 +104,8 @@ impl Keyword {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
|
||||
/// primitve types
|
||||
pub enum Prim {
|
||||
Int,
|
||||
Real,
|
||||
|
@ -110,36 +113,111 @@ pub enum Prim {
|
|||
}
|
||||
|
||||
impl Prim {
|
||||
fn from<'a>(text: &'a str) -> Prim {
|
||||
fn from<'a>(text: &'a str, dbginf: &DebugInfo, source: &str) -> Prim {
|
||||
return match text {
|
||||
"i4" => Prim::Int,
|
||||
"f4" => Prim::Real,
|
||||
"bool" => Prim::Bool,
|
||||
_ => panic!("Unknown type declaration: {text}")
|
||||
|
||||
_ => {
|
||||
dbginf.print(MessageType::Error, "Unknown type declaration", source);
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
||||
pub struct DebugInfo {
|
||||
/// index in source string where the token begins
|
||||
start: usize,
|
||||
/// index in source string where the token ends
|
||||
end: usize,
|
||||
/// line number where the line in which the token is begins
|
||||
line: usize
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum MessageType {
|
||||
Error,
|
||||
Warning,
|
||||
Info
|
||||
}
|
||||
|
||||
impl MessageType {
|
||||
/// return a colorized string representation:
|
||||
/// - Error (in red)
|
||||
/// - Warning (in yellow)
|
||||
/// - Info (in blue)
|
||||
pub fn to_colored(&self) -> ColoredString {
|
||||
let raw = format!("{:#?}", self);
|
||||
return match self {
|
||||
MessageType::Error => raw.red().bold(),
|
||||
MessageType::Warning => raw.yellow().bold(),
|
||||
MessageType::Info => raw.blue().bold()
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl DebugInfo {
|
||||
/// print message in the form of:
|
||||
/// ```text
|
||||
/// Error (message) in line 7: token `code`
|
||||
/// somewhere in here:
|
||||
/// --> `code line`
|
||||
/// ```
|
||||
pub fn print<'a>(&self, typ: MessageType, msg: &str, source: &'a str) {
|
||||
println!("{} ({}) in line {}: token `{}`", typ.to_colored(), msg.bold().bright_white(), self.line, &source[self.start..self.end].bold());
|
||||
println!(" somewhere in here:\n --> `{}`\n", source.lines().nth(self.line).unwrap().trim().bold().bright_white())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
||||
/// A token represents a basic building block for source code.
|
||||
/// They give a meaning to patterns of chars allowing to interpret them.
|
||||
pub enum Token<'a> {
|
||||
// base tokens that can simply be split to from raw source code
|
||||
Word(&'a str),
|
||||
Delemiter(char),
|
||||
Operator(Operator),
|
||||
Number(&'a str),
|
||||
LineBreak,
|
||||
Func(&'a str),
|
||||
Var(&'a str),
|
||||
Arg(&'a str),
|
||||
Assign(&'a str, Option<Prim>),
|
||||
Decl(&'a str, Prim),
|
||||
Bool(bool),
|
||||
Keyword(Keyword),
|
||||
Word(&'a str, DebugInfo),
|
||||
/// Single symbol delemiter like ```(```,```}```
|
||||
Delemiter(char, DebugInfo),
|
||||
Operator(Operator, DebugInfo),
|
||||
Number(&'a str, DebugInfo),
|
||||
LineBreak(DebugInfo),
|
||||
Func(&'a str, DebugInfo),
|
||||
/// Variable
|
||||
Var(&'a str, DebugInfo),
|
||||
/// Function argument
|
||||
Arg(&'a str, DebugInfo),
|
||||
/// Variable assignment in the form of ```name = ```
|
||||
Assign(&'a str, Option<Prim>, DebugInfo),
|
||||
/// Variable type declaration in the form of ```name:type```
|
||||
Decl(&'a str, Prim, DebugInfo),
|
||||
Bool(bool, DebugInfo),
|
||||
/// Keywords like ```if```,```break```,```while```
|
||||
Keyword(Keyword, DebugInfo),
|
||||
}
|
||||
|
||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n+)";
|
||||
impl<'a> Token<'a> {
|
||||
/// redirect for ```DebugInfo.print()```
|
||||
pub fn print(&self, error: MessageType, arg: &str, source: &str) {
|
||||
match self {
|
||||
Token::Word(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Delemiter(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Operator(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Number(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::LineBreak(dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Func(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Var(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Arg(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Assign(_, _, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Decl(_, _, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Bool(_, dbginf) => dbginf.print(error, arg, source),
|
||||
Token::Keyword(_, dbginf) => dbginf.print(error, arg, source),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|(if|while|loop|break|continue)|(true|false|yes|no|maybe)|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))?\s*=|([A-Za-z_]+)\s*(?::\s*([a-zA-Z0-9]+))|([A-Za-z_]+)|(\d*\.?\d+)|(!=|==|<=|<=|[&|+\-*/<>])|([(){}])|(\n)";
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
||||
|
@ -149,6 +227,8 @@ lazy_static::lazy_static! {
|
|||
pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
|
||||
let mut tokens = VecDeque::new();
|
||||
|
||||
let mut line_count = 0;
|
||||
|
||||
for cap in TOKEN_REGEX.captures_iter(source) {
|
||||
let mut enumerator = cap.iter().enumerate();
|
||||
loop {
|
||||
|
@ -167,28 +247,40 @@ pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
|
|||
|
||||
// if we have a match, save it as token
|
||||
if let Some(mat) = group {
|
||||
let debug_info = DebugInfo {
|
||||
start: mat.start(),
|
||||
end: mat.end(),
|
||||
line: line_count
|
||||
};
|
||||
|
||||
tokens.push_back(match i {
|
||||
2 => Token::Keyword(Keyword::parse(mat.as_str())),
|
||||
3 => Token::Bool(parse_bool(mat.as_str())),
|
||||
2 => Token::Keyword(Keyword::parse(mat.as_str()), debug_info),
|
||||
3 => Token::Bool(parse_bool(mat.as_str()), debug_info),
|
||||
4 => {
|
||||
let var_type = Prim::from(enumerator.next().unwrap().1.unwrap().as_str());
|
||||
Token::Decl(mat.as_str(), var_type)
|
||||
},
|
||||
6 => {
|
||||
let var_type = if let Some(mat) = enumerator.next().unwrap().1 {
|
||||
Some(Prim::from(mat.as_str()))
|
||||
Some(Prim::from(mat.as_str(), &debug_info, source))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Token::Assign(mat.as_str(), var_type)
|
||||
Token::Assign(mat.as_str(), var_type, debug_info)
|
||||
},
|
||||
6 => {
|
||||
let var_type = Prim::from(enumerator.next().unwrap().1.unwrap().as_str(), &debug_info, source);
|
||||
Token::Decl(mat.as_str(), var_type, debug_info)
|
||||
},
|
||||
8 => Token::Word(mat.as_str(), debug_info),
|
||||
9 => Token::Number(mat.as_str(), debug_info),
|
||||
10 => Token::Operator(Operator::parse(mat.as_str()), debug_info),
|
||||
11 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap(), debug_info),
|
||||
12 => {
|
||||
line_count += 1;
|
||||
Token::LineBreak(debug_info)
|
||||
},
|
||||
8 => Token::Word(mat.as_str()),
|
||||
9 => Token::Number(mat.as_str()),
|
||||
10 => Token::Operator(Operator::parse(mat.as_str())),
|
||||
11 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
|
||||
12 => Token::LineBreak,
|
||||
|
||||
_ => panic!("Unknown match to tokenize ({i}): {}", mat.as_str())
|
||||
_ => {
|
||||
debug_info.print(MessageType::Error, "Unable to identify sequence as token", source);
|
||||
panic!()
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue