just poosh

This commit is contained in:
Sven Vogel 2022-09-23 13:02:22 +02:00
parent 696bca6f4e
commit d1d3fe73df
3 changed files with 160 additions and 25 deletions

View File

@ -3,15 +3,22 @@ mod token;
mod parser;
use token::*;
use parser::*;
fn main() {
let source =
r"
main() {
foo = 5 * 6 + 4
foo() = {
c
}
main()(x) {
3 * 5 # comment
}
";
tokenize(source).iter().for_each(|t| print!("{:?}", t));
parse(&mut tokenize(source));
}

View File

@ -1,25 +1,124 @@
use std::collections::{VecDeque, HashSet};
use crate::token::Token;
#[derive(Eq, Hash)]
pub struct Function<'a> {
/// name
name: &'a str,
/// parameter names
params: Vec<&'a str>,
/// wether this function returns a single value or not
ret: bool,
pub params: Option<Vec<&'a str>>,
/// raw tokens
pub raw: Option<VecDeque<Token<'a>>>
}
impl<'a> Function<'a> {
pub fn new() -> Self {
Self {
params: None,
raw: None
}
}
}
impl<'a> PartialEq for Function<'a> {
fn eq(&self, other: &Self) -> bool {
self.params == other.params
}
}
/// simple brace-counting parser to detect abstract token syntaxes
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> HashSet<Function<'a>> {
let mut funcs = HashSet::new();
let mut name = None;
let mut cur_fun = Function::new();
let mut assigned = false;
let mut brace_cnt = 0;
let mut parent_cnt = 0;
while let Some(top) = tokens.pop_front() {
match &top {
crate::Token::Operator(op) => {
match op {
crate::Operator::Assign => if cur_fun.raw.is_none() {
assigned = true;
cur_fun.raw = Some(VecDeque::new());
continue;
},
_ => ()
}
}
crate::Token::LineBreak => if name.is_some() && cur_fun.raw.is_some() && assigned {
funcs.insert(cur_fun);
cur_fun = Function::new();
continue;
}
crate::Token::Delemiter(char) => {
match char {
'{' => {
brace_cnt += 1;
if brace_cnt == 1 {
// start a new body
cur_fun.raw = Some(VecDeque::new());
assigned = false;
continue;
}
},
'}' => {
brace_cnt -= 1;
// we have a full body!
if brace_cnt == 0 {
funcs.insert(cur_fun);
cur_fun = Function::new();
continue;
}
},
'(' => if cur_fun.raw.is_none() {
parent_cnt += 1;
if parent_cnt == 1 {
// start a new arg list
cur_fun.params = Some(Vec::new());
continue;
}
},
')' => if cur_body.is_none() {
parent_cnt -= 1;
// we have a full body!
if parent_cnt == 0 {
funcs.insert(cur_fun);
cur_fun = Function::new();
continue;
}
},
_ => ()
}
}
_ => (),
}
if let Some(body) = &mut cur_body {
body.push_back(top);
} else if let Some(args) = &mut cur_args {
match &top {
Token::Word(text) => args.push(text),
_ => panic!("Argument in list is not a word")
}
} else {
body.push_back(top)
}
}
funcs
}
/// reorder and organize a listing of instructions to a RPN based format:
/// any program is made out of functions.
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
/// ```python
/// foo(a) = {
/// # function
/// }
/// ```
pub fn parse<'a>(tokens: &Vec<crate::Token<'a>>) -> Vec<Function<'a>> {
let mut functions = vec![];
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) {
functions
}

View File

@ -1,23 +1,51 @@
use std::collections::{VecDeque};
#[derive(Debug)]
#[derive(Debug, Hash, PartialEq, Eq)]
pub enum Operator {
Assign,
Add,
Sub,
Mul,
Div
}
impl Operator {
pub fn parse<'a>(str: &'a str) -> Self {
return match str {
"=" => Operator::Assign,
"+" => Operator::Add,
"-" => Operator::Sub,
"*" => Operator::Mul,
"/" => Operator::Div,
_ => panic!("Unspecified operator")
};
}
}
#[derive(Debug, Hash, PartialEq, Eq)]
/// A token represents a basic building block for source code.
/// They give a meaning to patterns of chars allowing to interpret them.
pub enum Token<'a> {
// base tokens that can simply be split to from raw source code
Word(&'a str),
Delemiter(char),
Operator,
Operator(Operator),
Number(&'a str),
LineBreak
}
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*])|([(){}])";
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*=])|([(){}])|(\n)";
lazy_static::lazy_static! {
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
}
/// creates a vector of tokens from the specified str.
pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
let mut tokens = vec![];
pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
let mut tokens = VecDeque::new();
for cap in TOKEN_REGEX.captures_iter(source) {
for (i, group) in cap.iter().enumerate() {
@ -30,11 +58,12 @@ pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
// if we have a match, save it as token
if let Some(mat) = group {
tokens.push(match i {
tokens.push_back(match i {
2 => Token::Word(mat.as_str()),
3 => Token::Number(mat.as_str()),
4 => Token::Operator,
4 => Token::Operator(Operator::parse(mat.as_str())),
5 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
6 => Token::LineBreak,
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
});