just poosh
This commit is contained in:
parent
696bca6f4e
commit
d1d3fe73df
13
src/main.rs
13
src/main.rs
|
@ -3,15 +3,22 @@ mod token;
|
|||
mod parser;
|
||||
|
||||
use token::*;
|
||||
use parser::*;
|
||||
|
||||
fn main() {
|
||||
|
||||
|
||||
let source =
|
||||
r"
|
||||
main() {
|
||||
foo = 5 * 6 + 4
|
||||
|
||||
foo() = {
|
||||
c
|
||||
}
|
||||
|
||||
main()(x) {
|
||||
3 * 5 # comment
|
||||
}
|
||||
";
|
||||
|
||||
tokenize(source).iter().for_each(|t| print!("{:?}", t));
|
||||
parse(&mut tokenize(source));
|
||||
}
|
||||
|
|
|
@ -1,25 +1,124 @@
|
|||
use std::collections::{VecDeque, HashSet};
|
||||
|
||||
use crate::token::Token;
|
||||
|
||||
#[derive(Eq, Hash)]
|
||||
pub struct Function<'a> {
|
||||
/// name
|
||||
name: &'a str,
|
||||
/// parameter names
|
||||
params: Vec<&'a str>,
|
||||
/// wether this function returns a single value or not
|
||||
ret: bool,
|
||||
pub params: Option<Vec<&'a str>>,
|
||||
/// raw tokens
|
||||
pub raw: Option<VecDeque<Token<'a>>>
|
||||
}
|
||||
|
||||
impl<'a> Function<'a> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
params: None,
|
||||
raw: None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for Function<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.params == other.params
|
||||
}
|
||||
}
|
||||
|
||||
/// simple brace-counting parser to detect abstract token syntaxes
|
||||
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> HashSet<Function<'a>> {
|
||||
let mut funcs = HashSet::new();
|
||||
|
||||
let mut name = None;
|
||||
let mut cur_fun = Function::new();
|
||||
|
||||
let mut assigned = false;
|
||||
let mut brace_cnt = 0;
|
||||
let mut parent_cnt = 0;
|
||||
|
||||
while let Some(top) = tokens.pop_front() {
|
||||
|
||||
match &top {
|
||||
crate::Token::Operator(op) => {
|
||||
match op {
|
||||
crate::Operator::Assign => if cur_fun.raw.is_none() {
|
||||
assigned = true;
|
||||
cur_fun.raw = Some(VecDeque::new());
|
||||
continue;
|
||||
},
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
crate::Token::LineBreak => if name.is_some() && cur_fun.raw.is_some() && assigned {
|
||||
funcs.insert(cur_fun);
|
||||
cur_fun = Function::new();
|
||||
continue;
|
||||
}
|
||||
crate::Token::Delemiter(char) => {
|
||||
match char {
|
||||
|
||||
'{' => {
|
||||
brace_cnt += 1;
|
||||
if brace_cnt == 1 {
|
||||
// start a new body
|
||||
cur_fun.raw = Some(VecDeque::new());
|
||||
assigned = false;
|
||||
continue;
|
||||
}
|
||||
},
|
||||
'}' => {
|
||||
brace_cnt -= 1;
|
||||
|
||||
// we have a full body!
|
||||
if brace_cnt == 0 {
|
||||
funcs.insert(cur_fun);
|
||||
cur_fun = Function::new();
|
||||
continue;
|
||||
}
|
||||
},
|
||||
|
||||
'(' => if cur_fun.raw.is_none() {
|
||||
parent_cnt += 1;
|
||||
if parent_cnt == 1 {
|
||||
// start a new arg list
|
||||
cur_fun.params = Some(Vec::new());
|
||||
continue;
|
||||
}
|
||||
},
|
||||
')' => if cur_body.is_none() {
|
||||
parent_cnt -= 1;
|
||||
|
||||
// we have a full body!
|
||||
if parent_cnt == 0 {
|
||||
funcs.insert(cur_fun);
|
||||
cur_fun = Function::new();
|
||||
continue;
|
||||
}
|
||||
},
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if let Some(body) = &mut cur_body {
|
||||
body.push_back(top);
|
||||
} else if let Some(args) = &mut cur_args {
|
||||
match &top {
|
||||
Token::Word(text) => args.push(text),
|
||||
_ => panic!("Argument in list is not a word")
|
||||
}
|
||||
} else {
|
||||
body.push_back(top)
|
||||
}
|
||||
}
|
||||
|
||||
funcs
|
||||
}
|
||||
|
||||
/// reorder and organize a listing of instructions to a RPN based format:
|
||||
/// any program is made out of functions.
|
||||
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
||||
/// ```python
|
||||
/// foo(a) = {
|
||||
/// # function
|
||||
/// }
|
||||
/// ```
|
||||
pub fn parse<'a>(tokens: &Vec<crate::Token<'a>>) -> Vec<Function<'a>> {
|
||||
let mut functions = vec![];
|
||||
|
||||
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) {
|
||||
|
||||
|
||||
functions
|
||||
}
|
|
@ -1,23 +1,51 @@
|
|||
use std::collections::{VecDeque};
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
pub enum Operator {
|
||||
Assign,
|
||||
|
||||
Add,
|
||||
Sub,
|
||||
Mul,
|
||||
Div
|
||||
}
|
||||
|
||||
impl Operator {
|
||||
pub fn parse<'a>(str: &'a str) -> Self {
|
||||
return match str {
|
||||
"=" => Operator::Assign,
|
||||
|
||||
"+" => Operator::Add,
|
||||
"-" => Operator::Sub,
|
||||
"*" => Operator::Mul,
|
||||
"/" => Operator::Div,
|
||||
|
||||
_ => panic!("Unspecified operator")
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
/// A token represents a basic building block for source code.
|
||||
/// They give a meaning to patterns of chars allowing to interpret them.
|
||||
pub enum Token<'a> {
|
||||
// base tokens that can simply be split to from raw source code
|
||||
Word(&'a str),
|
||||
Delemiter(char),
|
||||
Operator,
|
||||
Operator(Operator),
|
||||
Number(&'a str),
|
||||
LineBreak
|
||||
}
|
||||
|
||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*])|([(){}])";
|
||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*=])|([(){}])|(\n)";
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
||||
}
|
||||
|
||||
/// creates a vector of tokens from the specified str.
|
||||
pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
|
||||
let mut tokens = vec![];
|
||||
pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
|
||||
let mut tokens = VecDeque::new();
|
||||
|
||||
for cap in TOKEN_REGEX.captures_iter(source) {
|
||||
for (i, group) in cap.iter().enumerate() {
|
||||
|
@ -30,11 +58,12 @@ pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
|
|||
|
||||
// if we have a match, save it as token
|
||||
if let Some(mat) = group {
|
||||
tokens.push(match i {
|
||||
tokens.push_back(match i {
|
||||
2 => Token::Word(mat.as_str()),
|
||||
3 => Token::Number(mat.as_str()),
|
||||
4 => Token::Operator,
|
||||
4 => Token::Operator(Operator::parse(mat.as_str())),
|
||||
5 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
|
||||
6 => Token::LineBreak,
|
||||
|
||||
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue