just poosh
This commit is contained in:
parent
696bca6f4e
commit
d1d3fe73df
13
src/main.rs
13
src/main.rs
|
@ -3,15 +3,22 @@ mod token;
|
||||||
mod parser;
|
mod parser;
|
||||||
|
|
||||||
use token::*;
|
use token::*;
|
||||||
|
use parser::*;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
|
||||||
let source =
|
let source =
|
||||||
r"
|
r"
|
||||||
main() {
|
foo = 5 * 6 + 4
|
||||||
|
|
||||||
|
foo() = {
|
||||||
|
c
|
||||||
|
}
|
||||||
|
|
||||||
|
main()(x) {
|
||||||
3 * 5 # comment
|
3 * 5 # comment
|
||||||
}
|
}
|
||||||
";
|
";
|
||||||
|
|
||||||
tokenize(source).iter().for_each(|t| print!("{:?}", t));
|
parse(&mut tokenize(source));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,25 +1,124 @@
|
||||||
|
use std::collections::{VecDeque, HashSet};
|
||||||
|
|
||||||
|
use crate::token::Token;
|
||||||
|
|
||||||
|
#[derive(Eq, Hash)]
|
||||||
pub struct Function<'a> {
|
pub struct Function<'a> {
|
||||||
/// name
|
|
||||||
name: &'a str,
|
|
||||||
/// parameter names
|
/// parameter names
|
||||||
params: Vec<&'a str>,
|
pub params: Option<Vec<&'a str>>,
|
||||||
/// wether this function returns a single value or not
|
/// raw tokens
|
||||||
ret: bool,
|
pub raw: Option<VecDeque<Token<'a>>>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Function<'a> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
params: None,
|
||||||
|
raw: None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PartialEq for Function<'a> {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.params == other.params
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// simple brace-counting parser to detect abstract token syntaxes
|
||||||
|
fn discover_functions<'a>(tokens: &mut VecDeque<crate::Token<'a>>) -> HashSet<Function<'a>> {
|
||||||
|
let mut funcs = HashSet::new();
|
||||||
|
|
||||||
|
let mut name = None;
|
||||||
|
let mut cur_fun = Function::new();
|
||||||
|
|
||||||
|
let mut assigned = false;
|
||||||
|
let mut brace_cnt = 0;
|
||||||
|
let mut parent_cnt = 0;
|
||||||
|
|
||||||
|
while let Some(top) = tokens.pop_front() {
|
||||||
|
|
||||||
|
match &top {
|
||||||
|
crate::Token::Operator(op) => {
|
||||||
|
match op {
|
||||||
|
crate::Operator::Assign => if cur_fun.raw.is_none() {
|
||||||
|
assigned = true;
|
||||||
|
cur_fun.raw = Some(VecDeque::new());
|
||||||
|
continue;
|
||||||
|
},
|
||||||
|
_ => ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
crate::Token::LineBreak => if name.is_some() && cur_fun.raw.is_some() && assigned {
|
||||||
|
funcs.insert(cur_fun);
|
||||||
|
cur_fun = Function::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
crate::Token::Delemiter(char) => {
|
||||||
|
match char {
|
||||||
|
|
||||||
|
'{' => {
|
||||||
|
brace_cnt += 1;
|
||||||
|
if brace_cnt == 1 {
|
||||||
|
// start a new body
|
||||||
|
cur_fun.raw = Some(VecDeque::new());
|
||||||
|
assigned = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'}' => {
|
||||||
|
brace_cnt -= 1;
|
||||||
|
|
||||||
|
// we have a full body!
|
||||||
|
if brace_cnt == 0 {
|
||||||
|
funcs.insert(cur_fun);
|
||||||
|
cur_fun = Function::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
'(' => if cur_fun.raw.is_none() {
|
||||||
|
parent_cnt += 1;
|
||||||
|
if parent_cnt == 1 {
|
||||||
|
// start a new arg list
|
||||||
|
cur_fun.params = Some(Vec::new());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
')' => if cur_body.is_none() {
|
||||||
|
parent_cnt -= 1;
|
||||||
|
|
||||||
|
// we have a full body!
|
||||||
|
if parent_cnt == 0 {
|
||||||
|
funcs.insert(cur_fun);
|
||||||
|
cur_fun = Function::new();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => ()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(body) = &mut cur_body {
|
||||||
|
body.push_back(top);
|
||||||
|
} else if let Some(args) = &mut cur_args {
|
||||||
|
match &top {
|
||||||
|
Token::Word(text) => args.push(text),
|
||||||
|
_ => panic!("Argument in list is not a word")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
body.push_back(top)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
funcs
|
||||||
}
|
}
|
||||||
|
|
||||||
/// reorder and organize a listing of instructions to a RPN based format:
|
/// reorder and organize a listing of instructions to a RPN based format:
|
||||||
/// any program is made out of functions.
|
/// any program is made out of functions.
|
||||||
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
||||||
/// ```python
|
pub fn parse<'a>(tokens: &mut VecDeque<crate::Token<'a>>) {
|
||||||
/// foo(a) = {
|
|
||||||
/// # function
|
|
||||||
/// }
|
|
||||||
/// ```
|
|
||||||
pub fn parse<'a>(tokens: &Vec<crate::Token<'a>>) -> Vec<Function<'a>> {
|
|
||||||
let mut functions = vec![];
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
functions
|
|
||||||
}
|
}
|
|
@ -1,23 +1,51 @@
|
||||||
|
use std::collections::{VecDeque};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum Operator {
|
||||||
|
Assign,
|
||||||
|
|
||||||
|
Add,
|
||||||
|
Sub,
|
||||||
|
Mul,
|
||||||
|
Div
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Operator {
|
||||||
|
pub fn parse<'a>(str: &'a str) -> Self {
|
||||||
|
return match str {
|
||||||
|
"=" => Operator::Assign,
|
||||||
|
|
||||||
|
"+" => Operator::Add,
|
||||||
|
"-" => Operator::Sub,
|
||||||
|
"*" => Operator::Mul,
|
||||||
|
"/" => Operator::Div,
|
||||||
|
|
||||||
|
_ => panic!("Unspecified operator")
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||||
/// A token represents a basic building block for source code.
|
/// A token represents a basic building block for source code.
|
||||||
/// They give a meaning to patterns of chars allowing to interpret them.
|
/// They give a meaning to patterns of chars allowing to interpret them.
|
||||||
pub enum Token<'a> {
|
pub enum Token<'a> {
|
||||||
|
// base tokens that can simply be split to from raw source code
|
||||||
Word(&'a str),
|
Word(&'a str),
|
||||||
Delemiter(char),
|
Delemiter(char),
|
||||||
Operator,
|
Operator(Operator),
|
||||||
Number(&'a str),
|
Number(&'a str),
|
||||||
|
LineBreak
|
||||||
}
|
}
|
||||||
|
|
||||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*])|([(){}])";
|
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*=])|([(){}])|(\n)";
|
||||||
|
|
||||||
lazy_static::lazy_static! {
|
lazy_static::lazy_static! {
|
||||||
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// creates a vector of tokens from the specified str.
|
/// creates a vector of tokens from the specified str.
|
||||||
pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
|
pub fn tokenize<'a>(source: &'a str) -> VecDeque<Token<'a>> {
|
||||||
let mut tokens = vec![];
|
let mut tokens = VecDeque::new();
|
||||||
|
|
||||||
for cap in TOKEN_REGEX.captures_iter(source) {
|
for cap in TOKEN_REGEX.captures_iter(source) {
|
||||||
for (i, group) in cap.iter().enumerate() {
|
for (i, group) in cap.iter().enumerate() {
|
||||||
|
@ -30,11 +58,12 @@ pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
|
||||||
|
|
||||||
// if we have a match, save it as token
|
// if we have a match, save it as token
|
||||||
if let Some(mat) = group {
|
if let Some(mat) = group {
|
||||||
tokens.push(match i {
|
tokens.push_back(match i {
|
||||||
2 => Token::Word(mat.as_str()),
|
2 => Token::Word(mat.as_str()),
|
||||||
3 => Token::Number(mat.as_str()),
|
3 => Token::Number(mat.as_str()),
|
||||||
4 => Token::Operator,
|
4 => Token::Operator(Operator::parse(mat.as_str())),
|
||||||
5 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
|
5 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
|
||||||
|
6 => Token::LineBreak,
|
||||||
|
|
||||||
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
|
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue