added tokenizer
This commit is contained in:
parent
31858f5013
commit
696bca6f4e
|
@ -0,0 +1 @@
|
|||
/target
|
|
@ -0,0 +1,49 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "Yard"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
|
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "Yard"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
regex = "*"
|
||||
lazy_static = "1.4.0"
|
|
@ -0,0 +1,17 @@
|
|||
|
||||
mod token;
|
||||
mod parser;
|
||||
|
||||
use token::*;
|
||||
|
||||
fn main() {
|
||||
|
||||
let source =
|
||||
r"
|
||||
main() {
|
||||
3 * 5 # comment
|
||||
}
|
||||
";
|
||||
|
||||
tokenize(source).iter().for_each(|t| print!("{:?}", t));
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
|
||||
pub struct Function<'a> {
|
||||
/// name
|
||||
name: &'a str,
|
||||
/// parameter names
|
||||
params: Vec<&'a str>,
|
||||
/// wether this function returns a single value or not
|
||||
ret: bool,
|
||||
}
|
||||
|
||||
/// reorder and organize a listing of instructions to a RPN based format:
|
||||
/// any program is made out of functions.
|
||||
/// A function has a name followed by an optional parameter list, followed by an optional equal sign and block.
|
||||
/// ```python
|
||||
/// foo(a) = {
|
||||
/// # function
|
||||
/// }
|
||||
/// ```
|
||||
pub fn parse<'a>(tokens: &Vec<crate::Token<'a>>) -> Vec<Function<'a>> {
|
||||
let mut functions = vec![];
|
||||
|
||||
|
||||
|
||||
functions
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
#[derive(Debug)]
|
||||
/// A token represents a basic building block for source code.
|
||||
/// They give a meaning to patterns of chars allowing to interpret them.
|
||||
pub enum Token<'a> {
|
||||
Word(&'a str),
|
||||
Delemiter(char),
|
||||
Operator,
|
||||
Number(&'a str),
|
||||
}
|
||||
|
||||
const TOKEN_REGEX_SRC: &'static str = r"(#.*)|([A-Za-z_]+)|(\d*\.?\d+)|([+\-*])|([(){}])";
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref TOKEN_REGEX: regex::Regex = regex::Regex::new(TOKEN_REGEX_SRC).unwrap();
|
||||
}
|
||||
|
||||
/// creates a vector of tokens from the specified str.
|
||||
pub fn tokenize<'a>(source: &'a str) -> Vec<Token<'a>> {
|
||||
let mut tokens = vec![];
|
||||
|
||||
for cap in TOKEN_REGEX.captures_iter(source) {
|
||||
for (i, group) in cap.iter().enumerate() {
|
||||
|
||||
// ignore first group as its the entire match,
|
||||
// as well as the 1st group (= comments)
|
||||
if i <= 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// if we have a match, save it as token
|
||||
if let Some(mat) = group {
|
||||
tokens.push(match i {
|
||||
2 => Token::Word(mat.as_str()),
|
||||
3 => Token::Number(mat.as_str()),
|
||||
4 => Token::Operator,
|
||||
5 => Token::Delemiter(mat.as_str().chars().nth(0).unwrap()),
|
||||
|
||||
_ => panic!("Unknown match to tokenize: {}", mat.as_str())
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
Loading…
Reference in New Issue