This commit is contained in:
2025-09-07 03:48:48 +10:00
parent 2f3f853134
commit 8d03316b1b
63 changed files with 1941 additions and 420 deletions

View File

@@ -1,7 +1,9 @@
#[derive(Debug, Clone, PartialEq)]
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", content = "value")]
pub enum Literal {
Integer(i64),
Double(f64),
Number(f64),
Boolean(bool),
String(String),
}
@@ -11,7 +13,8 @@ pub enum Token {
Identifier(String), // Could be a function
Literal(Literal),
Operator(char),
Paren(char),
OpenParen,
CloseParen,
Comma,
Eof,
}
@@ -39,29 +42,38 @@ impl Tokenizer {
break;
}
}
tokens.push(Token::Identifier(ident));
let res = match ident.as_str() {
"true" => Token::Literal(Literal::Boolean(true)),
"false" => Token::Literal(Literal::Boolean(false)),
it => Token::Identifier(it.into()),
};
tokens.push(res);
} else if c.is_ascii_digit() {
// parse number
let mut number = String::new();
let mut is_decimal = false;
let mut is_exp = false;
while let Some(&ch) = chars.peek() {
if ch.is_ascii_digit() {
number.push(ch);
chars.next();
} else if ch == '.' && !is_decimal {
} else if ch == '.' && !is_decimal && !is_exp {
is_decimal = true;
number.push(ch);
chars.next();
} else if ch == 'e' && !is_decimal && !is_exp {
is_exp = true;
number.push(ch);
chars.next();
} else {
break;
}
}
if is_decimal {
tokens.push(Token::Literal(Literal::Double(number.parse().unwrap())))
} else {
tokens.push(Token::Literal(Literal::Integer(number.parse().unwrap())))
};
// TODO: REMOVE UNWRAP
tokens.push(Token::Literal(Literal::Number(number.parse().unwrap())));
} else if c == '"' || c == '\'' {
// parse string literal
let mut string = String::new();
@@ -89,7 +101,11 @@ impl Tokenizer {
tokens.push(Token::Operator(c));
chars.next();
} else if "()".contains(c) {
tokens.push(Token::Paren(c));
if c == '(' {
tokens.push(Token::OpenParen);
} else {
tokens.push(Token::CloseParen);
}
chars.next();
} else if c == ',' {
tokens.push(Token::Comma);
@@ -116,20 +132,140 @@ mod tests {
use super::*;
#[test]
fn test_tokenizer() {
let raw = "hello hello 1.23 this 5 (1+2)";
let expected: Vec<Token> = vec![
Token::Identifier("hello".to_string()),
Token::Identifier("hello".to_string()),
Token::Literal(Literal::Double(1.23)),
Token::Identifier("this".to_string()),
Token::Literal(Literal::Integer(5)),
Token::Paren('('),
Token::Literal(Literal::Integer(1)),
fn test_single_token() {
assert_eq!(
Tokenizer::new("1").unwrap().tokens,
Vec::from([Token::Literal(Literal::Number(1.0))])
);
assert_eq!(
Tokenizer::new("2.0").unwrap().tokens,
Vec::from([Token::Literal(Literal::Number(2.0))])
);
assert_eq!(
Tokenizer::new("\"hello\"").unwrap().tokens,
Vec::from([Token::Literal(Literal::String("hello".into()))])
);
assert_eq!(
Tokenizer::new("\'hello\'").unwrap().tokens,
Vec::from([Token::Literal(Literal::String("hello".into()))])
);
assert_eq!(
Tokenizer::new("hello").unwrap().tokens,
Vec::from([Token::Identifier("hello".into())])
);
assert_eq!(
Tokenizer::new("+").unwrap().tokens,
Vec::from([Token::Operator('+')])
);
assert_eq!(
Tokenizer::new(",").unwrap().tokens,
Vec::from([Token::Comma])
);
assert_eq!(
Tokenizer::new(")").unwrap().tokens,
Vec::from([Token::CloseParen])
);
assert_eq!(
Tokenizer::new("(").unwrap().tokens,
Vec::from([Token::OpenParen])
);
}
#[test]
fn test_token_punctuation() {
let mut exp = Vec::from([
Token::Comma,
Token::CloseParen,
Token::Comma,
Token::OpenParen,
]);
exp.reverse();
assert_eq!(Tokenizer::new(", ) , (").unwrap().tokens, exp);
}
#[test]
fn test_token_operators() {
let mut exp = Vec::from([
Token::Operator('+'),
Token::Literal(Literal::Integer(2)),
Token::Paren(')'),
Token::Operator('-'),
Token::Operator('*'),
Token::Operator('/'),
Token::Operator('^'),
Token::Operator('!'),
Token::Operator('%'),
Token::Operator('&'),
Token::Operator('|'),
]);
exp.reverse();
assert_eq!(Tokenizer::new("+-*/^!%&|").unwrap().tokens, exp);
}
#[test]
fn test_token_string() {
let raw = "\"hello\" \'world\'";
let mut expected: Vec<Token> = vec![
Token::Literal(Literal::String("hello".into())),
Token::Literal(Literal::String("world".into())),
];
expected.reverse();
let t = Tokenizer::new(&raw).unwrap();
assert_eq!(t.tokens, expected);
}
#[test]
fn test_token_number() {
let raw = "123 4.56";
let mut expected: Vec<Token> = vec![
Token::Literal(Literal::Number(123.0)),
Token::Literal(Literal::Number(4.56)),
];
expected.reverse();
let t = Tokenizer::new(&raw).unwrap();
assert_eq!(t.tokens, expected);
}
#[test]
fn test_token_boolean() {
let raw = "false true";
let mut expected: Vec<Token> = vec![
Token::Literal(Literal::Boolean(false)),
Token::Literal(Literal::Boolean(true)),
];
expected.reverse();
let t = Tokenizer::new(&raw).unwrap();
assert_eq!(t.tokens, expected);
}
#[test]
fn test_token_identifier() {
let raw = "hello test";
let mut expected: Vec<Token> = vec![
Token::Identifier("hello".to_string()),
Token::Identifier("test".to_string()),
];
expected.reverse();
let t = Tokenizer::new(&raw).unwrap();
assert_eq!(t.tokens, expected);
}
#[test]
fn test_token_mix() {
let raw = "hello test 1.23 this 5 (1+2)";
let mut expected: Vec<Token> = vec![
Token::Identifier("hello".to_string()),
Token::Identifier("test".to_string()),
Token::Literal(Literal::Number(1.23)),
Token::Identifier("this".to_string()),
Token::Literal(Literal::Number(5.0)),
Token::OpenParen,
Token::Literal(Literal::Number(1.0)),
Token::Operator('+'),
Token::Literal(Literal::Number(2.0)),
Token::CloseParen,
];
expected.reverse();
let t = Tokenizer::new(&raw).unwrap();
assert_eq!(t.tokens, expected);
}