This commit is contained in:
2025-08-29 02:30:08 +10:00
commit 746186ee65
6 changed files with 319 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "lead-rust"
version = "0.1.0"

4
Cargo.toml Normal file
View File

@@ -0,0 +1,4 @@
[package]
name = "lead-rust"
version = "0.1.0"
edition = "2024"

16
src/main.rs Normal file
View File

@@ -0,0 +1,16 @@
mod parser;
mod tokenizer;
use crate::parser::*;
use crate::tokenizer::*;
use std::io;
fn main() {
let mut input = String::new();
io::stdin().read_line(&mut input).expect("Expected input.");
let mut t = Tokenizer::new(&input).unwrap();
println!("{:?}", t.tokens);
let ast = parser::parse(&mut t).unwrap();
println!("{}", ast.pretty_tree());
}

159
src/parser.rs Normal file
View File

@@ -0,0 +1,159 @@
use crate::tokenizer::*;
use std::fmt;
#[derive(Debug, PartialEq)]
pub enum PrefixOp {}
pub trait Precedence {
fn prec(&self) -> u8;
}
#[derive(Debug, PartialEq)]
pub enum InfixOp {
MUL,
DIV,
ADD,
SUB,
}
impl Precedence for InfixOp {
fn prec(&self) -> u8 {
match self {
InfixOp::MUL | InfixOp::DIV => 2,
InfixOp::ADD | InfixOp::SUB => 1,
}
}
}
#[derive(Debug, PartialEq)]
pub enum Expr {
Literal(Literal),
Group(Box<Expr>),
Prefix {
op: PrefixOp,
expr: Box<Expr>,
},
Infix {
op: InfixOp,
lhs: Box<Expr>,
rhs: Box<Expr>,
},
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Expr::Literal(lit) => write!(f, "{lit:?}"),
Expr::Group(expr) => write!(f, "({expr})"),
Expr::Prefix { op, expr } => write!(f, "({op:?} {expr})"),
Expr::Infix { op, lhs, rhs } => write!(f, "({lhs} {op:?} {rhs})"),
}
}
}
impl Expr {
pub fn pretty(&self, indent: usize) -> String {
let pad = " ".repeat(indent);
match self {
Expr::Literal(lit) => format!("{pad}Literal({lit:?})"),
Expr::Group(expr) => format!("{pad}Group(\n{}\n{pad})", expr.pretty(indent + 1)),
Expr::Prefix { op, expr } => {
format!("{pad}Prefix({op:?}\n{}\n{pad})", expr.pretty(indent + 1))
}
Expr::Infix { op, lhs, rhs } => format!(
"{pad}Infix({op:?}\n{}\n{}\n{pad})",
lhs.pretty(indent + 1),
rhs.pretty(indent + 1)
),
}
}
pub fn pretty_tree(&self) -> String {
// entry point for users — root printed without └──
let mut result = String::new();
result.push_str(&format!("{}\n", self.node_name()));
result.push_str(&self.pretty_subtree("", true));
result
}
fn pretty_subtree(&self, prefix: &str, is_tail: bool) -> String {
let mut result = String::new();
let new_prefix = if is_tail {
format!("{} ", prefix)
} else {
format!("{}", prefix)
};
match self {
Expr::Literal(_) => {}
Expr::Group(expr) => {
result.push_str(&expr.pretty_branch(&new_prefix, true));
}
Expr::Prefix { expr, .. } => {
result.push_str(&expr.pretty_branch(&new_prefix, true));
}
Expr::Infix { lhs, rhs, .. } => {
result.push_str(&lhs.pretty_branch(&new_prefix, false));
result.push_str(&rhs.pretty_branch(&new_prefix, true));
}
}
result
}
fn pretty_branch(&self, prefix: &str, is_tail: bool) -> String {
let mut result = String::new();
let branch = if is_tail { "└── " } else { "├── " };
result.push_str(&format!("{}{}{}\n", prefix, branch, self.node_name()));
result.push_str(&self.pretty_subtree(prefix, is_tail));
result
}
fn node_name(&self) -> String {
match self {
Expr::Literal(lit) => format!("Literal({:?})", lit),
Expr::Group(_) => "Group".to_string(),
Expr::Prefix { op, .. } => format!("Prefix({:?})", op),
Expr::Infix { op, .. } => format!("Infix({:?})", op),
}
}
}
pub fn parse(input: &mut Tokenizer) -> Result<Expr, String> {
_parse(input, 0)
}
pub fn _parse(input: &mut Tokenizer, min_prec: u8) -> Result<Expr, String> {
let mut lhs = match input.next() {
Token::Literal(it) => Expr::Literal(it),
it => return Err(format!("Parse error: did not expect token {:?}.", it)),
};
loop {
let op = match input.peek() {
Token::Eof => break,
Token::Operator(op) => match op {
'+' => InfixOp::ADD,
'-' => InfixOp::SUB,
'*' => InfixOp::MUL,
'/' => InfixOp::DIV,
it => return Err(format!("Parse error: do not know operator {:?}.", it)),
},
it => return Err(format!("Parse error: did not expect token {:?}.", it)),
};
if op.prec() < min_prec {
break;
}
input.next();
lhs = {
let rhs = _parse(input, op.prec()).unwrap();
Expr::Infix {
op: op,
lhs: Box::new(lhs),
rhs: Box::new(rhs),
}
}
}
Ok(lhs)
}

132
src/tokenizer.rs Normal file
View File

@@ -0,0 +1,132 @@
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
Integer(i64),
Double(f64),
Boolean(bool),
String(String),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Identifier(String), // Could be a function
Literal(Literal),
Operator(char),
Paren(char),
Eof,
}
pub struct Tokenizer {
pub tokens: Vec<Token>,
}
impl Tokenizer {
pub fn new(input: &str) -> Result<Tokenizer, String> {
let mut tokens = Vec::new();
let mut chars = input.chars().peekable();
while let Some(&c) = chars.peek() {
if c.is_whitespace() {
chars.next();
} else if c.is_ascii_alphabetic() {
// parse identifier
let mut ident = String::new();
while let Some(&ch) = chars.peek() {
if ch.is_ascii_alphanumeric() || ch == '_' {
ident.push(ch);
chars.next();
} else {
break;
}
}
tokens.push(Token::Identifier(ident));
} else if c.is_ascii_digit() {
// parse number
let mut number = String::new();
let mut is_decimal = false;
while let Some(&ch) = chars.peek() {
if ch.is_ascii_digit() {
number.push(ch);
chars.next();
} else if ch == '.' && !is_decimal {
is_decimal = true;
number.push(ch);
chars.next();
} else {
break;
}
}
if is_decimal {
tokens.push(Token::Literal(Literal::Double(number.parse().unwrap())))
} else {
tokens.push(Token::Literal(Literal::Integer(number.parse().unwrap())))
};
} else if c == '"' || c == '\'' {
// parse string literal
let mut string = String::new();
let quote = c;
let mut escapes = 0;
chars.next(); // consume opening quote
while let Some(&ch) = chars.peek() {
chars.next();
if ch == quote && escapes % 2 == 0 {
break;
}
if ch == '\\' {
escapes += 1;
} else {
escapes = 0;
}
string.push(ch);
}
tokens.push(Token::Literal(Literal::String(string)));
} else if "+-*/^".contains(c) {
tokens.push(Token::Operator(c));
chars.next();
} else if "()".contains(c) {
tokens.push(Token::Paren(c));
chars.next();
} else {
return Err(format!("Encountered unknown token char: {c}"));
}
}
tokens.reverse(); // Since we want FIFO and next + peek are implemented as LILO
Ok(Tokenizer { tokens })
}
pub fn next(&mut self) -> Token {
self.tokens.pop().unwrap_or(Token::Eof)
}
pub fn peek(&mut self) -> Token {
self.tokens.last().cloned().unwrap_or(Token::Eof)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tokenizer() {
let raw = "hello hello 1.23 this 5 (1+2)";
let expected: Vec<Token> = vec![
Token::Identifier("hello".to_string()),
Token::Identifier("hello".to_string()),
Token::Literal(Literal::Double(1.23)),
Token::Identifier("this".to_string()),
Token::Literal(Literal::Integer(5)),
Token::Paren('('),
Token::Literal(Literal::Integer(1)),
Token::Operator('+'),
Token::Literal(Literal::Integer(2)),
Token::Paren(')'),
];
let t = Tokenizer::new(&raw).unwrap();
assert_eq!(t.tokens, expected);
}
}