From 746186ee654b25b10db120461fd4b2a0e612d002 Mon Sep 17 00:00:00 2001 From: Lloyd Date: Fri, 29 Aug 2025 02:30:08 +1000 Subject: [PATCH] =?UTF-8?q?=F0=9F=99=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + Cargo.lock | 7 +++ Cargo.toml | 4 ++ src/main.rs | 16 +++++ src/parser.rs | 159 +++++++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.rs | 132 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 319 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/main.rs create mode 100644 src/parser.rs create mode 100644 src/tokenizer.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..0718eb3 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "lead-rust" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..09c5a02 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "lead-rust" +version = "0.1.0" +edition = "2024" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0c41f55 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,16 @@ +mod parser; +mod tokenizer; + +use crate::parser::*; +use crate::tokenizer::*; +use std::io; + +fn main() { + let mut input = String::new(); + io::stdin().read_line(&mut input).expect("Expected input."); + + let mut t = Tokenizer::new(&input).unwrap(); + println!("{:?}", t.tokens); + let ast = parser::parse(&mut t).unwrap(); + println!("{}", ast.pretty_tree()); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..6f1bc2a --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,159 @@ +use crate::tokenizer::*; +use std::fmt; + +#[derive(Debug, PartialEq)] +pub enum PrefixOp {} + +pub trait Precedence { + fn prec(&self) -> u8; +} + +#[derive(Debug, PartialEq)] +pub enum InfixOp { + MUL, + DIV, + ADD, + SUB, +} + +impl Precedence for InfixOp { + fn prec(&self) -> u8 { + match self { + InfixOp::MUL | InfixOp::DIV => 2, + InfixOp::ADD | InfixOp::SUB => 1, + } + } +} + +#[derive(Debug, PartialEq)] +pub enum Expr { + Literal(Literal), + Group(Box), + Prefix { + op: PrefixOp, + expr: Box, + }, + Infix { + op: InfixOp, + lhs: Box, + rhs: Box, + }, +} + +impl fmt::Display for Expr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Expr::Literal(lit) => write!(f, "{lit:?}"), + Expr::Group(expr) => write!(f, "({expr})"), + Expr::Prefix { op, expr } => write!(f, "({op:?} {expr})"), + Expr::Infix { op, lhs, rhs } => write!(f, "({lhs} {op:?} {rhs})"), + } + } +} + +impl Expr { + pub fn pretty(&self, indent: usize) -> String { + let pad = " ".repeat(indent); + match self { + Expr::Literal(lit) => format!("{pad}Literal({lit:?})"), + Expr::Group(expr) => format!("{pad}Group(\n{}\n{pad})", expr.pretty(indent + 1)), + Expr::Prefix { op, expr } => { + format!("{pad}Prefix({op:?}\n{}\n{pad})", expr.pretty(indent + 1)) + } + Expr::Infix { op, lhs, rhs } => format!( + "{pad}Infix({op:?}\n{}\n{}\n{pad})", + lhs.pretty(indent + 1), + rhs.pretty(indent + 1) + ), + } + } + pub fn pretty_tree(&self) -> String { + // entry point for users — root printed without └── + let mut result = String::new(); + result.push_str(&format!("{}\n", self.node_name())); + result.push_str(&self.pretty_subtree("", true)); + result + } + + fn pretty_subtree(&self, prefix: &str, is_tail: bool) -> String { + let mut result = String::new(); + let new_prefix = if is_tail { + format!("{} ", prefix) + } else { + format!("{}│ ", prefix) + }; + + match self { + Expr::Literal(_) => {} + Expr::Group(expr) => { + result.push_str(&expr.pretty_branch(&new_prefix, true)); + } + Expr::Prefix { expr, .. } => { + result.push_str(&expr.pretty_branch(&new_prefix, true)); + } + Expr::Infix { lhs, rhs, .. } => { + result.push_str(&lhs.pretty_branch(&new_prefix, false)); + result.push_str(&rhs.pretty_branch(&new_prefix, true)); + } + } + result + } + + fn pretty_branch(&self, prefix: &str, is_tail: bool) -> String { + let mut result = String::new(); + let branch = if is_tail { "└── " } else { "├── " }; + result.push_str(&format!("{}{}{}\n", prefix, branch, self.node_name())); + result.push_str(&self.pretty_subtree(prefix, is_tail)); + result + } + + fn node_name(&self) -> String { + match self { + Expr::Literal(lit) => format!("Literal({:?})", lit), + Expr::Group(_) => "Group".to_string(), + Expr::Prefix { op, .. } => format!("Prefix({:?})", op), + Expr::Infix { op, .. } => format!("Infix({:?})", op), + } + } +} + +pub fn parse(input: &mut Tokenizer) -> Result { + _parse(input, 0) +} + +pub fn _parse(input: &mut Tokenizer, min_prec: u8) -> Result { + let mut lhs = match input.next() { + Token::Literal(it) => Expr::Literal(it), + it => return Err(format!("Parse error: did not expect token {:?}.", it)), + }; + + loop { + let op = match input.peek() { + Token::Eof => break, + Token::Operator(op) => match op { + '+' => InfixOp::ADD, + '-' => InfixOp::SUB, + '*' => InfixOp::MUL, + '/' => InfixOp::DIV, + it => return Err(format!("Parse error: do not know operator {:?}.", it)), + }, + it => return Err(format!("Parse error: did not expect token {:?}.", it)), + }; + + if op.prec() < min_prec { + break; + } + + input.next(); + lhs = { + let rhs = _parse(input, op.prec()).unwrap(); + Expr::Infix { + op: op, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + } + } + } + + Ok(lhs) +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..600cdff --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,132 @@ +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Integer(i64), + Double(f64), + Boolean(bool), + String(String), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Token { + Identifier(String), // Could be a function + Literal(Literal), + Operator(char), + Paren(char), + Eof, +} + +pub struct Tokenizer { + pub tokens: Vec, +} + +impl Tokenizer { + pub fn new(input: &str) -> Result { + let mut tokens = Vec::new(); + let mut chars = input.chars().peekable(); + + while let Some(&c) = chars.peek() { + if c.is_whitespace() { + chars.next(); + } else if c.is_ascii_alphabetic() { + // parse identifier + let mut ident = String::new(); + while let Some(&ch) = chars.peek() { + if ch.is_ascii_alphanumeric() || ch == '_' { + ident.push(ch); + chars.next(); + } else { + break; + } + } + tokens.push(Token::Identifier(ident)); + } else if c.is_ascii_digit() { + // parse number + let mut number = String::new(); + let mut is_decimal = false; + + while let Some(&ch) = chars.peek() { + if ch.is_ascii_digit() { + number.push(ch); + chars.next(); + } else if ch == '.' && !is_decimal { + is_decimal = true; + number.push(ch); + chars.next(); + } else { + break; + } + } + if is_decimal { + tokens.push(Token::Literal(Literal::Double(number.parse().unwrap()))) + } else { + tokens.push(Token::Literal(Literal::Integer(number.parse().unwrap()))) + }; + } else if c == '"' || c == '\'' { + // parse string literal + let mut string = String::new(); + + let quote = c; + let mut escapes = 0; + chars.next(); // consume opening quote + + while let Some(&ch) = chars.peek() { + chars.next(); + if ch == quote && escapes % 2 == 0 { + break; + } + + if ch == '\\' { + escapes += 1; + } else { + escapes = 0; + } + + string.push(ch); + } + tokens.push(Token::Literal(Literal::String(string))); + } else if "+-*/^".contains(c) { + tokens.push(Token::Operator(c)); + chars.next(); + } else if "()".contains(c) { + tokens.push(Token::Paren(c)); + chars.next(); + } else { + return Err(format!("Encountered unknown token char: {c}")); + } + } + + tokens.reverse(); // Since we want FIFO and next + peek are implemented as LILO + Ok(Tokenizer { tokens }) + } + + pub fn next(&mut self) -> Token { + self.tokens.pop().unwrap_or(Token::Eof) + } + pub fn peek(&mut self) -> Token { + self.tokens.last().cloned().unwrap_or(Token::Eof) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tokenizer() { + let raw = "hello hello 1.23 this 5 (1+2)"; + let expected: Vec = vec![ + Token::Identifier("hello".to_string()), + Token::Identifier("hello".to_string()), + Token::Literal(Literal::Double(1.23)), + Token::Identifier("this".to_string()), + Token::Literal(Literal::Integer(5)), + Token::Paren('('), + Token::Literal(Literal::Integer(1)), + Token::Operator('+'), + Token::Literal(Literal::Integer(2)), + Token::Paren(')'), + ]; + let t = Tokenizer::new(&raw).unwrap(); + assert_eq!(t.tokens, expected); + } +}