This commit is contained in:
2025-08-30 00:37:36 +10:00
parent 73f2dccf4f
commit 3522b1acc2
4 changed files with 266 additions and 36 deletions

View File

@@ -2,10 +2,15 @@ use crate::tokenizer::*;
use std::fmt;
#[derive(Debug, PartialEq)]
pub enum PrefixOp {}
pub enum PrefixOp {
POS,
NEG,
NOT,
}
pub trait Precedence {
fn prec(&self) -> u8;
#[derive(Debug, PartialEq)]
pub enum PostfixOp {
PERCENT,
}
#[derive(Debug, PartialEq)]
@@ -14,25 +19,27 @@ pub enum InfixOp {
DIV,
ADD,
SUB,
}
impl Precedence for InfixOp {
fn prec(&self) -> u8 {
match self {
InfixOp::MUL | InfixOp::DIV => 2,
InfixOp::ADD | InfixOp::SUB => 1,
}
}
AND,
OR,
}
#[derive(Debug, PartialEq)]
pub enum Expr {
Literal(Literal),
CellRef(String),
Function {
name: String,
args: Vec<Expr>,
},
Group(Box<Expr>),
Prefix {
op: PrefixOp,
expr: Box<Expr>,
},
Postfix {
op: PostfixOp,
expr: Box<Expr>,
},
Infix {
op: InfixOp,
lhs: Box<Expr>,
@@ -40,13 +47,46 @@ pub enum Expr {
},
}
// Ref: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
// We have left and right precedence as to allow associative operators
// to parse as you would more expect and to break ties in a predictable manner
pub trait Precedence {
fn prec(&self) -> (u8, u8);
}
impl Precedence for InfixOp {
fn prec(&self) -> (u8, u8) {
match self {
InfixOp::MUL | InfixOp::DIV | InfixOp::AND => (3, 4),
InfixOp::ADD | InfixOp::SUB | InfixOp::OR => (1, 2),
}
}
}
impl Precedence for PrefixOp {
fn prec(&self) -> (u8, u8) {
match self {
_it => (0, 5),
}
}
}
impl Precedence for PostfixOp {
fn prec(&self) -> (u8, u8) {
match self {
_it => (6, 0),
}
}
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Expr::Literal(lit) => write!(f, "{lit:?}"),
Expr::Group(expr) => write!(f, "({expr})"),
Expr::Prefix { op, expr } => write!(f, "({op:?} {expr})"),
Expr::Postfix { op, expr } => write!(f, "({op:?} {expr})"),
Expr::Infix { op, lhs, rhs } => write!(f, "({lhs} {op:?} {rhs})"),
Expr::Function { name, args } => write!(f, "{name}({args:?})"),
Expr::CellRef(it) => write!(f, "CellRef({it})"),
}
}
}
@@ -70,16 +110,25 @@ impl Expr {
match self {
Expr::Literal(_) => {}
Expr::CellRef(_) => {}
Expr::Group(expr) => {
result.push_str(&expr.pretty_branch(&new_prefix, true));
}
Expr::Prefix { expr, .. } => {
result.push_str(&expr.pretty_branch(&new_prefix, true));
}
Expr::Postfix { expr, .. } => {
result.push_str(&expr.pretty_branch(&new_prefix, true));
}
Expr::Infix { lhs, rhs, .. } => {
result.push_str(&lhs.pretty_branch(&new_prefix, false));
result.push_str(&rhs.pretty_branch(&new_prefix, true));
}
Expr::Function { args, .. } => {
for (idx, arg) in args.iter().enumerate() {
result.push_str(&arg.pretty_branch(&new_prefix, idx == args.len() - 1));
}
}
}
result
}
@@ -97,46 +146,133 @@ impl Expr {
Expr::Literal(lit) => format!("Literal({:?})", lit),
Expr::Group(_) => "Group".to_string(),
Expr::Prefix { op, .. } => format!("Prefix({:?})", op),
Expr::Postfix { op, .. } => format!("Postfix({:?})", op),
Expr::Infix { op, .. } => format!("Infix({:?})", op),
Expr::Function { name, .. } => format!("Function({:?})", name),
Expr::CellRef(it) => format!("CellRef({:?})", it),
}
}
}
pub fn parse(input: &mut Tokenizer) -> Result<Expr, String> {
_parse(input, 0)
pub fn parse(input: &str) -> Result<Expr, String> {
let mut tokenizer = Tokenizer::new(input)?;
// println!("{:?}", tokenizer.tokens);
_parse(&mut tokenizer, 0)
}
pub fn _parse(input: &mut Tokenizer, min_prec: u8) -> Result<Expr, String> {
let mut lhs = match input.next() {
Token::Literal(it) => Expr::Literal(it),
Token::Identifier(id) if id == "true" => Expr::Literal(Literal::Boolean(true)),
Token::Identifier(id) if id == "false" => Expr::Literal(Literal::Boolean(false)),
Token::Paren('(') => {
let lhs = _parse(input, 0)?;
if input.next() != Token::Paren(')') {
return Err(format!("Parse error: expected closing paren."));
}
Expr::Group(Box::new(lhs))
}
Token::Operator(op) => {
let prefix_op = match op {
'+' => PrefixOp::POS,
'-' => PrefixOp::NEG,
'!' => PrefixOp::NOT,
it => return Err(format!("Parse error: unknown prefix operator {:?}.", it)),
};
let rhs = _parse(input, prefix_op.prec().1)?;
Expr::Prefix {
op: prefix_op,
expr: Box::new(rhs),
}
}
Token::Identifier(id) => match input.peek() {
Token::Paren('(') => {
input.next();
let mut args: Vec<Expr> = Vec::new();
loop {
let nxt = input.peek();
if nxt == Token::Paren(')') {
input.next();
break;
} else if nxt != Token::Comma && args.len() != 0 {
return Err(format!(
"Parse error: expected comma while parsing argument of function {:?}.",
id
));
}
if args.len() != 0 {
input.next(); // Skip comma
}
let arg = _parse(input, 0)?;
args.push(arg);
}
Expr::Function {
name: id,
args: args,
}
}
_ => Expr::CellRef(id),
},
it => return Err(format!("Parse error: did not expect token {:?}.", it)),
};
loop {
let op = match input.peek() {
Token::Eof => break,
Token::Operator(op) => match op {
// In the reference article this is a loop with match
// statement that breaks on Eof and closing paren but this is simpler and works as expected
while let Token::Operator(op) = input.peek() {
if "+-*/&|".contains(op) {
let infix_op = match op {
'+' => InfixOp::ADD,
'-' => InfixOp::SUB,
'*' => InfixOp::MUL,
'/' => InfixOp::DIV,
it => return Err(format!("Parse error: do not know operator {:?}.", it)),
},
it => return Err(format!("Parse error: did not expect token {:?}.", it)),
};
'&' => InfixOp::AND,
'|' => InfixOp::OR,
it => {
return Err(format!("Parse error: do not know infix operator {:?}.", it));
}
};
if op.prec() < min_prec {
break;
}
let (l_prec, r_prec) = infix_op.prec();
if l_prec < min_prec {
break;
}
input.next();
lhs = {
let rhs = _parse(input, op.prec()).unwrap();
Expr::Infix {
op: op,
input.next();
let rhs = _parse(input, r_prec)?;
lhs = Expr::Infix {
op: infix_op,
lhs: Box::new(lhs),
rhs: Box::new(rhs),
};
} else if "%".contains(op) {
let postfix_op = match op {
'%' => PostfixOp::PERCENT,
it => {
return Err(format!(
"Parse error: do not know postfix operator {:?}.",
it
));
}
};
let (l_prec, _) = postfix_op.prec();
if l_prec < min_prec {
break;
}
input.next();
lhs = Expr::Postfix {
op: postfix_op,
expr: Box::new(lhs),
};
}
}