diff --git a/src/evaluator.rs b/src/evaluator.rs new file mode 100644 index 0000000..ee5f412 --- /dev/null +++ b/src/evaluator.rs @@ -0,0 +1,92 @@ +use crate::parser::*; +use crate::tokenizer::Literal; +use std::fmt; + +#[derive(Debug, PartialEq)] +pub enum Eval { + Literal(Literal), + Expr(Expr), +} + +impl fmt::Display for Eval { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Eval::Literal(lit) => write!(f, "{lit:?}"), + Eval::Expr(expr) => write!(f, "({expr})"), + } + } +} + +pub fn _evaluate(expr: &mut Expr) -> Result { + let res = match expr { + Expr::Literal(lit) => Eval::Literal(lit.clone()), + Expr::Infix { op, lhs, rhs } => { + let lval = _evaluate(lhs)?; + let rval = _evaluate(rhs)?; + + match op { + InfixOp::ADD => eval_add(&lval, &rval)?, + InfixOp::SUB => eval_sub(&lval, &rval)?, + InfixOp::MUL => eval_mul(&lval, &rval)?, + InfixOp::DIV => eval_div(&lval, &rval)?, + _ => return Err(format!("Evaluation error: Unsupported operator {:?}", op)), + } + } + it => return Err(format!("Evaluation error: Unsupported expression {:?}", it)), + }; + + Ok(res) +} + +fn eval_add(lval: &Eval, rval: &Eval) -> Result { + match (lval, rval) { + (Eval::Literal(a), Eval::Literal(b)) => { + if let Some(res) = eval_numeric_infix(a, b, |x, y| x + y, |x, y| x + y) { + return Ok(Eval::Literal(res)); + } + + // Try string concatenation + if let (Literal::String(x), Literal::String(y)) = (a, b) { + let mut res = x.to_owned(); + res.push_str(y); + return Ok(Eval::Literal(Literal::String(res))); + } + + Err("Evaluation error: expected string or numeric types for ADD function.".to_string()) + } + _ => return Err("Evalutation error: expected literals for ADD function.".to_string()), + } +} + +fn eval_sub(lval: &Eval, rval: &Eval) -> Result { + Err("Todo.".to_string()) +} +fn eval_mul(lval: &Eval, rval: &Eval) -> Result { + Err("Todo.".to_string()) +} +fn eval_div(lval: &Eval, rval: &Eval) -> Result { + Err("Todo.".to_string()) +} + +pub fn eval_numeric_infix( + lhs: &Literal, + rhs: &Literal, + int_op: FInt, + double_op: FDouble, +) -> Option +where + FInt: Fn(i64, i64) -> i64, + FDouble: Fn(f64, f64) -> f64, +{ + match (lhs, rhs) { + (Literal::Integer(a), Literal::Integer(b)) => Some(Literal::Integer(int_op(*a, *b))), + (Literal::Double(a), Literal::Double(b)) => Some(Literal::Double(double_op(*a, *b))), + (Literal::Integer(a), Literal::Double(b)) => { + Some(Literal::Double(double_op(*a as f64, *b))) + } + (Literal::Double(a), Literal::Integer(b)) => { + Some(Literal::Double(double_op(*a, *b as f64))) + } + _ => None, + } +} diff --git a/src/main.rs b/src/main.rs index 434bba9..21ee077 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,14 @@ +mod evaluator; mod parser; mod tokenizer; -use crate::parser::*; -use crate::tokenizer::*; use std::io; fn main() { let mut input = String::new(); io::stdin().read_line(&mut input).expect("Expected input."); - let mut t = Tokenizer::new(&input).unwrap(); - println!("{:?}\n", t.tokens); - let ast = parser::parse(&mut t).unwrap(); + let mut ast = parser::parse(&input).unwrap(); println!("{}", ast.pretty()); + println!("{}", evaluator::_evaluate(&mut ast).unwrap()); } diff --git a/src/parser.rs b/src/parser.rs index b9ee14b..4286d4d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,10 +2,15 @@ use crate::tokenizer::*; use std::fmt; #[derive(Debug, PartialEq)] -pub enum PrefixOp {} +pub enum PrefixOp { + POS, + NEG, + NOT, +} -pub trait Precedence { - fn prec(&self) -> u8; +#[derive(Debug, PartialEq)] +pub enum PostfixOp { + PERCENT, } #[derive(Debug, PartialEq)] @@ -14,25 +19,27 @@ pub enum InfixOp { DIV, ADD, SUB, -} - -impl Precedence for InfixOp { - fn prec(&self) -> u8 { - match self { - InfixOp::MUL | InfixOp::DIV => 2, - InfixOp::ADD | InfixOp::SUB => 1, - } - } + AND, + OR, } #[derive(Debug, PartialEq)] pub enum Expr { Literal(Literal), + CellRef(String), + Function { + name: String, + args: Vec, + }, Group(Box), Prefix { op: PrefixOp, expr: Box, }, + Postfix { + op: PostfixOp, + expr: Box, + }, Infix { op: InfixOp, lhs: Box, @@ -40,13 +47,46 @@ pub enum Expr { }, } +// Ref: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html +// We have left and right precedence as to allow associative operators +// to parse as you would more expect and to break ties in a predictable manner +pub trait Precedence { + fn prec(&self) -> (u8, u8); +} + +impl Precedence for InfixOp { + fn prec(&self) -> (u8, u8) { + match self { + InfixOp::MUL | InfixOp::DIV | InfixOp::AND => (3, 4), + InfixOp::ADD | InfixOp::SUB | InfixOp::OR => (1, 2), + } + } +} +impl Precedence for PrefixOp { + fn prec(&self) -> (u8, u8) { + match self { + _it => (0, 5), + } + } +} +impl Precedence for PostfixOp { + fn prec(&self) -> (u8, u8) { + match self { + _it => (6, 0), + } + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Expr::Literal(lit) => write!(f, "{lit:?}"), Expr::Group(expr) => write!(f, "({expr})"), Expr::Prefix { op, expr } => write!(f, "({op:?} {expr})"), + Expr::Postfix { op, expr } => write!(f, "({op:?} {expr})"), Expr::Infix { op, lhs, rhs } => write!(f, "({lhs} {op:?} {rhs})"), + Expr::Function { name, args } => write!(f, "{name}({args:?})"), + Expr::CellRef(it) => write!(f, "CellRef({it})"), } } } @@ -70,16 +110,25 @@ impl Expr { match self { Expr::Literal(_) => {} + Expr::CellRef(_) => {} Expr::Group(expr) => { result.push_str(&expr.pretty_branch(&new_prefix, true)); } Expr::Prefix { expr, .. } => { result.push_str(&expr.pretty_branch(&new_prefix, true)); } + Expr::Postfix { expr, .. } => { + result.push_str(&expr.pretty_branch(&new_prefix, true)); + } Expr::Infix { lhs, rhs, .. } => { result.push_str(&lhs.pretty_branch(&new_prefix, false)); result.push_str(&rhs.pretty_branch(&new_prefix, true)); } + Expr::Function { args, .. } => { + for (idx, arg) in args.iter().enumerate() { + result.push_str(&arg.pretty_branch(&new_prefix, idx == args.len() - 1)); + } + } } result } @@ -97,46 +146,133 @@ impl Expr { Expr::Literal(lit) => format!("Literal({:?})", lit), Expr::Group(_) => "Group".to_string(), Expr::Prefix { op, .. } => format!("Prefix({:?})", op), + Expr::Postfix { op, .. } => format!("Postfix({:?})", op), Expr::Infix { op, .. } => format!("Infix({:?})", op), + Expr::Function { name, .. } => format!("Function({:?})", name), + Expr::CellRef(it) => format!("CellRef({:?})", it), } } } -pub fn parse(input: &mut Tokenizer) -> Result { - _parse(input, 0) +pub fn parse(input: &str) -> Result { + let mut tokenizer = Tokenizer::new(input)?; + // println!("{:?}", tokenizer.tokens); + _parse(&mut tokenizer, 0) } pub fn _parse(input: &mut Tokenizer, min_prec: u8) -> Result { let mut lhs = match input.next() { Token::Literal(it) => Expr::Literal(it), + Token::Identifier(id) if id == "true" => Expr::Literal(Literal::Boolean(true)), + Token::Identifier(id) if id == "false" => Expr::Literal(Literal::Boolean(false)), + Token::Paren('(') => { + let lhs = _parse(input, 0)?; + if input.next() != Token::Paren(')') { + return Err(format!("Parse error: expected closing paren.")); + } + Expr::Group(Box::new(lhs)) + } + Token::Operator(op) => { + let prefix_op = match op { + '+' => PrefixOp::POS, + '-' => PrefixOp::NEG, + '!' => PrefixOp::NOT, + it => return Err(format!("Parse error: unknown prefix operator {:?}.", it)), + }; + + let rhs = _parse(input, prefix_op.prec().1)?; + + Expr::Prefix { + op: prefix_op, + expr: Box::new(rhs), + } + } + Token::Identifier(id) => match input.peek() { + Token::Paren('(') => { + input.next(); + + let mut args: Vec = Vec::new(); + loop { + let nxt = input.peek(); + + if nxt == Token::Paren(')') { + input.next(); + break; + } else if nxt != Token::Comma && args.len() != 0 { + return Err(format!( + "Parse error: expected comma while parsing argument of function {:?}.", + id + )); + } + + if args.len() != 0 { + input.next(); // Skip comma + } + + let arg = _parse(input, 0)?; + args.push(arg); + } + + Expr::Function { + name: id, + args: args, + } + } + _ => Expr::CellRef(id), + }, + it => return Err(format!("Parse error: did not expect token {:?}.", it)), }; - loop { - let op = match input.peek() { - Token::Eof => break, - Token::Operator(op) => match op { + // In the reference article this is a loop with match + // statement that breaks on Eof and closing paren but this is simpler and works as expected + while let Token::Operator(op) = input.peek() { + if "+-*/&|".contains(op) { + let infix_op = match op { '+' => InfixOp::ADD, '-' => InfixOp::SUB, '*' => InfixOp::MUL, '/' => InfixOp::DIV, - it => return Err(format!("Parse error: do not know operator {:?}.", it)), - }, - it => return Err(format!("Parse error: did not expect token {:?}.", it)), - }; + '&' => InfixOp::AND, + '|' => InfixOp::OR, + it => { + return Err(format!("Parse error: do not know infix operator {:?}.", it)); + } + }; - if op.prec() < min_prec { - break; - } + let (l_prec, r_prec) = infix_op.prec(); + if l_prec < min_prec { + break; + } - input.next(); - lhs = { - let rhs = _parse(input, op.prec()).unwrap(); - Expr::Infix { - op: op, + input.next(); + let rhs = _parse(input, r_prec)?; + lhs = Expr::Infix { + op: infix_op, lhs: Box::new(lhs), rhs: Box::new(rhs), + }; + } else if "%".contains(op) { + let postfix_op = match op { + '%' => PostfixOp::PERCENT, + it => { + return Err(format!( + "Parse error: do not know postfix operator {:?}.", + it + )); + } + }; + + let (l_prec, _) = postfix_op.prec(); + if l_prec < min_prec { + break; } + + input.next(); + lhs = Expr::Postfix { + op: postfix_op, + expr: Box::new(lhs), + }; } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 600cdff..547aa2a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -12,6 +12,7 @@ pub enum Token { Literal(Literal), Operator(char), Paren(char), + Comma, Eof, } @@ -84,12 +85,15 @@ impl Tokenizer { string.push(ch); } tokens.push(Token::Literal(Literal::String(string))); - } else if "+-*/^".contains(c) { + } else if "+-*/^!%&|".contains(c) { tokens.push(Token::Operator(c)); chars.next(); } else if "()".contains(c) { tokens.push(Token::Paren(c)); chars.next(); + } else if c == ',' { + tokens.push(Token::Comma); + chars.next(); } else { return Err(format!("Encountered unknown token char: {c}")); }