AST parser for SpamAssassin meta expressions

This commit is contained in:
Mauro D 2023-08-20 17:44:26 +02:00
parent 016a5bde18
commit dbaaff48f0
5 changed files with 612 additions and 413 deletions

View file

@ -0,0 +1,177 @@
use super::{BinaryOperator, Comparator, Expr, Logical, Operation, Token, UnaryOperator};
pub struct Parser<'x> {
tokens: &'x [Token],
position: usize,
}
impl<'x> Parser<'x> {
pub fn new(tokens: &'x [Token]) -> Self {
Self {
tokens,
position: 0,
}
}
pub fn consume(&mut self) -> Option<&'x Token> {
if self.position < self.tokens.len() {
let token = &self.tokens[self.position];
self.position += 1;
Some(token)
} else {
None
}
}
pub fn peek(&self) -> Option<&'x Token> {
if self.position < self.tokens.len() {
Some(&self.tokens[self.position])
} else {
None
}
}
fn primary(&mut self) -> Result<Expr, String> {
match self.peek() {
Some(&Token::Number(n)) => {
self.consume();
Ok(Expr::Literal(n))
}
Some(Token::Tag(ref id)) => {
self.consume();
Ok(Expr::Identifier(id.clone()))
}
Some(&Token::OpenParen) => {
self.consume();
let expr = self.expr();
if let Some(&Token::CloseParen) = self.peek() {
self.consume();
expr
} else {
Err("Expected closing parenthesis".to_string())
}
}
_ => Err("Unexpected token in factor".to_string()),
}
}
fn unary(&mut self) -> Result<Expr, String> {
match self.peek() {
Some(&Token::Logical(Logical::Not)) => {
self.consume();
let operand = self.primary()?;
Ok(Expr::UnaryOp(UnaryOperator::Not, Box::new(operand)))
}
Some(&Token::Operation(Operation::Subtract)) => {
self.consume();
let operand = self.primary()?;
Ok(Expr::UnaryOp(UnaryOperator::Minus, Box::new(operand)))
}
_ => self.primary(),
}
}
fn factor(&mut self) -> Result<Expr, String> {
let mut left = self.unary()?;
while let Some(op @ Token::Operation(Operation::Multiply | Operation::Divide)) = self.peek()
{
self.consume();
let right = self.unary()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn term(&mut self) -> Result<Expr, String> {
let mut left = self.factor()?;
while let Some(op @ Token::Operation(Operation::Add | Operation::Subtract)) = self.peek() {
self.consume();
let right = self.factor()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn bitwise(&mut self) -> Result<Expr, String> {
let mut left = self.term()?;
while let Some(op @ Token::Operation(Operation::And | Operation::Or)) = self.peek() {
self.consume();
let right = self.term()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn comparison(&mut self) -> Result<Expr, String> {
let mut left = self.bitwise()?;
while let Some(op @ Token::Comparator(_)) = self.peek() {
self.consume();
let right = self.bitwise()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn logical_and(&mut self) -> Result<Expr, String> {
let mut left = self.comparison()?;
while let Some(Token::Logical(Logical::And)) = self.peek() {
self.consume();
let right = self.comparison()?;
left = Expr::BinaryOp(Box::new(left), BinaryOperator::And, Box::new(right));
}
Ok(left)
}
fn logical_or(&mut self) -> Result<Expr, String> {
let mut left = self.logical_and()?;
while let Some(Token::Logical(Logical::Or)) = self.peek() {
self.consume();
let right = self.logical_and()?;
left = Expr::BinaryOp(Box::new(left), BinaryOperator::Or, Box::new(right));
}
Ok(left)
}
fn expr(&mut self) -> Result<Expr, String> {
self.logical_or()
}
pub fn parse(&mut self) -> Result<Expr, String> {
let result = self.expr()?;
if self.position < self.tokens.len() {
println!("{result:#?}\n {} {}", self.position, self.tokens.len());
Err("Unexpected tokens at the end of the expression".to_string())
} else {
Ok(result)
}
}
}
impl From<&Token> for BinaryOperator {
fn from(value: &Token) -> Self {
match value {
Token::Operation(Operation::Add) => Self::Add,
Token::Operation(Operation::Multiply) => Self::Multiply,
Token::Operation(Operation::Divide) => Self::Divide,
Token::Operation(Operation::Subtract) => Self::Subtract,
Token::Operation(Operation::And) => Self::BitwiseAnd,
Token::Operation(Operation::Or) => Self::BitwiseOr,
Token::Logical(Logical::And) => Self::And,
Token::Logical(Logical::Or) => Self::Or,
Token::Comparator(Comparator::Gt) => Self::Greater,
Token::Comparator(Comparator::Lt) => Self::Lesser,
Token::Comparator(Comparator::Ge) => Self::GreaterOrEqual,
Token::Comparator(Comparator::Le) => Self::LesserOrEqual,
Token::Comparator(Comparator::Eq) => Self::Equal,
_ => panic!("Invalid token"),
}
}
}

View file

@ -1,336 +1,40 @@
use std::{collections::HashMap, fmt::Display, iter::Peekable, str::Chars};
use std::fmt::Display;
use super::{Comparator, Logical, Operation, Token};
// Parse a meta expression into a list of tokens that can be easily
// converted into a Sieve test.
// The parser is not very robust but works on all SpamAssassin meta expressions.
// It might be a good idea in the future to instead build a parse tree and
// then convert that into a Sieve expression.
#[derive(Debug, Clone, Default)]
pub struct MetaExpression {
pub tokens: Vec<TokenDepth>,
depth_range: HashMap<u32, DepthRange>,
depth: u32,
}
#[derive(Debug, Clone)]
pub struct TokenDepth {
pub token: Token,
depth: u32,
prefix: Vec<Token>,
}
#[derive(Debug, Clone, Default)]
struct DepthRange {
start: usize,
end: usize,
expr_end: Option<(usize, bool)>,
logic_end: bool,
}
use super::{
ast::Parser, tokenizer::Tokenizer, BinaryOperator, Comparator, Expr, Logical, MetaExpression,
Operation, Token, UnaryOperator, UnwrapResult,
};
// Parse a meta expression into a list of tokens that can be converted into a Sieve test.
impl MetaExpression {
pub fn from_meta(expr: &str) -> Self {
let mut meta = MetaExpression::default();
let mut seen_comp = false;
let mut buf = String::new();
let mut iter = expr.chars().peekable();
let mut tokens = Tokenizer::new(expr).collect::<Vec<_>>();
while let Some(ch) = iter.next() {
match ch {
'A'..='Z' | 'a'..='z' | '0'..='9' | '_' => {
buf.push(ch);
}
_ => {
if !buf.is_empty() {
let token = Token::from(buf);
buf = String::new();
if !seen_comp && !meta.has_comparator(iter.clone()) {
meta.push(token);
meta.push(Token::Comparator(Comparator::Gt));
meta.push(Token::Number(0));
seen_comp = true;
} else {
meta.push(token);
}
// If there are no comparators, we can just turn it into am expression
if !tokens.iter().any(|t| matches!(t, Token::Comparator(_))) {
let prev_tokens = tokens;
tokens = Vec::with_capacity(prev_tokens.len() + 3);
tokens.push(Token::OpenParen);
for token in prev_tokens {
tokens.push(if let Token::Logical(op) = token {
match op {
Logical::And => Token::Operation(Operation::And),
Logical::Or => Token::Operation(Operation::Or),
Logical::Not => Token::Logical(Logical::Not),
}
match ch {
'&' => {
seen_comp = false;
if matches!(iter.next(), Some('&')) {
meta.push(Token::Logical(Logical::And));
} else {
eprintln!("Warning: Single & in meta expression {expr}",);
}
}
'|' => {
seen_comp = false;
if matches!(iter.next(), Some('|')) {
meta.push(Token::Logical(Logical::Or));
} else {
eprintln!("Warning: Single | in meta expression {expr}",);
}
}
'!' => {
seen_comp = false;
meta.push(Token::Logical(Logical::Not))
}
'=' => {
seen_comp = true;
meta.push(match iter.next() {
Some('=') => Token::Comparator(Comparator::Eq),
Some('>') => Token::Comparator(Comparator::Ge),
Some('<') => Token::Comparator(Comparator::Le),
_ => {
eprintln!("Warning: Single = in meta expression {expr}",);
Token::Comparator(Comparator::Eq)
}
});
}
'>' => {
seen_comp = true;
meta.push(match iter.peek() {
Some('=') => {
iter.next();
Token::Comparator(Comparator::Ge)
}
_ => Token::Comparator(Comparator::Gt),
})
}
'<' => {
seen_comp = true;
meta.push(match iter.peek() {
Some('=') => {
iter.next();
Token::Comparator(Comparator::Le)
}
_ => Token::Comparator(Comparator::Lt),
})
}
'(' => meta.push(Token::OpenParen),
')' => {
if meta.depth == 0 {
eprintln!(
"Warning: Unmatched close parenthesis in meta expression {expr}"
);
}
meta.push(Token::CloseParen)
}
'+' => meta.push(Token::Operation(Operation::Add)),
'*' => meta.push(Token::Operation(Operation::Multiply)),
'/' => meta.push(Token::Operation(Operation::Divide)),
' ' => {}
_ => {
eprintln!("Warning: Invalid character {ch} in meta expression {expr}");
break;
}
}
}
}
}
if meta.depth > 0 {
eprintln!("Warning: Unmatched open parenthesis in meta expression {expr}");
}
if !buf.is_empty() {
meta.push(Token::from(buf));
if !seen_comp {
meta.push(Token::Comparator(Comparator::Gt));
meta.push(Token::Number(0));
}
}
meta.finalize();
meta
}
fn push(&mut self, mut token: Token) {
let pos = self.tokens.len();
let depth_range = self
.depth_range
.entry(self.depth)
.or_insert_with(|| DepthRange {
start: pos,
end: pos,
..Default::default()
});
depth_range.end = pos;
let mut depth = self.depth;
let mut prefix = vec![];
match &token {
Token::OpenParen => {
if let Some((pos, true)) = depth_range.expr_end {
depth_range.expr_end = Some((pos, false));
}
self.depth += 1;
}
Token::CloseParen => {
if let Some((pos, is_static)) = depth_range.expr_end.take() {
self.tokens[pos + 2]
.prefix
.push(Token::BeginExpression(is_static));
prefix.push(Token::EndExpression(is_static));
}
if depth_range.logic_end {
prefix.push(Token::CloseParen);
}
self.depth = self.depth.saturating_sub(1);
depth = self.depth;
}
Token::Logical(op) => {
if self
.tokens
.iter()
.any(|t| matches!(t.token, Token::Comparator(_)) && t.depth < depth)
{
token = Token::Operation(match op {
Logical::And => Operation::And,
Logical::Or => Operation::Or,
Logical::Not => Operation::Not,
});
if let Some((pos, true)) = depth_range.expr_end {
depth_range.expr_end = Some((pos, false));
}
} else if matches!(op, Logical::Or | Logical::And) {
let start_prefix = &mut self.tokens[depth_range.start].prefix;
if !start_prefix.contains(&Token::Logical(Logical::And))
&& !start_prefix.contains(&Token::Logical(Logical::Or))
{
start_prefix.insert(0, token.clone());
}
depth_range.logic_end = true;
if let Some((pos, is_static)) = depth_range.expr_end.take() {
self.tokens[pos + 2]
.prefix
.push(Token::BeginExpression(is_static));
prefix.push(Token::EndExpression(is_static));
}
}
}
Token::Comparator(_) => {
let mut is_static = true;
let mut start_pos = usize::MAX;
for (pos, token) in self.tokens.iter_mut().enumerate().rev() {
if token.depth >= depth {
start_pos = pos;
match &token.token {
Token::Logical(op) => {
if token.depth == depth {
start_pos += 1;
break;
} else {
is_static = false;
token.token = Token::Operation(match op {
Logical::And => Operation::And,
Logical::Or => Operation::Or,
Logical::Not => Operation::Not,
});
token.prefix.clear();
}
}
Token::OpenParen
| Token::CloseParen
| Token::Operation(_)
| Token::Tag(_) => {
is_static = false;
}
_ => {}
}
} else {
break;
}
}
if start_pos != usize::MAX {
self.tokens.push(TokenDepth {
token: Token::EndExpression(is_static),
depth,
prefix: vec![],
});
self.tokens[start_pos].prefix =
vec![token.clone(), Token::BeginExpression(is_static)];
depth_range.expr_end = Some((pos, true));
}
}
Token::Tag(_) | Token::Operation(_) => {
if let Some((pos, true)) = depth_range.expr_end {
depth_range.expr_end = Some((pos, false));
}
}
_ => {}
}
self.tokens.push(TokenDepth {
token,
depth,
prefix,
})
}
fn finalize(&mut self) {
if let Some(depth_range) = self.depth_range.get(&self.depth) {
if let Some((pos, is_static)) = depth_range.expr_end {
self.tokens[pos + 2]
.prefix
.push(Token::BeginExpression(is_static));
self.tokens.push(TokenDepth {
token: Token::EndExpression(is_static),
depth: self.depth,
prefix: vec![],
} else {
token
});
}
if depth_range.logic_end {
self.tokens.push(TokenDepth {
token: Token::CloseParen,
depth: self.depth,
prefix: vec![],
});
}
}
}
fn has_comparator(&self, iter: Peekable<Chars<'_>>) -> bool {
let mut d = self.depth;
let mut comp_depth = None;
let mut logic_depth = None;
for (pos, ch) in iter.enumerate() {
match ch {
'(' => {
d += 1;
}
')' => {
d = d.saturating_sub(1);
}
'>' | '<' | '=' => {
comp_depth = Some((pos, d));
break;
}
'&' | '|' => {
if d <= self.depth {
logic_depth = Some((pos, d));
}
}
_ => (),
}
tokens.push(Token::CloseParen);
tokens.push(Token::Comparator(Comparator::Gt));
tokens.push(Token::Number(0));
}
println!("comp_depth: {comp_depth:?} {logic_depth:?}");
match (comp_depth, logic_depth) {
(Some((comp_pos, comp_depth)), Some((logic_pos, logic_depth))) => {
match comp_depth.cmp(&logic_depth) {
std::cmp::Ordering::Less => true,
std::cmp::Ordering::Equal => comp_pos < logic_pos,
_ => false,
}
}
(Some(_), None) => true,
_ => false,
}
let expr = Parser::new(&tokens)
.parse()
.unwrap_result("parse expression");
MetaExpression { tokens, expr }
}
}
@ -344,112 +48,192 @@ impl From<String> for Token {
}
}
impl Display for MetaExpression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("if ")?;
for token in &self.tokens {
for token in &token.prefix {
token.fmt(f)?;
impl Expr {
fn fmt_child(
&self,
f: &mut std::fmt::Formatter<'_>,
parent: Option<&BinaryOperator>,
in_comp: bool,
) -> std::fmt::Result {
match self {
Expr::UnaryOp(op, expr) => {
let add_p =
in_comp && !matches!(expr.as_ref(), Expr::Literal(_) | Expr::Identifier(_));
match op {
UnaryOperator::Not => f.write_str(if in_comp { "!" } else { "not " })?,
UnaryOperator::Minus => f.write_str("-")?,
}
if add_p {
f.write_str("(")?;
}
expr.fmt_child(f, None, in_comp)?;
if add_p {
f.write_str(")")?;
}
Ok(())
}
Expr::BinaryOp(left, op, right) => match op {
BinaryOperator::Or | BinaryOperator::And => {
let add_p = parent.map_or(true, |pop| pop.precedence() != op.precedence());
if add_p {
write!(f, "{op}(")?;
}
left.fmt_child(f, op.into(), in_comp)?;
f.write_str(", ")?;
right.fmt_child(f, op.into(), in_comp)?;
if add_p {
f.write_str(")")
} else {
Ok(())
}
}
BinaryOperator::Greater
| BinaryOperator::Lesser
| BinaryOperator::GreaterOrEqual
| BinaryOperator::LesserOrEqual
| BinaryOperator::Equal => {
write!(f, "string :value {op} :comparator \"i;ascii-numeric\" \"")?;
let is_literal = matches!(left.as_ref(), Expr::Literal(_));
if !is_literal {
f.write_str("${")?;
}
left.fmt_child(f, None, true)?;
if !is_literal {
f.write_str("}")?;
}
f.write_str("\" \"")?;
let is_literal = matches!(right.as_ref(), Expr::Literal(_));
if !is_literal {
f.write_str("${")?;
}
right.fmt_child(f, None, true)?;
if !is_literal {
f.write_str("}")?;
}
f.write_str("\"")
}
BinaryOperator::Add
| BinaryOperator::Subtract
| BinaryOperator::Multiply
| BinaryOperator::Divide
| BinaryOperator::BitwiseAnd
| BinaryOperator::BitwiseOr => {
let add_p = parent.map_or(false, |pop| pop.precedence() != op.precedence());
if add_p {
f.write_str("(")?;
}
left.fmt_child(f, op.into(), in_comp)?;
op.fmt(f)?;
right.fmt_child(f, op.into(), in_comp)?;
if add_p {
f.write_str(")")?;
}
Ok(())
}
},
Expr::Literal(v) => {
if !in_comp {
write!(
f,
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"{v}\" \"0\""
)
} else {
v.fmt(f)
}
}
Expr::Identifier(i) => {
if !in_comp {
write!(
f,
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"${{{i}}}\" \"0\"",
)
} else {
i.fmt(f)
}
}
match &token.token {
Token::Logical(Logical::And) | Token::Logical(Logical::Or) => f.write_str(", "),
Token::Comparator(Comparator::Gt)
| Token::Comparator(Comparator::Lt)
| Token::Comparator(Comparator::Eq)
| Token::Comparator(Comparator::Ge)
| Token::Comparator(Comparator::Le) => f.write_str(" "),
_ => token.token.fmt(f),
}?;
}
Ok(())
}
}
impl Display for Token {
impl Display for MetaExpression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("if ")?;
self.expr.fmt_child(f, None, false)
}
}
impl Display for BinaryOperator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Token::Tag(t) => t.fmt(f),
Token::Number(n) => n.fmt(f),
Token::Logical(Logical::And) => f.write_str("allof("),
Token::Logical(Logical::Or) => f.write_str("anyof("),
Token::Logical(Logical::Not) => f.write_str("not "),
Token::Comparator(comp) => {
f.write_str("string :value \"")?;
match comp {
Comparator::Eq => f.write_str("eq")?,
Comparator::Gt => f.write_str("gt")?,
Comparator::Lt => f.write_str("lt")?,
Comparator::Ge => f.write_str("ge")?,
Comparator::Le => f.write_str("gt")?,
_ => unreachable!(),
}
f.write_str("\" :comparator \"i;ascii-numeric\" ")
}
Token::OpenParen => f.write_str("("),
Token::CloseParen => f.write_str(")"),
Token::Operation(Operation::Add) => f.write_str(" + "),
Token::Operation(Operation::Multiply) => f.write_str(" * "),
Token::Operation(Operation::Divide) => f.write_str(" / "),
Token::Operation(Operation::And) => f.write_str(" & "),
Token::Operation(Operation::Or) => f.write_str(" | "),
Token::Operation(Operation::Not) => f.write_str("!"),
Token::BeginExpression(is_static) => {
if *is_static {
f.write_str("\"")
} else {
f.write_str("\"${")
}
}
Token::EndExpression(is_static) => {
if *is_static {
f.write_str("\"")
} else {
f.write_str("}\"")
}
}
BinaryOperator::Or => f.write_str("anyof"),
BinaryOperator::And => f.write_str("allof"),
BinaryOperator::BitwiseOr => f.write_str(" | "),
BinaryOperator::BitwiseAnd => f.write_str(" & "),
BinaryOperator::Greater => f.write_str("\"gt\""),
BinaryOperator::Lesser => f.write_str("\"lt\""),
BinaryOperator::GreaterOrEqual => f.write_str("\"ge\""),
BinaryOperator::LesserOrEqual => f.write_str("\"le\""),
BinaryOperator::Equal => f.write_str("\"eq\""),
BinaryOperator::Add => f.write_str(" + "),
BinaryOperator::Subtract => f.write_str(" - "),
BinaryOperator::Multiply => f.write_str(" * "),
BinaryOperator::Divide => f.write_str(" / "),
}
}
}
#[cfg(test)]
mod test {
use super::MetaExpression;
use crate::import::MetaExpression;
#[test]
fn parse_meta() {
for (expr, expected) in [
/*(
for (pos, (expr, expected)) in [
(
concat!(
"( ! HTML_IMAGE_ONLY_16 ) && ",
"( __LOWER_E > 20 ) && ",
"( __E_LIKE_LETTER > ( (__LOWER_E * 14 ) / 10) ) && ",
"( __E_LIKE_LETTER < ( 10 * __LOWER_E ) )"
"( ! A ) && ",
"( B > 20 ) && ",
"( C > ( (D * 14 ) / 10) ) && ",
"( E < ( 10 * F ) )"
),
"",
),
("(__DRUGS_ERECTILE1 || __DRUGS_ERECTILE2)", ""),
("(__HELO_DYNAMIC_IPADDR && !HELO_STATIC_HOST)", ""),
("__ML2 || __ML4", ""),
("(__AT_HOTMAIL_MSGID && (!__FROM_HOTMAIL_COM && !__FROM_MSN_COM && !__FROM_YAHOO_COM))", ""),
("(A || B)", ""),
("(A && !B)", ""),
("A || B", ""),
("(A && (!B && !C && !D))", ""),
("(0)", ""),
("RAZOR2_CHECK + DCC_CHECK + PYZOR_CHECK > 1", ""),
("(SUBJECT_IN_BLOCKLIST)", ""),
("__HAS_MSGID && !(__SANE_MSGID || __MSGID_COMMENT)", ""),
("!__CTYPE_HTML && __X_MAILER_APPLEMAIL && (__MSGID_APPLEMAIL || __MIME_VERSION_APPLEMAIL)", ""),
("((__AUTO_GEN_MS||__AUTO_GEN_3||__AUTO_GEN_4) && !__XM_VBULLETIN && !__X_CRON_ENV)", ""),*/
("(__WEBMAIL_ACCT + __MAILBOX_FULL + (__TVD_PH_SUBJ_META || __TVD_PH_BODY_META) > 3)", ""),
] {
("A + B + C > 1", ""),
("(A)", ""),
("A && !(B || C)", ""),
("!A && B && (C || D)", ""),
("((A||B||C) && !D && !E)", ""),
("(A + B + (C || D) > 3)", ""),
(
"(A || B) > 2 && (C && D) == 0 || ((E+F-G) > 0 || (H||I) <= 4)",
"",
),
("(A || B) > (C && D) && E", ""),
//("", ""),
]
.iter()
.enumerate()
{
let meta = MetaExpression::from_meta(expr);
//println!("{:#?}", meta.tokens);
let result = meta.to_string();
/*if pos != 13 {
continue;
}*/
println!("{expr}");
println!("{}", result);
//let tokens = Tokenizer::new(expr).collect::<Vec<_>>();
//println!("{tokens:?}");
//let mut p = Parser::new(&tokens);
//let expr = p.parse().unwrap();
//println!("{:#?}", expr);
println!("{}\n------------------------------------", meta);
/*assert_eq!(
result,

View file

@ -1,9 +1,9 @@
use std::collections::HashMap;
use self::meta::MetaExpression;
pub mod ast;
pub mod meta;
pub mod spamassassin;
pub mod tokenizer;
pub mod utils;
#[derive(Debug, Default, Clone)]
@ -49,6 +49,12 @@ enum RuleType {
None,
}
#[derive(Debug, Clone, Default)]
pub struct MetaExpression {
pub tokens: Vec<Token>,
pub expr: Expr,
}
impl RuleType {
pub fn pattern(&mut self) -> Option<&mut String> {
match self {
@ -116,10 +122,6 @@ pub enum Token {
OpenParen,
CloseParen,
// Sieve specific
BeginExpression(bool),
EndExpression(bool),
}
#[derive(Debug, PartialEq, Eq, Clone)]
@ -143,11 +145,66 @@ pub enum Operation {
Add,
Multiply,
Divide,
Subtract,
And,
Or,
Not,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Expr {
UnaryOp(UnaryOperator, Box<Expr>),
BinaryOp(Box<Expr>, BinaryOperator, Box<Expr>),
Literal(u32),
Identifier(String),
}
impl Default for Expr {
fn default() -> Self {
Self::Literal(0)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum UnaryOperator {
Not,
Minus,
}
#[derive(Debug, PartialEq, Clone)]
pub enum BinaryOperator {
Or,
And,
Greater,
Lesser,
GreaterOrEqual,
LesserOrEqual,
Equal,
Add,
Subtract,
Multiply,
Divide,
BitwiseAnd,
BitwiseOr,
}
impl BinaryOperator {
pub fn precedence(&self) -> u32 {
match self {
Self::Or => 1,
Self::And => 2,
Self::Greater
| Self::Lesser
| Self::GreaterOrEqual
| Self::LesserOrEqual
| Self::Equal => 3,
Self::Add | Self::Subtract => 4,
Self::Multiply | Self::Divide => 5,
Self::BitwiseAnd | Self::BitwiseOr => 6,
}
}
}
impl Rule {
fn score(&self) -> f64 {
self.scores.last().copied().unwrap_or_else(|| {

View file

@ -6,9 +6,9 @@ use std::{
};
use super::{
meta::MetaExpression,
utils::{fix_broken_regex, replace_tags},
Header, HeaderMatches, HeaderPart, Rule, RuleType, TestFlag, Token, UnwrapResult,
Header, HeaderMatches, HeaderPart, MetaExpression, Rule, RuleType, TestFlag, Token,
UnwrapResult,
};
const VERSION: f64 = 4.000000;
@ -1038,7 +1038,7 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
&& !meta
.tokens
.iter()
.any(|t| matches!(&t.token, Token::Tag(n) if n == &rule.name)))
.any(|t| matches!(&t, Token::Tag(n) if n == &rule.name)))
{
continue;
}

View file

@ -0,0 +1,181 @@
use super::{Comparator, Logical, Operation, Token};
pub struct Tokenizer<'x> {
expr: &'x str,
iter: std::iter::Peekable<std::str::Chars<'x>>,
buf: String,
depth: u32,
comparator_depth: u32,
next_token: Option<Token>,
}
impl<'x> Tokenizer<'x> {
pub fn new(expr: &'x str) -> Self {
Self {
expr,
iter: expr.chars().peekable(),
buf: String::new(),
depth: 0,
next_token: None,
comparator_depth: u32::MAX,
}
}
}
impl<'x> Iterator for Tokenizer<'x> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
if let Some(token) = self.next_token.take() {
return Some(token);
}
while let Some(ch) = self.iter.next() {
match ch {
'A'..='Z' | 'a'..='z' | '0'..='9' | '_' => {
self.buf.push(ch);
}
_ => {
let mut depth = self.depth;
let prev_token = if !self.buf.is_empty() {
Token::from(std::mem::take(&mut self.buf)).into()
} else {
None
};
let token = match ch {
'&' | '|' => {
if matches!(self.iter.next(), Some(c) if c == ch) {
let is_and = ch == '&';
if self.depth > self.comparator_depth {
Token::Operation(if is_and {
Operation::And
} else {
Operation::Or
})
} else {
let mut depth = self.depth;
let mut found_comp = false;
for ch in self.iter.clone() {
match ch {
'(' => depth += 1,
')' => {
depth -= 1;
}
'<' | '>' | '=' => {
found_comp = true;
break;
}
_ => (),
}
}
if found_comp && depth < self.depth {
self.comparator_depth = depth;
Token::Operation(if is_and {
Operation::And
} else {
Operation::Or
})
} else {
self.comparator_depth = u32::MAX;
Token::Logical(if is_and {
Logical::And
} else {
Logical::Or
})
}
}
} else {
eprintln!("Warning: Single {ch} in meta expression {}", self.expr);
return None;
}
}
'!' => Token::Logical(Logical::Not),
'=' => match self.iter.next() {
Some('=') => Token::Comparator(Comparator::Eq),
Some('>') => Token::Comparator(Comparator::Ge),
Some('<') => Token::Comparator(Comparator::Le),
_ => {
eprintln!("Warning: Single = in meta expression {}", self.expr);
Token::Comparator(Comparator::Eq)
}
},
'>' => match self.iter.peek() {
Some('=') => {
self.iter.next();
Token::Comparator(Comparator::Ge)
}
_ => Token::Comparator(Comparator::Gt),
},
'<' => match self.iter.peek() {
Some('=') => {
self.iter.next();
Token::Comparator(Comparator::Le)
}
_ => Token::Comparator(Comparator::Lt),
},
'(' => {
self.depth += 1;
Token::OpenParen
}
')' => {
if self.depth == 0 {
eprintln!(
"Warning: Unmatched close parenthesis in meta expression {}",
self.expr
);
return None;
}
self.depth -= 1;
depth = self.depth;
Token::CloseParen
}
'+' => Token::Operation(Operation::Add),
'*' => Token::Operation(Operation::Multiply),
'/' => Token::Operation(Operation::Divide),
'-' => Token::Operation(Operation::Subtract),
' ' => {
if let Some(prev_token) = prev_token {
return Some(prev_token);
} else {
continue;
}
}
_ => {
eprintln!(
"Warning: Invalid character {ch} in meta expression {}",
self.expr
);
return None;
}
};
if matches!(token, Token::Comparator(_)) {
self.comparator_depth = depth;
}
return Some(if let Some(prev_token) = prev_token {
self.next_token = Some(token);
prev_token
} else {
token
});
}
}
}
if self.depth > 0 {
eprintln!(
"Warning: Unmatched open parenthesis in meta expression {}",
self.expr
);
None
} else if !self.buf.is_empty() {
Some(Token::from(std::mem::take(&mut self.buf)))
} else {
None
}
}
}