mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2025-10-06 10:35:46 +08:00
AST parser for SpamAssassin meta expressions
This commit is contained in:
parent
016a5bde18
commit
dbaaff48f0
5 changed files with 612 additions and 413 deletions
177
crates/antispam/src/import/ast.rs
Normal file
177
crates/antispam/src/import/ast.rs
Normal file
|
@ -0,0 +1,177 @@
|
|||
use super::{BinaryOperator, Comparator, Expr, Logical, Operation, Token, UnaryOperator};
|
||||
|
||||
pub struct Parser<'x> {
|
||||
tokens: &'x [Token],
|
||||
position: usize,
|
||||
}
|
||||
|
||||
impl<'x> Parser<'x> {
|
||||
pub fn new(tokens: &'x [Token]) -> Self {
|
||||
Self {
|
||||
tokens,
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn consume(&mut self) -> Option<&'x Token> {
|
||||
if self.position < self.tokens.len() {
|
||||
let token = &self.tokens[self.position];
|
||||
self.position += 1;
|
||||
Some(token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<&'x Token> {
|
||||
if self.position < self.tokens.len() {
|
||||
Some(&self.tokens[self.position])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn primary(&mut self) -> Result<Expr, String> {
|
||||
match self.peek() {
|
||||
Some(&Token::Number(n)) => {
|
||||
self.consume();
|
||||
Ok(Expr::Literal(n))
|
||||
}
|
||||
Some(Token::Tag(ref id)) => {
|
||||
self.consume();
|
||||
Ok(Expr::Identifier(id.clone()))
|
||||
}
|
||||
Some(&Token::OpenParen) => {
|
||||
self.consume();
|
||||
let expr = self.expr();
|
||||
if let Some(&Token::CloseParen) = self.peek() {
|
||||
self.consume();
|
||||
expr
|
||||
} else {
|
||||
Err("Expected closing parenthesis".to_string())
|
||||
}
|
||||
}
|
||||
_ => Err("Unexpected token in factor".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn unary(&mut self) -> Result<Expr, String> {
|
||||
match self.peek() {
|
||||
Some(&Token::Logical(Logical::Not)) => {
|
||||
self.consume();
|
||||
let operand = self.primary()?;
|
||||
Ok(Expr::UnaryOp(UnaryOperator::Not, Box::new(operand)))
|
||||
}
|
||||
Some(&Token::Operation(Operation::Subtract)) => {
|
||||
self.consume();
|
||||
let operand = self.primary()?;
|
||||
Ok(Expr::UnaryOp(UnaryOperator::Minus, Box::new(operand)))
|
||||
}
|
||||
_ => self.primary(),
|
||||
}
|
||||
}
|
||||
|
||||
fn factor(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.unary()?;
|
||||
|
||||
while let Some(op @ Token::Operation(Operation::Multiply | Operation::Divide)) = self.peek()
|
||||
{
|
||||
self.consume();
|
||||
let right = self.unary()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn term(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.factor()?;
|
||||
|
||||
while let Some(op @ Token::Operation(Operation::Add | Operation::Subtract)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.factor()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn bitwise(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.term()?;
|
||||
while let Some(op @ Token::Operation(Operation::And | Operation::Or)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.term()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn comparison(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.bitwise()?;
|
||||
|
||||
while let Some(op @ Token::Comparator(_)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.bitwise()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn logical_and(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.comparison()?;
|
||||
|
||||
while let Some(Token::Logical(Logical::And)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.comparison()?;
|
||||
left = Expr::BinaryOp(Box::new(left), BinaryOperator::And, Box::new(right));
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn logical_or(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.logical_and()?;
|
||||
|
||||
while let Some(Token::Logical(Logical::Or)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.logical_and()?;
|
||||
left = Expr::BinaryOp(Box::new(left), BinaryOperator::Or, Box::new(right));
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn expr(&mut self) -> Result<Expr, String> {
|
||||
self.logical_or()
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Result<Expr, String> {
|
||||
let result = self.expr()?;
|
||||
if self.position < self.tokens.len() {
|
||||
println!("{result:#?}\n {} {}", self.position, self.tokens.len());
|
||||
Err("Unexpected tokens at the end of the expression".to_string())
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Token> for BinaryOperator {
|
||||
fn from(value: &Token) -> Self {
|
||||
match value {
|
||||
Token::Operation(Operation::Add) => Self::Add,
|
||||
Token::Operation(Operation::Multiply) => Self::Multiply,
|
||||
Token::Operation(Operation::Divide) => Self::Divide,
|
||||
Token::Operation(Operation::Subtract) => Self::Subtract,
|
||||
Token::Operation(Operation::And) => Self::BitwiseAnd,
|
||||
Token::Operation(Operation::Or) => Self::BitwiseOr,
|
||||
Token::Logical(Logical::And) => Self::And,
|
||||
Token::Logical(Logical::Or) => Self::Or,
|
||||
Token::Comparator(Comparator::Gt) => Self::Greater,
|
||||
Token::Comparator(Comparator::Lt) => Self::Lesser,
|
||||
Token::Comparator(Comparator::Ge) => Self::GreaterOrEqual,
|
||||
Token::Comparator(Comparator::Le) => Self::LesserOrEqual,
|
||||
Token::Comparator(Comparator::Eq) => Self::Equal,
|
||||
_ => panic!("Invalid token"),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,336 +1,40 @@
|
|||
use std::{collections::HashMap, fmt::Display, iter::Peekable, str::Chars};
|
||||
use std::fmt::Display;
|
||||
|
||||
use super::{Comparator, Logical, Operation, Token};
|
||||
|
||||
// Parse a meta expression into a list of tokens that can be easily
|
||||
// converted into a Sieve test.
|
||||
// The parser is not very robust but works on all SpamAssassin meta expressions.
|
||||
// It might be a good idea in the future to instead build a parse tree and
|
||||
// then convert that into a Sieve expression.
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MetaExpression {
|
||||
pub tokens: Vec<TokenDepth>,
|
||||
depth_range: HashMap<u32, DepthRange>,
|
||||
depth: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TokenDepth {
|
||||
pub token: Token,
|
||||
depth: u32,
|
||||
prefix: Vec<Token>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
struct DepthRange {
|
||||
start: usize,
|
||||
end: usize,
|
||||
expr_end: Option<(usize, bool)>,
|
||||
logic_end: bool,
|
||||
}
|
||||
use super::{
|
||||
ast::Parser, tokenizer::Tokenizer, BinaryOperator, Comparator, Expr, Logical, MetaExpression,
|
||||
Operation, Token, UnaryOperator, UnwrapResult,
|
||||
};
|
||||
|
||||
// Parse a meta expression into a list of tokens that can be converted into a Sieve test.
|
||||
impl MetaExpression {
|
||||
pub fn from_meta(expr: &str) -> Self {
|
||||
let mut meta = MetaExpression::default();
|
||||
let mut seen_comp = false;
|
||||
let mut buf = String::new();
|
||||
let mut iter = expr.chars().peekable();
|
||||
let mut tokens = Tokenizer::new(expr).collect::<Vec<_>>();
|
||||
|
||||
while let Some(ch) = iter.next() {
|
||||
match ch {
|
||||
'A'..='Z' | 'a'..='z' | '0'..='9' | '_' => {
|
||||
buf.push(ch);
|
||||
}
|
||||
_ => {
|
||||
if !buf.is_empty() {
|
||||
let token = Token::from(buf);
|
||||
buf = String::new();
|
||||
if !seen_comp && !meta.has_comparator(iter.clone()) {
|
||||
meta.push(token);
|
||||
meta.push(Token::Comparator(Comparator::Gt));
|
||||
meta.push(Token::Number(0));
|
||||
seen_comp = true;
|
||||
} else {
|
||||
meta.push(token);
|
||||
}
|
||||
}
|
||||
|
||||
match ch {
|
||||
'&' => {
|
||||
seen_comp = false;
|
||||
if matches!(iter.next(), Some('&')) {
|
||||
meta.push(Token::Logical(Logical::And));
|
||||
} else {
|
||||
eprintln!("Warning: Single & in meta expression {expr}",);
|
||||
}
|
||||
}
|
||||
'|' => {
|
||||
seen_comp = false;
|
||||
if matches!(iter.next(), Some('|')) {
|
||||
meta.push(Token::Logical(Logical::Or));
|
||||
} else {
|
||||
eprintln!("Warning: Single | in meta expression {expr}",);
|
||||
}
|
||||
}
|
||||
'!' => {
|
||||
seen_comp = false;
|
||||
meta.push(Token::Logical(Logical::Not))
|
||||
}
|
||||
'=' => {
|
||||
seen_comp = true;
|
||||
meta.push(match iter.next() {
|
||||
Some('=') => Token::Comparator(Comparator::Eq),
|
||||
Some('>') => Token::Comparator(Comparator::Ge),
|
||||
Some('<') => Token::Comparator(Comparator::Le),
|
||||
_ => {
|
||||
eprintln!("Warning: Single = in meta expression {expr}",);
|
||||
Token::Comparator(Comparator::Eq)
|
||||
}
|
||||
});
|
||||
}
|
||||
'>' => {
|
||||
seen_comp = true;
|
||||
meta.push(match iter.peek() {
|
||||
Some('=') => {
|
||||
iter.next();
|
||||
Token::Comparator(Comparator::Ge)
|
||||
}
|
||||
_ => Token::Comparator(Comparator::Gt),
|
||||
})
|
||||
}
|
||||
'<' => {
|
||||
seen_comp = true;
|
||||
meta.push(match iter.peek() {
|
||||
Some('=') => {
|
||||
iter.next();
|
||||
Token::Comparator(Comparator::Le)
|
||||
}
|
||||
_ => Token::Comparator(Comparator::Lt),
|
||||
})
|
||||
}
|
||||
'(' => meta.push(Token::OpenParen),
|
||||
')' => {
|
||||
if meta.depth == 0 {
|
||||
eprintln!(
|
||||
"Warning: Unmatched close parenthesis in meta expression {expr}"
|
||||
);
|
||||
}
|
||||
|
||||
meta.push(Token::CloseParen)
|
||||
}
|
||||
'+' => meta.push(Token::Operation(Operation::Add)),
|
||||
'*' => meta.push(Token::Operation(Operation::Multiply)),
|
||||
'/' => meta.push(Token::Operation(Operation::Divide)),
|
||||
' ' => {}
|
||||
_ => {
|
||||
eprintln!("Warning: Invalid character {ch} in meta expression {expr}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if meta.depth > 0 {
|
||||
eprintln!("Warning: Unmatched open parenthesis in meta expression {expr}");
|
||||
}
|
||||
|
||||
if !buf.is_empty() {
|
||||
meta.push(Token::from(buf));
|
||||
if !seen_comp {
|
||||
meta.push(Token::Comparator(Comparator::Gt));
|
||||
meta.push(Token::Number(0));
|
||||
}
|
||||
}
|
||||
|
||||
meta.finalize();
|
||||
meta
|
||||
}
|
||||
|
||||
fn push(&mut self, mut token: Token) {
|
||||
let pos = self.tokens.len();
|
||||
let depth_range = self
|
||||
.depth_range
|
||||
.entry(self.depth)
|
||||
.or_insert_with(|| DepthRange {
|
||||
start: pos,
|
||||
end: pos,
|
||||
..Default::default()
|
||||
});
|
||||
depth_range.end = pos;
|
||||
let mut depth = self.depth;
|
||||
let mut prefix = vec![];
|
||||
|
||||
match &token {
|
||||
Token::OpenParen => {
|
||||
if let Some((pos, true)) = depth_range.expr_end {
|
||||
depth_range.expr_end = Some((pos, false));
|
||||
}
|
||||
self.depth += 1;
|
||||
}
|
||||
Token::CloseParen => {
|
||||
if let Some((pos, is_static)) = depth_range.expr_end.take() {
|
||||
self.tokens[pos + 2]
|
||||
.prefix
|
||||
.push(Token::BeginExpression(is_static));
|
||||
prefix.push(Token::EndExpression(is_static));
|
||||
}
|
||||
if depth_range.logic_end {
|
||||
prefix.push(Token::CloseParen);
|
||||
}
|
||||
self.depth = self.depth.saturating_sub(1);
|
||||
depth = self.depth;
|
||||
}
|
||||
Token::Logical(op) => {
|
||||
if self
|
||||
.tokens
|
||||
.iter()
|
||||
.any(|t| matches!(t.token, Token::Comparator(_)) && t.depth < depth)
|
||||
{
|
||||
token = Token::Operation(match op {
|
||||
Logical::And => Operation::And,
|
||||
Logical::Or => Operation::Or,
|
||||
Logical::Not => Operation::Not,
|
||||
});
|
||||
if let Some((pos, true)) = depth_range.expr_end {
|
||||
depth_range.expr_end = Some((pos, false));
|
||||
}
|
||||
} else if matches!(op, Logical::Or | Logical::And) {
|
||||
let start_prefix = &mut self.tokens[depth_range.start].prefix;
|
||||
if !start_prefix.contains(&Token::Logical(Logical::And))
|
||||
&& !start_prefix.contains(&Token::Logical(Logical::Or))
|
||||
{
|
||||
start_prefix.insert(0, token.clone());
|
||||
}
|
||||
depth_range.logic_end = true;
|
||||
if let Some((pos, is_static)) = depth_range.expr_end.take() {
|
||||
self.tokens[pos + 2]
|
||||
.prefix
|
||||
.push(Token::BeginExpression(is_static));
|
||||
prefix.push(Token::EndExpression(is_static));
|
||||
}
|
||||
}
|
||||
}
|
||||
Token::Comparator(_) => {
|
||||
let mut is_static = true;
|
||||
let mut start_pos = usize::MAX;
|
||||
for (pos, token) in self.tokens.iter_mut().enumerate().rev() {
|
||||
if token.depth >= depth {
|
||||
start_pos = pos;
|
||||
match &token.token {
|
||||
Token::Logical(op) => {
|
||||
if token.depth == depth {
|
||||
start_pos += 1;
|
||||
break;
|
||||
} else {
|
||||
is_static = false;
|
||||
token.token = Token::Operation(match op {
|
||||
Logical::And => Operation::And,
|
||||
Logical::Or => Operation::Or,
|
||||
Logical::Not => Operation::Not,
|
||||
});
|
||||
token.prefix.clear();
|
||||
}
|
||||
}
|
||||
Token::OpenParen
|
||||
| Token::CloseParen
|
||||
| Token::Operation(_)
|
||||
| Token::Tag(_) => {
|
||||
is_static = false;
|
||||
}
|
||||
_ => {}
|
||||
// If there are no comparators, we can just turn it into am expression
|
||||
if !tokens.iter().any(|t| matches!(t, Token::Comparator(_))) {
|
||||
let prev_tokens = tokens;
|
||||
tokens = Vec::with_capacity(prev_tokens.len() + 3);
|
||||
tokens.push(Token::OpenParen);
|
||||
for token in prev_tokens {
|
||||
tokens.push(if let Token::Logical(op) = token {
|
||||
match op {
|
||||
Logical::And => Token::Operation(Operation::And),
|
||||
Logical::Or => Token::Operation(Operation::Or),
|
||||
Logical::Not => Token::Logical(Logical::Not),
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if start_pos != usize::MAX {
|
||||
self.tokens.push(TokenDepth {
|
||||
token: Token::EndExpression(is_static),
|
||||
depth,
|
||||
prefix: vec![],
|
||||
});
|
||||
self.tokens[start_pos].prefix =
|
||||
vec![token.clone(), Token::BeginExpression(is_static)];
|
||||
depth_range.expr_end = Some((pos, true));
|
||||
}
|
||||
}
|
||||
Token::Tag(_) | Token::Operation(_) => {
|
||||
if let Some((pos, true)) = depth_range.expr_end {
|
||||
depth_range.expr_end = Some((pos, false));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
self.tokens.push(TokenDepth {
|
||||
token,
|
||||
depth,
|
||||
prefix,
|
||||
})
|
||||
}
|
||||
|
||||
fn finalize(&mut self) {
|
||||
if let Some(depth_range) = self.depth_range.get(&self.depth) {
|
||||
if let Some((pos, is_static)) = depth_range.expr_end {
|
||||
self.tokens[pos + 2]
|
||||
.prefix
|
||||
.push(Token::BeginExpression(is_static));
|
||||
self.tokens.push(TokenDepth {
|
||||
token: Token::EndExpression(is_static),
|
||||
depth: self.depth,
|
||||
prefix: vec![],
|
||||
token
|
||||
});
|
||||
}
|
||||
if depth_range.logic_end {
|
||||
self.tokens.push(TokenDepth {
|
||||
token: Token::CloseParen,
|
||||
depth: self.depth,
|
||||
prefix: vec![],
|
||||
});
|
||||
}
|
||||
}
|
||||
tokens.push(Token::CloseParen);
|
||||
tokens.push(Token::Comparator(Comparator::Gt));
|
||||
tokens.push(Token::Number(0));
|
||||
}
|
||||
|
||||
fn has_comparator(&self, iter: Peekable<Chars<'_>>) -> bool {
|
||||
let mut d = self.depth;
|
||||
let mut comp_depth = None;
|
||||
let mut logic_depth = None;
|
||||
|
||||
for (pos, ch) in iter.enumerate() {
|
||||
match ch {
|
||||
'(' => {
|
||||
d += 1;
|
||||
}
|
||||
')' => {
|
||||
d = d.saturating_sub(1);
|
||||
}
|
||||
'>' | '<' | '=' => {
|
||||
comp_depth = Some((pos, d));
|
||||
break;
|
||||
}
|
||||
'&' | '|' => {
|
||||
if d <= self.depth {
|
||||
logic_depth = Some((pos, d));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
println!("comp_depth: {comp_depth:?} {logic_depth:?}");
|
||||
|
||||
match (comp_depth, logic_depth) {
|
||||
(Some((comp_pos, comp_depth)), Some((logic_pos, logic_depth))) => {
|
||||
match comp_depth.cmp(&logic_depth) {
|
||||
std::cmp::Ordering::Less => true,
|
||||
std::cmp::Ordering::Equal => comp_pos < logic_pos,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
(Some(_), None) => true,
|
||||
_ => false,
|
||||
}
|
||||
let expr = Parser::new(&tokens)
|
||||
.parse()
|
||||
.unwrap_result("parse expression");
|
||||
MetaExpression { tokens, expr }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -344,112 +48,192 @@ impl From<String> for Token {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for MetaExpression {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("if ")?;
|
||||
|
||||
for token in &self.tokens {
|
||||
for token in &token.prefix {
|
||||
token.fmt(f)?;
|
||||
impl Expr {
|
||||
fn fmt_child(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
parent: Option<&BinaryOperator>,
|
||||
in_comp: bool,
|
||||
) -> std::fmt::Result {
|
||||
match self {
|
||||
Expr::UnaryOp(op, expr) => {
|
||||
let add_p =
|
||||
in_comp && !matches!(expr.as_ref(), Expr::Literal(_) | Expr::Identifier(_));
|
||||
match op {
|
||||
UnaryOperator::Not => f.write_str(if in_comp { "!" } else { "not " })?,
|
||||
UnaryOperator::Minus => f.write_str("-")?,
|
||||
}
|
||||
|
||||
match &token.token {
|
||||
Token::Logical(Logical::And) | Token::Logical(Logical::Or) => f.write_str(", "),
|
||||
Token::Comparator(Comparator::Gt)
|
||||
| Token::Comparator(Comparator::Lt)
|
||||
| Token::Comparator(Comparator::Eq)
|
||||
| Token::Comparator(Comparator::Ge)
|
||||
| Token::Comparator(Comparator::Le) => f.write_str(" "),
|
||||
_ => token.token.fmt(f),
|
||||
}?;
|
||||
if add_p {
|
||||
f.write_str("(")?;
|
||||
}
|
||||
|
||||
expr.fmt_child(f, None, in_comp)?;
|
||||
if add_p {
|
||||
f.write_str(")")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Expr::BinaryOp(left, op, right) => match op {
|
||||
BinaryOperator::Or | BinaryOperator::And => {
|
||||
let add_p = parent.map_or(true, |pop| pop.precedence() != op.precedence());
|
||||
if add_p {
|
||||
write!(f, "{op}(")?;
|
||||
}
|
||||
left.fmt_child(f, op.into(), in_comp)?;
|
||||
f.write_str(", ")?;
|
||||
right.fmt_child(f, op.into(), in_comp)?;
|
||||
if add_p {
|
||||
f.write_str(")")
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
BinaryOperator::Greater
|
||||
| BinaryOperator::Lesser
|
||||
| BinaryOperator::GreaterOrEqual
|
||||
| BinaryOperator::LesserOrEqual
|
||||
| BinaryOperator::Equal => {
|
||||
write!(f, "string :value {op} :comparator \"i;ascii-numeric\" \"")?;
|
||||
let is_literal = matches!(left.as_ref(), Expr::Literal(_));
|
||||
if !is_literal {
|
||||
f.write_str("${")?;
|
||||
}
|
||||
left.fmt_child(f, None, true)?;
|
||||
if !is_literal {
|
||||
f.write_str("}")?;
|
||||
}
|
||||
f.write_str("\" \"")?;
|
||||
let is_literal = matches!(right.as_ref(), Expr::Literal(_));
|
||||
if !is_literal {
|
||||
f.write_str("${")?;
|
||||
}
|
||||
right.fmt_child(f, None, true)?;
|
||||
if !is_literal {
|
||||
f.write_str("}")?;
|
||||
}
|
||||
f.write_str("\"")
|
||||
}
|
||||
BinaryOperator::Add
|
||||
| BinaryOperator::Subtract
|
||||
| BinaryOperator::Multiply
|
||||
| BinaryOperator::Divide
|
||||
| BinaryOperator::BitwiseAnd
|
||||
| BinaryOperator::BitwiseOr => {
|
||||
let add_p = parent.map_or(false, |pop| pop.precedence() != op.precedence());
|
||||
if add_p {
|
||||
f.write_str("(")?;
|
||||
}
|
||||
left.fmt_child(f, op.into(), in_comp)?;
|
||||
op.fmt(f)?;
|
||||
right.fmt_child(f, op.into(), in_comp)?;
|
||||
if add_p {
|
||||
f.write_str(")")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
Expr::Literal(v) => {
|
||||
if !in_comp {
|
||||
write!(
|
||||
f,
|
||||
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"{v}\" \"0\""
|
||||
)
|
||||
} else {
|
||||
v.fmt(f)
|
||||
}
|
||||
}
|
||||
Expr::Identifier(i) => {
|
||||
if !in_comp {
|
||||
write!(
|
||||
f,
|
||||
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"${{{i}}}\" \"0\"",
|
||||
)
|
||||
} else {
|
||||
i.fmt(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Token {
|
||||
impl Display for MetaExpression {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("if ")?;
|
||||
self.expr.fmt_child(f, None, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for BinaryOperator {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Token::Tag(t) => t.fmt(f),
|
||||
Token::Number(n) => n.fmt(f),
|
||||
Token::Logical(Logical::And) => f.write_str("allof("),
|
||||
Token::Logical(Logical::Or) => f.write_str("anyof("),
|
||||
Token::Logical(Logical::Not) => f.write_str("not "),
|
||||
Token::Comparator(comp) => {
|
||||
f.write_str("string :value \"")?;
|
||||
match comp {
|
||||
Comparator::Eq => f.write_str("eq")?,
|
||||
Comparator::Gt => f.write_str("gt")?,
|
||||
Comparator::Lt => f.write_str("lt")?,
|
||||
Comparator::Ge => f.write_str("ge")?,
|
||||
Comparator::Le => f.write_str("gt")?,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
f.write_str("\" :comparator \"i;ascii-numeric\" ")
|
||||
}
|
||||
|
||||
Token::OpenParen => f.write_str("("),
|
||||
Token::CloseParen => f.write_str(")"),
|
||||
Token::Operation(Operation::Add) => f.write_str(" + "),
|
||||
Token::Operation(Operation::Multiply) => f.write_str(" * "),
|
||||
Token::Operation(Operation::Divide) => f.write_str(" / "),
|
||||
Token::Operation(Operation::And) => f.write_str(" & "),
|
||||
Token::Operation(Operation::Or) => f.write_str(" | "),
|
||||
Token::Operation(Operation::Not) => f.write_str("!"),
|
||||
Token::BeginExpression(is_static) => {
|
||||
if *is_static {
|
||||
f.write_str("\"")
|
||||
} else {
|
||||
f.write_str("\"${")
|
||||
}
|
||||
}
|
||||
Token::EndExpression(is_static) => {
|
||||
if *is_static {
|
||||
f.write_str("\"")
|
||||
} else {
|
||||
f.write_str("}\"")
|
||||
}
|
||||
}
|
||||
BinaryOperator::Or => f.write_str("anyof"),
|
||||
BinaryOperator::And => f.write_str("allof"),
|
||||
BinaryOperator::BitwiseOr => f.write_str(" | "),
|
||||
BinaryOperator::BitwiseAnd => f.write_str(" & "),
|
||||
BinaryOperator::Greater => f.write_str("\"gt\""),
|
||||
BinaryOperator::Lesser => f.write_str("\"lt\""),
|
||||
BinaryOperator::GreaterOrEqual => f.write_str("\"ge\""),
|
||||
BinaryOperator::LesserOrEqual => f.write_str("\"le\""),
|
||||
BinaryOperator::Equal => f.write_str("\"eq\""),
|
||||
BinaryOperator::Add => f.write_str(" + "),
|
||||
BinaryOperator::Subtract => f.write_str(" - "),
|
||||
BinaryOperator::Multiply => f.write_str(" * "),
|
||||
BinaryOperator::Divide => f.write_str(" / "),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::MetaExpression;
|
||||
use crate::import::MetaExpression;
|
||||
|
||||
#[test]
|
||||
fn parse_meta() {
|
||||
for (expr, expected) in [
|
||||
/*(
|
||||
for (pos, (expr, expected)) in [
|
||||
(
|
||||
concat!(
|
||||
"( ! HTML_IMAGE_ONLY_16 ) && ",
|
||||
"( __LOWER_E > 20 ) && ",
|
||||
"( __E_LIKE_LETTER > ( (__LOWER_E * 14 ) / 10) ) && ",
|
||||
"( __E_LIKE_LETTER < ( 10 * __LOWER_E ) )"
|
||||
"( ! A ) && ",
|
||||
"( B > 20 ) && ",
|
||||
"( C > ( (D * 14 ) / 10) ) && ",
|
||||
"( E < ( 10 * F ) )"
|
||||
),
|
||||
"",
|
||||
),
|
||||
("(__DRUGS_ERECTILE1 || __DRUGS_ERECTILE2)", ""),
|
||||
("(__HELO_DYNAMIC_IPADDR && !HELO_STATIC_HOST)", ""),
|
||||
("__ML2 || __ML4", ""),
|
||||
("(__AT_HOTMAIL_MSGID && (!__FROM_HOTMAIL_COM && !__FROM_MSN_COM && !__FROM_YAHOO_COM))", ""),
|
||||
("(A || B)", ""),
|
||||
("(A && !B)", ""),
|
||||
("A || B", ""),
|
||||
("(A && (!B && !C && !D))", ""),
|
||||
("(0)", ""),
|
||||
("RAZOR2_CHECK + DCC_CHECK + PYZOR_CHECK > 1", ""),
|
||||
("(SUBJECT_IN_BLOCKLIST)", ""),
|
||||
("__HAS_MSGID && !(__SANE_MSGID || __MSGID_COMMENT)", ""),
|
||||
("!__CTYPE_HTML && __X_MAILER_APPLEMAIL && (__MSGID_APPLEMAIL || __MIME_VERSION_APPLEMAIL)", ""),
|
||||
("((__AUTO_GEN_MS||__AUTO_GEN_3||__AUTO_GEN_4) && !__XM_VBULLETIN && !__X_CRON_ENV)", ""),*/
|
||||
("(__WEBMAIL_ACCT + __MAILBOX_FULL + (__TVD_PH_SUBJ_META || __TVD_PH_BODY_META) > 3)", ""),
|
||||
|
||||
] {
|
||||
("A + B + C > 1", ""),
|
||||
("(A)", ""),
|
||||
("A && !(B || C)", ""),
|
||||
("!A && B && (C || D)", ""),
|
||||
("((A||B||C) && !D && !E)", ""),
|
||||
("(A + B + (C || D) > 3)", ""),
|
||||
(
|
||||
"(A || B) > 2 && (C && D) == 0 || ((E+F-G) > 0 || (H||I) <= 4)",
|
||||
"",
|
||||
),
|
||||
("(A || B) > (C && D) && E", ""),
|
||||
//("", ""),
|
||||
]
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
let meta = MetaExpression::from_meta(expr);
|
||||
//println!("{:#?}", meta.tokens);
|
||||
let result = meta.to_string();
|
||||
/*if pos != 13 {
|
||||
continue;
|
||||
}*/
|
||||
|
||||
println!("{expr}");
|
||||
println!("{}", result);
|
||||
//let tokens = Tokenizer::new(expr).collect::<Vec<_>>();
|
||||
//println!("{tokens:?}");
|
||||
//let mut p = Parser::new(&tokens);
|
||||
//let expr = p.parse().unwrap();
|
||||
|
||||
//println!("{:#?}", expr);
|
||||
|
||||
println!("{}\n------------------------------------", meta);
|
||||
|
||||
/*assert_eq!(
|
||||
result,
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use self::meta::MetaExpression;
|
||||
|
||||
pub mod ast;
|
||||
pub mod meta;
|
||||
pub mod spamassassin;
|
||||
pub mod tokenizer;
|
||||
pub mod utils;
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
|
@ -49,6 +49,12 @@ enum RuleType {
|
|||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MetaExpression {
|
||||
pub tokens: Vec<Token>,
|
||||
pub expr: Expr,
|
||||
}
|
||||
|
||||
impl RuleType {
|
||||
pub fn pattern(&mut self) -> Option<&mut String> {
|
||||
match self {
|
||||
|
@ -116,10 +122,6 @@ pub enum Token {
|
|||
|
||||
OpenParen,
|
||||
CloseParen,
|
||||
|
||||
// Sieve specific
|
||||
BeginExpression(bool),
|
||||
EndExpression(bool),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
|
@ -143,11 +145,66 @@ pub enum Operation {
|
|||
Add,
|
||||
Multiply,
|
||||
Divide,
|
||||
Subtract,
|
||||
And,
|
||||
Or,
|
||||
Not,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Expr {
|
||||
UnaryOp(UnaryOperator, Box<Expr>),
|
||||
BinaryOp(Box<Expr>, BinaryOperator, Box<Expr>),
|
||||
Literal(u32),
|
||||
Identifier(String),
|
||||
}
|
||||
|
||||
impl Default for Expr {
|
||||
fn default() -> Self {
|
||||
Self::Literal(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum UnaryOperator {
|
||||
Not,
|
||||
Minus,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum BinaryOperator {
|
||||
Or,
|
||||
And,
|
||||
Greater,
|
||||
Lesser,
|
||||
GreaterOrEqual,
|
||||
LesserOrEqual,
|
||||
Equal,
|
||||
Add,
|
||||
Subtract,
|
||||
Multiply,
|
||||
Divide,
|
||||
BitwiseAnd,
|
||||
BitwiseOr,
|
||||
}
|
||||
|
||||
impl BinaryOperator {
|
||||
pub fn precedence(&self) -> u32 {
|
||||
match self {
|
||||
Self::Or => 1,
|
||||
Self::And => 2,
|
||||
Self::Greater
|
||||
| Self::Lesser
|
||||
| Self::GreaterOrEqual
|
||||
| Self::LesserOrEqual
|
||||
| Self::Equal => 3,
|
||||
Self::Add | Self::Subtract => 4,
|
||||
Self::Multiply | Self::Divide => 5,
|
||||
Self::BitwiseAnd | Self::BitwiseOr => 6,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
fn score(&self) -> f64 {
|
||||
self.scores.last().copied().unwrap_or_else(|| {
|
||||
|
|
|
@ -6,9 +6,9 @@ use std::{
|
|||
};
|
||||
|
||||
use super::{
|
||||
meta::MetaExpression,
|
||||
utils::{fix_broken_regex, replace_tags},
|
||||
Header, HeaderMatches, HeaderPart, Rule, RuleType, TestFlag, Token, UnwrapResult,
|
||||
Header, HeaderMatches, HeaderPart, MetaExpression, Rule, RuleType, TestFlag, Token,
|
||||
UnwrapResult,
|
||||
};
|
||||
|
||||
const VERSION: f64 = 4.000000;
|
||||
|
@ -1038,7 +1038,7 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
&& !meta
|
||||
.tokens
|
||||
.iter()
|
||||
.any(|t| matches!(&t.token, Token::Tag(n) if n == &rule.name)))
|
||||
.any(|t| matches!(&t, Token::Tag(n) if n == &rule.name)))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
|
181
crates/antispam/src/import/tokenizer.rs
Normal file
181
crates/antispam/src/import/tokenizer.rs
Normal file
|
@ -0,0 +1,181 @@
|
|||
use super::{Comparator, Logical, Operation, Token};
|
||||
|
||||
pub struct Tokenizer<'x> {
|
||||
expr: &'x str,
|
||||
iter: std::iter::Peekable<std::str::Chars<'x>>,
|
||||
buf: String,
|
||||
depth: u32,
|
||||
comparator_depth: u32,
|
||||
next_token: Option<Token>,
|
||||
}
|
||||
|
||||
impl<'x> Tokenizer<'x> {
|
||||
pub fn new(expr: &'x str) -> Self {
|
||||
Self {
|
||||
expr,
|
||||
iter: expr.chars().peekable(),
|
||||
buf: String::new(),
|
||||
depth: 0,
|
||||
next_token: None,
|
||||
comparator_depth: u32::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'x> Iterator for Tokenizer<'x> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(token) = self.next_token.take() {
|
||||
return Some(token);
|
||||
}
|
||||
|
||||
while let Some(ch) = self.iter.next() {
|
||||
match ch {
|
||||
'A'..='Z' | 'a'..='z' | '0'..='9' | '_' => {
|
||||
self.buf.push(ch);
|
||||
}
|
||||
_ => {
|
||||
let mut depth = self.depth;
|
||||
let prev_token = if !self.buf.is_empty() {
|
||||
Token::from(std::mem::take(&mut self.buf)).into()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let token = match ch {
|
||||
'&' | '|' => {
|
||||
if matches!(self.iter.next(), Some(c) if c == ch) {
|
||||
let is_and = ch == '&';
|
||||
if self.depth > self.comparator_depth {
|
||||
Token::Operation(if is_and {
|
||||
Operation::And
|
||||
} else {
|
||||
Operation::Or
|
||||
})
|
||||
} else {
|
||||
let mut depth = self.depth;
|
||||
let mut found_comp = false;
|
||||
|
||||
for ch in self.iter.clone() {
|
||||
match ch {
|
||||
'(' => depth += 1,
|
||||
')' => {
|
||||
depth -= 1;
|
||||
}
|
||||
'<' | '>' | '=' => {
|
||||
found_comp = true;
|
||||
break;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if found_comp && depth < self.depth {
|
||||
self.comparator_depth = depth;
|
||||
Token::Operation(if is_and {
|
||||
Operation::And
|
||||
} else {
|
||||
Operation::Or
|
||||
})
|
||||
} else {
|
||||
self.comparator_depth = u32::MAX;
|
||||
Token::Logical(if is_and {
|
||||
Logical::And
|
||||
} else {
|
||||
Logical::Or
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("Warning: Single {ch} in meta expression {}", self.expr);
|
||||
return None;
|
||||
}
|
||||
}
|
||||
'!' => Token::Logical(Logical::Not),
|
||||
'=' => match self.iter.next() {
|
||||
Some('=') => Token::Comparator(Comparator::Eq),
|
||||
Some('>') => Token::Comparator(Comparator::Ge),
|
||||
Some('<') => Token::Comparator(Comparator::Le),
|
||||
_ => {
|
||||
eprintln!("Warning: Single = in meta expression {}", self.expr);
|
||||
Token::Comparator(Comparator::Eq)
|
||||
}
|
||||
},
|
||||
'>' => match self.iter.peek() {
|
||||
Some('=') => {
|
||||
self.iter.next();
|
||||
Token::Comparator(Comparator::Ge)
|
||||
}
|
||||
_ => Token::Comparator(Comparator::Gt),
|
||||
},
|
||||
'<' => match self.iter.peek() {
|
||||
Some('=') => {
|
||||
self.iter.next();
|
||||
Token::Comparator(Comparator::Le)
|
||||
}
|
||||
_ => Token::Comparator(Comparator::Lt),
|
||||
},
|
||||
'(' => {
|
||||
self.depth += 1;
|
||||
Token::OpenParen
|
||||
}
|
||||
')' => {
|
||||
if self.depth == 0 {
|
||||
eprintln!(
|
||||
"Warning: Unmatched close parenthesis in meta expression {}",
|
||||
self.expr
|
||||
);
|
||||
return None;
|
||||
}
|
||||
self.depth -= 1;
|
||||
depth = self.depth;
|
||||
|
||||
Token::CloseParen
|
||||
}
|
||||
'+' => Token::Operation(Operation::Add),
|
||||
'*' => Token::Operation(Operation::Multiply),
|
||||
'/' => Token::Operation(Operation::Divide),
|
||||
'-' => Token::Operation(Operation::Subtract),
|
||||
' ' => {
|
||||
if let Some(prev_token) = prev_token {
|
||||
return Some(prev_token);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
eprintln!(
|
||||
"Warning: Invalid character {ch} in meta expression {}",
|
||||
self.expr
|
||||
);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
|
||||
if matches!(token, Token::Comparator(_)) {
|
||||
self.comparator_depth = depth;
|
||||
}
|
||||
|
||||
return Some(if let Some(prev_token) = prev_token {
|
||||
self.next_token = Some(token);
|
||||
prev_token
|
||||
} else {
|
||||
token
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.depth > 0 {
|
||||
eprintln!(
|
||||
"Warning: Unmatched open parenthesis in meta expression {}",
|
||||
self.expr
|
||||
);
|
||||
None
|
||||
} else if !self.buf.is_empty() {
|
||||
Some(Token::from(std::mem::take(&mut self.buf)))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue