mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2025-09-11 22:44:29 +08:00
Antispam: Eval meta expressions in Sieve, support for named capture groups.
This commit is contained in:
parent
79599f2f9c
commit
e73c82e7d8
7 changed files with 274 additions and 626 deletions
|
@ -1,177 +0,0 @@
|
|||
use super::{BinaryOperator, Comparator, Expr, Logical, Operation, Token, UnaryOperator};
|
||||
|
||||
pub struct Parser<'x> {
|
||||
tokens: &'x [Token],
|
||||
position: usize,
|
||||
}
|
||||
|
||||
impl<'x> Parser<'x> {
|
||||
pub fn new(tokens: &'x [Token]) -> Self {
|
||||
Self {
|
||||
tokens,
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn consume(&mut self) -> Option<&'x Token> {
|
||||
if self.position < self.tokens.len() {
|
||||
let token = &self.tokens[self.position];
|
||||
self.position += 1;
|
||||
Some(token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn peek(&self) -> Option<&'x Token> {
|
||||
if self.position < self.tokens.len() {
|
||||
Some(&self.tokens[self.position])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn primary(&mut self) -> Result<Expr, String> {
|
||||
match self.peek() {
|
||||
Some(&Token::Number(n)) => {
|
||||
self.consume();
|
||||
Ok(Expr::Literal(n))
|
||||
}
|
||||
Some(Token::Tag(ref id)) => {
|
||||
self.consume();
|
||||
Ok(Expr::Identifier(id.clone()))
|
||||
}
|
||||
Some(&Token::OpenParen) => {
|
||||
self.consume();
|
||||
let expr = self.expr();
|
||||
if let Some(&Token::CloseParen) = self.peek() {
|
||||
self.consume();
|
||||
expr
|
||||
} else {
|
||||
Err("Expected closing parenthesis".to_string())
|
||||
}
|
||||
}
|
||||
_ => Err("Unexpected token in factor".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn unary(&mut self) -> Result<Expr, String> {
|
||||
match self.peek() {
|
||||
Some(&Token::Logical(Logical::Not)) => {
|
||||
self.consume();
|
||||
let operand = self.primary()?;
|
||||
Ok(Expr::UnaryOp(UnaryOperator::Not, Box::new(operand)))
|
||||
}
|
||||
Some(&Token::Operation(Operation::Subtract)) => {
|
||||
self.consume();
|
||||
let operand = self.primary()?;
|
||||
Ok(Expr::UnaryOp(UnaryOperator::Minus, Box::new(operand)))
|
||||
}
|
||||
_ => self.primary(),
|
||||
}
|
||||
}
|
||||
|
||||
fn factor(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.unary()?;
|
||||
|
||||
while let Some(op @ Token::Operation(Operation::Multiply | Operation::Divide)) = self.peek()
|
||||
{
|
||||
self.consume();
|
||||
let right = self.unary()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn term(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.factor()?;
|
||||
|
||||
while let Some(op @ Token::Operation(Operation::Add | Operation::Subtract)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.factor()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn bitwise(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.term()?;
|
||||
while let Some(op @ Token::Operation(Operation::And | Operation::Or)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.term()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn comparison(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.bitwise()?;
|
||||
|
||||
while let Some(op @ Token::Comparator(_)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.bitwise()?;
|
||||
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
|
||||
}
|
||||
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn logical_and(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.comparison()?;
|
||||
|
||||
while let Some(Token::Logical(Logical::And)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.comparison()?;
|
||||
left = Expr::BinaryOp(Box::new(left), BinaryOperator::And, Box::new(right));
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn logical_or(&mut self) -> Result<Expr, String> {
|
||||
let mut left = self.logical_and()?;
|
||||
|
||||
while let Some(Token::Logical(Logical::Or)) = self.peek() {
|
||||
self.consume();
|
||||
let right = self.logical_and()?;
|
||||
left = Expr::BinaryOp(Box::new(left), BinaryOperator::Or, Box::new(right));
|
||||
}
|
||||
Ok(left)
|
||||
}
|
||||
|
||||
fn expr(&mut self) -> Result<Expr, String> {
|
||||
self.logical_or()
|
||||
}
|
||||
|
||||
pub fn parse(&mut self) -> Result<Expr, String> {
|
||||
let result = self.expr()?;
|
||||
if self.position < self.tokens.len() {
|
||||
println!("{result:#?}\n {} {}", self.position, self.tokens.len());
|
||||
Err("Unexpected tokens at the end of the expression".to_string())
|
||||
} else {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Token> for BinaryOperator {
|
||||
fn from(value: &Token) -> Self {
|
||||
match value {
|
||||
Token::Operation(Operation::Add) => Self::Add,
|
||||
Token::Operation(Operation::Multiply) => Self::Multiply,
|
||||
Token::Operation(Operation::Divide) => Self::Divide,
|
||||
Token::Operation(Operation::Subtract) => Self::Subtract,
|
||||
Token::Operation(Operation::And) => Self::BitwiseAnd,
|
||||
Token::Operation(Operation::Or) => Self::BitwiseOr,
|
||||
Token::Logical(Logical::And) => Self::And,
|
||||
Token::Logical(Logical::Or) => Self::Or,
|
||||
Token::Comparator(Comparator::Gt) => Self::Greater,
|
||||
Token::Comparator(Comparator::Lt) => Self::Lesser,
|
||||
Token::Comparator(Comparator::Ge) => Self::GreaterOrEqual,
|
||||
Token::Comparator(Comparator::Le) => Self::LesserOrEqual,
|
||||
Token::Comparator(Comparator::Eq) => Self::Equal,
|
||||
_ => panic!("Invalid token"),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,245 +0,0 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use super::{
|
||||
ast::Parser, tokenizer::Tokenizer, BinaryOperator, Comparator, Expr, Logical, MetaExpression,
|
||||
Operation, Token, UnaryOperator, UnwrapResult,
|
||||
};
|
||||
|
||||
// Parse a meta expression into a list of tokens that can be converted into a Sieve test.
|
||||
impl MetaExpression {
|
||||
pub fn from_meta(expr: &str) -> Self {
|
||||
let mut tokens = Tokenizer::new(expr).collect::<Vec<_>>();
|
||||
|
||||
// If there are no comparators, we can just turn it into am expression
|
||||
if !tokens.iter().any(|t| matches!(t, Token::Comparator(_))) {
|
||||
let prev_tokens = tokens;
|
||||
tokens = Vec::with_capacity(prev_tokens.len() + 3);
|
||||
tokens.push(Token::OpenParen);
|
||||
for token in prev_tokens {
|
||||
tokens.push(if let Token::Logical(op) = token {
|
||||
match op {
|
||||
Logical::And => Token::Operation(Operation::And),
|
||||
Logical::Or => Token::Operation(Operation::Or),
|
||||
Logical::Not => Token::Logical(Logical::Not),
|
||||
}
|
||||
} else {
|
||||
token
|
||||
});
|
||||
}
|
||||
tokens.push(Token::CloseParen);
|
||||
tokens.push(Token::Comparator(Comparator::Gt));
|
||||
tokens.push(Token::Number(0));
|
||||
}
|
||||
|
||||
let expr = Parser::new(&tokens)
|
||||
.parse()
|
||||
.unwrap_result("parse expression");
|
||||
MetaExpression { tokens, expr }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Token {
|
||||
fn from(value: String) -> Self {
|
||||
if let Ok(value) = value.parse() {
|
||||
Token::Number(value)
|
||||
} else {
|
||||
Token::Tag(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Expr {
|
||||
fn fmt_child(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
parent: Option<&BinaryOperator>,
|
||||
in_comp: bool,
|
||||
) -> std::fmt::Result {
|
||||
match self {
|
||||
Expr::UnaryOp(op, expr) => {
|
||||
let add_p =
|
||||
in_comp && !matches!(expr.as_ref(), Expr::Literal(_) | Expr::Identifier(_));
|
||||
match op {
|
||||
UnaryOperator::Not => f.write_str(if in_comp { "!" } else { "not " })?,
|
||||
UnaryOperator::Minus => f.write_str("-")?,
|
||||
}
|
||||
if add_p {
|
||||
f.write_str("(")?;
|
||||
}
|
||||
expr.fmt_child(f, None, in_comp)?;
|
||||
if add_p {
|
||||
f.write_str(")")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Expr::BinaryOp(left, op, right) => match op {
|
||||
BinaryOperator::Or | BinaryOperator::And => {
|
||||
let add_p = parent.map_or(true, |pop| pop.precedence() != op.precedence());
|
||||
if add_p {
|
||||
write!(f, "{op}(")?;
|
||||
}
|
||||
left.fmt_child(f, op.into(), in_comp)?;
|
||||
f.write_str(", ")?;
|
||||
right.fmt_child(f, op.into(), in_comp)?;
|
||||
if add_p {
|
||||
f.write_str(")")
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
BinaryOperator::Greater
|
||||
| BinaryOperator::Lesser
|
||||
| BinaryOperator::GreaterOrEqual
|
||||
| BinaryOperator::LesserOrEqual
|
||||
| BinaryOperator::Equal => {
|
||||
write!(f, "string :value {op} :comparator \"i;ascii-numeric\" \"")?;
|
||||
let is_literal = matches!(left.as_ref(), Expr::Literal(_));
|
||||
if !is_literal {
|
||||
f.write_str("${")?;
|
||||
}
|
||||
left.fmt_child(f, None, true)?;
|
||||
if !is_literal {
|
||||
f.write_str("}")?;
|
||||
}
|
||||
f.write_str("\" \"")?;
|
||||
let is_literal = matches!(right.as_ref(), Expr::Literal(_));
|
||||
if !is_literal {
|
||||
f.write_str("${")?;
|
||||
}
|
||||
right.fmt_child(f, None, true)?;
|
||||
if !is_literal {
|
||||
f.write_str("}")?;
|
||||
}
|
||||
f.write_str("\"")
|
||||
}
|
||||
BinaryOperator::Add
|
||||
| BinaryOperator::Subtract
|
||||
| BinaryOperator::Multiply
|
||||
| BinaryOperator::Divide
|
||||
| BinaryOperator::BitwiseAnd
|
||||
| BinaryOperator::BitwiseOr => {
|
||||
let add_p = parent.map_or(false, |pop| pop.precedence() != op.precedence());
|
||||
if add_p {
|
||||
f.write_str("(")?;
|
||||
}
|
||||
left.fmt_child(f, op.into(), in_comp)?;
|
||||
op.fmt(f)?;
|
||||
right.fmt_child(f, op.into(), in_comp)?;
|
||||
if add_p {
|
||||
f.write_str(")")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
Expr::Literal(v) => {
|
||||
if !in_comp {
|
||||
write!(
|
||||
f,
|
||||
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"{v}\" \"0\""
|
||||
)
|
||||
} else {
|
||||
v.fmt(f)
|
||||
}
|
||||
}
|
||||
Expr::Identifier(i) => {
|
||||
if !in_comp {
|
||||
write!(
|
||||
f,
|
||||
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"${{{i}}}\" \"0\"",
|
||||
)
|
||||
} else {
|
||||
i.fmt(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for MetaExpression {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str("if ")?;
|
||||
self.expr.fmt_child(f, None, false)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for BinaryOperator {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
BinaryOperator::Or => f.write_str("anyof"),
|
||||
BinaryOperator::And => f.write_str("allof"),
|
||||
BinaryOperator::BitwiseOr => f.write_str(" | "),
|
||||
BinaryOperator::BitwiseAnd => f.write_str(" & "),
|
||||
BinaryOperator::Greater => f.write_str("\"gt\""),
|
||||
BinaryOperator::Lesser => f.write_str("\"lt\""),
|
||||
BinaryOperator::GreaterOrEqual => f.write_str("\"ge\""),
|
||||
BinaryOperator::LesserOrEqual => f.write_str("\"le\""),
|
||||
BinaryOperator::Equal => f.write_str("\"eq\""),
|
||||
BinaryOperator::Add => f.write_str(" + "),
|
||||
BinaryOperator::Subtract => f.write_str(" - "),
|
||||
BinaryOperator::Multiply => f.write_str(" * "),
|
||||
BinaryOperator::Divide => f.write_str(" / "),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::import::MetaExpression;
|
||||
|
||||
#[test]
|
||||
fn parse_meta() {
|
||||
for (pos, (expr, expected)) in [
|
||||
(
|
||||
concat!(
|
||||
"( ! A ) && ",
|
||||
"( B > 20 ) && ",
|
||||
"( C > ( (D * 14 ) / 10) ) && ",
|
||||
"( E < ( 10 * F ) )"
|
||||
),
|
||||
"",
|
||||
),
|
||||
("(A || B)", ""),
|
||||
("(A && !B)", ""),
|
||||
("A || B", ""),
|
||||
("(A && (!B && !C && !D))", ""),
|
||||
("(0)", ""),
|
||||
("A + B + C > 1", ""),
|
||||
("(A)", ""),
|
||||
("A && !(B || C)", ""),
|
||||
("!A && B && (C || D)", ""),
|
||||
("((A||B||C) && !D && !E)", ""),
|
||||
("(A + B + (C || D) > 3)", ""),
|
||||
(
|
||||
"(A || B) > 2 && (C && D) == 0 || ((E+F-G) > 0 || (H||I) <= 4)",
|
||||
"",
|
||||
),
|
||||
("(A || B) > (C && D) && E", ""),
|
||||
//("", ""),
|
||||
]
|
||||
.iter()
|
||||
.enumerate()
|
||||
{
|
||||
let meta = MetaExpression::from_meta(expr);
|
||||
//println!("{:#?}", meta.tokens);
|
||||
/*if pos != 13 {
|
||||
continue;
|
||||
}*/
|
||||
|
||||
println!("{expr}");
|
||||
//let tokens = Tokenizer::new(expr).collect::<Vec<_>>();
|
||||
//println!("{tokens:?}");
|
||||
//let mut p = Parser::new(&tokens);
|
||||
//let expr = p.parse().unwrap();
|
||||
|
||||
//println!("{:#?}", expr);
|
||||
|
||||
println!("{}\n------------------------------------", meta);
|
||||
|
||||
/*assert_eq!(
|
||||
result,
|
||||
expected,
|
||||
"failed for {expr}"
|
||||
);*/
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +1,5 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub mod ast;
|
||||
pub mod meta;
|
||||
pub mod spamassassin;
|
||||
pub mod tokenizer;
|
||||
pub mod utils;
|
||||
|
@ -11,6 +9,8 @@ struct Rule {
|
|||
name: String,
|
||||
t: RuleType,
|
||||
scores: Vec<f64>,
|
||||
captured_vars: Vec<(String, usize)>,
|
||||
required_vars: HashSet<String>,
|
||||
description: HashMap<String, String>,
|
||||
priority: i32,
|
||||
flags: Vec<TestFlag>,
|
||||
|
@ -52,7 +52,7 @@ enum RuleType {
|
|||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MetaExpression {
|
||||
pub tokens: Vec<Token>,
|
||||
pub expr: Expr,
|
||||
pub expr: String,
|
||||
}
|
||||
|
||||
impl RuleType {
|
||||
|
@ -96,7 +96,7 @@ enum Header {
|
|||
Name(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
enum HeaderMatches {
|
||||
#[default]
|
||||
Matches,
|
||||
|
@ -151,64 +151,10 @@ pub enum Operation {
|
|||
Not,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum Expr {
|
||||
UnaryOp(UnaryOperator, Box<Expr>),
|
||||
BinaryOp(Box<Expr>, BinaryOperator, Box<Expr>),
|
||||
Literal(u32),
|
||||
Identifier(String),
|
||||
}
|
||||
|
||||
impl Default for Expr {
|
||||
fn default() -> Self {
|
||||
Self::Literal(0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum UnaryOperator {
|
||||
Not,
|
||||
Minus,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum BinaryOperator {
|
||||
Or,
|
||||
And,
|
||||
Greater,
|
||||
Lesser,
|
||||
GreaterOrEqual,
|
||||
LesserOrEqual,
|
||||
Equal,
|
||||
Add,
|
||||
Subtract,
|
||||
Multiply,
|
||||
Divide,
|
||||
BitwiseAnd,
|
||||
BitwiseOr,
|
||||
}
|
||||
|
||||
impl BinaryOperator {
|
||||
pub fn precedence(&self) -> u32 {
|
||||
match self {
|
||||
Self::Or => 1,
|
||||
Self::And => 2,
|
||||
Self::Greater
|
||||
| Self::Lesser
|
||||
| Self::GreaterOrEqual
|
||||
| Self::LesserOrEqual
|
||||
| Self::Equal => 3,
|
||||
Self::Add | Self::Subtract => 4,
|
||||
Self::Multiply | Self::Divide => 5,
|
||||
Self::BitwiseAnd | Self::BitwiseOr => 6,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
fn score(&self) -> f64 {
|
||||
self.scores.last().copied().unwrap_or_else(|| {
|
||||
if self.name.starts_with("__") {
|
||||
if self.is_subrule() {
|
||||
0.0
|
||||
} else if self.name.starts_with("T_") {
|
||||
0.01
|
||||
|
@ -217,6 +163,10 @@ impl Rule {
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn is_subrule(&self) -> bool {
|
||||
self.name.starts_with("__")
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Rule {
|
||||
|
|
|
@ -6,7 +6,8 @@ use std::{
|
|||
};
|
||||
|
||||
use super::{
|
||||
utils::{fix_broken_regex, replace_tags},
|
||||
tokenizer::Tokenizer,
|
||||
utils::{fix_broken_regex, import_regex, replace_tags},
|
||||
Header, HeaderMatches, HeaderPart, MetaExpression, Rule, RuleType, TestFlag, Token,
|
||||
UnwrapResult,
|
||||
};
|
||||
|
@ -240,7 +241,7 @@ static SUPPORTED_FUNCTIONS: [&str; 162] = [
|
|||
"tvd_vertical_words",
|
||||
];
|
||||
|
||||
pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, validate_regex: bool) {
|
||||
pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) {
|
||||
let mut paths: Vec<_> = fs::read_dir(&path)
|
||||
.unwrap_result("read directory")
|
||||
.map(|r| r.unwrap_result("read directory entry"))
|
||||
|
@ -323,7 +324,6 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
if cmd.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let todo = "GB_TO_ADDR caca";
|
||||
|
||||
match cmd {
|
||||
"ifplugin" => {
|
||||
|
@ -647,20 +647,12 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
}
|
||||
"meta" => {
|
||||
if let Some((test_name, expression)) = params.split_once(' ') {
|
||||
let expr = MetaExpression::from_meta(expression);
|
||||
/*if tokens.tokens.contains(&Token::Divide) {
|
||||
println!(
|
||||
"->: {expression}\n{:?}\n<-: {}",
|
||||
tokens
|
||||
.tokens
|
||||
.iter()
|
||||
.zip(tokens.token_depth.iter())
|
||||
.collect::<Vec<_>>(),
|
||||
String::from(tokens.clone())
|
||||
);
|
||||
std::process::exit(1);
|
||||
}*/
|
||||
rules.entry(test_name.to_string()).or_default().t = RuleType::Meta { expr };
|
||||
rules.entry(test_name.to_string()).or_default().t = RuleType::Meta {
|
||||
expr: MetaExpression {
|
||||
tokens: Tokenizer::new(expression).collect(),
|
||||
expr: expression.to_string(),
|
||||
},
|
||||
};
|
||||
} else {
|
||||
eprintln!(
|
||||
"Warning: Invalid meta command on {}, line {}",
|
||||
|
@ -779,16 +771,6 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
"replace_tag" | "replace_inter" | "replace_post" | "replace_pre" => {
|
||||
if let Some((tag, pattern)) = params.split_once(' ') {
|
||||
let pattern = replace_tags(pattern, replace_start, replace_end, &tags);
|
||||
if validate_regex {
|
||||
if let Err(err) = fancy_regex::Regex::new(&pattern) {
|
||||
eprintln!(
|
||||
"Warning: Invalid regex {pattern:?} on {}, line {}: {}",
|
||||
path.display(),
|
||||
line_num,
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
let tag_class = cmd.strip_prefix("replace_").unwrap();
|
||||
let tag = if tag_class != "tag" {
|
||||
format!("{} {}", tag_class, tag)
|
||||
|
@ -994,13 +976,28 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
}
|
||||
}
|
||||
|
||||
let mut var_to_rule = HashMap::new();
|
||||
let mut rules = rules
|
||||
.into_iter()
|
||||
.filter_map(|(name, mut rule)| {
|
||||
if !matches!(rule.t, RuleType::None) {
|
||||
if validate_regex {
|
||||
if let Some(pattern) = rule.t.pattern() {
|
||||
if let Err(err) = fancy_regex::Regex::new(pattern) {
|
||||
if let Some(pattern) = rule.t.pattern() {
|
||||
let (pattern_, variables) = import_regex(pattern);
|
||||
*pattern = pattern_;
|
||||
rule.required_vars = variables;
|
||||
match fancy_regex::Regex::new(pattern) {
|
||||
Ok(r) => {
|
||||
rule.captured_vars = r
|
||||
.capture_names()
|
||||
.enumerate()
|
||||
.filter_map(|(pos, var_name)| {
|
||||
let var_name = var_name?;
|
||||
var_to_rule.insert(var_name.to_string(), name.clone());
|
||||
(var_name.to_string(), pos).into()
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!(
|
||||
"Warning: Invalid regex {} for test {}: {}",
|
||||
pattern, name, err
|
||||
|
@ -1020,8 +1017,7 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
.collect::<Vec<_>>();
|
||||
rules.sort_unstable();
|
||||
|
||||
let no_meta = MetaExpression::default();
|
||||
let mut meta = &no_meta;
|
||||
let mut required_rests: Vec<&str> = vec![];
|
||||
|
||||
let mut tests_done = HashSet::new();
|
||||
let mut tests_linked = HashSet::new();
|
||||
|
@ -1032,46 +1028,72 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
|
|||
// Sort rules by meta
|
||||
loop {
|
||||
while let Some(rule) = rules_iter.next() {
|
||||
let in_meta = !meta.tokens.is_empty();
|
||||
let in_linked = !required_rests.is_empty();
|
||||
if tests_done.contains(&rule.name)
|
||||
|| (in_meta
|
||||
&& !meta
|
||||
.tokens
|
||||
.iter()
|
||||
.any(|t| matches!(&t, Token::Tag(n) if n == &rule.name)))
|
||||
|| (in_linked && !required_rests.contains(&rule.name.as_str()))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
tests_done.insert(&rule.name);
|
||||
if in_meta {
|
||||
if in_linked {
|
||||
tests_linked.insert(&rule.name);
|
||||
}
|
||||
|
||||
match &rule.t {
|
||||
RuleType::Meta { expr } if rule.score() != 0.0 => {
|
||||
rules_stack.push((meta, rule, rules_iter));
|
||||
rules_iter = rules.iter();
|
||||
meta = expr;
|
||||
}
|
||||
_ => {
|
||||
rules_sorted.push(rule);
|
||||
//write!(&mut script, "{rule}").unwrap();
|
||||
}
|
||||
let new_required_tests = match &rule.t {
|
||||
RuleType::Meta { expr } if rule.score() != 0.0 || rule.is_subrule() => expr
|
||||
.tokens
|
||||
.iter()
|
||||
.filter_map(|t| match &t {
|
||||
Token::Tag(t) if !tests_done.contains(t) => Some(t.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
_ => rule
|
||||
.required_vars
|
||||
.iter()
|
||||
.filter_map(|required_var| {
|
||||
if let Some(required_test) = var_to_rule.get(required_var) {
|
||||
if !tests_done.contains(required_test) {
|
||||
Some(required_test.as_str())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
eprintln!(
|
||||
"Warning: Variable {required_var:?} not found for test {:?}",
|
||||
rule.name
|
||||
);
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
};
|
||||
|
||||
if !new_required_tests.is_empty() {
|
||||
rules_stack.push((rule, rules_iter, required_rests));
|
||||
rules_iter = rules.iter();
|
||||
required_rests = new_required_tests;
|
||||
} else {
|
||||
rules_sorted.push(rule);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((prev_meta, prev_rule, prev_rules_iter)) = rules_stack.pop() {
|
||||
if let Some((prev_rule, prev_rules_iter, prev_required_tests)) = rules_stack.pop() {
|
||||
rules_sorted.push(prev_rule);
|
||||
//write!(&mut script, "{prev_rule}").unwrap();
|
||||
rules_iter = prev_rules_iter;
|
||||
meta = prev_meta;
|
||||
required_rests = prev_required_tests;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate script
|
||||
let mut script = String::new();
|
||||
let mut script = String::from(concat!(
|
||||
"require [\"variables\", \"include\", \"regex\", \"body\", \"vnd.stalwart.plugins\"];\n\n",
|
||||
"global \"score\";\n",
|
||||
"global \"spam_score\";\n",
|
||||
"\n"
|
||||
));
|
||||
let mut rules_iter = rules_sorted.iter();
|
||||
|
||||
while let Some(&rule) = rules_iter.next() {
|
||||
|
@ -1146,15 +1168,13 @@ impl Display for Rule {
|
|||
|
||||
match &self.t {
|
||||
RuleType::Header {
|
||||
matches,
|
||||
header: header @ (Header::All | Header::AllExternal),
|
||||
if_unset,
|
||||
pattern,
|
||||
part,
|
||||
..
|
||||
} => {
|
||||
write!(
|
||||
f,
|
||||
"if vnd.stalwart.eval(\"match_all_headers\", \"{}\", {:?})",
|
||||
"if match_all_headers {:?} {:?}",
|
||||
if header == &Header::All {
|
||||
"all"
|
||||
} else {
|
||||
|
@ -1170,10 +1190,49 @@ impl Display for Rule {
|
|||
pattern,
|
||||
part,
|
||||
} => {
|
||||
let is_raw = part.contains(&HeaderPart::Raw);
|
||||
let is_name = part.contains(&HeaderPart::Name);
|
||||
let is_addr = part.contains(&HeaderPart::Addr);
|
||||
|
||||
let mut pattern = pattern.as_str();
|
||||
let mut matches = *matches;
|
||||
|
||||
f.write_str("if ")?;
|
||||
|
||||
// Map unset statements into expressions
|
||||
let mut has_unset = match if_unset {
|
||||
Some(val) if pattern == format!("^{val}$") => {
|
||||
// convert /^UNSET$/ [if-unset: UNSET] to exists
|
||||
pattern = "";
|
||||
matches = HeaderMatches::Exists;
|
||||
f.write_str("not ")?;
|
||||
false
|
||||
}
|
||||
Some(_) => true,
|
||||
None => false,
|
||||
};
|
||||
|
||||
if has_unset {
|
||||
match header {
|
||||
Header::MessageId => f.write_str(concat!(
|
||||
"allof(header :contains ",
|
||||
"[\"Message-Id\",\"Resent-Message-Id\",",
|
||||
"\"X-Message-Id\",\"X-Original-Message-ID\"]"
|
||||
))?,
|
||||
Header::ToCc => f.write_str("allof(header :contains [\"To\",\"Cc\"]")?,
|
||||
Header::Name(name) => write!(f, "allof(header :contains {:?}", name)?,
|
||||
Header::EnvelopeFrom | Header::All | Header::AllExternal => {
|
||||
has_unset = false;
|
||||
}
|
||||
}
|
||||
if has_unset {
|
||||
f.write_str(" \"\", ")?;
|
||||
}
|
||||
}
|
||||
|
||||
let cmd = if matches!(header, Header::EnvelopeFrom) {
|
||||
"envelope"
|
||||
} else if part.contains(&HeaderPart::Addr) || part.contains(&HeaderPart::Name) {
|
||||
} else if (is_name || is_addr) && !is_raw {
|
||||
"address"
|
||||
} else {
|
||||
"header"
|
||||
|
@ -1183,23 +1242,29 @@ impl Display for Rule {
|
|||
HeaderMatches::NotMatches => write!(f, "not {cmd} :regex ")?,
|
||||
HeaderMatches::Exists => write!(f, "{cmd} :contains ")?,
|
||||
}
|
||||
for part in part {
|
||||
match part {
|
||||
HeaderPart::Name => f.write_str(":name ")?,
|
||||
HeaderPart::Addr => f.write_str(":all ")?,
|
||||
HeaderPart::Raw => f.write_str(":raw ")?,
|
||||
if !is_raw {
|
||||
if is_name {
|
||||
f.write_str(":name ")?;
|
||||
} else if is_addr {
|
||||
f.write_str(":all ")?;
|
||||
}
|
||||
}
|
||||
match header {
|
||||
Header::MessageId => f.write_str("[\"Message-Id\",\"Resent-Message-Id\",\"X-Message-Id\",\"X-Original-Message-ID\"]")?,
|
||||
Header::ToCc => f.write_str("[\"To\",\"Cc\"]")?,
|
||||
Header::Name (name) => write!(f, "{:?}", name)?,
|
||||
Header::EnvelopeFrom => f.write_str("\"from\"")?,
|
||||
Header::All |
|
||||
Header::AllExternal => unreachable!(),
|
||||
}
|
||||
Header::MessageId => f.write_str(concat!(
|
||||
"[\"Message-Id\",\"Resent-Message-Id\",",
|
||||
"\"X-Message-Id\",\"X-Original-Message-ID\"]"
|
||||
))?,
|
||||
Header::ToCc => f.write_str("[\"To\",\"Cc\"]")?,
|
||||
Header::Name(name) => write!(f, "{:?}", name)?,
|
||||
Header::EnvelopeFrom => f.write_str("\"from\"")?,
|
||||
Header::All | Header::AllExternal => unreachable!(),
|
||||
}
|
||||
|
||||
write!(f, " {:?}", pattern)?;
|
||||
|
||||
if has_unset {
|
||||
f.write_str(")")?;
|
||||
}
|
||||
}
|
||||
RuleType::Body { pattern, raw } => {
|
||||
if *raw {
|
||||
|
@ -1211,33 +1276,42 @@ impl Display for Rule {
|
|||
}
|
||||
}
|
||||
RuleType::Full { pattern } => {
|
||||
write!(f, "if vnd.stalwart.eval(\"match_full\", {:?})", pattern)?;
|
||||
write!(f, "if match_full {:?}", pattern)?;
|
||||
}
|
||||
RuleType::Uri { pattern } => {
|
||||
write!(f, "if vnd.stalwart.eval(\"match_uri\", {:?})", pattern)?;
|
||||
write!(f, "if match_uri {:?}", pattern)?;
|
||||
}
|
||||
RuleType::Eval { function, params } => {
|
||||
write!(f, "if vnd.stalwart.eval({function:?}")?;
|
||||
write!(f, "if {function}")?;
|
||||
for param in params {
|
||||
write!(f, ", {param:?}")?;
|
||||
f.write_str(" ")?;
|
||||
if let Some(param) = param.strip_prefix('\'').and_then(|v| v.strip_suffix('\''))
|
||||
{
|
||||
write!(f, "\"{param}\"")?;
|
||||
} else if param.starts_with('\"') {
|
||||
f.write_str(param)?;
|
||||
} else {
|
||||
write!(f, "\"{param}\"")?;
|
||||
}
|
||||
}
|
||||
f.write_str(")")?;
|
||||
}
|
||||
RuleType::Meta { expr } => {
|
||||
expr.fmt(f)?;
|
||||
write!(f, "if eval {:?}", expr.expr.trim())?;
|
||||
}
|
||||
RuleType::None => {
|
||||
f.write_str("if false")?;
|
||||
}
|
||||
}
|
||||
|
||||
f.write_str(" {\n\tset \"")?;
|
||||
f.write_str(&self.name)?;
|
||||
f.write_str("\" \"1\";\n")?;
|
||||
let score = self.score();
|
||||
write!(f, " {{\n\tset :local \"{}\" \"1\";\n", self.name)?;
|
||||
|
||||
for (var_name, pos) in &self.captured_vars {
|
||||
writeln!(f, "\tset :local \"{}\" \"${{{}}}\";", var_name, pos)?;
|
||||
}
|
||||
|
||||
let score = self.score();
|
||||
if score != 0.0 {
|
||||
f.write_str("\tset \"score\" \"${score")?;
|
||||
f.write_str("\tset \"score\" \"%{score")?;
|
||||
if score > 0.0 {
|
||||
f.write_str(" + ")?;
|
||||
score.fmt(f)?;
|
||||
|
@ -1251,36 +1325,20 @@ impl Display for Rule {
|
|||
if self.forward_score_neg != 0.0 {
|
||||
write!(
|
||||
f,
|
||||
concat!(
|
||||
"if allof(string :value \"ge\" :comparator ",
|
||||
"\"i;ascii-numeric\" \"${{score}}\" \"${{spam_score}}\", ",
|
||||
"string :value \"ge\" :comparator ",
|
||||
"\"i;ascii-numeric\" \"${{score - {:.4}}}\" \"${{spam_score}}\")"
|
||||
),
|
||||
"if eval \"score >= spam_score && score - {:.4} >= spam_score\"",
|
||||
-self.forward_score_neg
|
||||
)?;
|
||||
} else {
|
||||
f.write_str(concat!(
|
||||
"if string :value \"ge\" :comparator ",
|
||||
"\"i;ascii-numeric\" \"${score}\" \"${spam_score}\""
|
||||
))?;
|
||||
f.write_str("if eval \"score >= spam_score\"")?;
|
||||
}
|
||||
} else if self.forward_score_pos != 0.0 {
|
||||
write!(
|
||||
f,
|
||||
concat!(
|
||||
"if allof(string :value \"lt\" :comparator ",
|
||||
"\"i;ascii-numeric\" \"${{score}}\" \"${{spam_score}}\", ",
|
||||
"string :value \"lt\" :comparator ",
|
||||
"\"i;ascii-numeric\" \"${{score + {:.4}}}\" \"${{spam_score}}\")"
|
||||
),
|
||||
"if eval \"score < spam_score && score + {:.4} < spam_score\"",
|
||||
self.forward_score_pos
|
||||
)?;
|
||||
} else {
|
||||
f.write_str(concat!(
|
||||
"if string :value \"lt\" :comparator ",
|
||||
"\"i;ascii-numeric\" \"${score}\" \"${spam_score}\""
|
||||
))?;
|
||||
f.write_str("if eval \"score < spam_score\"")?;
|
||||
}
|
||||
f.write_str(" {\n\t\treturn;\n\t}\n")?;
|
||||
}
|
||||
|
|
|
@ -45,47 +45,7 @@ impl<'x> Iterator for Tokenizer<'x> {
|
|||
let token = match ch {
|
||||
'&' | '|' => {
|
||||
if matches!(self.iter.next(), Some(c) if c == ch) {
|
||||
let is_and = ch == '&';
|
||||
if self.depth > self.comparator_depth {
|
||||
Token::Operation(if is_and {
|
||||
Operation::And
|
||||
} else {
|
||||
Operation::Or
|
||||
})
|
||||
} else {
|
||||
let mut depth = self.depth;
|
||||
let mut found_comp = false;
|
||||
|
||||
for ch in self.iter.clone() {
|
||||
match ch {
|
||||
'(' => depth += 1,
|
||||
')' => {
|
||||
depth -= 1;
|
||||
}
|
||||
'<' | '>' | '=' => {
|
||||
found_comp = true;
|
||||
break;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
if found_comp && depth < self.depth {
|
||||
self.comparator_depth = depth;
|
||||
Token::Operation(if is_and {
|
||||
Operation::And
|
||||
} else {
|
||||
Operation::Or
|
||||
})
|
||||
} else {
|
||||
self.comparator_depth = u32::MAX;
|
||||
Token::Logical(if is_and {
|
||||
Logical::And
|
||||
} else {
|
||||
Logical::Or
|
||||
})
|
||||
}
|
||||
}
|
||||
Token::Logical(if ch == '&' { Logical::And } else { Logical::Or })
|
||||
} else {
|
||||
eprintln!("Warning: Single {ch} in meta expression {}", self.expr);
|
||||
return None;
|
||||
|
@ -179,3 +139,13 @@ impl<'x> Iterator for Tokenizer<'x> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Token {
|
||||
fn from(value: String) -> Self {
|
||||
if let Ok(value) = value.parse() {
|
||||
Token::Number(value)
|
||||
} else {
|
||||
Token::Tag(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub fn replace_tags(
|
||||
pattern: &str,
|
||||
|
@ -80,3 +80,96 @@ pub fn fix_broken_regex(value: &str) -> &str {
|
|||
_ => value,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn import_regex(value: &str) -> (String, HashSet<String>) {
|
||||
// Obtain separator
|
||||
let mut iter = value.chars().peekable();
|
||||
let separator = match iter.next() {
|
||||
Some('/') => Some('/'),
|
||||
Some('m') => iter.next().map(|ch| if ch == '{' { '}' } else { ch }),
|
||||
_ => None,
|
||||
}
|
||||
.unwrap_or(char::from(0));
|
||||
let mut regex = String::with_capacity(value.len());
|
||||
let mut flags = String::new();
|
||||
|
||||
let mut variables = HashSet::new();
|
||||
let mut variable_buf = String::new();
|
||||
let mut in_variable = false;
|
||||
|
||||
// Obtain regex
|
||||
let mut found_separator = false;
|
||||
while let Some(mut ch) = iter.next() {
|
||||
if ch == '%' && matches!(iter.peek(), Some('{')) {
|
||||
ch = '$';
|
||||
in_variable = true;
|
||||
} else if in_variable {
|
||||
match ch {
|
||||
'{' => {}
|
||||
'}' => {
|
||||
if !variable_buf.is_empty() {
|
||||
variables.insert(variable_buf.clone());
|
||||
variable_buf.clear();
|
||||
}
|
||||
in_variable = false;
|
||||
}
|
||||
_ => {
|
||||
variable_buf.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ch == separator {
|
||||
if !found_separator {
|
||||
found_separator = true;
|
||||
} else {
|
||||
regex.push(ch);
|
||||
regex.push_str(&flags);
|
||||
flags.clear();
|
||||
}
|
||||
} else if !found_separator {
|
||||
regex.push(ch);
|
||||
} else {
|
||||
flags.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
if !flags.is_empty() {
|
||||
format!("(?{flags}){regex}")
|
||||
} else {
|
||||
regex
|
||||
},
|
||||
variables,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[test]
|
||||
fn import_regex() {
|
||||
for (expr, result, vars) in [
|
||||
(
|
||||
r#"m{<img\b[^>]{0,100}\ssrc=.?https?://[^>]{6,80}(?:\?[^>]{8}|[^a-z](?![a-f]{3}|20\d\d[01]\d[0-3]\d)[0-9a-f]{8})}i"#,
|
||||
r#"(?i)<img\b[^>]{0,100}\ssrc=.?https?://[^>]{6,80}(?:\?[^>]{8}|[^a-z](?![a-f]{3}|20\d\d[01]\d[0-3]\d)[0-9a-f]{8})"#,
|
||||
vec![],
|
||||
),
|
||||
(r#"/\bhoodia\b/i"#, r#"(?i)\bhoodia\b"#, vec![]),
|
||||
(r#"/\bCurrent Price:/"#, r#"\bCurrent Price:"#, vec![]),
|
||||
(
|
||||
r#"m|^https?://storage\.cloud\.google\.com/.{4,128}\#%{GB_TO_ADDR}|i"#,
|
||||
r#"(?i)^https?://storage\.cloud\.google\.com/.{4,128}\#${GB_TO_ADDR}"#,
|
||||
vec!["GB_TO_ADDR"],
|
||||
),
|
||||
] {
|
||||
let (regex, regex_vars) = super::import_regex(expr);
|
||||
assert_eq!(regex, result);
|
||||
assert_eq!(
|
||||
HashSet::from_iter(vars.iter().map(|s| s.to_string())),
|
||||
regex_vars
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,6 @@ fn main() {
|
|||
PathBuf::from("/Users/me/code/mail-server/resources/spamassassin"),
|
||||
"cf".to_string(),
|
||||
false,
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue