Antispam: Eval meta expressions in Sieve, support for named capture groups.

This commit is contained in:
mdecimus 2023-08-26 17:20:04 +02:00
parent 79599f2f9c
commit e73c82e7d8
7 changed files with 274 additions and 626 deletions

View file

@ -1,177 +0,0 @@
use super::{BinaryOperator, Comparator, Expr, Logical, Operation, Token, UnaryOperator};
pub struct Parser<'x> {
tokens: &'x [Token],
position: usize,
}
impl<'x> Parser<'x> {
pub fn new(tokens: &'x [Token]) -> Self {
Self {
tokens,
position: 0,
}
}
pub fn consume(&mut self) -> Option<&'x Token> {
if self.position < self.tokens.len() {
let token = &self.tokens[self.position];
self.position += 1;
Some(token)
} else {
None
}
}
pub fn peek(&self) -> Option<&'x Token> {
if self.position < self.tokens.len() {
Some(&self.tokens[self.position])
} else {
None
}
}
fn primary(&mut self) -> Result<Expr, String> {
match self.peek() {
Some(&Token::Number(n)) => {
self.consume();
Ok(Expr::Literal(n))
}
Some(Token::Tag(ref id)) => {
self.consume();
Ok(Expr::Identifier(id.clone()))
}
Some(&Token::OpenParen) => {
self.consume();
let expr = self.expr();
if let Some(&Token::CloseParen) = self.peek() {
self.consume();
expr
} else {
Err("Expected closing parenthesis".to_string())
}
}
_ => Err("Unexpected token in factor".to_string()),
}
}
fn unary(&mut self) -> Result<Expr, String> {
match self.peek() {
Some(&Token::Logical(Logical::Not)) => {
self.consume();
let operand = self.primary()?;
Ok(Expr::UnaryOp(UnaryOperator::Not, Box::new(operand)))
}
Some(&Token::Operation(Operation::Subtract)) => {
self.consume();
let operand = self.primary()?;
Ok(Expr::UnaryOp(UnaryOperator::Minus, Box::new(operand)))
}
_ => self.primary(),
}
}
fn factor(&mut self) -> Result<Expr, String> {
let mut left = self.unary()?;
while let Some(op @ Token::Operation(Operation::Multiply | Operation::Divide)) = self.peek()
{
self.consume();
let right = self.unary()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn term(&mut self) -> Result<Expr, String> {
let mut left = self.factor()?;
while let Some(op @ Token::Operation(Operation::Add | Operation::Subtract)) = self.peek() {
self.consume();
let right = self.factor()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn bitwise(&mut self) -> Result<Expr, String> {
let mut left = self.term()?;
while let Some(op @ Token::Operation(Operation::And | Operation::Or)) = self.peek() {
self.consume();
let right = self.term()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn comparison(&mut self) -> Result<Expr, String> {
let mut left = self.bitwise()?;
while let Some(op @ Token::Comparator(_)) = self.peek() {
self.consume();
let right = self.bitwise()?;
left = Expr::BinaryOp(Box::new(left), op.into(), Box::new(right));
}
Ok(left)
}
fn logical_and(&mut self) -> Result<Expr, String> {
let mut left = self.comparison()?;
while let Some(Token::Logical(Logical::And)) = self.peek() {
self.consume();
let right = self.comparison()?;
left = Expr::BinaryOp(Box::new(left), BinaryOperator::And, Box::new(right));
}
Ok(left)
}
fn logical_or(&mut self) -> Result<Expr, String> {
let mut left = self.logical_and()?;
while let Some(Token::Logical(Logical::Or)) = self.peek() {
self.consume();
let right = self.logical_and()?;
left = Expr::BinaryOp(Box::new(left), BinaryOperator::Or, Box::new(right));
}
Ok(left)
}
fn expr(&mut self) -> Result<Expr, String> {
self.logical_or()
}
pub fn parse(&mut self) -> Result<Expr, String> {
let result = self.expr()?;
if self.position < self.tokens.len() {
println!("{result:#?}\n {} {}", self.position, self.tokens.len());
Err("Unexpected tokens at the end of the expression".to_string())
} else {
Ok(result)
}
}
}
impl From<&Token> for BinaryOperator {
fn from(value: &Token) -> Self {
match value {
Token::Operation(Operation::Add) => Self::Add,
Token::Operation(Operation::Multiply) => Self::Multiply,
Token::Operation(Operation::Divide) => Self::Divide,
Token::Operation(Operation::Subtract) => Self::Subtract,
Token::Operation(Operation::And) => Self::BitwiseAnd,
Token::Operation(Operation::Or) => Self::BitwiseOr,
Token::Logical(Logical::And) => Self::And,
Token::Logical(Logical::Or) => Self::Or,
Token::Comparator(Comparator::Gt) => Self::Greater,
Token::Comparator(Comparator::Lt) => Self::Lesser,
Token::Comparator(Comparator::Ge) => Self::GreaterOrEqual,
Token::Comparator(Comparator::Le) => Self::LesserOrEqual,
Token::Comparator(Comparator::Eq) => Self::Equal,
_ => panic!("Invalid token"),
}
}
}

View file

@ -1,245 +0,0 @@
use std::fmt::Display;
use super::{
ast::Parser, tokenizer::Tokenizer, BinaryOperator, Comparator, Expr, Logical, MetaExpression,
Operation, Token, UnaryOperator, UnwrapResult,
};
// Parse a meta expression into a list of tokens that can be converted into a Sieve test.
impl MetaExpression {
pub fn from_meta(expr: &str) -> Self {
let mut tokens = Tokenizer::new(expr).collect::<Vec<_>>();
// If there are no comparators, we can just turn it into am expression
if !tokens.iter().any(|t| matches!(t, Token::Comparator(_))) {
let prev_tokens = tokens;
tokens = Vec::with_capacity(prev_tokens.len() + 3);
tokens.push(Token::OpenParen);
for token in prev_tokens {
tokens.push(if let Token::Logical(op) = token {
match op {
Logical::And => Token::Operation(Operation::And),
Logical::Or => Token::Operation(Operation::Or),
Logical::Not => Token::Logical(Logical::Not),
}
} else {
token
});
}
tokens.push(Token::CloseParen);
tokens.push(Token::Comparator(Comparator::Gt));
tokens.push(Token::Number(0));
}
let expr = Parser::new(&tokens)
.parse()
.unwrap_result("parse expression");
MetaExpression { tokens, expr }
}
}
impl From<String> for Token {
fn from(value: String) -> Self {
if let Ok(value) = value.parse() {
Token::Number(value)
} else {
Token::Tag(value)
}
}
}
impl Expr {
fn fmt_child(
&self,
f: &mut std::fmt::Formatter<'_>,
parent: Option<&BinaryOperator>,
in_comp: bool,
) -> std::fmt::Result {
match self {
Expr::UnaryOp(op, expr) => {
let add_p =
in_comp && !matches!(expr.as_ref(), Expr::Literal(_) | Expr::Identifier(_));
match op {
UnaryOperator::Not => f.write_str(if in_comp { "!" } else { "not " })?,
UnaryOperator::Minus => f.write_str("-")?,
}
if add_p {
f.write_str("(")?;
}
expr.fmt_child(f, None, in_comp)?;
if add_p {
f.write_str(")")?;
}
Ok(())
}
Expr::BinaryOp(left, op, right) => match op {
BinaryOperator::Or | BinaryOperator::And => {
let add_p = parent.map_or(true, |pop| pop.precedence() != op.precedence());
if add_p {
write!(f, "{op}(")?;
}
left.fmt_child(f, op.into(), in_comp)?;
f.write_str(", ")?;
right.fmt_child(f, op.into(), in_comp)?;
if add_p {
f.write_str(")")
} else {
Ok(())
}
}
BinaryOperator::Greater
| BinaryOperator::Lesser
| BinaryOperator::GreaterOrEqual
| BinaryOperator::LesserOrEqual
| BinaryOperator::Equal => {
write!(f, "string :value {op} :comparator \"i;ascii-numeric\" \"")?;
let is_literal = matches!(left.as_ref(), Expr::Literal(_));
if !is_literal {
f.write_str("${")?;
}
left.fmt_child(f, None, true)?;
if !is_literal {
f.write_str("}")?;
}
f.write_str("\" \"")?;
let is_literal = matches!(right.as_ref(), Expr::Literal(_));
if !is_literal {
f.write_str("${")?;
}
right.fmt_child(f, None, true)?;
if !is_literal {
f.write_str("}")?;
}
f.write_str("\"")
}
BinaryOperator::Add
| BinaryOperator::Subtract
| BinaryOperator::Multiply
| BinaryOperator::Divide
| BinaryOperator::BitwiseAnd
| BinaryOperator::BitwiseOr => {
let add_p = parent.map_or(false, |pop| pop.precedence() != op.precedence());
if add_p {
f.write_str("(")?;
}
left.fmt_child(f, op.into(), in_comp)?;
op.fmt(f)?;
right.fmt_child(f, op.into(), in_comp)?;
if add_p {
f.write_str(")")?;
}
Ok(())
}
},
Expr::Literal(v) => {
if !in_comp {
write!(
f,
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"{v}\" \"0\""
)
} else {
v.fmt(f)
}
}
Expr::Identifier(i) => {
if !in_comp {
write!(
f,
"string :value \"gt\" :comparator \"i;ascii-numeric\" \"${{{i}}}\" \"0\"",
)
} else {
i.fmt(f)
}
}
}
}
}
impl Display for MetaExpression {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("if ")?;
self.expr.fmt_child(f, None, false)
}
}
impl Display for BinaryOperator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinaryOperator::Or => f.write_str("anyof"),
BinaryOperator::And => f.write_str("allof"),
BinaryOperator::BitwiseOr => f.write_str(" | "),
BinaryOperator::BitwiseAnd => f.write_str(" & "),
BinaryOperator::Greater => f.write_str("\"gt\""),
BinaryOperator::Lesser => f.write_str("\"lt\""),
BinaryOperator::GreaterOrEqual => f.write_str("\"ge\""),
BinaryOperator::LesserOrEqual => f.write_str("\"le\""),
BinaryOperator::Equal => f.write_str("\"eq\""),
BinaryOperator::Add => f.write_str(" + "),
BinaryOperator::Subtract => f.write_str(" - "),
BinaryOperator::Multiply => f.write_str(" * "),
BinaryOperator::Divide => f.write_str(" / "),
}
}
}
#[cfg(test)]
mod test {
use crate::import::MetaExpression;
#[test]
fn parse_meta() {
for (pos, (expr, expected)) in [
(
concat!(
"( ! A ) && ",
"( B > 20 ) && ",
"( C > ( (D * 14 ) / 10) ) && ",
"( E < ( 10 * F ) )"
),
"",
),
("(A || B)", ""),
("(A && !B)", ""),
("A || B", ""),
("(A && (!B && !C && !D))", ""),
("(0)", ""),
("A + B + C > 1", ""),
("(A)", ""),
("A && !(B || C)", ""),
("!A && B && (C || D)", ""),
("((A||B||C) && !D && !E)", ""),
("(A + B + (C || D) > 3)", ""),
(
"(A || B) > 2 && (C && D) == 0 || ((E+F-G) > 0 || (H||I) <= 4)",
"",
),
("(A || B) > (C && D) && E", ""),
//("", ""),
]
.iter()
.enumerate()
{
let meta = MetaExpression::from_meta(expr);
//println!("{:#?}", meta.tokens);
/*if pos != 13 {
continue;
}*/
println!("{expr}");
//let tokens = Tokenizer::new(expr).collect::<Vec<_>>();
//println!("{tokens:?}");
//let mut p = Parser::new(&tokens);
//let expr = p.parse().unwrap();
//println!("{:#?}", expr);
println!("{}\n------------------------------------", meta);
/*assert_eq!(
result,
expected,
"failed for {expr}"
);*/
}
}
}

View file

@ -1,7 +1,5 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
pub mod ast;
pub mod meta;
pub mod spamassassin;
pub mod tokenizer;
pub mod utils;
@ -11,6 +9,8 @@ struct Rule {
name: String,
t: RuleType,
scores: Vec<f64>,
captured_vars: Vec<(String, usize)>,
required_vars: HashSet<String>,
description: HashMap<String, String>,
priority: i32,
flags: Vec<TestFlag>,
@ -52,7 +52,7 @@ enum RuleType {
#[derive(Debug, Clone, Default)]
pub struct MetaExpression {
pub tokens: Vec<Token>,
pub expr: Expr,
pub expr: String,
}
impl RuleType {
@ -96,7 +96,7 @@ enum Header {
Name(String),
}
#[derive(Debug, Default, Clone)]
#[derive(Debug, Default, Clone, Copy)]
enum HeaderMatches {
#[default]
Matches,
@ -151,64 +151,10 @@ pub enum Operation {
Not,
}
#[derive(Debug, PartialEq, Clone)]
pub enum Expr {
UnaryOp(UnaryOperator, Box<Expr>),
BinaryOp(Box<Expr>, BinaryOperator, Box<Expr>),
Literal(u32),
Identifier(String),
}
impl Default for Expr {
fn default() -> Self {
Self::Literal(0)
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum UnaryOperator {
Not,
Minus,
}
#[derive(Debug, PartialEq, Clone)]
pub enum BinaryOperator {
Or,
And,
Greater,
Lesser,
GreaterOrEqual,
LesserOrEqual,
Equal,
Add,
Subtract,
Multiply,
Divide,
BitwiseAnd,
BitwiseOr,
}
impl BinaryOperator {
pub fn precedence(&self) -> u32 {
match self {
Self::Or => 1,
Self::And => 2,
Self::Greater
| Self::Lesser
| Self::GreaterOrEqual
| Self::LesserOrEqual
| Self::Equal => 3,
Self::Add | Self::Subtract => 4,
Self::Multiply | Self::Divide => 5,
Self::BitwiseAnd | Self::BitwiseOr => 6,
}
}
}
impl Rule {
fn score(&self) -> f64 {
self.scores.last().copied().unwrap_or_else(|| {
if self.name.starts_with("__") {
if self.is_subrule() {
0.0
} else if self.name.starts_with("T_") {
0.01
@ -217,6 +163,10 @@ impl Rule {
}
})
}
fn is_subrule(&self) -> bool {
self.name.starts_with("__")
}
}
impl Ord for Rule {

View file

@ -6,7 +6,8 @@ use std::{
};
use super::{
utils::{fix_broken_regex, replace_tags},
tokenizer::Tokenizer,
utils::{fix_broken_regex, import_regex, replace_tags},
Header, HeaderMatches, HeaderPart, MetaExpression, Rule, RuleType, TestFlag, Token,
UnwrapResult,
};
@ -240,7 +241,7 @@ static SUPPORTED_FUNCTIONS: [&str; 162] = [
"tvd_vertical_words",
];
pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, validate_regex: bool) {
pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) {
let mut paths: Vec<_> = fs::read_dir(&path)
.unwrap_result("read directory")
.map(|r| r.unwrap_result("read directory entry"))
@ -323,7 +324,6 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
if cmd.is_empty() {
continue;
}
let todo = "GB_TO_ADDR caca";
match cmd {
"ifplugin" => {
@ -647,20 +647,12 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
}
"meta" => {
if let Some((test_name, expression)) = params.split_once(' ') {
let expr = MetaExpression::from_meta(expression);
/*if tokens.tokens.contains(&Token::Divide) {
println!(
"->: {expression}\n{:?}\n<-: {}",
tokens
.tokens
.iter()
.zip(tokens.token_depth.iter())
.collect::<Vec<_>>(),
String::from(tokens.clone())
);
std::process::exit(1);
}*/
rules.entry(test_name.to_string()).or_default().t = RuleType::Meta { expr };
rules.entry(test_name.to_string()).or_default().t = RuleType::Meta {
expr: MetaExpression {
tokens: Tokenizer::new(expression).collect(),
expr: expression.to_string(),
},
};
} else {
eprintln!(
"Warning: Invalid meta command on {}, line {}",
@ -779,16 +771,6 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
"replace_tag" | "replace_inter" | "replace_post" | "replace_pre" => {
if let Some((tag, pattern)) = params.split_once(' ') {
let pattern = replace_tags(pattern, replace_start, replace_end, &tags);
if validate_regex {
if let Err(err) = fancy_regex::Regex::new(&pattern) {
eprintln!(
"Warning: Invalid regex {pattern:?} on {}, line {}: {}",
path.display(),
line_num,
err
);
}
}
let tag_class = cmd.strip_prefix("replace_").unwrap();
let tag = if tag_class != "tag" {
format!("{} {}", tag_class, tag)
@ -994,13 +976,28 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
}
}
let mut var_to_rule = HashMap::new();
let mut rules = rules
.into_iter()
.filter_map(|(name, mut rule)| {
if !matches!(rule.t, RuleType::None) {
if validate_regex {
if let Some(pattern) = rule.t.pattern() {
if let Err(err) = fancy_regex::Regex::new(pattern) {
if let Some(pattern) = rule.t.pattern() {
let (pattern_, variables) = import_regex(pattern);
*pattern = pattern_;
rule.required_vars = variables;
match fancy_regex::Regex::new(pattern) {
Ok(r) => {
rule.captured_vars = r
.capture_names()
.enumerate()
.filter_map(|(pos, var_name)| {
let var_name = var_name?;
var_to_rule.insert(var_name.to_string(), name.clone());
(var_name.to_string(), pos).into()
})
.collect();
}
Err(err) => {
eprintln!(
"Warning: Invalid regex {} for test {}: {}",
pattern, name, err
@ -1020,8 +1017,7 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
.collect::<Vec<_>>();
rules.sort_unstable();
let no_meta = MetaExpression::default();
let mut meta = &no_meta;
let mut required_rests: Vec<&str> = vec![];
let mut tests_done = HashSet::new();
let mut tests_linked = HashSet::new();
@ -1032,46 +1028,72 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool, vali
// Sort rules by meta
loop {
while let Some(rule) = rules_iter.next() {
let in_meta = !meta.tokens.is_empty();
let in_linked = !required_rests.is_empty();
if tests_done.contains(&rule.name)
|| (in_meta
&& !meta
.tokens
.iter()
.any(|t| matches!(&t, Token::Tag(n) if n == &rule.name)))
|| (in_linked && !required_rests.contains(&rule.name.as_str()))
{
continue;
}
tests_done.insert(&rule.name);
if in_meta {
if in_linked {
tests_linked.insert(&rule.name);
}
match &rule.t {
RuleType::Meta { expr } if rule.score() != 0.0 => {
rules_stack.push((meta, rule, rules_iter));
rules_iter = rules.iter();
meta = expr;
}
_ => {
rules_sorted.push(rule);
//write!(&mut script, "{rule}").unwrap();
}
let new_required_tests = match &rule.t {
RuleType::Meta { expr } if rule.score() != 0.0 || rule.is_subrule() => expr
.tokens
.iter()
.filter_map(|t| match &t {
Token::Tag(t) if !tests_done.contains(t) => Some(t.as_str()),
_ => None,
})
.collect::<Vec<_>>(),
_ => rule
.required_vars
.iter()
.filter_map(|required_var| {
if let Some(required_test) = var_to_rule.get(required_var) {
if !tests_done.contains(required_test) {
Some(required_test.as_str())
} else {
None
}
} else {
eprintln!(
"Warning: Variable {required_var:?} not found for test {:?}",
rule.name
);
None
}
})
.collect::<Vec<_>>(),
};
if !new_required_tests.is_empty() {
rules_stack.push((rule, rules_iter, required_rests));
rules_iter = rules.iter();
required_rests = new_required_tests;
} else {
rules_sorted.push(rule);
}
}
if let Some((prev_meta, prev_rule, prev_rules_iter)) = rules_stack.pop() {
if let Some((prev_rule, prev_rules_iter, prev_required_tests)) = rules_stack.pop() {
rules_sorted.push(prev_rule);
//write!(&mut script, "{prev_rule}").unwrap();
rules_iter = prev_rules_iter;
meta = prev_meta;
required_rests = prev_required_tests;
} else {
break;
}
}
// Generate script
let mut script = String::new();
let mut script = String::from(concat!(
"require [\"variables\", \"include\", \"regex\", \"body\", \"vnd.stalwart.plugins\"];\n\n",
"global \"score\";\n",
"global \"spam_score\";\n",
"\n"
));
let mut rules_iter = rules_sorted.iter();
while let Some(&rule) = rules_iter.next() {
@ -1146,15 +1168,13 @@ impl Display for Rule {
match &self.t {
RuleType::Header {
matches,
header: header @ (Header::All | Header::AllExternal),
if_unset,
pattern,
part,
..
} => {
write!(
f,
"if vnd.stalwart.eval(\"match_all_headers\", \"{}\", {:?})",
"if match_all_headers {:?} {:?}",
if header == &Header::All {
"all"
} else {
@ -1170,10 +1190,49 @@ impl Display for Rule {
pattern,
part,
} => {
let is_raw = part.contains(&HeaderPart::Raw);
let is_name = part.contains(&HeaderPart::Name);
let is_addr = part.contains(&HeaderPart::Addr);
let mut pattern = pattern.as_str();
let mut matches = *matches;
f.write_str("if ")?;
// Map unset statements into expressions
let mut has_unset = match if_unset {
Some(val) if pattern == format!("^{val}$") => {
// convert /^UNSET$/ [if-unset: UNSET] to exists
pattern = "";
matches = HeaderMatches::Exists;
f.write_str("not ")?;
false
}
Some(_) => true,
None => false,
};
if has_unset {
match header {
Header::MessageId => f.write_str(concat!(
"allof(header :contains ",
"[\"Message-Id\",\"Resent-Message-Id\",",
"\"X-Message-Id\",\"X-Original-Message-ID\"]"
))?,
Header::ToCc => f.write_str("allof(header :contains [\"To\",\"Cc\"]")?,
Header::Name(name) => write!(f, "allof(header :contains {:?}", name)?,
Header::EnvelopeFrom | Header::All | Header::AllExternal => {
has_unset = false;
}
}
if has_unset {
f.write_str(" \"\", ")?;
}
}
let cmd = if matches!(header, Header::EnvelopeFrom) {
"envelope"
} else if part.contains(&HeaderPart::Addr) || part.contains(&HeaderPart::Name) {
} else if (is_name || is_addr) && !is_raw {
"address"
} else {
"header"
@ -1183,23 +1242,29 @@ impl Display for Rule {
HeaderMatches::NotMatches => write!(f, "not {cmd} :regex ")?,
HeaderMatches::Exists => write!(f, "{cmd} :contains ")?,
}
for part in part {
match part {
HeaderPart::Name => f.write_str(":name ")?,
HeaderPart::Addr => f.write_str(":all ")?,
HeaderPart::Raw => f.write_str(":raw ")?,
if !is_raw {
if is_name {
f.write_str(":name ")?;
} else if is_addr {
f.write_str(":all ")?;
}
}
match header {
Header::MessageId => f.write_str("[\"Message-Id\",\"Resent-Message-Id\",\"X-Message-Id\",\"X-Original-Message-ID\"]")?,
Header::ToCc => f.write_str("[\"To\",\"Cc\"]")?,
Header::Name (name) => write!(f, "{:?}", name)?,
Header::EnvelopeFrom => f.write_str("\"from\"")?,
Header::All |
Header::AllExternal => unreachable!(),
}
Header::MessageId => f.write_str(concat!(
"[\"Message-Id\",\"Resent-Message-Id\",",
"\"X-Message-Id\",\"X-Original-Message-ID\"]"
))?,
Header::ToCc => f.write_str("[\"To\",\"Cc\"]")?,
Header::Name(name) => write!(f, "{:?}", name)?,
Header::EnvelopeFrom => f.write_str("\"from\"")?,
Header::All | Header::AllExternal => unreachable!(),
}
write!(f, " {:?}", pattern)?;
if has_unset {
f.write_str(")")?;
}
}
RuleType::Body { pattern, raw } => {
if *raw {
@ -1211,33 +1276,42 @@ impl Display for Rule {
}
}
RuleType::Full { pattern } => {
write!(f, "if vnd.stalwart.eval(\"match_full\", {:?})", pattern)?;
write!(f, "if match_full {:?}", pattern)?;
}
RuleType::Uri { pattern } => {
write!(f, "if vnd.stalwart.eval(\"match_uri\", {:?})", pattern)?;
write!(f, "if match_uri {:?}", pattern)?;
}
RuleType::Eval { function, params } => {
write!(f, "if vnd.stalwart.eval({function:?}")?;
write!(f, "if {function}")?;
for param in params {
write!(f, ", {param:?}")?;
f.write_str(" ")?;
if let Some(param) = param.strip_prefix('\'').and_then(|v| v.strip_suffix('\''))
{
write!(f, "\"{param}\"")?;
} else if param.starts_with('\"') {
f.write_str(param)?;
} else {
write!(f, "\"{param}\"")?;
}
}
f.write_str(")")?;
}
RuleType::Meta { expr } => {
expr.fmt(f)?;
write!(f, "if eval {:?}", expr.expr.trim())?;
}
RuleType::None => {
f.write_str("if false")?;
}
}
f.write_str(" {\n\tset \"")?;
f.write_str(&self.name)?;
f.write_str("\" \"1\";\n")?;
let score = self.score();
write!(f, " {{\n\tset :local \"{}\" \"1\";\n", self.name)?;
for (var_name, pos) in &self.captured_vars {
writeln!(f, "\tset :local \"{}\" \"${{{}}}\";", var_name, pos)?;
}
let score = self.score();
if score != 0.0 {
f.write_str("\tset \"score\" \"${score")?;
f.write_str("\tset \"score\" \"%{score")?;
if score > 0.0 {
f.write_str(" + ")?;
score.fmt(f)?;
@ -1251,36 +1325,20 @@ impl Display for Rule {
if self.forward_score_neg != 0.0 {
write!(
f,
concat!(
"if allof(string :value \"ge\" :comparator ",
"\"i;ascii-numeric\" \"${{score}}\" \"${{spam_score}}\", ",
"string :value \"ge\" :comparator ",
"\"i;ascii-numeric\" \"${{score - {:.4}}}\" \"${{spam_score}}\")"
),
"if eval \"score >= spam_score && score - {:.4} >= spam_score\"",
-self.forward_score_neg
)?;
} else {
f.write_str(concat!(
"if string :value \"ge\" :comparator ",
"\"i;ascii-numeric\" \"${score}\" \"${spam_score}\""
))?;
f.write_str("if eval \"score >= spam_score\"")?;
}
} else if self.forward_score_pos != 0.0 {
write!(
f,
concat!(
"if allof(string :value \"lt\" :comparator ",
"\"i;ascii-numeric\" \"${{score}}\" \"${{spam_score}}\", ",
"string :value \"lt\" :comparator ",
"\"i;ascii-numeric\" \"${{score + {:.4}}}\" \"${{spam_score}}\")"
),
"if eval \"score < spam_score && score + {:.4} < spam_score\"",
self.forward_score_pos
)?;
} else {
f.write_str(concat!(
"if string :value \"lt\" :comparator ",
"\"i;ascii-numeric\" \"${score}\" \"${spam_score}\""
))?;
f.write_str("if eval \"score < spam_score\"")?;
}
f.write_str(" {\n\t\treturn;\n\t}\n")?;
}

View file

@ -45,47 +45,7 @@ impl<'x> Iterator for Tokenizer<'x> {
let token = match ch {
'&' | '|' => {
if matches!(self.iter.next(), Some(c) if c == ch) {
let is_and = ch == '&';
if self.depth > self.comparator_depth {
Token::Operation(if is_and {
Operation::And
} else {
Operation::Or
})
} else {
let mut depth = self.depth;
let mut found_comp = false;
for ch in self.iter.clone() {
match ch {
'(' => depth += 1,
')' => {
depth -= 1;
}
'<' | '>' | '=' => {
found_comp = true;
break;
}
_ => (),
}
}
if found_comp && depth < self.depth {
self.comparator_depth = depth;
Token::Operation(if is_and {
Operation::And
} else {
Operation::Or
})
} else {
self.comparator_depth = u32::MAX;
Token::Logical(if is_and {
Logical::And
} else {
Logical::Or
})
}
}
Token::Logical(if ch == '&' { Logical::And } else { Logical::Or })
} else {
eprintln!("Warning: Single {ch} in meta expression {}", self.expr);
return None;
@ -179,3 +139,13 @@ impl<'x> Iterator for Tokenizer<'x> {
}
}
}
impl From<String> for Token {
fn from(value: String) -> Self {
if let Ok(value) = value.parse() {
Token::Number(value)
} else {
Token::Tag(value)
}
}
}

View file

@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
pub fn replace_tags(
pattern: &str,
@ -80,3 +80,96 @@ pub fn fix_broken_regex(value: &str) -> &str {
_ => value,
}
}
pub fn import_regex(value: &str) -> (String, HashSet<String>) {
// Obtain separator
let mut iter = value.chars().peekable();
let separator = match iter.next() {
Some('/') => Some('/'),
Some('m') => iter.next().map(|ch| if ch == '{' { '}' } else { ch }),
_ => None,
}
.unwrap_or(char::from(0));
let mut regex = String::with_capacity(value.len());
let mut flags = String::new();
let mut variables = HashSet::new();
let mut variable_buf = String::new();
let mut in_variable = false;
// Obtain regex
let mut found_separator = false;
while let Some(mut ch) = iter.next() {
if ch == '%' && matches!(iter.peek(), Some('{')) {
ch = '$';
in_variable = true;
} else if in_variable {
match ch {
'{' => {}
'}' => {
if !variable_buf.is_empty() {
variables.insert(variable_buf.clone());
variable_buf.clear();
}
in_variable = false;
}
_ => {
variable_buf.push(ch);
}
}
}
if ch == separator {
if !found_separator {
found_separator = true;
} else {
regex.push(ch);
regex.push_str(&flags);
flags.clear();
}
} else if !found_separator {
regex.push(ch);
} else {
flags.push(ch);
}
}
(
if !flags.is_empty() {
format!("(?{flags}){regex}")
} else {
regex
},
variables,
)
}
#[cfg(test)]
mod test {
use std::collections::HashSet;
#[test]
fn import_regex() {
for (expr, result, vars) in [
(
r#"m{<img\b[^>]{0,100}\ssrc=.?https?://[^>]{6,80}(?:\?[^>]{8}|[^a-z](?![a-f]{3}|20\d\d[01]\d[0-3]\d)[0-9a-f]{8})}i"#,
r#"(?i)<img\b[^>]{0,100}\ssrc=.?https?://[^>]{6,80}(?:\?[^>]{8}|[^a-z](?![a-f]{3}|20\d\d[01]\d[0-3]\d)[0-9a-f]{8})"#,
vec![],
),
(r#"/\bhoodia\b/i"#, r#"(?i)\bhoodia\b"#, vec![]),
(r#"/\bCurrent Price:/"#, r#"\bCurrent Price:"#, vec![]),
(
r#"m|^https?://storage\.cloud\.google\.com/.{4,128}\#%{GB_TO_ADDR}|i"#,
r#"(?i)^https?://storage\.cloud\.google\.com/.{4,128}\#${GB_TO_ADDR}"#,
vec!["GB_TO_ADDR"],
),
] {
let (regex, regex_vars) = super::import_regex(expr);
assert_eq!(regex, result);
assert_eq!(
HashSet::from_iter(vars.iter().map(|s| s.to_string())),
regex_vars
);
}
}
}

View file

@ -9,7 +9,6 @@ fn main() {
PathBuf::from("/Users/me/code/mail-server/resources/spamassassin"),
"cf".to_string(),
false,
false,
);
}