diff --git a/crates/antispam/src/import/mod.rs b/crates/antispam/src/import/mod.rs index c4241ed5..76bd3e6c 100644 --- a/crates/antispam/src/import/mod.rs +++ b/crates/antispam/src/import/mod.rs @@ -21,8 +21,7 @@ enum RuleType { Header { matches: HeaderMatches, header: Header, - part: Vec, - if_unset: Option, + part: HeaderPart, pattern: String, }, Body { @@ -83,31 +82,45 @@ enum TestFlag { DnsBlockRule(String), } -#[derive(Debug, Default, PartialEq, Eq, Clone)] +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] enum Header { #[default] All, MessageId, - AllExternal, EnvelopeFrom, ToCc, + Received(ReceivedPart), Name(String), } +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] +enum ReceivedPart { + From, + FromIp, + FromIpRev, + By, + For, + Ident, + Id, + Protocol, +} + #[derive(Debug, Default, Clone, Copy)] enum HeaderMatches { #[default] Matches, NotMatches, Exists, + NotExists, } -#[derive(Debug, Default, PartialEq, Eq, Clone)] +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] enum HeaderPart { Name, Addr, - #[default] Raw, + #[default] + Default, } #[derive(Debug, PartialEq, Eq, Clone)] diff --git a/crates/antispam/src/import/spamassassin.rs b/crates/antispam/src/import/spamassassin.rs index 31a9098b..b2de1c7d 100644 --- a/crates/antispam/src/import/spamassassin.rs +++ b/crates/antispam/src/import/spamassassin.rs @@ -9,13 +9,13 @@ use std::{ use super::{ tokenizer::Tokenizer, utils::{fix_broken_regex, import_regex, replace_tags}, - Header, HeaderMatches, HeaderPart, MetaExpression, Rule, RuleType, TestFlag, Token, - UnwrapResult, + Header, HeaderMatches, HeaderPart, MetaExpression, ReceivedPart, Rule, RuleType, TestFlag, + Token, UnwrapResult, }; const VERSION: f64 = 4.000000; -static IF_TRUE: [&str; 54] = [ +static IF_TRUE: [&str; 53] = [ "Mail::SpamAssassin::Plugin::DKIM", "Mail::SpamAssassin::Plugin::SPF", "Mail::SpamAssassin::Plugin::ASN", @@ -46,7 +46,6 @@ static IF_TRUE: [&str; 54] = [ "Mail::SpamAssassin::Plugin::TxRep", "Mail::SpamAssassin::Plugin::URIDNSBL", "Mail::SpamAssassin::Plugin::URIEval", - "Mail::SpamAssassin::Plugin::VBounce", "Mail::SpamAssassin::Plugin::WLBLEval", "Mail::SpamAssassin::Plugin::WelcomeListSubject", "Mail::SpamAssassin::Conf::feature_bayes_stopwords", @@ -72,11 +71,12 @@ static IF_TRUE: [&str; 54] = [ "Mail::SpamAssassin::Conf::feature_dns_block_rule", ]; -static IF_FALSE: [&str; 4] = [ +static IF_FALSE: [&str; 5] = [ "Mail::SpamAssassin::Plugin::WhiteListSubject", "Mail::SpamAssassin::Plugin::AccessDB", "Mail::SpamAssassin::Plugin::AntiVirus", "Mail::SpamAssassin::Plugin::Shortcircuit", + "Mail::SpamAssassin::Plugin::VBounce", ]; static SUPPORTED_FUNCTIONS: [&str; 162] = [ @@ -261,6 +261,7 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { let mut unsupported_ifs: BTreeMap>> = BTreeMap::new(); let mut unsupported_commands: BTreeMap>> = BTreeMap::new(); + let mut common_headers = BTreeMap::new(); for path in paths { let path = path.path(); @@ -517,9 +518,8 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { rule.t = RuleType::Header { matches: HeaderMatches::Exists, header: Header::Name(exists.to_string()), - if_unset: None, pattern: String::new(), - part: vec![], + part: HeaderPart::Default, }; } else if let Some((header, (op, mut pattern))) = value .split_once(' ') @@ -543,44 +543,59 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { } }).collect::>(); - rule.t = RuleType::Header { - matches: match op { - "=~" => HeaderMatches::Matches, - "!~" => HeaderMatches::NotMatches, - _ => { - eprintln!( - "Warning: Invalid operator {op:?} on {}, line {}", - path.display(), - line_num - ); - continue; + let header = match header { + "ALL" | "ALL-EXTERNAL" => Header::All, + "MESSAGEID" => Header::MessageId, + "EnvelopeFrom" => Header::EnvelopeFrom, + "ToCc" => Header::ToCc, + _ => Header::Name(header.to_lowercase()), + }; + let part = if part.contains(&HeaderPart::Addr) { + HeaderPart::Addr + } else if part.contains(&HeaderPart::Name) { + HeaderPart::Name + } else if part.contains(&HeaderPart::Raw) { + HeaderPart::Raw + } else { + HeaderPart::Default + }; + + let mut matches = match op { + "=~" => HeaderMatches::Matches, + "!~" => HeaderMatches::NotMatches, + _ => { + panic!( + "Warning: Invalid operator {op:?} on {}, line {}", + path.display(), + line_num + ); + } + }; + + if let Some((new_pattern, if_unset)) = + pattern.rsplit_once("[if-unset:") + { + pattern = new_pattern.trim(); + if let Some(if_unset) = + if_unset.strip_suffix(']').map(|v| v.trim()) + { + if pattern == format!("^{if_unset}$") { + // convert /^UNSET$/ [if-unset: UNSET] to exists + pattern = ""; + matches = HeaderMatches::NotExists; } - }, - header: match header { - "ALL" => Header::All, - "MESSAGEID" => Header::MessageId, - "ALL-EXTERNAL" => Header::AllExternal, - "EnvelopeFrom" => Header::EnvelopeFrom, - "ToCc" => Header::ToCc, - _ => Header::Name(header.to_string()), - }, - if_unset: pattern.rsplit_once("[if-unset:").and_then( - |(new_pattern, if_unset)| { - pattern = new_pattern.trim(); - if let Some(if_unset) = - if_unset.strip_suffix(']').map(|v| v.trim()) - { - if_unset.to_string().into() - } else { - eprintln!( - "Warning: Failed to parse if_unset for header command on {}, line {}", - path.display(), - line_num - ); - None - } - }, - ), + } else { + panic!( + "Warning: Failed to parse if_unset for header command on {}, line {}", + path.display(), + line_num + ); + } + } + + rule.t = RuleType::Header { + matches, + header, pattern: fix_broken_regex(pattern).to_string(), part, }; @@ -986,16 +1001,121 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { } } - let mut var_to_rule = HashMap::new(); + let mut var_to_rule: HashMap = HashMap::new(); let mut rules = rules .into_iter() - .filter_map(|(name, mut rule)| { - if !matches!(rule.t, RuleType::None) { - if let Some(pattern) = rule.t.pattern() { - let (pattern_, variables) = import_regex(pattern); - *pattern = pattern_; + .flat_map(|(name, mut rule)| { + let mut result = vec![]; + rule.name = name; + + match &mut rule.t { + RuleType::Header { + pattern, + header: Header::Name(hdr_name), + .. + } if hdr_name.to_lowercase().starts_with("x-spam-relays-") => { + #[derive(Debug)] + enum Part { + Id(Vec), + Pattern(String), + } + let (pattern_, flags, _) = import_regex(pattern); + let mut buf = String::new(); + let mut parts = vec![]; + let mut last_ch = char::from(0); + + for ch in pattern_.chars() { + if ch == '=' && (last_ch.is_alphabetic() || last_ch == ')') { + let mut found_suffix = false; + for (ids, key) in [ + (vec![ReceivedPart::From], "helo"), + (vec![ReceivedPart::Protocol], "auth"), + (vec![ReceivedPart::FromIp], "ip"), + (vec![ReceivedPart::FromIpRev], "rdns"), + (vec![ReceivedPart::For], "envfrom"), + (vec![ReceivedPart::By], "by"), + (vec![ReceivedPart::Ident], "ident"), + ( + vec![ReceivedPart::From, ReceivedPart::FromIpRev], + "(?:rdns|helo)", + ), + (vec![ReceivedPart::Id], "id"), + (vec![ReceivedPart::By, ReceivedPart::FromIp], "(?:by|ip)"), + ] { + if let Some(v) = buf.strip_suffix(key) { + parts.push(Part::Pattern(v.trim().to_string())); + parts.push(Part::Id(ids)); + found_suffix = true; + break; + } + } + + if !found_suffix { + panic!("Failed to parse x-spam-relays- pattern {pattern_}: {buf}"); + } + buf.clear(); + } else { + buf.push(ch); + } + + last_ch = ch; + } + + if !buf.is_empty() { + parts.push(Part::Pattern(buf.trim().to_string())); + } + if matches!(parts.first(), Some(Part::Pattern(_))) { + parts.remove(0); + } + for part in parts.chunks_exact(2) { + if let (Part::Id(ids), Part::Pattern(pattern)) = (&part[0], &part[1]) { + if !pattern.is_empty() { + for id in ids { + result.push(Rule { + name: rule.name.clone(), + t: RuleType::Header { + matches: HeaderMatches::Matches, + header: Header::Received(*id), + part: HeaderPart::Default, + pattern: if !flags.is_empty() { + format!("(?{flags}){pattern}") + } else { + pattern.to_string() + }, + }, + scores: rule.scores.clone(), + captured_vars: rule.captured_vars.clone(), + required_vars: rule.required_vars.clone(), + description: rule.description.clone(), + priority: rule.priority, + flags: rule.flags.clone(), + }); + + common_headers + .entry((Header::Received(*id), HeaderPart::Default)) + .or_insert_with(HashSet::new) + .insert(rule.name.to_string()); + } + } + } else { + unreachable!(); + } + } + } + RuleType::Header { pattern, .. } + | RuleType::Body { pattern, .. } + | RuleType::Full { pattern, .. } + | RuleType::Uri { pattern, .. } => { + let (pattern_, flags, variables) = import_regex(pattern); + + *pattern = if !flags.is_empty() { + format!("(?{flags}){pattern_}") + } else { + pattern_ + }; rule.required_vars = variables; - match fancy_regex::Regex::new(pattern) { + let cococ = "fd"; + /*match fancy_regex::Regex::new(pattern) { Ok(r) => { rule.captured_vars = r .capture_names() @@ -1013,16 +1133,33 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { pattern, name, err ); } + }*/ + + if let RuleType::Header { + matches: HeaderMatches::Matches | HeaderMatches::NotMatches, + header: header @ (Header::MessageId | Header::Name(_) | Header::ToCc), + part, + .. + } = &rule.t + { + common_headers + .entry((header.clone(), *part)) + .or_insert_with(HashSet::new) + .insert(rule.name.to_string()); + } + + result.push(rule); + } + RuleType::None => { + if do_warn { + eprintln!("Warning: Test {} has no type: {rule:?}", rule.name); } } - rule.name = name; - rule.into() - } else { - if do_warn { - eprintln!("Warning: Test {name} has no type: {rule:?}"); + _ => { + result.push(rule); } - None } + result }) .collect::>(); rules.sort_unstable(); @@ -1099,13 +1236,19 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { // Generate script let mut script = String::from(concat!( - "require [\"variables\", \"include\", \"regex\", \"body\", \"vnd.stalwart.plugins\"];\n\n", + "require [\"variables\", \"include\", \"regex\", \"body\", ", + "\"vnd.stalwart.foreveryline\", \"vnd.stalwart.eval\",", + " \"vnd.stalwart.plugins\"];\n\n", "set \"score\" \"0.0\";\n", "set \"spam_score\" \"5.0\";\n", "set \"awl_factor\" \"0.5\";\n", "set \"body\" \"${body.to_text}\";\n", "set \"body_len\" \"${body.len()}\";\n", - "set \"headers_raw\" \"${headers.raw}\";\n", + "set \"body_ucc\" \"${count_uppercase(body)}\";\n", + "set \"body_lcc\" \"${count_lowercase(body)}\";\n", + "set \"body_ul_ratio\" \"${body_ucc / (body_ucc + body_lcc)}\";\n", + "set \"headers_raw\" \"${header.*.raw}\";\n", + "set \"headers_text\" \"${header.*.text}\";\n", "set \"thread_name\" \"${header.subject.thread_name()}\";\n", "set \"sent_date\" \"${header.date.date}\";\n", "set \"mail_from\" \"${envelope.from}\";\n", @@ -1119,14 +1262,168 @@ pub fn import_spamassassin(path: PathBuf, extension: String, do_warn: bool) { "\n" )); + // Print grouped rules + let mut descriptions: HashMap> = HashMap::new(); + let mut processed_rules = HashSet::new(); + for ((header, part), rule_names) in common_headers { + script.push_str("foreveryline \"${"); + let mut add_to_processed = true; + match &header { + Header::All => { + script.push_str(match part { + HeaderPart::Raw => "headers_raw", + _ => "headers_text", + }); + } + Header::MessageId => { + script.push_str(concat!( + "header.message-id:resent-message-id:", + "x-message-id:x-original-message-id[*].id[*]" + )); + } + Header::ToCc => { + script.push_str("header.to:cc[*]"); + script.push_str(match part { + HeaderPart::Name => ".name[*]", + HeaderPart::Addr => ".addr[*]", + HeaderPart::Raw => ".raw", + HeaderPart::Default => ".text", + }); + } + Header::Name(name) => { + script.push_str("header."); + script.push_str(name); + script.push_str("[*]"); + script.push_str(match part { + HeaderPart::Name => ".name[*]", + HeaderPart::Addr => ".addr[*]", + HeaderPart::Raw => ".raw", + HeaderPart::Default => "", + }); + } + Header::Received(rcvd) => { + script.push_str("header.received[*].rcvd."); + script.push_str(match rcvd { + ReceivedPart::From => "from", + ReceivedPart::FromIp => "ip", + ReceivedPart::FromIpRev => "iprev", + ReceivedPart::Protocol => "with", + ReceivedPart::By => "by", + ReceivedPart::For => "for", + ReceivedPart::Ident => "ident", + ReceivedPart::Id => "id", + }); + if matches!( + rcvd, + ReceivedPart::From | ReceivedPart::FromIp | ReceivedPart::FromIpRev + ) { + add_to_processed = false; + } + } + Header::EnvelopeFrom => unreachable!(), + } + + script.push_str("}\" {\n"); + for rule_name in rule_names { + for rule in &rules { + if rule.name == rule_name { + if rule.score() == 0.0 && !tests_linked.contains(&rule.name) { + if do_warn { + println!("Warning: Test {} is never linked to.", rule.name); + } + continue; + } + if let RuleType::Header { + matches, + pattern, + header: header_, + .. + } = &rule.t + { + if header_ != &header { + continue; + } + + write!(&mut script, "\tif allof(eval \"!{}\", ", rule_name).unwrap(); + match matches { + HeaderMatches::Matches => script.push_str("string :regex "), + HeaderMatches::NotMatches => script.push_str("not string :regex "), + HeaderMatches::Exists | HeaderMatches::NotExists => unreachable!(), + } + script.push_str("\"${line}\" "); + write!(&mut script, "{:?})", pattern).unwrap(); + let spaces = "\t".repeat(2); + writeln!( + &mut script, + " {{\n{spaces}set :local \"{}\" \"1\";", + rule.name + ) + .unwrap(); + + for (var_name, pos) in &rule.captured_vars { + writeln!( + &mut script, + "{spaces}set :local \"{}\" \"${{{}}}\";", + var_name, pos + ) + .unwrap(); + } + + let score = rule.score(); + if score != 0.0 { + script.push_str(&spaces); + script.push_str("set \"score\" \"%{score"); + if score > 0.0 { + script.push_str(" + "); + script.push_str(&score.to_string()); + } else { + script.push_str(" - "); + script.push_str((-score).to_string().as_str()); + } + script.push_str("}\";\n"); + } + script.push_str("\t}\n"); + } else { + unreachable!(); + } + + if add_to_processed { + processed_rules.insert(rule_name); + } + break; + } + } + } + script.push_str("}\n\n"); + } + for rule in rules_sorted { + for (lang, text) in &rule.description { + descriptions + .entry(lang.to_string()) + .or_default() + .insert(rule.name.to_string(), text.to_string()); + } + if rule.score() == 0.0 && !tests_linked.contains(&rule.name) { if do_warn { eprintln!("Warning: Test {} is never linked to.", rule.name); } continue; + } else if !processed_rules.contains(&rule.name) { + write!(&mut script, "{rule}").unwrap(); + } + } + + for (lang, texts) in descriptions { + let mut file = fs::File::create(format!( + "/Users/me/code/mail-server/_ignore/descriptions_{}.txt", + lang + )) + .unwrap(); + for (name, text) in texts { + //file.write_all(format!("set {:?} {:?}", name, text).as_bytes()); } - write!(&mut script, "{rule}").unwrap(); } fs::write( @@ -1172,79 +1469,54 @@ impl Display for Rule { match &self.t { RuleType::Header { - header: header @ (Header::All | Header::AllExternal), - pattern, + matches: matches @ (HeaderMatches::Exists | HeaderMatches::NotExists), + header, .. } => { - write!( - f, - "if match_all_headers {:?} {:?}", - if header == &Header::All { - "all" - } else { - "all-external" - }, - pattern - )?; + match matches { + HeaderMatches::Exists => write!(f, "if exists ")?, + HeaderMatches::NotExists => write!(f, "not exists ")?, + _ => unreachable!(), + } + match header { + Header::MessageId => f.write_str(concat!( + "[\"Message-Id\",\"Resent-Message-Id\",", + "\"X-Message-Id\",\"X-Original-Message-ID\"]" + ))?, + Header::ToCc => f.write_str("[\"To\",\"Cc\"]")?, + Header::Name(name) => write!(f, "{:?}", name)?, + _ => unreachable!(), + } } + RuleType::Header { matches, header, - if_unset, pattern, part, } => { - let is_raw = part.contains(&HeaderPart::Raw); - let is_name = part.contains(&HeaderPart::Name); - let is_addr = part.contains(&HeaderPart::Addr); + let is_raw = part == &HeaderPart::Raw; + let is_name = part == &HeaderPart::Name; + let is_addr = part == &HeaderPart::Addr; - let mut pattern = pattern.as_str(); - let mut matches = *matches; + let pattern = pattern.as_str(); + let matches = *matches; f.write_str("if ")?; - // Map unset statements into expressions - let mut has_unset = match if_unset { - Some(val) if pattern == format!("^{val}$") => { - // convert /^UNSET$/ [if-unset: UNSET] to exists - pattern = ""; - matches = HeaderMatches::Exists; - f.write_str("not ")?; - false - } - Some(_) => true, - None => false, - }; - - if has_unset { - match header { - Header::MessageId => f.write_str(concat!( - "allof(header :contains ", - "[\"Message-Id\",\"Resent-Message-Id\",", - "\"X-Message-Id\",\"X-Original-Message-ID\"]" - ))?, - Header::ToCc => f.write_str("allof(header :contains [\"To\",\"Cc\"]")?, - Header::Name(name) => write!(f, "allof(header :contains {:?}", name)?, - Header::EnvelopeFrom | Header::All | Header::AllExternal => { - has_unset = false; - } - } - if has_unset { - f.write_str(" \"\", ")?; - } - } - let cmd = if matches!(header, Header::EnvelopeFrom) { "envelope" } else if (is_name || is_addr) && !is_raw { "address" + } else if matches!(header, Header::All | Header::Received(_)) { + "string" } else { "header" }; match matches { HeaderMatches::Matches => write!(f, "{cmd} :regex ")?, HeaderMatches::NotMatches => write!(f, "not {cmd} :regex ")?, - HeaderMatches::Exists => write!(f, "{cmd} :contains ")?, + HeaderMatches::Exists | HeaderMatches::NotExists => unreachable!(), } if !is_raw { if is_name { @@ -1261,14 +1533,30 @@ impl Display for Rule { Header::ToCc => f.write_str("[\"To\",\"Cc\"]")?, Header::Name(name) => write!(f, "{:?}", name)?, Header::EnvelopeFrom => f.write_str("\"from\"")?, - Header::All | Header::AllExternal => unreachable!(), + Header::All => { + if is_raw { + f.write_str("\"${headers_raw}\"")? + } else { + f.write_str("\"${headers_text}\"")? + } + } + Header::Received(rcvd) => { + f.write_str("\"${env.")?; + f.write_str(match rcvd { + ReceivedPart::From => "helo_domain", + ReceivedPart::FromIp => "remote_ip", + ReceivedPart::FromIpRev => "iprev_ptr", + _ => unreachable!(), + })?; + f.write_str("}\"")?; + } } write!(f, " {:?}", pattern)?; - if has_unset { + /*if has_unset { f.write_str(")")?; - } + }*/ } RuleType::Body { pattern, raw } => { if *raw { @@ -1289,12 +1577,12 @@ impl Display for Rule { match function.as_str() { "check_from_in_auto_welcomelist" | "check_from_in_auto_whitelist" => { f.write_str(concat!( - "query :use \"spam\" :set [\"awl_score\", \"awl_count\"] \"SELECT score, count FROM awl WHERE sender = ? AND ip = ?\" [\"${from}\", \"%{env.remote_ip}\"];\n", + "query :use \"spam\" :set [\"awl_score\", \"awl_count\"] \"SELECT score, count ","FROM awl WHERE sender = ? AND ip = ?\" [\"${from}\", \"%{env.remote_ip}\"];\n", "if eval \"awl_count > 0\" {\n", - "\tquery :use \"spam\" \"UPDATE awl SET score = score + ?, count = count + 1 WHERE sender = ? AND ip = ?\" [\"%{score}\", \"${from}\", \"%{env.remote_ip}\"];\n", + "\tquery :use \"spam\" \"UPDATE awl SET score = score + ?, count = count + 1 WHERE ","sender = ? AND ip = ?\" [\"%{score}\", \"${from}\", \"%{env.remote_ip}\"];\n", "\tset \"score\" \"%{score + ((awl_score / awl_count) - score) * awl_factor}\";\n", "} else {\n", - "\tquery :use \"spam\" \"INSERT INTO awl (score, count, sender, ip) VALUES (?, 1, ?, ?)\" [\"%{score}\", \"${from}\", \"%{env.remote_ip}\"];\n", + "\tquery :use \"spam\" \"INSERT INTO awl (score, count, sender, ip) VALUES (?, 1, ?, ?)\""," [\"%{score}\", \"${from}\", \"%{env.remote_ip}\"];\n", "}\n\n", ))?; return Ok(()); @@ -1408,7 +1696,22 @@ impl Display for Rule { | "check_microsoft_executable" | "check_outlook_message_id" | "gated_through_received_hdr_remover" - | "check_for_faraway_charset_in_headers" => { + | "check_for_faraway_charset" + | "check_for_faraway_charset_in_headers" + | "check_all_trusted" + | "check_relays_unparseable" + | "check_welcomelist_bounce_relays" + | "check_whitelist_bounce_relays" + | "have_any_bounce_relays" + | "check_for_fake_aol_relay_in_rcvd" + | "check_for_forged_eudoramail_received_headers" + | "check_for_forged_gmail_received_headers" + | "check_for_forged_hotmail_received_headers" + | "check_for_forged_juno_received_headers" + | "check_for_forged_yahoo_received_headers" + | "check_for_no_hotmail_received_headers" + | "check_stock_info" + | "check_abundant_unicode_ratio" => { // ADSP is deprecated (see https://datatracker.ietf.org/doc/status-change-adsp-rfc5617-to-historic/) // check_body_8bits: Not really useful // check_shortcircuit: Not used @@ -1636,8 +1939,8 @@ impl Display for Rule { } "check_fromname_spoof" => { f.write_str(concat!( - "if allof(eval \"from_name.is_email()\", ", - "not string :is \"${from_name.domain_name_part()}\" \"${from.domain_name_part()}\")", + "if eval \"is_email(from_name) && ", + "domain_name_part(from_name) != domain_name_part(from)\")", ))?; } "check_header_count_range" => { @@ -1658,10 +1961,10 @@ impl Display for Rule { if range_to > 100 { write!( f, - "if eval \"header.{hdr_name}[*].raw.count() >= {range_from}\"" + "if eval \"count(header.{hdr_name}[*].raw) >= {range_from}\"" )?; } else { - write!(f, "if eval \"header.{hdr_name}[*].raw.count() >= {range_from} && header.{hdr_name}[*].raw.count() < {range_to}\"")?; + write!(f, "if eval \"count(header.{hdr_name}[*].raw) >= {range_from} && header.{hdr_name}[*].raw.count() < {range_to}\"")?; } } "check_illegal_chars" => { @@ -1722,6 +2025,45 @@ impl Display for Rule { )?; } } + "check_no_relays" => {} + "check_for_forged_received_trail" => { + f.write_str(concat!( + "if allof(string :value \"${env.iprev_ptr}\" \"\", ", + "string :value \"ne\" \"%{env.helo_domain}\" \"${env.iprev_ptr}\")" + ))?; + } + + "check_for_mime_html_only" + | "check_for_mime_html" + | "multipart_alternative_difference" + | "multipart_alternative_difference_count" + | "check_ma_non_text" + | "check_for_ascii_text_illegal" + | "check_mime_multipart_ratio" + | "check_msg_parse_flags" + | "tvd_vertical_words" + | "check_base64_length" + | "check_for_uppercase" => { + // Handled externally + return Ok(()); + //f.write_str("if eval \"header.content-type == 'text/html'\"")?; + /* + + let params = params + .iter() + .next() + .expect("missing parameter for check_msg_parse_flags"); + if params.contains("mime_epilogue_exists") { + f.write_str("if eval \"header.content-type.type == 'multipart' && !ends-with(trim(part.raw), '--')\"")?; + } else if params.contains("missing_mime_headers") + || params.contains("missing_head_body_separator") + { + let c = "implemented in loop"; + } else { + panic!("Warning: Invalid check_msg_parse_flags {:?}", params); + } + */ + } _ => { write!(f, "if {function}")?; diff --git a/crates/antispam/src/import/utils.rs b/crates/antispam/src/import/utils.rs index 6a377fb3..c00d13df 100644 --- a/crates/antispam/src/import/utils.rs +++ b/crates/antispam/src/import/utils.rs @@ -81,7 +81,7 @@ pub fn fix_broken_regex(value: &str) -> &str { } } -pub fn import_regex(value: &str) -> (String, HashSet) { +pub fn import_regex(value: &str) -> (String, String, HashSet) { // Obtain separator let mut iter = value.chars().peekable(); let separator = match iter.next() { @@ -134,14 +134,7 @@ pub fn import_regex(value: &str) -> (String, HashSet) { } } - ( - if !flags.is_empty() { - format!("(?{flags}){regex}") - } else { - regex - }, - variables, - ) + (regex, flags, variables) } #[cfg(test)] @@ -164,7 +157,11 @@ mod test { vec!["GB_TO_ADDR"], ), ] { - let (regex, regex_vars) = super::import_regex(expr); + let (mut regex, flags, regex_vars) = super::import_regex(expr); + if !flags.is_empty() { + regex = format!("(?{flags}){regex}"); + } + assert_eq!(regex, result); assert_eq!( HashSet::from_iter(vars.iter().map(|s| s.to_string())), diff --git a/crates/smtp/src/config/scripts.rs b/crates/smtp/src/config/scripts.rs index 308a4d74..4e587942 100644 --- a/crates/smtp/src/config/scripts.rs +++ b/crates/smtp/src/config/scripts.rs @@ -70,6 +70,7 @@ impl ConfigSieve for Config { ]) .with_capability(Capability::Plugins) .with_capability(Capability::ForEveryLine) + .with_capability(Capability::Eval) .with_max_variable_size(102400) .with_max_header_size(10240) .with_valid_notification_uri("mailto") diff --git a/crates/smtp/src/scripts/event_loop.rs b/crates/smtp/src/scripts/event_loop.rs index fa71d9a9..49990f94 100644 --- a/crates/smtp/src/scripts/event_loop.rs +++ b/crates/smtp/src/scripts/event_loop.rs @@ -21,6 +21,7 @@ * for more details. */ +use core::panic; use std::{sync::Arc, time::Duration}; use ahash::AHashMap; @@ -343,6 +344,31 @@ impl SMTP { } } + // Assert global variables + #[cfg(feature = "test_mode")] + if let Some(expected_variables) = params.expected_variables { + for var_name in instance.global_variable_names() { + if !expected_variables.contains_key(var_name) { + panic!( + "Unexpected variable {var_name:?} with value {:?}\nExpected {:?}\nFound: {:?}", + instance.global_variable(var_name).unwrap(), + expected_variables.keys().collect::>(), + instance.global_variable_names().collect::>() + ); + } + } + + for (name, expected) in &expected_variables { + if let Some(value) = instance.global_variable(name.as_str()) { + assert_eq!(value, expected, "Variable {name:?} has unexpected value"); + } else { + panic!("Missing variable {name:?} with value {expected:?}\nExpected {:?}\nFound: {:?}", + expected_variables.keys().collect::>(), + instance.global_variable_names().collect::>()); + } + } + } + // Keep id // 0 = use original message // MAX = implicit keep diff --git a/crates/smtp/src/scripts/functions.rs b/crates/smtp/src/scripts/functions.rs index 62afa045..bb8b4332 100644 --- a/crates/smtp/src/scripts/functions.rs +++ b/crates/smtp/src/scripts/functions.rs @@ -21,45 +21,85 @@ * for more details. */ -use mail_parser::parsers::fields::thread::thread_name; -use sieve::{runtime::Variable, FunctionMap}; +use std::{borrow::Cow, collections::HashMap}; + +use ahash::{HashSet, HashSetExt}; +use mail_parser::{ + decoders::html::html_to_text, parsers::fields::thread::thread_name, HeaderName, HeaderValue, + MimeHeaders, +}; +use sieve::{compiler::ReceivedPart, runtime::Variable, FunctionMap}; pub fn register_functions() -> FunctionMap { FunctionMap::new() - .with_function("trim", |v| v.to_cow().trim().to_string().into()) - .with_function("len", |v| v.to_cow().len().into()) - .with_function("is_empty", |v| v.to_cow().as_ref().is_empty().into()) - .with_function("to_lowercase", |v| { - v.to_cow().to_lowercase().to_string().into() + .with_function("trim", |_, v| v[0].transform(|s| Some(s.trim()))) + .with_function("len", |_, v| { + match &v[0] { + Variable::String(s) => s.len(), + Variable::StringRef(s) => s.len(), + Variable::Array(a) => a.len(), + Variable::ArrayRef(a) => a.len(), + v => v.to_string().len(), + } + .into() }) - .with_function("to_uppercase", |v| { - v.to_cow().to_uppercase().to_string().into() + .with_function("is_empty", |_, v| { + match &v[0] { + Variable::String(s) => s.is_empty(), + Variable::StringRef(s) => s.is_empty(), + Variable::Integer(_) | Variable::Float(_) => false, + Variable::Array(a) => a.is_empty(), + Variable::ArrayRef(a) => a.is_empty(), + } + .into() }) - .with_function("language", |v| { - whatlang::detect_lang(v.to_cow().as_ref()) + .with_function("is_ascii", |_, v| { + match &v[0] { + Variable::String(s) => s.chars().all(|c| c.is_ascii()), + Variable::StringRef(s) => s.chars().all(|c| c.is_ascii()), + Variable::Integer(_) | Variable::Float(_) => true, + Variable::Array(a) => a.iter().all(|v| match v { + Variable::String(s) => s.chars().all(|c| c.is_ascii()), + Variable::StringRef(s) => s.chars().all(|c| c.is_ascii()), + _ => true, + }), + Variable::ArrayRef(a) => a.iter().all(|v| match v { + Variable::String(s) => s.chars().all(|c| c.is_ascii()), + Variable::StringRef(s) => s.chars().all(|c| c.is_ascii()), + _ => true, + }), + } + .into() + }) + .with_function("to_lowercase", |_, v| { + v[0].to_cow().to_lowercase().to_string().into() + }) + .with_function("to_uppercase", |_, v| { + v[0].to_cow().to_uppercase().to_string().into() + }) + .with_function("detect_language", |_, v| { + whatlang::detect_lang(v[0].to_cow().as_ref()) .map(|l| l.code()) .unwrap_or("unknown") .into() }) - .with_function("is_email", |v| is_email_valid(v.to_cow().as_ref()).into()) - .with_function("domain_part", |v| { - v.to_cow() - .rsplit_once('@') - .map_or(Variable::default(), |(_, d)| d.trim().to_string().into()) + .with_function("is_email", |_, v| { + is_email_valid(v[0].to_cow().as_ref()).into() }) - .with_function("local_part", |v| { - v.to_cow() - .rsplit_once('@') - .map_or(Variable::default(), |(u, _)| u.trim().to_string().into()) + .with_function("domain_part", |_, v| { + v[0].transform(|s| s.rsplit_once('@').map(|(_, d)| d.trim())) }) - .with_function("domain_name_part", |v| { - v.to_cow() - .rsplit_once('@') - .and_then(|(_, d)| d.trim().split('.').rev().nth(1).map(|s| s.to_string())) - .map_or(Variable::default(), Variable::from) + .with_function("local_part", |_, v| { + v[0].transform(|s| s.rsplit_once('@').map(|(u, _)| u.trim())) }) - .with_function("subdomain_part", |v| { - v.to_cow() + .with_function("domain_name_part", |_, v| { + v[0].transform(|s| { + s.rsplit_once('@') + .and_then(|(_, d)| d.trim().split('.').rev().nth(1)) + }) + }) + .with_function("subdomain_part", |_, v| { + v[0].to_cow() .rsplit_once('@') .map_or(Variable::default(), |(_, d)| { d.split('.') @@ -75,33 +115,88 @@ pub fn register_functions() -> FunctionMap { .into() }) }) - .with_function("thread_name", |v| { - thread_name(v.to_cow().as_ref()).to_string().into() + .with_function("thread_name", |_, v| { + v[0].transform(|s| thread_name(s).into()) }) - .with_function("is_uppercase", |v| { - v.to_cow() + .with_function("html_to_text", |_, v| { + html_to_text(v[0].to_cow().as_ref()).into() + }) + .with_function("is_uppercase", |_, v| { + v[0].to_cow() .as_ref() .chars() .filter(|c| c.is_alphabetic()) .all(|c| c.is_uppercase()) .into() }) - .with_function("is_lowercase", |v| { - v.to_cow() + .with_function("is_lowercase", |_, v| { + v[0].to_cow() .as_ref() .chars() .filter(|c| c.is_alphabetic()) .all(|c| c.is_lowercase()) .into() }) - .with_function("count_words", |v| { - v.to_cow().as_ref().split_whitespace().count().into() + .with_function("tokenize_words", |_, v| { + match &v[0] { + Variable::StringRef(s) => s + .split_whitespace() + .filter(|word| word.chars().all(|c| c.is_alphanumeric())) + .map(Variable::from) + .collect::>(), + Variable::String(s) => s + .split_whitespace() + .filter(|word| word.chars().all(|c| c.is_alphanumeric())) + .map(|word| Variable::from(word.to_string())) + .collect::>(), + v => v + .to_string() + .split_whitespace() + .filter(|word| word.chars().all(|c| c.is_alphanumeric())) + .map(|word| Variable::from(word.to_string())) + .collect::>(), + } + .into() }) - .with_function("count_chars", |v| { - v.to_cow().as_ref().chars().count().into() + .with_function("max_line_len", |_, v| { + match &v[0] { + Variable::String(s) => s.lines().map(|l| l.len()).max().unwrap_or(0), + Variable::StringRef(s) => s.lines().map(|l| l.len()).max().unwrap_or(0), + Variable::Integer(_) | Variable::Float(_) => 0, + Variable::Array(a) => a.iter().map(|v| v.to_cow().len()).max().unwrap_or(0), + Variable::ArrayRef(a) => a.iter().map(|v| v.to_cow().len()).max().unwrap_or(0), + } + .into() }) - .with_function("count_control_chars", |v| { - v.to_cow() + .with_function("count_spaces", |_, v| { + v[0].to_cow() + .as_ref() + .chars() + .filter(|c| c.is_whitespace()) + .count() + .into() + }) + .with_function("count_uppercase", |_, v| { + v[0].to_cow() + .as_ref() + .chars() + .filter(|c| c.is_alphabetic() && c.is_uppercase()) + .count() + .into() + }) + .with_function("count_lowercase", |_, v| { + v[0].to_cow() + .as_ref() + .chars() + .filter(|c| c.is_alphabetic() && c.is_lowercase()) + .count() + .into() + }) + .with_function("count_chars", |_, v| { + v[0].to_cow().as_ref().chars().count().into() + }) + .with_function("count_control_chars", |_, v| { + v[0].to_cow() .as_ref() .chars() .filter(|c| { @@ -114,13 +209,183 @@ pub fn register_functions() -> FunctionMap { .count() .into() }) - .with_function("count", |v| { - if let Variable::Array(l) = v { - l.len().into() - } else { - 1.into() - } + .with_function_args( + "eq_ignore_case", + |_, v| { + v[0].to_cow() + .eq_ignore_ascii_case(v[1].to_cow().as_ref()) + .into() + }, + 2, + ) + .with_function_args( + "received_part", + |ctx, v| { + if let (Ok(part), Some(HeaderValue::Received(rcvd))) = ( + ReceivedPart::try_from(v[1].to_cow().as_ref()), + ctx.message() + .part(ctx.part()) + .and_then(|p| { + p.headers + .iter() + .filter(|h| h.name == HeaderName::Received) + .nth((v[0].to_integer() as usize).saturating_sub(1)) + }) + .map(|h| &h.value), + ) { + part.eval(rcvd).unwrap_or_default() + } else { + Variable::default() + } + }, + 2, + ) + .with_function_args( + "cosine_similarity", + |_, v| { + let mut word_freq: HashMap, [u32; 2]> = HashMap::new(); + + for (idx, var) in v.into_iter().enumerate() { + match var { + Variable::Array(l) => { + for item in l { + word_freq.entry(item.into_cow()).or_insert([0, 0])[idx] += 1; + } + } + Variable::ArrayRef(l) => { + for item in l { + word_freq.entry(item.to_cow()).or_insert([0, 0])[idx] += 1; + } + } + _ => { + for char in var.to_cow().chars() { + word_freq.entry(char.to_string().into()).or_insert([0, 0])[idx] += + 1; + } + } + } + } + + let mut dot_product = 0; + let mut magnitude_a = 0; + let mut magnitude_b = 0; + + for (_word, count) in word_freq.iter() { + dot_product += count[0] * count[1]; + magnitude_a += count[0] * count[0]; + magnitude_b += count[1] * count[1]; + } + + if magnitude_a != 0 && magnitude_b != 0 { + dot_product as f64 / (magnitude_a as f64).sqrt() / (magnitude_b as f64).sqrt() + } else { + 0.0 + } + .into() + }, + 2, + ) + .with_function_args( + "jaccard_similarity", + |_, v| { + let mut word_freq = [HashSet::new(), HashSet::new()]; + + for (idx, var) in v.into_iter().enumerate() { + match var { + Variable::Array(l) => { + for item in l { + word_freq[idx].insert(item.into_cow()); + } + } + Variable::ArrayRef(l) => { + for item in l { + word_freq[idx].insert(item.to_cow()); + } + } + _ => { + for char in var.to_cow().chars() { + word_freq[idx].insert(char.to_string().into()); + } + } + } + } + + let intersection_size = word_freq[0].intersection(&word_freq[1]).count(); + let union_size = word_freq[0].union(&word_freq[1]).count(); + + if union_size != 0 { + intersection_size as f64 / union_size as f64 + } else { + 0.0 + } + .into() + }, + 2, + ) + .with_function_no_args("var_names", |ctx, _| { + Variable::Array( + ctx.global_variable_names() + .map(|v| Variable::from(v.to_string())) + .collect(), + ) }) + .with_function_no_args("is_encoding_problem", |ctx, _| { + ctx.message() + .part(ctx.part()) + .map(|p| p.is_encoding_problem) + .unwrap_or_default() + .into() + }) + .with_function_no_args("is_attachment", |ctx, _| { + ctx.message().attachments.contains(&ctx.part()).into() + }) + .with_function_no_args("is_body", |ctx, _| { + (ctx.message().text_body.contains(&ctx.part()) + || ctx.message().html_body.contains(&ctx.part())) + .into() + }) + .with_function_no_args("attachment_name", |ctx, _| { + ctx.message() + .part(ctx.part()) + .and_then(|p| p.attachment_name()) + .unwrap_or_default() + .into() + }) +} + +trait ApplyString<'x> { + fn transform(&self, f: impl Fn(&str) -> Option<&str>) -> Variable<'x>; + fn transform_string>>( + &self, + f: impl Fn(&str) -> T, + ) -> Option>; +} + +impl<'x> ApplyString<'x> for Variable<'x> { + fn transform(&self, f: impl Fn(&str) -> Option<&str>) -> Variable<'x> { + match self { + Variable::String(s) => { + f(s).map_or(Variable::default(), |s| Variable::from(s.to_string())) + } + Variable::StringRef(s) => f(s).map_or(Variable::default(), Variable::from), + v => f(v.to_string().as_str()) + .map_or(Variable::default(), |s| Variable::from(s.to_string())), + } + } + + fn transform_string>>( + &self, + f: impl Fn(&str) -> T, + ) -> Option> { + match self { + Variable::String(s) => Some(f(s).into()), + Variable::StringRef(s) => Some(f(s).into()), + Variable::Integer(_) + | Variable::Float(_) + | Variable::Array(_) + | Variable::ArrayRef(_) => None, + } + } } fn is_email_valid(email: &str) -> bool { diff --git a/crates/smtp/src/scripts/mod.rs b/crates/smtp/src/scripts/mod.rs index c956e594..d3a9dc9c 100644 --- a/crates/smtp/src/scripts/mod.rs +++ b/crates/smtp/src/scripts/mod.rs @@ -49,6 +49,8 @@ pub struct ScriptParameters { message: Option>>, variables: AHashMap, Variable<'static>>, envelope: Vec<(Envelope, Variable<'static>)>, + #[cfg(feature = "test_mode")] + expected_variables: Option>>, } impl ScriptParameters { @@ -57,6 +59,8 @@ impl ScriptParameters { variables: AHashMap::with_capacity(10), envelope: Vec::with_capacity(6), message: None, + #[cfg(feature = "test_mode")] + expected_variables: None, } } @@ -75,6 +79,15 @@ impl ScriptParameters { self.variables.insert(name.into(), value.into()); self } + + #[cfg(feature = "test_mode")] + pub fn with_expected_variables( + mut self, + expected_variables: AHashMap>, + ) -> Self { + self.expected_variables = expected_variables.into(); + self + } } impl Default for ScriptParameters { diff --git a/crates/smtp/src/scripts/plugins/mod.rs b/crates/smtp/src/scripts/plugins/mod.rs index f4ea06a3..70d761e7 100644 --- a/crates/smtp/src/scripts/plugins/mod.rs +++ b/crates/smtp/src/scripts/plugins/mod.rs @@ -54,6 +54,13 @@ pub trait RegisterSievePlugins { impl RegisterSievePlugins for Compiler { fn register_plugins(mut self) -> Self { + #[cfg(feature = "test_mode")] + { + self.register_plugin("print") + .with_id(PLUGINS_EXEC.len() as u32) + .with_string_argument(); + } + for (i, fnc) in PLUGINS_REGISTER.iter().enumerate() { fnc(i as u32, &mut self); } @@ -63,9 +70,27 @@ impl RegisterSievePlugins for Compiler { impl SMTP { pub fn run_plugin_blocking(&self, id: u32, ctx: PluginContext<'_>) -> Input { + #[cfg(feature = "test_mode")] + if id == PLUGINS_EXEC.len() as u32 { + return test_print(ctx); + } + PLUGINS_EXEC .get(id as usize) .map(|fnc| fnc(ctx)) .unwrap_or(false.into()) } } + +#[cfg(feature = "test_mode")] +pub fn test_print(ctx: PluginContext<'_>) -> Input { + println!( + "{}", + ctx.arguments + .into_iter() + .next() + .and_then(|a| a.unwrap_string()) + .unwrap() + ); + Input::True +} diff --git a/resources/config/sieve/antispam.sieve b/resources/config/sieve/antispam.sieve new file mode 100644 index 00000000..1d0bede5 --- /dev/null +++ b/resources/config/sieve/antispam.sieve @@ -0,0 +1,196 @@ +require [ "variables", + "include", + "foreverypart", + "regex", + "body", + "reject", + "vnd.stalwart.foreveryline", + "vnd.stalwart.eval", + "vnd.stalwart.plugins"]; + +set "body" "%{body.to_text}"; +set "body_len" "%{len(body)}"; +set "headers_raw" "%{header.*.raw}"; +set "headers_text" "%{header.*.text}"; +#set "thread_name" "%{header.subject.thread_name()}"; +#set "sent_date" "%{header.date.date}"; +#set "mail_from" "%{envelope.from}"; +#if eval "mail_from.is_empty()" { +# set "mail_from" "postmaster@${env.helo_domain}"; +#} +#set "mail_from_domain" "%{mail_from.domain_part()}"; +#set "from" "%{header.from.addr}"; +#set "from_domain" "%{from.domain_part()}"; +#set "from_name" "%{header.from.name.trim()}"; + +# Message only has text/html MIME parts +if eval "header.content-type == 'text/html'" { + set "t.MIME_HTML_ONLY" "1"; + set "t.__MIME_HTML" "1"; +} + +set "mime_text_html_count" "0"; +set "mime_text_plain_count" "0"; +set "mime_part_count" "0"; + +foreverypart { + set "ct" "%{to_lowercase(header.content-type)}"; + + if eval "is_body()" { + if eval "ct == 'text/plain'" { + set "mime_text_plain_count" "%{mime_text_plain_count + 1}"; + } elsif eval "ct == 'text/html'" { + set "mime_text_html_count" "%{mime_text_html_count + 1}"; + } + } + + if eval "is_attachment()" { + # Has a MIME attachment + set "t.__MIME_ATTACHMENT" "1"; + } + + if eval "ct == 'multipart/alternative'" { + set "text_part" ""; + set "text_part_len" "0"; + set "text_part_words" ""; + set "html_part" ""; + set "html_part_words" ""; + set "html_part_len" "0"; + + foreverypart { + set "ma_ct" "%{to_lowercase(header.content-type)}"; + + if eval "text_part_len == 0 && ma_ct == 'text/plain'" { + set "text_part_len" "%{len(part.text)}"; + set "text_part" "%{part.text}"; + set "text_part_words" "%{tokenize_words(part.text)}"; + } elsif eval "html_part_len == 0 && ma_ct == 'text/html'" { + set "html_part_len" "%{len(part.text)}"; + set "html_part" "%{html_to_text(part.text)}"; + set "html_part_words" "%{tokenize_words(html_part)}"; + } + + # Multipart/alternative has a no text part + if eval "!(ma_ct == 'multipart/related' || ma_ct == 'application/rtf' || header.content-type.type == 'text')" { + set "t.MULTIPART_ALT_NON_TEXT" "1"; + } + } + + # Multipart message mostly text/html MIME + if eval "html_part_len > 0 && (text_part_len / html_part_len) >= 0.0 && (text_part_len / html_part_len) < 0.01" { + set "t.MIME_HTML_MOSTLY" "1"; + } + + # HTML and text parts are different + if eval "!is_empty(text_part_words) && !is_empty(html_part_words)" { + if eval "!t.MPART_ALT_DIFF_COUNT" { + set "ma_count_text" "%{len(text_part_words)}"; + set "ma_count_html" "%{len(html_part_words)}"; + + if eval "(ma_count_text > ma_count_html && ma_count_text / ma_count_html > 3) || + (ma_count_html > ma_count_text && ma_count_html / ma_count_text > 3)" { + set "t.MPART_ALT_DIFF_COUNT" "1"; + } + } + + if eval "!t.MPART_ALT_DIFF" { + if eval "cosine_similarity(text_part_words, html_part_words) < 0.98" { + set "t.MPART_ALT_DIFF" "1"; + } + } + } + + } + + if eval "eq_ignore_case(header.content-transfer-encoding, 'base64')" { + # Message has a Base64 encoded MIME part + set "t.__MIME_BASE64" "1"; + + # Some spammers generate base64 encoded parts with a single or a handful of + # long lines over the standard length, which hovers around 77 chars on average. + set "max_b64_len" "%{max_line_len(part.raw)}"; + if eval "max_b64_len == 78 || max_b64_len == 79" { + set "t.BASE64_LENGTH_78_79" "1"; + } elsif eval "max_b64_len > 79" { + set "t.BASE64_LENGTH_79_INF" "1"; + } + + # Message text disguised using base64 encoding + if eval "header.content-type.type == 'text' && is_body() && is_ascii(part.text)" { + set "t.MIME_BASE64_TEXT" "1"; + } + } elsif eval "eq_ignore_case(header.content-transfer-encoding, 'quoted-printable')" { + set "t.__MIME_QP" "1"; + set "t.__MIME_QPC" "%{t.__MIME_QPC + 1}"; + + # Quoted-printable line longer than 76 chars + if eval "max_line_len(part.raw) > 79" { + set "t.MIME_QP_LONG_LINE" "1"; + } + } + + # MIME text/plain claims to be ASCII but isn't + if eval "header.content-type.type == 'text' + && ( header.content-transfer-encoding == '' || + eq_ignore_case(header.content-transfer-encoding, '7bit' ) ) + && !is_ascii(part.raw)" { + set "t.PP_MIME_FAKE_ASCII_TEXT" "1"; + } + + # Message has too many MIME parts + set "mime_part_count" "%{mime_part_count + 1}"; + if eval "mime_part_count > 1000" { + set "t.MIMEPART_LIMIT_EXCEEDED" "1"; + break; + } +} + +if eval "mime_text_html_count > 0 && mime_text_plain_count == 0" { + set "t.__MIME_HTML" "1"; +} + +set "vertical_lines" "0"; +set "total_lines" "0"; + +foreveryline "${body}" { + # Vertical words in body + if eval "len(trim(line)) == 1 || (len(line) > 5 && count_spaces(line) / count_chars(line) > 0.8)" { + set "vertical_lines" "%{vertical_lines + 1}"; + } + + set "total_lines" "%{line_num}"; +} + +if eval "total_lines > 5 && vertical_lines > total_lines / 2" { + set "t.__TVD_SPACE_RATIO" "1"; +} + +# Ratio of uppercase characters in body +if eval "body_len > 200" { + set "upper_count" "%{count_uppercase(body)}"; + set "lower_count" "%{count_lowercase(body)}"; + set "upper_ratio" "%{upper_count / (upper_count + lower_count)}"; + + if eval "upper_ratio > 0.25 && upper_ratio <= 0.5" { + set "t.__UPPERCASE_25_50" "1"; + } elsif eval "upper_ratio > 0.50 && upper_ratio <= 0.75" { + set "t.__UPPERCASE_50_75" "1"; + } elsif eval "upper_ratio > 0.75" { + set "t.__UPPERCASE_75_100" "1"; + } +} + +# Check for a forged received trail +if eval "!is_empty(env.iprev_ptr) && !eq_ignore_case(env.helo_domain, env.iprev_ptr)" { + set "t.__FORGED_RCVD_TRAIL" "1"; +} else { + foreveryline "${header.received[*].rcvd.iprev}" { + if eval "!is_empty(line)" { + set "helo_domain" "%{received_part(line_num, 'from')}"; + if eval "!is_empty(helo_domain) && !eq_ignore_case(helo_domain, line)" { + set "t.__FORGED_RCVD_TRAIL" "1"; + break; + } + } + } +} diff --git a/tests/resources/smtp/antispam/base64.txt b/tests/resources/smtp/antispam/base64.txt new file mode 100644 index 00000000..c1f7bbd2 --- /dev/null +++ b/tests/resources/smtp/antispam/base64.txt @@ -0,0 +1,142 @@ +expect MIME_BASE64_TEXT __MIME_BASE64 + +Return-Path: hello@public.hzptt.sd.cn +X-Mmdf-To: jm +Received: from salmon.maths.tcd.ie by maccullagh.maths.tcd.ie with SMTP + id ; 15 Apr 2001 02:36:50 +0100 (BST) +Received: from dux1.tcd.ie by salmon.maths.tcd.ie with SMTP + id ; 15 Apr 2001 02:36:50 +0100 (BST) +Received: from qd_mail3.sd.cninfo.net ([61.156.13.71]) + by dux1.tcd.ie (8.11.1/8.11.1) with ESMTP id f3F1ans26770 + for ; Sun, 15 Apr 2001 02:36:50 +0100 (BST) +Received: from Good ([206.172.87.3]) by qd_mail3.sd.cninfo.net with SMTP + id <20010415013005.EXEK607.qd_mail3@Good> for ; + Sun, 15 Apr 2001 09:30:05 +0800 +From: keywordranking@excite.com +To: jm@maths.tcd.ie +Subject: Website traffic Building +X-Mailer: WC Mail __ty__ +MIME-Version: 1.0 +Content-Type: multipart/mixed;boundary= "Z_MULTI_PART_MAIL_BOUNDAEY_S" +Message-Id: <20010415013005.EXEK607.qd_mail3@Good> +Date: Sun, 15 Apr 2001 09:30:33 +0800 + +This is a multi-part message in MIME format. + +--Z_MULTI_PART_MAIL_BOUNDAEY_S +Content-Type: text/plain +Content-Transfer-Encoding: base64 + +V2UgaGF2ZSByYW5rZWQgdGhlIHRvcCB0ZW4gdHJhZmZpYyBidWlsZGluZyBzaXRlcyBvbiB0 +aGUgd2ViLCB0aGVzZSBzaXRlcw0KaGF2ZSBiZWVuIHJhbmtlZCBieSBvdXIgZXhwZXJ0cywg +b3VyIGNsaWVudHMgYW5kIGJ5IHlvdSBvdXIgdmlzaXRvcnMuIENvbWUNCnRvIGh0dHA6Ly9t +ZW1iZXJzLnRyaXBvZC5jby51ay90b3B0ZW5zaXRlcw0KDQpPbmUgb2YgdGhlIHRvcCB0ZW4g +c2l0ZXMsIGNhbiBldmVuIGd1YXJhbnRlZSB0b3AgcGxhY2VtZW50IG9uIHRoZSB0b3AgdGVu +DQpzZWFyY2ggZW5naW5lcy4gUmlnaHQgbm93IHRoZXkgYXJlIGFsc28gb2ZmZXJpbmcgdG8g +Y2hlY2sgeW91ciBzaXRlJ3MNCnBvc2l0aW9uDQppbiB0aGUgc2VhcmNoIGVuZ2luZXMsIEZS +RUUgT0YgQ0hBUkdFIQ0KDQpPdXIgc2l0ZSBoYXMgYmVlbiBkZXZlbG9wZWQgdG8gaGVscCB5 +b3UgZmluZCB0aGUgbW9zdCBjb3N0LWVmZmVjdGl2ZSB0cmFmZmljDQpvbiB0aGUgd2ViLiBQ +bGVhc2UgZmVlbCBmcmVlIHRvIGNvbnRhY3QgdXMgd2l0aCBhbnkgcXVlc3Rpb25zLg0KDQoN +Cg0KDQpUbyBCZSBSZW1vdmVkIFNlbmQgRS1NYWlsIHRvIHJlbW92ZWt3QGV4Y2l0ZS5jb20N +Cg0KSUYgVEhJUyBJUyBOT1QgRE9ORSBPVVIgTElTVCBNQU5BR0VNRU5UIFNPRlRXQVJFIFdJ +TEwgTk9UIEtOT1cgVE8NClJFTU9WRSBZT1UuDQoNCkFnYWluIHNlbmQgbWFpbCwgb3IgZm9y +d2FyZCBtZXNzYWdlIHRvIHJlbW92ZWt3QGV4Y2l0ZS5jb20NCnRvIGJlIHJlbW92ZWQuDQog +ICAg +--Z_MULTI_PART_MAIL_BOUNDAEY_S-- + + +expect __mime_attachment base64_length_79_inf __mime_base64 + +Return-Path: +Received: from mailhost.whitehouse.gov ([192.168.51.200]) + by heartbeat.whitehouse.gov (8.8.8/8.8.8) with ESMTP id SAA22453 + for ; + Mon, 13 Aug 1998 l8:14:23 +1000 +Received: from the_big_box.whitehouse.gov ([192.168.51.50]) + by mailhost.whitehouse.gov (8.8.8/8.8.7) with ESMTP id RAA20366 + for vice-president@whitehouse.gov; Mon, 13 Aug 1998 17:42:41 +1000 +Date: Mon, 13 Aug 1998 17:42:41 +1000 +Message-Id: <199804130742.RAA20366@mai1host.whitehouse.gov> +From: Bill Clinton +To: A1 (The Enforcer) Gore +Subject: Map of Argentina with Description +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="DC8------------DC8638F443D87A7F0726DEF7" + +This is a multi-part message in MIME format. +--DC8------------DC8638F443D87A7F0726DEF7 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit + +Hi A1, + +I finally figured out this MIME thing. Pretty cool. I'll send you +some sax music in .au files next week! + +Anyway, the attached image is really too small to get a good look at +Argentina. Try this for a much better map: + + http://www.1one1yp1anet.com/dest/sam/graphics/map-arg.htm + +Then again, shouldn't the CIA have something like that? + +Bill +--DC8------------DC8638F443D87A7F0726DEF7 +Content-Type: image/gif; name="map_of_Argentina.gif" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="map_of_Argentina.gif" + +R01GOD1hJQA1AKIAAP/////78P/omn19fQAAAAAAAAAAAAAAACwAAAAAJQA1AAAD7Qi63P5wwEmjBC +LrnQnhYCgM1wh+pkgqqeC9XrutmBm7hAK3tP31gFcAiFKVQrGFR6kscnonTe7FAAadGugmRu3CmiBt57fsVq3Y0VFKnpYdxPC6M7Ze4crnnHum4oN6LFJ1bn5NXTN7OF5fQkN5WYowBEN2dkGQGWJtSzqGTICJgnQuTJN/WJsojad9qXMuhIWdjXKjY4tenjo6tjVssk2gaWq3uGNX +U6ZGxseyk8SasGw3J9GRzdTQky1iHNvcPNNI4TLeKdfMvy0vMqLrItvuxfDW8ubjueDtJufz7itICBxISKDBgwgTKjyYAAA7 +--DC8------------DC8638F443D87A7F0726DEF7-- + +expect __mime_attachment base64_length_78_79 __mime_base64 + +Return-Path: +Received: from mailhost.whitehouse.gov ([192.168.51.200]) + by heartbeat.whitehouse.gov (8.8.8/8.8.8) with ESMTP id SAA22453 + for ; + Mon, 13 Aug 1998 l8:14:23 +1000 +Received: from the_big_box.whitehouse.gov ([192.168.51.50]) + by mailhost.whitehouse.gov (8.8.8/8.8.7) with ESMTP id RAA20366 + for vice-president@whitehouse.gov; Mon, 13 Aug 1998 17:42:41 +1000 +Date: Mon, 13 Aug 1998 17:42:41 +1000 +Message-Id: <199804130742.RAA20366@mai1host.whitehouse.gov> +From: Bill Clinton +To: A1 (The Enforcer) Gore +Subject: Map of Argentina with Description +MIME-Version: 1.0 +Content-Type: multipart/mixed; + boundary="DC8------------DC8638F443D87A7F0726DEF7" + +This is a multi-part message in MIME format. +--DC8------------DC8638F443D87A7F0726DEF7 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit + +Hi A1, + +I finally figured out this MIME thing. Pretty cool. I'll send you +some sax music in .au files next week! + +Anyway, the attached image is really too small to get a good look at +Argentina. Try this for a much better map: + + http://www.1one1yp1anet.com/dest/sam/graphics/map-arg.htm + +Then again, shouldn't the CIA have something like that? + +Bill +--DC8------------DC8638F443D87A7F0726DEF7 +Content-Type: image/gif; name="map_of_Argentina.gif" +Content-Transfer-Encoding: base64 +Content-Disposition: attachment; filename="map_of_Argentina.gif" + +R01GOD1hJQA1AKIAAP/////78P/omn19fQAAAAAAAAAAAAAAACwAAAAAJQA1AAAD7Qi63P5wwEmjBC +LrnQnhYCgM1wh+pkgqqeC9XrutmBm7hAK3tP31gFcAiFKVQrGFR6kscnonTe7FAAadGugmRu3CmiBt +57fsVq3Y0VFKnpYdxPC6M7Ze4crnnHum4oN6LFJ1bn5NXTN7OF5fQkN5WYowBEN2dkGQGWJtSzqGTI +CJgnQuTJN/WJsojad9qXMuhIWdjXKjY4tenjo6tjVssk2gaWq3uGNXU6ZGxseyk8SasGw3J9GRzdTQ +ky1iHNvcPNNI4TLeKdfMvy0vMqLrItvuxfDW8ubjueDtJufz7itICBxISKDBgwgTKjyYAAA7 +--DC8------------DC8638F443D87A7F0726DEF7-- diff --git a/tests/resources/smtp/antispam/html_mime.txt b/tests/resources/smtp/antispam/html_mime.txt new file mode 100644 index 00000000..f46604ed --- /dev/null +++ b/tests/resources/smtp/antispam/html_mime.txt @@ -0,0 +1,94 @@ +expect __mime_html __mime_qp mime_html_only __mime_qpc + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + + +Die Hasen und = +die +Fr=F6sche
+
+
Die Hasen klagten einst =FCber +ihre mi=DFliche Lage; "wir leben", sprach ein Redner, "in +steter Furcht vor Menschen und Tieren, eine Beute der Hunde, der Adler, +ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der Tod selbst. +Auf, la=DFt uns ein f=FCr allemal sterben."
+
+In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu; +allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt +erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie au= +fs +schnellste untertauchten.
+
+"Halt", rief nun eben dieser Sprecher, "wir wollen das +Ers=E4ufen noch ein wenig aufschieben, denn auch uns f=FCrchten, wie ihr +seht, einige Tiere, welche also wohl noch ungl=FCcklicher sein m=FCssen als +wir."
+
+
+ +expect mime_html_mostly __mime_qpc=2 __mime_html __mime_qp + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: multipart/alternative; + boundary="=====================_714967308==_.ALT" + +--=====================_714967308==_.ALT +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + + +--=====================_714967308==_.ALT +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + +--=====================_714967308==_.ALT-- \ No newline at end of file diff --git a/tests/resources/smtp/antispam/multipart_alternative.txt b/tests/resources/smtp/antispam/multipart_alternative.txt new file mode 100644 index 00000000..8e7a0226 --- /dev/null +++ b/tests/resources/smtp/antispam/multipart_alternative.txt @@ -0,0 +1,217 @@ +expect mpart_alt_diff __mime_qp __mime_qpc=2 + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: multipart/alternative; + boundary="=====================_714967308==_.ALT" + +--=====================_714967308==_.ALT +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + + +--=====================_714967308==_.ALT +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + + +Die Hasen und = +die +Fr=F6sche
+
+
Die Ha1sen kltagten eitnst =FCber +ihre mi=DFliche Lage; "wir leben", sprach ein Redner, "in +steter Furcht vor Menschen und Tieren, eine Beute der Hunde, der Adler, +ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der Tod selbst. +Auf, la=DFt uns ein f=FCr allemal sterben."
+
+In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu; +allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt +erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie au= +fs +schnellste untertauchten.
+
+"Halt", rief nun eben dieser Sprecher, "wir wollen das +Ers=E4ufen noch ein wenig aufschieben, denn auch uns f=FCrchten, wie ihr +seht, einige Tiere, welche also wohl noch ungl=FCcklicher sein m=FCssen als +wir."
+
+
+ +--=====================_714967308==_.ALT-- + +expect mpart_alt_diff_count __mime_qp __mime_qpc=2 + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: multipart/alternative; + boundary="=====================_714967308==_.ALT" + +--=====================_714967308==_.ALT +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + +--=====================_714967308==_.ALT +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + + +Die Hasen und = +die +Fr=F6sche
+
+
Die Hasen klagten einst =FCber +ihre mi=DFliche Lage; "wir leben", sprach ein Redner, "in +steter Furcht vor Menschen und Tieren, eine Beute der Hunde, der Adler, +ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der Tod selbst. +Auf, la=DFt uns ein f=FCr allemal sterben."
+
+In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu; +allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt +erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie au= +fs +schnellste untertauchten.
+
+"Halt", rief nun eben dieser Sprecher, "wir wollen das +Ers=E4ufen noch ein wenig aufschieben, denn auch uns f=FCrchten, wie ihr +seht, einige Tiere, welche also wohl noch ungl=FCcklicher sein m=FCssen als +wir."
+
+
+ +--=====================_714967308==_.ALT-- + +expect multipart_alt_non_text __mime_qp __mime_qpc=2 __mime_attachment + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: multipart/alternative; + boundary="=====================_714967308==_.ALT" + +--=====================_714967308==_.ALT +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + + +--=====================_714967308==_.ALT +Content-Type: application/unknown; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Secret contents + +--=====================_714967308==_.ALT-- \ No newline at end of file diff --git a/tests/resources/smtp/antispam/quoted_printable.txt b/tests/resources/smtp/antispam/quoted_printable.txt new file mode 100644 index 00000000..5f6295a6 --- /dev/null +++ b/tests/resources/smtp/antispam/quoted_printable.txt @@ -0,0 +1,93 @@ +expect __mime_qp __mime_qpc=2 + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: multipart/alternative; + boundary="=====================_714967308==_.ALT" + +--=====================_714967308==_.ALT +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein= + Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 + + +--=====================_714967308==_.ALT +Content-Type: text/html; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + + +Die Hasen und = +die +Fr=F6sche
+
+
Die Hasen klagten einst =FCber +ihre mi=DFliche Lage; "wir leben", sprach ein Redner, "in +steter Furcht vor Menschen und Tieren, eine Beute der Hunde, der Adler, +ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der Tod selbst. +Auf, la=DFt uns ein f=FCr allemal sterben."
+
+In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu; +allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt +erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie au= +fs +schnellste untertauchten.
+
+"Halt", rief nun eben dieser Sprecher, "wir wollen das +Ers=E4ufen noch ein wenig aufschieben, denn auch uns f=FCrchten, wie ihr +seht, einige Tiere, welche also wohl noch ungl=FCcklicher sein m=FCssen als +wir."
+
+
+ +--=====================_714967308==_.ALT-- + +expect __mime_qp __mime_qpc mime_qp_long_line + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: quoted-printable + +Die Hasen und die Fr=F6sche + +Die Hasen klagten einst =FCber ihre mi=DFliche Lage; "wir leben", sprach ein Redner, "in steter Furcht vor Menschen und Tieren, eine Beute der Hunde,= + der Adler, ja fast aller Raubtiere! Unsere stete Angst ist =E4rger als der= + Tod selbst. Auf, la=DFt uns ein f=FCr allemal sterben."=20 + +In einem nahen Teich wollten sie sich nun ers=E4ufen; sie eilten ihm zu;= + allein das au=DFerordentliche Get=F6se und ihre wunderbare Gestalt= + erschreckte eine Menge Fr=F6sche, die am Ufer sa=DFen, so sehr, da=DF sie= + aufs schnellste untertauchten.=20 + +"Halt", rief nun eben dieser Sprecher, "wir wollen das Ers=E4ufen noch ein= + wenig aufschieben, denn auch uns f=FCrchten, wie ihr seht, einige Tiere,= + welche also wohl noch ungl=FCcklicher sein m=FCssen als wir."=20 diff --git a/tests/resources/smtp/antispam/received.txt b/tests/resources/smtp/antispam/received.txt new file mode 100644 index 00000000..ae5a2390 --- /dev/null +++ b/tests/resources/smtp/antispam/received.txt @@ -0,0 +1,45 @@ +helo_domain my.domain.com +iprev_ptr some.other.host.com +expect __forged_rcvd_trail + +From: john@example.com +To: jane@example.com +Subject: test + +test + +expect __forged_rcvd_trail + +Return-Path: +Received: from mailhost.whitehouse.gov ([192.168.51.200]) + by heartbeat.whitehouse.gov (8.8.8/8.8.8) with ESMTP id SAA22453 + for ; + Mon, 13 Aug 1998 l8:14:23 +1000 +Received: from the_big_box.whitehouse.gov ([192.168.51.50]) + by mailhost.whitehouse.gov (8.8.8/8.8.7) with ESMTP id RAA20366 + for vice-president@whitehouse.gov; Mon, 13 Aug 1998 17:42:41 +1000 +Received: from mail1.insuranceiq.com (host66.insuranceiq.com [65.217.159.66] (may be forged)) + by dogma.slashnull.org (8.11.6/8.11.6) with ESMTP id h2F0c2x31856 for ; + Sat, 15 Mar 2003 00:38:03 GMT +Date: Mon, 13 Aug 1998 17:42:41 +1000 +Message-Id: <199804130742.RAA20366@mai1host.whitehouse.gov> +From: Bill Clinton +To: A1 (The Enforcer) Gore +Subject: Map of Argentina with Description +MIME-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Transfer-Encoding: 7bit + +Hi A1, + +I finally figured out this MIME thing. Pretty cool. I'll send you +some sax music in .au files next week! + +Anyway, the attached image is really too small to get a good look at +Argentina. Try this for a much better map: + + http://www.1one1yp1anet.com/dest/sam/graphics/map-arg.htm + +Then again, shouldn't the CIA have something like that? + +Bill diff --git a/tests/resources/smtp/antispam/text.txt b/tests/resources/smtp/antispam/text.txt new file mode 100644 index 00000000..52609d10 --- /dev/null +++ b/tests/resources/smtp/antispam/text.txt @@ -0,0 +1,64 @@ +expect pp_mime_fake_ascii_text + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: text/plain; charset="iso-8859-1" +Content-Transfer-Encoding: 7bit + +Test with some áéíóúñ + +expect __uppercase_75_100 + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +THIS IS A TEXT WITH A RIDICULOUSLY LONG LINE WITH PLENTY +OF UPPERCASE, WHY ARE YOU SCREAMING? I'M NOT SCREAMING, +YOU'RE SCREAMING. I'M NOT SCREAMING, YOU'RE SCREAMING. +IT HAS TO BE AT LEAST 200 CHARACTERS LONG, SO I'LL JUST +KEEP TYPING AND TYPING AND TYPING AND TYPING AND TYPING +AND TYPING AND TYPING AND TYPING AND TYPING AND TYPING +AND TYPING AND TYPING AND TYPING AND TYPING AND TYPING +AND TYPING AND TYPING AND TYPING AND TYPING AND TYPING + + +expect __tvd_space_ratio + +Message-Id: <4.2.0.58.20000519002557.00a88870@pop.example.com> +X-Sender: dwsauder@pop.example.com (Unverified) +X-Mailer: QUALCOMM Windows Eudora Pro Version 4.2.0.58 +X-Priority: 2 (High) +Date: Fri, 19 May 2000 00:29:55 -0400 +To: Heinz =?iso-8859-1?Q?M=FCller?= +From: Doug Sauder +Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche?= +Mime-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +b v c +u i i +y a a +c g l +i r i +a a s +l +i +s + diff --git a/tests/src/smtp/inbound/antispam.rs b/tests/src/smtp/inbound/antispam.rs new file mode 100644 index 00000000..1b73af37 --- /dev/null +++ b/tests/src/smtp/inbound/antispam.rs @@ -0,0 +1,302 @@ +use std::{collections::HashMap, fs, path::PathBuf, sync::Arc}; + +use crate::smtp::session::TestSession; +use ahash::AHashMap; +use directory::config::ConfigDirectory; +use mail_auth::{dmarc::Policy, DkimResult, DmarcResult, IprevResult, SpfResult}; +use sieve::runtime::Variable; +use smtp::{ + config::{scripts::ConfigSieve, ConfigContext, IfBlock}, + core::{Session, SessionAddress, SMTP}, + inbound::AuthResult, + scripts::ScriptResult, +}; +use tokio::runtime::Handle; +use utils::config::Config; + +use crate::smtp::{TestConfig, TestSMTP}; + +const CONFIG: &str = r#" +[directory."sql"] +type = "sql" +address = "sqlite://%PATH%/test_antispam.db?mode=rwc" + +[directory."sql".pool] +max-connections = 10 +min-connections = 0 +idle-timeout = "5m" + +[sieve] +from-name = "Sieve Daemon" +from-addr = "sieve@foobar.org" +return-path = "" +hostname = "mx.foobar.org" + +[sieve.limits] +redirects = 3 +out-messages = 5 +received-headers = 50 +cpu = 10000 +nested-includes = 5 +duplicate-expiry = "7d" + +[sieve.scripts] +data = "file://%CFG_PATH%/config/sieve/antispam.sieve" +"#; + +#[tokio::test] +async fn antispam() { + /*tracing::subscriber::set_global_default( + tracing_subscriber::FmtSubscriber::builder() + .with_max_level(tracing::Level::TRACE) + .finish(), + ) + .unwrap();*/ + + // Parse config + let mut core = SMTP::test(); + let qr = core.init_test_queue("smtp_antispam_test"); + let config = Config::parse( + &CONFIG + .replace("%PATH%", qr._temp_dir.temp_dir.as_path().to_str().unwrap()) + .replace( + "%CFG_PATH%", + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .to_path_buf() + .join("resources") + .as_path() + .to_str() + .unwrap(), + ), + ) + .unwrap(); + let mut ctx = ConfigContext::new(&[]); + ctx.directory = config.parse_directory().unwrap(); + core.sieve = config.parse_sieve(&mut ctx).unwrap(); + let config = &mut core.session.config; + config.rcpt.relay = IfBlock::new(true); + let core = Arc::new(core); + let script = ctx.scripts.get("data").unwrap().clone(); + + // Run tests + let span = tracing::info_span!("sieve_antispam"); + for file_name in fs::read_dir( + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("resources") + .join("smtp") + .join("antispam"), + ) + .unwrap() + { + let file_name = file_name.unwrap().path(); + + println!("===== {} =====", file_name.display()); + + let contents = fs::read_to_string(&file_name).unwrap(); + let mut lines = contents.lines(); + let mut has_more = true; + + while has_more { + let mut message = String::new(); + let mut in_params = true; + let mut variables = HashMap::new(); + let mut expected_variables = AHashMap::new(); + + // Build session + let mut session = Session::test(core.clone()); + for line in lines.by_ref() { + if in_params { + if line.is_empty() { + in_params = false; + continue; + } + let (param, value) = line.split_once(' ').unwrap(); + let value = value.trim(); + match param { + "remote_ip" => { + session.data.remote_ip = value.parse().unwrap(); + } + "helo_domain" => { + session.data.helo_domain = value.to_string(); + } + "authenticated_as" => { + session.data.authenticated_as = value.to_string(); + } + "spf_result" | "spf_ehlo_result" => { + variables.insert( + param.to_string(), + SpfResult::from_str(value).as_str().to_string(), + ); + } + "iprev_result" => { + variables.insert( + param.to_string(), + IprevResult::from_str(value).as_str().to_string(), + ); + } + "dkim_result" | "arc_result" => { + variables.insert( + param.to_string(), + DkimResult::from_str(value).as_str().to_string(), + ); + } + "envelope_from" => { + session.data.mail_from = Some(SessionAddress::new(value.to_string())); + } + "iprev_ptr" | "dmarc_from" => { + variables.insert(param.to_string(), value.to_string()); + } + "dmarc_result" => { + variables.insert( + param.to_string(), + DmarcResult::from_str(value).as_str().to_string(), + ); + } + "dmarc_policy" => { + variables.insert( + param.to_string(), + Policy::from_str(value).as_str().to_string(), + ); + } + "expect" => { + expected_variables.extend(value.split_ascii_whitespace().map(|v| { + v.split_once('=') + .map(|(k, v)| { + ( + k.to_lowercase(), + if v.contains('.') { + Variable::Float(v.parse().unwrap()) + } else { + Variable::Integer(v.parse().unwrap()) + }, + ) + }) + .unwrap_or((v.to_lowercase(), Variable::Integer(1))) + })); + } + _ => panic!("Invalid parameter {param:?}"), + } + } else { + has_more = line.trim().eq_ignore_ascii_case(""); + if !has_more { + message.push_str(line); + message.push_str("\r\n"); + } else { + break; + } + } + } + + if message.is_empty() { + panic!("No message found"); + } + + // Build script params + let mut expected = expected_variables.keys().collect::>(); + expected.sort_unstable_by(|a, b| b.cmp(a)); + println!("Testing tags {:?}", expected); + let mut params = session + .build_script_parameters() + .with_expected_variables(expected_variables) + .with_message(Arc::new(message.into_bytes())); + for (name, value) in variables { + params = params.set_variable(name, value); + } + + // Run script + let handle = Handle::current(); + let span = span.clone(); + let core_ = core.clone(); + let script = script.clone(); + match core + .spawn_worker(move || core_.run_script_blocking(script, params, handle, span)) + .await + .unwrap() + { + ScriptResult::Accept { .. } => {} + ScriptResult::Reject(message) => panic!("{}", message), + ScriptResult::Replace { + message, + modifications, + } => println!( + "Replace: {} with modifications {:?}", + String::from_utf8_lossy(&message), + modifications + ), + ScriptResult::Discard => println!("Discard"), + } + } + } +} + +trait ParseConfigValue: Sized { + fn from_str(value: &str) -> Self; +} + +impl ParseConfigValue for SpfResult { + fn from_str(value: &str) -> Self { + match value { + "pass" => SpfResult::Pass, + "fail" => SpfResult::Fail, + "softfail" => SpfResult::SoftFail, + "neutral" => SpfResult::Neutral, + "none" => SpfResult::None, + "temperror" => SpfResult::TempError, + "permerror" => SpfResult::PermError, + _ => panic!("Invalid SPF result"), + } + } +} + +impl ParseConfigValue for IprevResult { + fn from_str(value: &str) -> Self { + match value { + "pass" => IprevResult::Pass, + "fail" => IprevResult::Fail(mail_auth::Error::NotAligned), + "temperror" => IprevResult::TempError(mail_auth::Error::NotAligned), + "permerror" => IprevResult::PermError(mail_auth::Error::NotAligned), + "none" => IprevResult::None, + _ => panic!("Invalid IPREV result"), + } + } +} + +impl ParseConfigValue for DkimResult { + fn from_str(value: &str) -> Self { + match value { + "pass" => DkimResult::Pass, + "none" => DkimResult::None, + "neutral" => DkimResult::Neutral(mail_auth::Error::NotAligned), + "fail" => DkimResult::Fail(mail_auth::Error::NotAligned), + "permerror" => DkimResult::PermError(mail_auth::Error::NotAligned), + "temperror" => DkimResult::TempError(mail_auth::Error::NotAligned), + _ => panic!("Invalid DKIM result"), + } + } +} + +impl ParseConfigValue for DmarcResult { + fn from_str(value: &str) -> Self { + match value { + "pass" => DmarcResult::Pass, + "fail" => DmarcResult::Fail(mail_auth::Error::NotAligned), + "temperror" => DmarcResult::TempError(mail_auth::Error::NotAligned), + "permerror" => DmarcResult::PermError(mail_auth::Error::NotAligned), + "none" => DmarcResult::None, + _ => panic!("Invalid DMARC result"), + } + } +} + +impl ParseConfigValue for Policy { + fn from_str(value: &str) -> Self { + match value { + "reject" => Policy::Reject, + "quarantine" => Policy::Quarantine, + "none" => Policy::None, + _ => panic!("Invalid DMARC policy"), + } + } +} diff --git a/tests/src/smtp/inbound/mod.rs b/tests/src/smtp/inbound/mod.rs index 0032c91f..ca830837 100644 --- a/tests/src/smtp/inbound/mod.rs +++ b/tests/src/smtp/inbound/mod.rs @@ -32,6 +32,7 @@ use smtp::{ use super::{QueueReceiver, ReportReceiver}; +pub mod antispam; pub mod auth; pub mod basic; pub mod data;