Port Spam filter to Rust - part 9

This commit is contained in:
mdecimus 2024-12-19 18:12:47 +01:00
parent 3275fb4313
commit e7298df610
53 changed files with 1506 additions and 1352 deletions

View file

@ -7,7 +7,6 @@
use std::{str::FromStr, time::Duration};
use jmap_proto::request::capability::BaseCapabilities;
use mail_parser::HeaderName;
use nlp::language::Language;
use utils::config::{cron::SimpleCron, utils::ParseValue, Config, Rate};
@ -65,7 +64,6 @@ pub struct JmapConfig {
pub fallback_admin: Option<(String, String)>,
pub master_user: Option<(String, String)>,
pub spam_header: Option<(HeaderName<'static>, String)>,
pub default_folders: Vec<DefaultFolder>,
pub shared_folder: String,
@ -334,17 +332,6 @@ impl JmapConfig {
encrypt_append: config
.property_or_default("storage.encryption.append", "false")
.unwrap_or(false),
spam_header: config
.property_or_default::<Option<String>>("spam.header.is-spam", "X-Spam-Status: Yes")
.unwrap_or_default()
.and_then(|v| {
v.split_once(':').map(|(k, v)| {
(
mail_parser::HeaderName::parse(k.trim().to_string()).unwrap(),
v.trim().to_string(),
)
})
}),
http_use_forwarded: config
.property("server.http.use-x-forwarded")
.unwrap_or(false),

View file

@ -185,7 +185,7 @@ impl Core {
oauth: OAuthConfig::parse(config),
acme: AcmeProviders::parse(config),
metrics: Metrics::parse(config),
spam: SpamFilterConfig::parse(config),
spam: SpamFilterConfig::parse(config).await,
storage: Storage {
data,
blob,

View file

@ -8,6 +8,7 @@ use std::{net::SocketAddr, time::Duration};
use ahash::AHashSet;
use nlp::bayes::BayesClassifier;
use tokio::net::lookup_host;
use utils::{
config::{utils::ParseValue, Config},
glob::{GlobMap, GlobSet},
@ -26,6 +27,14 @@ pub struct SpamFilterConfig {
pub bayes: Option<BayesConfig>,
pub scores: SpamFilterScoreConfig,
pub expiry: SpamFilterExpiryConfig,
pub headers: SpamFilterHeaderConfig,
}
#[derive(Debug, Clone)]
pub struct SpamFilterHeaderConfig {
pub status: Option<String>,
pub result: Option<String>,
pub llm: Option<String>,
}
#[derive(Debug, Clone, Default)]
@ -182,7 +191,7 @@ pub enum RemoteListFormat {
}
impl SpamFilterConfig {
pub fn parse(config: &mut Config) -> Self {
pub async fn parse(config: &mut Config) -> Self {
SpamFilterConfig {
enabled: config
.property_or_default("spam-filter.enable", "true")
@ -190,11 +199,12 @@ impl SpamFilterConfig {
dnsbl: DnsBlConfig::parse(config),
rules: parse_rules(config),
lists: SpamFilterLists::parse(config),
pyzor: PyzorConfig::parse(config),
pyzor: PyzorConfig::parse(config).await,
reputation: ReputationConfig::parse(config),
bayes: BayesConfig::parse(config),
scores: SpamFilterScoreConfig::parse(config),
expiry: SpamFilterExpiryConfig::parse(config),
headers: SpamFilterHeaderConfig::parse(config),
}
}
}
@ -217,7 +227,7 @@ fn parse_rules(config: &mut Config) -> Vec<SpamFilterRule> {
impl SpamFilterRule {
pub fn parse(config: &mut Config, id: String) -> Option<(Self, i32)> {
let id = id.as_str();
if config
if !config
.property_or_default(("spam-filter.rule", id, "enable"), "true")
.unwrap_or(true)
{
@ -249,7 +259,7 @@ impl DnsBlConfig {
pub fn parse(config: &mut Config) -> Self {
let mut servers = vec![];
for id in config
.sub_keys("spam-filter.dnsbl.server", ".url")
.sub_keys("spam-filter.dnsbl.server", ".scope")
.map(|k| k.to_string())
.collect::<Vec<_>>()
{
@ -280,7 +290,7 @@ impl DnsBlServer {
pub fn parse(config: &mut Config, id: String) -> Option<Self> {
let id_ = id.as_str();
if config
if !config
.property_or_default(("spam-filter.dnsbl.server", id_, "enable"), "true")
.unwrap_or(true)
{
@ -300,7 +310,7 @@ impl DnsBlServer {
tags: IfBlock::try_parse(
config,
("spam-filter.dnsbl.server", id_, "tag"),
&Element::Domain.token_map(),
&Element::Ip.token_map(),
)?,
id,
}
@ -308,6 +318,32 @@ impl DnsBlServer {
}
}
impl SpamFilterHeaderConfig {
pub fn parse(config: &mut Config) -> Self {
let mut header = SpamFilterHeaderConfig::default();
for (typ, var) in [
("status", &mut header.status),
("result", &mut header.result),
("llm", &mut header.llm),
] {
if config
.property_or_default(("spam-filter.header", typ, "enable"), "true")
.unwrap_or(true)
{
if let Some(value) = config.value(("spam-filter.header", typ, "name")) {
let value = value.trim();
if !value.is_empty() {
*var = value.to_string().into();
}
}
}
}
header
}
}
impl SpamFilterLists {
pub fn parse(config: &mut Config) -> Self {
let mut lists = SpamFilterLists {
@ -511,7 +547,7 @@ impl SpamFilterLists {
}
impl PyzorConfig {
pub fn parse(config: &mut Config) -> Option<Self> {
pub async fn parse(config: &mut Config) -> Option<Self> {
if !config
.property_or_default("spam-filter.pyzor.enable", "true")
.unwrap_or(true)
@ -525,8 +561,18 @@ impl PyzorConfig {
let host = config
.value("spam-filter.pyzor.host")
.unwrap_or("public.pyzor.org");
let address = match format!("{host}:{port}").parse() {
Ok(address) => address,
let address = match lookup_host(format!("{host}:{port}"))
.await
.map(|mut a| a.next())
{
Ok(Some(address)) => address,
Ok(None) => {
config.new_build_error(
"spam-filter.pyzor.host",
"Invalid address: No addresses found.",
);
return None;
}
Err(err) => {
config.new_build_error(
"spam-filter.pyzor.host",
@ -716,14 +762,23 @@ impl Location {
}
}
impl Default for SpamFilterHeaderConfig {
fn default() -> Self {
SpamFilterHeaderConfig {
status: "X-Spam-Status".to_string().into(),
result: "X-Spam-Result".to_string().into(),
llm: "X-Spam-LLM".to_string().into(),
}
}
}
pub const V_SPAM_REMOTE_IP: u32 = 100;
pub const V_SPAM_REMOTE_IP_PTR: u32 = 101;
pub const V_SPAM_EHLO_DOMAIN: u32 = 102;
pub const V_SPAM_AUTH_AS: u32 = 103;
pub const V_SPAM_ASN: u32 = 104;
pub const V_SPAM_COUNTRY: u32 = 105;
pub const V_SPAM_TLS_VERSION: u32 = 106;
pub const V_SPAM_TLS_CIPHER: u32 = 107;
pub const V_SPAM_IS_TLS: u32 = 106;
pub const V_SPAM_ENV_FROM: u32 = 108;
pub const V_SPAM_ENV_FROM_LOCAL: u32 = 109;
pub const V_SPAM_ENV_FROM_DOMAIN: u32 = 110;
@ -794,8 +849,7 @@ impl Element {
("auth_as", V_SPAM_AUTH_AS),
("asn", V_SPAM_ASN),
("country", V_SPAM_COUNTRY),
("tls_version", V_SPAM_TLS_VERSION),
("tls_cipher", V_SPAM_TLS_CIPHER),
("is_tls", V_SPAM_IS_TLS),
("env_from", V_SPAM_ENV_FROM),
("env_from.local", V_SPAM_ENV_FROM_LOCAL),
("env_from.domain", V_SPAM_ENV_FROM_DOMAIN),
@ -844,6 +898,7 @@ impl Element {
]),
Element::Email => map.with_variables_map([
("email", V_RCPT_EMAIL),
("value", V_RCPT_EMAIL),
("name", V_RCPT_NAME),
("local", V_RCPT_LOCAL),
("domain", V_RCPT_DOMAIN),

View file

@ -233,7 +233,7 @@ impl SpamFilterLlmConfig {
.unwrap_or_default()
.chars()
.next()
.unwrap_or('|'),
.unwrap_or(','),
index_category: config
.property("spam-filter.llm.index.category")
.unwrap_or_default(),

View file

@ -99,7 +99,7 @@ pub enum ExpressionItem {
ArrayBuild(u32),
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub enum Variable<'x> {
String(Cow<'x, str>),
Integer(i64),

View file

@ -1,422 +0,0 @@
/*
* SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::borrow::Cow;
use mail_parser::decoders::html::{add_html_token, html_to_text};
use sieve::{runtime::Variable, Context};
pub fn fn_html_to_text<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
html_to_text(v[0].to_string().as_ref()).into()
}
pub fn fn_html_has_tag<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
v[0].as_array()
.map(|arr| {
let token = v[1].to_string();
arr.iter().any(|v| {
v.to_string()
.as_ref()
.strip_prefix('<')
.map_or(false, |tag| tag.starts_with(token.as_ref()))
})
})
.unwrap_or_default()
.into()
}
pub fn fn_html_attr_size<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
let t = v[0].to_string();
let mut dimension = None;
if let Some(value) = get_attribute(t.as_ref(), v[1].to_string().as_ref()) {
let value = value.trim();
if let Some(pct) = value.strip_suffix('%') {
if let Ok(pct) = pct.trim().parse::<u32>() {
dimension = ((v[2].to_integer() * pct as i64) / 100).into();
}
} else if let Ok(value) = value.parse::<u32>() {
dimension = (value as i64).into();
}
}
dimension.map(Variable::Integer).unwrap_or_default()
}
pub fn fn_html_attrs<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
html_attr_tokens(
v[0].to_string().as_ref(),
v[1].to_string().as_ref(),
v[2].to_string_array(),
)
.into()
}
pub fn fn_html_attr<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
get_attribute(v[0].to_string().as_ref(), v[1].to_string().as_ref())
.map(Variable::from)
.unwrap_or_default()
}
pub fn html_to_tokens(input: &str) -> Vec<Variable> {
let input = input.as_bytes();
let mut iter = input.iter().enumerate();
let mut tags = vec![];
let mut is_token_start = true;
let mut is_after_space = false;
let mut is_new_line = true;
let mut token_start = 0;
let mut token_end = 0;
let mut text = String::from("_");
while let Some((pos, &ch)) = iter.next() {
match ch {
b'<' => {
if !is_token_start {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space,
);
is_after_space = false;
is_token_start = true;
}
if text.len() > 1 {
tags.push(Variable::String(text.into()));
text = String::from("_");
}
let mut tag = vec![b'<'];
if matches!(input.get(pos + 1..pos + 4), Some(b"!--")) {
let mut last_ch: u8 = 0;
for (_, &ch) in iter.by_ref() {
match ch {
b'>' if tag.len() > 3
&& matches!(tag.last(), Some(b'-'))
&& matches!(tag.get(tag.len() - 2), Some(b'-')) =>
{
break;
}
b' ' | b'\t' | b'\r' | b'\n' => {
if last_ch != b' ' {
tag.push(b' ');
} else {
last_ch = b' ';
}
continue;
}
_ => {
tag.push(ch);
}
}
last_ch = ch;
}
} else {
let mut in_quote = false;
let mut last_ch = b' ';
for (_, &ch) in iter.by_ref() {
match ch {
b'>' if !in_quote => {
break;
}
b'"' => {
in_quote = !in_quote;
tag.push(b'"');
}
b' ' | b'\t' | b'\r' | b'\n' if !in_quote => {
if last_ch != b' ' {
tag.push(b' ');
last_ch = b' ';
}
continue;
}
b'/' if !in_quote => {
tag.push(b'/');
last_ch = b' ';
continue;
}
_ => {
tag.push(if in_quote {
ch
} else {
ch.to_ascii_lowercase()
});
}
}
last_ch = ch;
}
}
tags.push(Variable::String(
String::from_utf8(tag).unwrap_or_default().into(),
));
continue;
}
b' ' | b'\t' | b'\r' | b'\n' => {
if !is_token_start {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space && !is_new_line,
);
is_new_line = false;
}
is_after_space = true;
is_token_start = true;
continue;
}
b'&' if !is_token_start => {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space && !is_new_line,
);
is_new_line = false;
is_token_start = true;
is_after_space = false;
}
b';' if !is_token_start => {
add_html_token(
&mut text,
&input[token_start..pos + 1],
is_after_space && !is_new_line,
);
is_token_start = true;
is_after_space = false;
is_new_line = false;
continue;
}
_ => (),
}
if is_token_start {
token_start = pos;
is_token_start = false;
}
token_end = pos;
}
if !is_token_start {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space && !is_new_line,
);
}
if text.len() > 1 {
tags.push(Variable::String(text.into()));
}
tags
}
pub fn html_attr_tokens(input: &str, tag: &str, attrs: Vec<Cow<str>>) -> Vec<Variable> {
let input = input.as_bytes();
let mut iter = input.iter().enumerate().peekable();
let mut tags = vec![];
while let Some((mut pos, &ch)) = iter.next() {
if ch == b'<' {
if !matches!(input.get(pos + 1..pos + 4), Some(b"!--")) {
let mut in_quote = false;
let mut last_ch_pos: usize = 0;
while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace()) {
pos += 1;
iter.next();
}
let found_tag = tag.is_empty()
|| (matches!(input.get(pos + 1..pos + tag.len() + 1), Some(t) if t.eq_ignore_ascii_case(tag.as_bytes()))
&& matches!(input.get(pos + tag.len() + 1), Some(ch) if ch.is_ascii_whitespace()));
'outer: while let Some((pos, &ch)) = iter.next() {
match ch {
b'>' if !in_quote => {
break;
}
b'"' => {
in_quote = !in_quote;
}
b'=' if found_tag
&& !in_quote
&& attrs.iter().any(|attr| matches!(input.get(last_ch_pos.saturating_sub(attr.len()) + 1..last_ch_pos + 1), Some(a) if a.eq_ignore_ascii_case(attr.as_bytes())))
&& matches!(input.get(last_ch_pos + 1), Some(ch) if ch.is_ascii_whitespace() || *ch == b'=') =>
{
while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace())
{
iter.next();
}
let mut tag = vec![];
for (_, &ch) in iter.by_ref() {
match ch {
b'>' if !in_quote => {
if !tag.is_empty() {
tags.push(Variable::String(
String::from_utf8(tag).unwrap_or_default().into(),
));
}
break 'outer;
}
b'"' => {
if in_quote {
in_quote = false;
break;
} else {
in_quote = true;
}
}
b' ' | b'\t' | b'\r' | b'\n' if !in_quote => {
break;
}
_ => {
tag.push(ch);
}
}
}
if !tag.is_empty() {
tags.push(Variable::String(
String::from_utf8(tag).unwrap_or_default().into(),
));
}
}
b' ' | b'\t' | b'\r' | b'\n' => {}
_ => {
last_ch_pos = pos;
}
}
}
} else {
let mut last_ch: u8 = 0;
let mut before_last_ch: u8 = 0;
for (_, &ch) in iter.by_ref() {
if ch == b'>' && last_ch == b'-' && before_last_ch == b'-' {
break;
}
before_last_ch = last_ch;
last_ch = ch;
}
}
}
}
tags
}
pub fn html_img_area(arr: &[Variable]) -> u32 {
arr.iter()
.filter_map(|v| {
let t = v.to_string();
if t.starts_with("<img") {
let mut dimensions = [200u32, 200u32];
for (idx, attr) in ["width", "height"].into_iter().enumerate() {
if let Some(value) = get_attribute(t.as_ref(), attr) {
let value = value.trim();
if let Some(pct) = value.strip_suffix('%') {
if let Ok(pct) = pct.trim().parse::<u32>() {
let size = if idx == 0 { 800 } else { 600 };
dimensions[idx] = (size * pct) / 100;
}
} else if let Ok(value) = value.parse::<u32>() {
dimensions[idx] = value;
}
}
}
Some(dimensions[0].saturating_mul(dimensions[1]))
} else {
None
}
})
.sum::<u32>()
}
pub fn get_attribute<'x>(tag: &'x str, attr_name: &str) -> Option<&'x str> {
let tag = tag.as_bytes();
let attr_name = attr_name.as_bytes();
let mut iter = tag.iter().enumerate().peekable();
let mut in_quote = false;
let mut start_pos = usize::MAX;
let mut end_pos = usize::MAX;
while let Some((pos, ch)) = iter.next() {
match ch {
b'=' if !in_quote => {
if start_pos != usize::MAX
&& end_pos != usize::MAX
&& tag
.get(start_pos..end_pos + 1)
.map_or(false, |name| name == attr_name)
{
let mut token_start = 0;
let mut token_end = 0;
for (pos, ch) in iter.by_ref() {
match ch {
b'"' => {
if !in_quote {
token_start = pos + 1;
in_quote = true;
} else {
token_end = pos;
break;
}
}
b' ' if !in_quote => {
if token_start != 0 {
token_end = pos;
break;
}
}
_ => {
if token_start == 0 {
token_start = pos;
}
}
}
}
return if token_start > 0 {
if token_end == 0 {
token_end = tag.len();
}
Some(std::str::from_utf8(&tag[token_start..token_end]).unwrap_or_default())
} else {
None
};
} else {
start_pos = usize::MAX;
end_pos = usize::MAX;
}
}
b'"' => {
in_quote = !in_quote;
}
b' ' => {
if !in_quote && !matches!(iter.peek(), Some((_, b'='))) {
start_pos = usize::MAX;
end_pos = usize::MAX;
}
}
_ => {
if !in_quote {
if start_pos == usize::MAX {
start_pos = pos;
}
end_pos = pos;
}
}
}
}
None
}

View file

@ -7,7 +7,6 @@
pub mod array;
mod email;
mod header;
pub mod html;
pub mod image;
pub mod misc;
pub mod text;
@ -16,9 +15,7 @@ pub mod url;
use sieve::{runtime::Variable, FunctionMap};
use self::{
array::*, email::*, header::*, html::*, image::*, misc::*, text::*, unicode::*, url::*,
};
use self::{array::*, email::*, header::*, image::*, misc::*, text::*, unicode::*, url::*};
pub fn register_functions_trusted() -> FunctionMap {
FunctionMap::new()
@ -70,10 +67,6 @@ pub fn register_functions_trusted() -> FunctionMap {
.with_function_args("cosine_similarity", fn_cosine_similarity, 2)
.with_function_args("jaccard_similarity", fn_jaccard_similarity, 2)
.with_function_args("levenshtein_distance", fn_levenshtein_distance, 2)
.with_function_args("html_has_tag", fn_html_has_tag, 2)
.with_function_args("html_attr", fn_html_attr, 2)
.with_function_args("html_attrs", fn_html_attrs, 3)
.with_function_args("html_attr_size", fn_html_attr_size, 3)
.with_function_args("uri_part", fn_uri_part, 2)
.with_function_args("substring", fn_substring, 3)
.with_function_args("split", fn_split, 2)

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use mail_parser::decoders::html::html_to_text;
use sieve::{runtime::Variable, Context};
use super::ApplyString;
@ -310,3 +311,7 @@ pub fn fn_detect_language<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable
.unwrap_or("unknown")
.into()
}
pub fn fn_html_to_text<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
html_to_text(v[0].to_string().as_ref()).into()
}

View file

@ -7,7 +7,7 @@
use nlp::tokenizers::types::{TokenType, TypesTokenizer};
use sieve::{runtime::Variable, FunctionMap};
use crate::scripts::functions::{html::html_to_tokens, text::tokenize_words, ApplyString};
use crate::scripts::functions::{text::tokenize_words, ApplyString};
use super::PluginContext;
@ -22,7 +22,6 @@ pub fn register_domain_part(plugin_id: u32, fnc_map: &mut FunctionMap) {
pub fn exec_tokenize(ctx: PluginContext<'_>) -> trc::Result<Variable> {
let mut v = ctx.arguments;
let (urls, urls_without_scheme, emails) = match v[1].to_string().as_ref() {
"html" => return Ok(html_to_tokens(v[0].to_string().as_ref()).into()),
"words" => return Ok(tokenize_words(&v[0])),
"uri" | "url" => (true, true, true),
"uri_strict" | "url_strict" => (true, false, false),

View file

@ -121,7 +121,8 @@ impl EmailIngest for Server {
// Check for Spam headers
let mut is_spam = false;
if let (IngestSource::Smtp, Some((header_name, header_value))) =
let todo = "true";
/*if let (IngestSource::Smtp, Some((header_name, header_value))) =
(params.source, &self.core.jmap.spam_header)
{
if params.mailbox_ids == [INBOX_ID]
@ -136,7 +137,7 @@ impl EmailIngest for Server {
params.mailbox_ids[0] = JUNK_ID;
is_spam = true;
}
}
}*/
// Obtain message references and thread name
let mut message_id = String::new();

View file

@ -752,7 +752,7 @@ impl<T: AsRef<str>> TokenType<T> {
TokenType::Url(url) => url.as_ref().trim().to_lowercase().into(),
TokenType::UrlNoScheme(url) if !with_scheme_only => {
let url = url.as_ref();
format!("http:s//{}", url.trim().to_lowercase())
format!("https://{}", url.trim().to_lowercase())
.to_lowercase()
.into()
}

View file

@ -20,6 +20,7 @@ pub mod mail;
pub mod milter;
pub mod rcpt;
pub mod session;
pub mod spam;
pub mod spawn;
pub mod vrfy;

View file

@ -0,0 +1,61 @@
/*
* SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
*
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use common::listener::SessionStream;
use mail_auth::{dmarc::Policy, ArcOutput, DkimOutput, DmarcResult};
use mail_parser::Message;
use spam_filter::SpamFilterInput;
use crate::core::Session;
impl<T: SessionStream> Session<T> {
pub fn build_spam_input<'x>(
&'x self,
message: &'x Message<'x>,
dkim_result: &'x [DkimOutput<'x>],
arc_result: Option<&'x ArcOutput>,
dmarc_result: Option<&'x DmarcResult>,
dmarc_policy: Option<&'x Policy>,
) -> SpamFilterInput<'x> {
SpamFilterInput {
message,
span_id: self.data.session_id,
arc_result,
spf_ehlo_result: self.data.spf_ehlo.as_ref(),
spf_mail_from_result: self.data.spf_mail_from.as_ref(),
dkim_result,
dmarc_result,
dmarc_policy,
iprev_result: self.data.iprev.as_ref(),
remote_ip: self.data.remote_ip,
ehlo_domain: self.data.helo_domain.as_str().into(),
authenticated_as: self.data.authenticated_as.as_ref().map(|a| a.name.as_str()),
asn: self.data.asn_geo_data.asn.as_ref().map(|a| a.id),
country: self.data.asn_geo_data.country.as_ref().map(|c| c.as_str()),
is_tls: self.stream.is_tls(),
env_from: self
.data
.mail_from
.as_ref()
.map(|m| m.address_lcase.as_str())
.unwrap_or_default(),
env_from_flags: self
.data
.mail_from
.as_ref()
.map(|m| m.flags)
.unwrap_or_default(),
env_rcpt_to: self
.data
.rcpt_to
.iter()
.map(|r| r.address_lcase.as_str())
.collect(),
account_id: None,
is_test: false,
}
}
}

View file

@ -359,33 +359,6 @@ impl RunScript for Server {
}
}
// Assert global variables
#[cfg(feature = "test_mode")]
if let Some(expected_variables) = params.expected_variables {
for var_name in instance.global_variable_names() {
if instance.global_variable(var_name).unwrap().to_bool()
&& !expected_variables.contains_key(var_name)
{
panic!(
"Unexpected variable {var_name:?} with value {:?}\nExpected {:?}\nFound: {:?}",
instance.global_variable(var_name).unwrap(),
expected_variables.keys().collect::<Vec<_>>(),
instance.global_variable_names().collect::<Vec<_>>()
);
}
}
for (name, expected) in &expected_variables {
if let Some(value) = instance.global_variable(name.as_str()) {
assert_eq!(value, expected, "Variable {name:?} has unexpected value");
} else {
panic!("Missing variable {name:?} with value {expected:?}\nExpected {:?}\nFound: {:?}",
expected_variables.keys().collect::<Vec<_>>(),
instance.global_variable_names().collect::<Vec<_>>());
}
}
}
// Keep id
// 0 = use original message
// MAX = implicit keep

View file

@ -38,8 +38,6 @@ pub struct ScriptParameters<'x> {
from_name: String,
return_path: String,
sign: Vec<String>,
#[cfg(feature = "test_mode")]
expected_variables: Option<AHashMap<String, Variable>>,
access_token: Option<&'x AccessToken>,
session_id: u64,
}
@ -51,8 +49,6 @@ impl<'x> ScriptParameters<'x> {
envelope: Vec::with_capacity(6),
message: None,
headers: None,
#[cfg(feature = "test_mode")]
expected_variables: None,
from_addr: Default::default(),
from_name: Default::default(),
return_path: Default::default(),
@ -123,15 +119,6 @@ impl<'x> ScriptParameters<'x> {
self.session_id = session_id;
self
}
#[cfg(feature = "test_mode")]
pub fn with_expected_variables(
mut self,
expected_variables: AHashMap<String, Variable>,
) -> Self {
self.expected_variables = expected_variables.into();
self
}
}
impl Default for ScriptParameters<'_> {

View file

@ -27,13 +27,14 @@ impl SpamFilterAnalyzeBayes for Server {
if let Some(config) = &self.core.spam.bayes {
if !ctx.result.has_tag("SPAM_TRAP") && !ctx.result.has_tag("TRUSTED_REPLY") {
match bayes_classify(self, ctx).await {
Ok(score) => {
Ok(Some(score)) => {
if score > config.score_spam {
ctx.result.add_tag("BAYES_SPAM");
} else if score < config.score_ham {
ctx.result.add_tag("BAYES_HAM");
}
}
Ok(None) => (),
Err(err) => {
trc::error!(err.span_id(ctx.input.span_id).caused_by(trc::location!()));
}

View file

@ -4,9 +4,10 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::future::Future;
use std::future::Future;
use common::Server;
use mail_parser::HeaderName;
use store::write::now;
use crate::SpamFilterContext;
@ -20,23 +21,35 @@ pub trait SpamFilterAnalyzeDate: Sync + Send {
impl SpamFilterAnalyzeDate for Server {
async fn spam_filter_analyze_date(&self, ctx: &mut SpamFilterContext<'_>) {
if let Some(date) = ctx.input.message.date() {
let date = date.to_timestamp();
if date != 0 {
let date_diff = now() as i64 - date;
match ctx
.input
.message
.header(HeaderName::Date)
.map(|h| h.as_datetime())
{
Some(Some(date)) => {
let date = date.to_timestamp();
if date != 0 {
let date_diff = now() as i64 - date;
if date_diff > 86400 {
// Older than a day
ctx.result.add_tag("DATE_IN_PAST");
} else if -date_diff > 7200 {
//# More than 2 hours in the future
ctx.result.add_tag("DATE_IN_FUTURE");
if date_diff > 86400 {
// Older than a day
ctx.result.add_tag("DATE_IN_PAST");
} else if -date_diff > 7200 {
//# More than 2 hours in the future
ctx.result.add_tag("DATE_IN_FUTURE");
}
} else {
ctx.result.add_tag("INVALID_DATE");
}
} else {
}
Some(None) => {
ctx.result.add_tag("INVALID_DATE");
}
} else {
ctx.result.add_tag("MISSING_DATE");
None => {
ctx.result.add_tag("MISSING_DATE");
}
}
}
}

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::future::Future;
use std::future::Future;
use common::Server;
use mail_auth::{
@ -22,16 +22,19 @@ pub trait SpamFilterAnalyzeDmarc: Sync + Send {
impl SpamFilterAnalyzeDmarc for Server {
async fn spam_filter_analyze_dmarc(&self, ctx: &mut SpamFilterContext<'_>) {
ctx.result
.add_tag(match ctx.input.spf_mail_from_result.result() {
SpfResult::Pass => "SPF_ALLOW",
SpfResult::Fail => "SPF_FAIL",
SpfResult::SoftFail => "SPF_SOFTFAIL",
SpfResult::Neutral => "SPF_NEUTRAL",
SpfResult::TempError => "SPF_DNSFAIL",
SpfResult::PermError => "SPF_PERMFAIL",
SpfResult::None => "SPF_NA",
});
ctx.result.add_tag(
ctx.input
.spf_mail_from_result
.map_or("SPF_NA", |r| match r.result() {
SpfResult::Pass => "SPF_ALLOW",
SpfResult::Fail => "SPF_FAIL",
SpfResult::SoftFail => "SPF_SOFTFAIL",
SpfResult::Neutral => "SPF_NEUTRAL",
SpfResult::TempError => "SPF_DNSFAIL",
SpfResult::PermError => "SPF_PERMFAIL",
SpfResult::None => "SPF_NA",
}),
);
ctx.result.add_tag(
match ctx
@ -51,25 +54,30 @@ impl SpamFilterAnalyzeDmarc for Server {
},
);
ctx.result.add_tag(match ctx.input.arc_result.result() {
DkimResult::Pass => "ARC_ALLOW",
DkimResult::Fail(_) => "ARC_REJECT",
DkimResult::PermError(_) => "ARC_INVALID",
DkimResult::TempError(_) => "ARC_DNSFAIL",
DkimResult::Neutral(_) | DkimResult::None => "ARC_NA",
});
ctx.result
.add_tag(ctx.input.arc_result.map_or("ARC_NA", |r| match r.result() {
DkimResult::Pass => "ARC_ALLOW",
DkimResult::Fail(_) => "ARC_REJECT",
DkimResult::PermError(_) => "ARC_INVALID",
DkimResult::TempError(_) => "ARC_DNSFAIL",
DkimResult::Neutral(_) | DkimResult::None => "ARC_NA",
}));
ctx.result.add_tag(match ctx.input.dmarc_result {
DmarcResult::Pass => "DMARC_POLICY_ALLOW",
DmarcResult::TempError(_) => "DMARC_DNSFAIL",
DmarcResult::PermError(_) => "DMARC_BAD_POLICY",
DmarcResult::None => "DMARC_NA",
DmarcResult::Fail(_) => match ctx.input.dmarc_policy {
Policy::Quarantine => "DMARC_POLICY_QUARANTINE",
Policy::Reject => "DMARC_POLICY_REJECT",
Policy::Unspecified | Policy::None => "DMARC_POLICY_SOFTFAIL",
},
});
ctx.result
.add_tag(ctx.input.dmarc_result.map_or("DMARC_NA", |r| match r {
DmarcResult::Pass => "DMARC_POLICY_ALLOW",
DmarcResult::TempError(_) => "DMARC_DNSFAIL",
DmarcResult::PermError(_) => "DMARC_BAD_POLICY",
DmarcResult::None => "DMARC_NA",
DmarcResult::Fail(_) => ctx.input.dmarc_policy.map_or(
"DMARC_POLICY_SOFTFAIL",
|p| match p {
Policy::Quarantine => "DMARC_POLICY_QUARANTINE",
Policy::Reject => "DMARC_POLICY_REJECT",
Policy::Unspecified | Policy::None => "DMARC_POLICY_SOFTFAIL",
},
),
}));
for header in ctx.input.message.headers() {
let header_name = header.name();
@ -83,37 +91,43 @@ impl SpamFilterAnalyzeDmarc for Server {
if self
.core
.spam
.lists.dmarc_allow
.lists
.dmarc_allow
.contains(&ctx.output.from.email.domain_part.fqdn)
{
if matches!(ctx.input.dmarc_result, DmarcResult::Pass) {
if matches!(ctx.input.dmarc_result, Some(DmarcResult::Pass)) {
ctx.result.add_tag("ALLOWLIST_DMARC");
} else {
} else if ctx.input.dmarc_result.is_some() {
ctx.result.add_tag("BLOCKLIST_DMARC");
}
} else if self
.core
.spam
.lists.spf_dkim_allow
.lists
.spf_dkim_allow
.contains(&ctx.output.from.email.domain_part.fqdn)
{
let is_dkim_pass = matches!(ctx.input.arc_result.result(), DkimResult::Pass)
|| ctx.input.dkim_result.iter().any(|r| {
matches!(r.result(), DkimResult::Pass)
&& r.signature().map_or(false, |s| {
s.domain().to_lowercase() == ctx.output.from.email.domain_part.fqdn
})
});
let is_spf_pass = matches!(ctx.input.spf_mail_from_result.result(), SpfResult::Pass);
let spf = ctx
.input
.spf_mail_from_result
.map(|r| r.result())
.unwrap_or(SpfResult::None);
let is_dkim_pass = matches!(
ctx.input.arc_result.map(|r| r.result()),
Some(DkimResult::Pass)
) || ctx.input.dkim_result.iter().any(|r| {
matches!(r.result(), DkimResult::Pass)
&& r.signature().map_or(false, |s| {
s.domain().to_lowercase() == ctx.output.from.email.domain_part.fqdn
})
});
let is_spf_pass = matches!(spf, SpfResult::Pass);
if is_dkim_pass && is_spf_pass {
ctx.result.add_tag("ALLOWLIST_SPF_DKIM");
} else if is_dkim_pass {
ctx.result.add_tag("ALLOWLIST_DKIM");
if !matches!(
ctx.input.spf_mail_from_result.result(),
SpfResult::TempError
) {
if !matches!(spf, SpfResult::TempError) {
ctx.result.add_tag("BLOCKLIST_SPF");
}
} else if is_spf_pass {
@ -126,14 +140,12 @@ impl SpamFilterAnalyzeDmarc for Server {
{
ctx.result.add_tag("BLOCKLIST_DKIM");
}
} else if !matches!(
ctx.input.spf_mail_from_result.result(),
SpfResult::TempError
) && !ctx
.input
.dkim_result
.iter()
.any(|r| matches!(r.result(), DkimResult::TempError(_)))
} else if !matches!(spf, SpfResult::TempError)
&& !ctx
.input
.dkim_result
.iter()
.any(|r| matches!(r.result(), DkimResult::TempError(_)))
{
ctx.result.add_tag("BLOCKLIST_SPF_DKIM");
}

View file

@ -11,6 +11,7 @@ use common::{
Server,
};
use mail_auth::DkimResult;
use mail_parser::{HeaderName, HeaderValue, Host};
use nlp::tokenizers::types::TokenType;
use crate::{
@ -19,7 +20,7 @@ use crate::{
expression::{SpamFilterResolver, StringResolver},
html::{HtmlToken, A, HREF},
},
Email, Recipient, SpamFilterContext, TextPart,
Email, Hostname, Recipient, SpamFilterContext, TextPart,
};
use super::{is_trusted_domain, ElementLocation};
@ -49,11 +50,31 @@ impl SpamFilterAnalyzeDomain for Server {
}
}
// Add Received headers
for header in ctx.input.message.headers() {
if let (HeaderName::Received, HeaderValue::Received(received)) =
(&header.name, &header.value)
{
for host in [&received.from, &received.helo, &received.by]
.into_iter()
.flatten()
{
if let Host::Name(name) = host {
if let Some(name) = Hostname::new(name.as_ref()).sld {
domains.insert(ElementLocation::new(name, Location::HeaderReceived));
}
}
}
}
}
// Add EHLO domain
domains.insert(ElementLocation::new(
ctx.output.ehlo_host.fqdn.clone(),
Location::Ehlo,
));
if !ctx.output.ehlo_host.fqdn.is_empty() {
domains.insert(ElementLocation::new(
ctx.output.ehlo_host.fqdn.clone(),
Location::Ehlo,
));
}
// Add PTR
if let Some(ptr) = &ctx.output.iprev_ptr {
@ -161,12 +182,13 @@ impl SpamFilterAnalyzeDomain for Server {
// Validate email
for email in &emails {
// Skip trusted domains
if is_trusted_domain(
self,
&email.element.email.domain_part.fqdn,
ctx.input.span_id,
)
.await
if !email.element.email.is_valid()
|| is_trusted_domain(
self,
&email.element.email.domain_part.fqdn,
ctx.input.span_id,
)
.await
{
continue;
}

View file

@ -125,7 +125,7 @@ impl SpamFilterAnalyzeFrom for Server {
}
if (!env_from_empty && ctx.output.env_from_addr.address == from_addr.address)
|| (!ctx.output.env_from_postmaster
|| (ctx.output.env_from_postmaster
&& from_addr_is_valid
&& from_addr.domain_part.sld == ctx.output.ehlo_host.sld)
{

View file

@ -28,7 +28,7 @@ impl SpamFilterAnalyzeHeaders for Server {
for header in ctx.input.message.headers() {
// Add header exists tag
let hdr_name = header.name();
let mut tag = String::with_capacity(hdr_name.len() + 5);
let mut tag: String = String::with_capacity(hdr_name.len() + 5);
tag.push_str("X_HDR_");
for ch in hdr_name.chars() {
if ch.is_ascii_alphanumeric() {
@ -37,6 +37,7 @@ impl SpamFilterAnalyzeHeaders for Server {
tag.push('_');
}
}
ctx.result.add_tag(tag);
match &header.name {
HeaderName::ContentType

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::future::Future;
use std::future::Future;
use common::Server;
use hyper::Uri;
@ -20,6 +20,7 @@ pub trait SpamFilterAnalyzeHtml: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
#[derive(Debug)]
struct Href {
url_parsed: Option<Uri>,
host: Option<Hostname>,
@ -119,7 +120,7 @@ impl SpamFilterAnalyzeHtml for Server {
if src.starts_with("data:") && src.contains(";base64,")
{
// Has Data URI encoding
ctx.result.add_tag("Has Data URI encoding");
ctx.result.add_tag("HAS_DATA_URI");
}
continue;
}
@ -185,7 +186,7 @@ impl SpamFilterAnalyzeHtml for Server {
has_rel_style = true;
}
} else if *attr == HREF
&& value.to_ascii_lowercase().ends_with(".css")
&& value.to_ascii_lowercase().contains(".css")
{
has_href_css = true;
}
@ -217,7 +218,7 @@ impl SpamFilterAnalyzeHtml for Server {
}
_ => (),
},
HtmlToken::Text { text } if in_head > 0 => {
HtmlToken::Text { text } if in_head == 0 => {
if let Some((href_url, href_host)) = last_href
.as_ref()
.and_then(|href| Some((href.url_parsed.as_ref()?, href.host.as_ref()?)))
@ -313,7 +314,7 @@ impl SpamFilterAnalyzeHtml for Server {
}
if (!has_link_to_img || html_text_chars >= 2048)
&& html_img_words as f64 / (html_words as f64 + html_img_words as f64) > 0.5
&& (html_img_words as f64 / (html_words as f64 + html_img_words as f64) > 0.5)
{
// Message contains more images than text
ctx.result.add_tag("HTML_TEXT_IMG_RATIO");

View file

@ -9,7 +9,7 @@ use mail_parser::{parsers::fields::thread::thread_name, HeaderName, PartType};
use nlp::tokenizers::types::{TokenType, TypesTokenizer};
use crate::{
modules::html::{html_to_tokens, HtmlToken},
modules::html::{html_to_tokens, HtmlToken, HEAD},
Email, Hostname, Recipient, SpamFilterContext, SpamFilterInput, SpamFilterOutput,
SpamFilterResult, TextPart,
};
@ -92,7 +92,6 @@ impl SpamFilterInit for Server {
.tokenize_emails(true)
.map(|t| t.word)
.collect::<Vec<_>>();
let subject = subject.to_lowercase();
// Tokenize and convert text parts
let mut text_parts = Vec::new();
@ -124,15 +123,25 @@ impl SpamFilterInit for Server {
})
.sum();
let mut text_body = String::with_capacity(text_body_len);
let mut in_head = false;
for token in &html_tokens {
if let HtmlToken::Text { text } = token {
if !text_body.is_empty()
&& !text_body.ends_with(' ')
&& text.starts_with(' ')
{
text_body.push(' ');
match token {
HtmlToken::StartTag { name: HEAD, .. } => {
in_head = true;
}
text_body.push_str(text)
HtmlToken::EndTag { name: HEAD } => {
in_head = false;
}
HtmlToken::Text { text } if !in_head => {
if !text_body.is_empty()
&& !text_body.ends_with(' ')
&& text.starts_with(' ')
{
text_body.push(' ');
}
text_body.push_str(text)
}
_ => {}
}
}
@ -190,17 +199,17 @@ impl SpamFilterInit for Server {
}
text_parts.extend(text_parts_nested);
let subject_thread = thread_name(&subject).to_string();
let subject_thread = thread_name(subject).to_string();
let env_from_addr = Email::new(input.env_from);
SpamFilterContext {
output: SpamFilterOutput {
ehlo_host: Hostname::new(input.ehlo_domain),
iprev_ptr: input
.iprev_result
.ptr
.as_ref()
.and_then(|ptr| ptr.first())
.map(|ptr| ptr.strip_suffix('.').unwrap_or(ptr).to_lowercase()),
ehlo_host: Hostname::new(input.ehlo_domain.unwrap_or("unknown")),
iprev_ptr: input.iprev_result.and_then(|r| {
r.ptr
.as_ref()
.and_then(|ptr| ptr.first())
.map(|ptr| ptr.strip_suffix('.').unwrap_or(ptr).to_lowercase())
}),
env_from_postmaster: env_from_addr.address.is_empty()
|| POSTMASTER_ADDRESSES.contains(&env_from_addr.local_part.as_str()),
env_from_addr,
@ -215,9 +224,9 @@ impl SpamFilterInit for Server {
},
reply_to,
subject_thread_lc: subject_thread.trim().to_lowercase(),
subject_lc: subject.trim().to_lowercase(),
subject_thread,
subject,
subject_lc: subject.trim().to_lowercase(),
subject: subject.to_string(),
subject_tokens,
recipients_to,
recipients_cc,

View file

@ -44,18 +44,22 @@ impl SpamFilterAnalyzeIp for Server {
(&header.name, &header.value)
{
if let Some(ip) = received.from_ip() {
ctx.output
.ips
.insert(ElementLocation::new(ip, Location::HeaderReceived));
if !ip.is_loopback() && !self.is_ip_allowed(&ip) {
ctx.output
.ips
.insert(ElementLocation::new(ip, Location::HeaderReceived));
}
}
for host in [&received.from, &received.helo, &received.by]
.into_iter()
.flatten()
{
if let Host::IpAddr(ip) = host {
ctx.output
.ips
.insert(ElementLocation::new(*ip, Location::HeaderReceived));
if !ip.is_loopback() && !self.is_ip_allowed(ip) {
ctx.output
.ips
.insert(ElementLocation::new(*ip, Location::HeaderReceived));
}
}
}
}
@ -124,7 +128,7 @@ impl SpamFilterAnalyzeIp for Server {
if let Some(tag) = is_dnsbl(
self,
dnsbl,
SpamFilterResolver::new(ctx, &IpResolver(ip.element), ip.location),
SpamFilterResolver::new(ctx, &IpResolver::new(ip.element), ip.location),
)
.await
{
@ -139,10 +143,12 @@ impl SpamFilterAnalyzeIp for Server {
}
// Reverse DNS validation
match &ctx.input.iprev_result.result {
IprevResult::TempError(_) => ctx.result.add_tag("RDNS_DNSFAIL"),
IprevResult::Fail(_) | IprevResult::PermError(_) => ctx.result.add_tag("RDNS_DNSFAIL"),
IprevResult::Pass | IprevResult::None => (),
if let Some(iprev) = ctx.input.iprev_result {
match &iprev.result {
IprevResult::TempError(_) => ctx.result.add_tag("RDNS_DNSFAIL"),
IprevResult::Fail(_) | IprevResult::PermError(_) => ctx.result.add_tag("RDNS_NONE"),
IprevResult::Pass | IprevResult::None => (),
}
}
}
}

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::future::Future;
use std::future::Future;
use common::Server;
use mail_parser::HeaderName;
@ -20,20 +20,23 @@ pub trait SpamFilterAnalyzeMid: Sync + Send {
impl SpamFilterAnalyzeMid for Server {
async fn spam_filter_analyze_message_id(&self, ctx: &mut SpamFilterContext<'_>) {
let mid_raw = ctx
.input
.message
.header_raw(HeaderName::MessageId)
.unwrap_or_default()
.trim();
let mut mid = "";
let mut mid_raw = "";
if !mid_raw.is_empty() {
let mid = ctx
.input
.message
.message_id()
for header in ctx.input.message.headers() {
if let (HeaderName::MessageId, value) = (&header.name, &header.value) {
mid = value.as_text().unwrap_or_default();
mid_raw = std::str::from_utf8(
&ctx.input.message.raw_message()[header.offset_start..header.offset_end],
)
.unwrap_or_default()
.to_lowercase();
.trim();
break;
}
}
if !mid.is_empty() {
let mid = mid.to_lowercase();
if let Some(mid_host) = mid.rsplit_once('@').map(|(_, host)| Hostname::new(host)) {
if mid_host.ip.is_some() {
if mid_host.fqdn.starts_with('[') {
@ -81,7 +84,7 @@ impl SpamFilterAnalyzeMid for Server {
ctx.result.add_tag("INVALID_MSGID");
}
if !mid_raw.starts_with('<') || !mid_raw.ends_with('>') {
if !mid_raw.starts_with('<') || !mid_raw.contains('>') {
ctx.result.add_tag("MID_MISSING_BRACKETS");
}
} else {

View file

@ -83,7 +83,7 @@ impl SpamFilterAnalyzeMime for Server {
if !has_mime_version && (has_ct || has_cte) {
ctx.result.add_tag("MISSING_MIME_VERSION");
}
if has_ct && !is_plain_text && !has_cte && !had_cd {
if has_ct && !is_plain_text && !has_cte && !had_cd && !has_mime_version {
// Only Content-Type header without other MIME headers
ctx.result.add_tag("MIME_HEADER_CTYPE_ONLY");
}
@ -323,10 +323,7 @@ impl SpamFilterAnalyzeMime for Server {
if !is_encrypted
&& !has_content_id
&& cd.map_or(true, |cd| {
cd.attribute("type")
.unwrap_or_default()
.to_ascii_lowercase()
!= "attachment"
!cd.c_type.eq_ignore_ascii_case("attachment")
&& !cd.has_attribute("filename")
})
{
@ -346,20 +343,16 @@ impl SpamFilterAnalyzeMime for Server {
if is_attachment {
// Has a MIME attachment
ctx.result.add_tag("HAS_ATTACHMENT");
match &part.body {
PartType::Binary(bytes) | PartType::InlineBinary(bytes) => {
if let Some(t) = infer::get(bytes.as_ref()) {
if t.mime_type() != ct_full {
// Known content-type
ctx.result.add_tag("MIME_GOOD");
} else if ct_full != "application/octet-stream" {
// Known bad content-type
ctx.result.add_tag("MIME_BAD");
}
if ct_full != "application/octet-stream" {
if let Some(t) = infer::get(part.contents()) {
if t.mime_type() == ct_full {
// Known content-type
ctx.result.add_tag("MIME_GOOD");
} else {
// Known bad content-type
ctx.result.add_tag("MIME_BAD");
}
}
_ => (),
}
}

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::future::Future;
use std::future::Future;
use common::Server;
use mail_parser::{HeaderName, Host};
@ -41,10 +41,10 @@ impl SpamFilterAnalyzeReceived for Server {
}
if let Some(received) = header.value().as_received() {
let helo_domain = received.helo();
let helo_domain = received.from().or_else(|| received.helo());
let ip_rev = received.from_iprev();
if matches!(&helo_domain, Some(Host::Name(hostname)) if hostname.eq_ignore_ascii_case("localhost"))
if matches!(&helo_domain, Some(Host::Name(hostname)) if hostname.eq_ignore_ascii_case("user"))
{
// HELO domain is "user"
ctx.result.add_tag("RCVD_HELO_USER");
@ -68,7 +68,7 @@ impl SpamFilterAnalyzeReceived for Server {
}
}
if received.from_ip().is_some() {
if matches!(received.from, Some(Host::IpAddr(_))) {
// Received from an IP address rather than a FQDN
rcvd_from_ip += 1;
}
@ -102,14 +102,14 @@ impl SpamFilterAnalyzeReceived for Server {
}
// Received from an authenticated user
if !ctx.input.authenticated_as.is_empty() {
if ctx.input.authenticated_as.is_some() {
ctx.result.add_tag("RCVD_VIA_SMTP_AUTH");
}
// Received with TLS checks
if rcvd_count > 0 && rcvd_count == tls_count && !ctx.input.tls_version.is_empty() {
if rcvd_count > 0 && rcvd_count == tls_count && ctx.input.is_tls {
ctx.result.add_tag("RCVD_TLS_ALL");
} else if !ctx.input.tls_version.is_empty() {
} else if ctx.input.is_tls {
ctx.result.add_tag("RCVD_TLS_LAST");
} else {
ctx.result.add_tag("RCVD_NO_TLS_LAST");

View file

@ -242,46 +242,50 @@ impl SpamFilterAnalyzeRecipient for Server {
ctx.result.add_tag("RCPT_BOUNCEMOREONE");
}
for rcpts in [&ctx.output.recipients_to, &ctx.output.recipients_cc] {
let mut is_sorted = false;
if rcpts.len() >= 6 {
// Check if the recipients list is sorted
let mut sorted = true;
for i in 1..rcpts.len() {
if rcpts[i - 1].email.address > rcpts[i].email.address {
sorted = false;
break;
}
let rcpts = ctx
.output
.recipients_to
.iter()
.chain(ctx.output.recipients_cc.iter())
.collect::<Vec<_>>();
let mut is_sorted = false;
if rcpts.len() >= 6 {
// Check if the recipients list is sorted
let mut sorted = true;
for i in 1..rcpts.len() {
if rcpts[i - 1].email.address > rcpts[i].email.address {
sorted = false;
break;
}
if sorted {
ctx.result.add_tag("SORTED_RECIPS");
is_sorted = true;
}
if sorted {
ctx.result.add_tag("SORTED_RECIPS");
is_sorted = true;
}
}
if !is_sorted && rcpt_count >= 5 {
// Look for similar recipients
let mut hits = 0;
let mut combinations = 0;
for i in 0..rcpts.len() {
for j in i + 1..rcpts.len() {
let a = &rcpts[i].email;
let b = &rcpts[j].email;
if levenshtein_distance(&a.local_part, &b.local_part) < 3
|| (a.domain_part.fqdn != b.domain_part.fqdn
&& levenshtein_distance(&a.domain_part.fqdn, &b.domain_part.fqdn) < 4)
{
hits += 1;
}
combinations += 1;
}
}
if !is_sorted && rcpt_count >= 5 {
// Look for similar recipients
let mut hits = 0;
let mut combinations = 0;
for i in 0..rcpts.len() {
for j in i + 1..rcpts.len() {
let a = &rcpts[i].email;
let b = &rcpts[j].email;
if levenshtein_distance(&a.local_part, &b.local_part) < 3
|| (a.domain_part.fqdn != b.domain_part.fqdn
&& levenshtein_distance(&a.domain_part.fqdn, &b.domain_part.fqdn)
< 4)
{
hits += 1;
}
combinations += 1;
}
}
if hits as f64 / combinations as f64 > 0.65 {
ctx.result.add_tag("SUSPICIOUS_RECIPS");
}
if hits as f64 / combinations as f64 > 0.65 {
ctx.result.add_tag("SUSPICIOUS_RECIPS");
}
}
}

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::{borrow::Cow, future::Future};
use std::{borrow::Cow, future::Future};
use common::{
ip_to_bytes, Server, KV_REPUTATION_ASN, KV_REPUTATION_DOMAIN, KV_REPUTATION_FROM,
@ -25,6 +25,7 @@ pub trait SpamFilterAnalyzeReputation: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
#[derive(Debug)]
enum Type {
Ip,
From,
@ -48,7 +49,7 @@ impl SpamFilterAnalyzeReputation for Server {
};
// Do not penalize forged domains
let is_dmarc_pass = matches!(ctx.input.dmarc_result, DmarcResult::Pass);
let is_dmarc_pass = matches!(ctx.input.dmarc_result, Some(DmarcResult::Pass));
let mut types = vec![
(Type::Ip, Cow::Owned(ip_to_bytes(&ctx.input.remote_ip))),
@ -84,29 +85,34 @@ impl SpamFilterAnalyzeReputation for Server {
let mut reputation = 0.0;
for (rep_type, key) in types {
let mut token =
match key_get::<Reputation>(self, ctx.input.span_id, key.clone()).await {
Ok(Some(token)) => token,
Ok(None) if !ctx.input.is_test => {
key_set(
self,
ctx.input.span_id,
KeyValue::with_prefix(
rep_type.prefix(),
key.as_ref(),
Reputation {
count: 1,
score: ctx.result.score,
}
.serialize(),
)
.expires(config.expiry),
let mut token = match key_get::<Reputation>(
self,
ctx.input.span_id,
KeyValue::<()>::build_key(rep_type.prefix(), key.as_ref()),
)
.await
{
Ok(Some(token)) => token,
Ok(None) if !ctx.input.is_test => {
key_set(
self,
ctx.input.span_id,
KeyValue::with_prefix(
rep_type.prefix(),
key.as_ref(),
Reputation {
count: 1,
score: ctx.result.score,
}
.serialize(),
)
.await;
continue;
}
_ => continue,
};
.expires(config.expiry),
)
.await;
continue;
}
Ok(None) | Err(_) => continue,
};
// Update reputation
token.score = (token.count + 1) as f64
@ -130,8 +136,10 @@ impl SpamFilterAnalyzeReputation for Server {
Type::Domain => config.domain_weight,
Type::Asn => config.asn_weight,
};
let c = println!("{rep_type:?} {weight}");
reputation += token.score / token.count as f64 * weight;
let c = println!("{rep_type:?} {weight}: {reputation}");
}
// Adjust score

View file

@ -97,7 +97,11 @@ impl SpamFilterAnalyzeRules for Server {
if let Some(tag) = self
.eval_if::<String, _>(
&rule.rule,
&SpamFilterResolver::new(ctx, &IpResolver(ip.element), ip.location),
&SpamFilterResolver::new(
ctx,
&IpResolver::new(ip.element),
ip.location,
),
ctx.input.span_id,
)
.await

View file

@ -45,19 +45,24 @@ impl SpamFilterAnalyzeScore for Server {
}
}
// Sort by score
let mut header = String::with_capacity(header_len);
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap().then_with(|| a.0.cmp(b.0)));
header.push_str("X-Spam-Result: ");
for (idx, (tag, score)) in results.into_iter().enumerate() {
if idx > 0 {
header.push_str(",\r\n\t");
// Write results header sorted by score
if let Some(header_name) = &self.core.spam.headers.result {
let mut header = String::with_capacity(header_name.len() + header_len + 2);
results.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap().then_with(|| a.0.cmp(b.0)));
header.push_str(header_name);
header.push_str(": ");
for (idx, (tag, score)) in results.into_iter().enumerate() {
if idx > 0 {
header.push_str(",\r\n\t");
}
let _ = write!(&mut header, "{} ({:.2})", tag, score);
}
let _ = write!(&mut header, "{} ({:.2})", tag, score);
}
header.push_str("\r\n");
header.push_str("\r\n");
SpamFilterAction::Allow(header)
SpamFilterAction::Allow(header)
} else {
SpamFilterAction::Allow(String::new())
}
}
async fn spam_filter_finalize(
@ -89,16 +94,19 @@ impl SpamFilterAnalyzeScore for Server {
{
SpamFilterAction::Discard
} else {
let _ = write!(
&mut header,
"X-Spam-Status: {}, score={:.2}\r\n",
if ctx.result.score >= self.core.spam.scores.spam_threshold {
"Yes"
} else {
"No"
},
ctx.result.score
);
if let Some(header_name) = &self.core.spam.headers.status {
let _ = write!(
&mut header,
"{}: {}, score={:.2}\r\n",
header_name,
if ctx.result.score >= self.core.spam.scores.spam_threshold {
"Yes"
} else {
"No"
},
ctx.result.score
);
}
SpamFilterAction::Allow(header)
}
}

View file

@ -32,6 +32,7 @@ impl SpamFilterAnalyzeSubject for Server {
.raw_message()
.get(header.offset_start..header.offset_end)
.unwrap_or_default();
break;
}
}
@ -46,7 +47,6 @@ impl SpamFilterAnalyzeSubject for Server {
let mut lower_count = 0;
let mut last_ch = ' ';
let mut last_ch_trimmed = ' ';
let mut is_ascii = true;
for ch in ctx.output.subject_thread.chars() {
@ -69,8 +69,6 @@ impl SpamFilterAnalyzeSubject for Server {
}
}
}
last_ch_trimmed = ch;
}
if !ch.is_ascii() {
@ -80,14 +78,12 @@ impl SpamFilterAnalyzeSubject for Server {
last_ch = ch;
}
if last_ch.is_whitespace() {
if last_ch_trimmed.is_whitespace() {
// Subject is empty
ctx.result.add_tag("EMPTY_SUBJECT");
} else {
// Subject ends with whitespace
ctx.result.add_tag("SUBJECT_ENDS_SPACES");
}
if ctx.output.subject_lc.is_empty() {
// Subject is empty
ctx.result.add_tag("EMPTY_SUBJECT");
} else if ctx.output.subject.ends_with(' ') {
// Subject ends with whitespace
ctx.result.add_tag("SUBJECT_ENDS_SPACES");
}
if ctx.output.subject_thread.len() >= 10

View file

@ -187,7 +187,7 @@ impl SpamFilterAnalyzeUrl for Server {
if let Some(tag) = is_dnsbl(
self,
dnsbl,
SpamFilterResolver::new(ctx, &IpResolver(ip), url.location),
SpamFilterResolver::new(ctx, &IpResolver::new(ip), url.location),
)
.await
{
@ -201,46 +201,52 @@ impl SpamFilterAnalyzeUrl for Server {
// Check for redirectors
ctx.result.add_tag("REDIRECTOR_URL");
let mut redirect_count = 0;
let mut url_redirect = Cow::Borrowed(url.element.as_str());
if !ctx.result.has_tag("URL_REDIRECTOR_NESTED") {
let mut redirect_count = 1;
let mut url_redirect = Cow::Borrowed(url.element.as_str());
while redirect_count <= 0 {
match http_get_header(url_redirect.as_ref(), LOCATION, Duration::from_secs(5))
while redirect_count <= 3 {
match http_get_header(
url_redirect.as_ref(),
LOCATION,
Duration::from_secs(5),
)
.await
{
Ok(Some(location)) => {
if let Ok(location_parsed) = location.parse::<Uri>() {
let host =
Hostname::new(location_parsed.host().unwrap_or_default());
if self
.core
.spam
.lists
.url_redirectors
.contains(host.sld_or_default())
{
url_redirect = Cow::Owned(location);
redirect_count += 1;
continue;
} else {
ctx.output.urls.insert(ElementLocation::new(
UrlParts::new(location.to_lowercase())
.with_parts(location_parsed, host),
url.location,
));
{
Ok(Some(location)) => {
if let Ok(location_parsed) = location.parse::<Uri>() {
let host =
Hostname::new(location_parsed.host().unwrap_or_default());
if self
.core
.spam
.lists
.url_redirectors
.contains(host.sld_or_default())
{
url_redirect = Cow::Owned(location);
redirect_count += 1;
continue;
} else {
ctx.output.urls.insert(ElementLocation::new(
UrlParts::new(location.to_lowercase())
.with_parts(location_parsed, host),
url.location,
));
}
}
}
Ok(None) => {}
Err(err) => {
trc::error!(err.span_id(ctx.input.span_id));
}
}
Ok(None) => {}
Err(err) => {
trc::error!(err.span_id(ctx.input.span_id));
}
break;
}
break;
}
if redirect_count > 5 {
ctx.result.add_tag("URL_REDIRECTOR_NESTED");
if redirect_count > 3 {
ctx.result.add_tag("URL_REDIRECTOR_NESTED");
}
}
}
@ -270,10 +276,10 @@ impl SpamFilterAnalyzeUrl for Server {
{
ctx.result.add_tag("HOMOGRAPH_URL");
}
}
if !cured_host.is_single_script() {
ctx.result.add_tag("MIXED_CHARSET_URL");
}
if !host.fqdn.is_single_script() {
ctx.result.add_tag("MIXED_CHARSET_URL");
}
}
@ -333,11 +339,21 @@ impl SpamFilterAnalyzeUrl for Server {
}
}
#[allow(unreachable_code)]
#[allow(unused_variables)]
async fn http_get_header(
url: &str,
header: hyper::header::HeaderName,
timeout: Duration,
) -> trc::Result<Option<String>> {
#[cfg(feature = "test_mode")]
{
return if url.contains("redirect.") {
Ok(url.split_once("/?").unwrap().1.to_string().into())
} else {
Ok(None)
};
}
reqwest::Client::builder()
.user_agent("Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0")
.timeout(timeout)

View file

@ -24,29 +24,28 @@ pub struct SpamFilterInput<'x> {
pub span_id: u64,
// Sender authentication
pub arc_result: &'x ArcOutput<'x>,
pub spf_ehlo_result: &'x SpfOutput,
pub spf_mail_from_result: &'x SpfOutput,
pub arc_result: Option<&'x ArcOutput<'x>>,
pub spf_ehlo_result: Option<&'x SpfOutput>,
pub spf_mail_from_result: Option<&'x SpfOutput>,
pub dkim_result: &'x [DkimOutput<'x>],
pub dmarc_result: &'x DmarcResult,
pub dmarc_policy: &'x Policy,
pub iprev_result: &'x IprevOutput,
pub dmarc_result: Option<&'x DmarcResult>,
pub dmarc_policy: Option<&'x Policy>,
pub iprev_result: Option<&'x IprevOutput>,
// Session details
pub remote_ip: IpAddr,
pub ehlo_domain: &'x str,
pub authenticated_as: &'x str,
pub ehlo_domain: Option<&'x str>,
pub authenticated_as: Option<&'x str>,
pub asn: Option<u32>,
pub country: Option<&'x str>,
// TLS
pub tls_version: &'x str,
pub tls_cipher: &'x str,
pub is_tls: bool,
// Envelope
pub env_from: &'x str,
pub env_from_flags: u64,
pub env_rcpt_to: &'x [&'x str],
pub env_rcpt_to: Vec<&'x str>,
pub account_id: Option<u32>,
pub is_test: bool,

View file

@ -136,11 +136,11 @@ pub(crate) async fn bayes_train(
pub(crate) async fn bayes_classify(
server: &Server,
ctx: &SpamFilterContext<'_>,
) -> trc::Result<f64> {
) -> trc::Result<Option<f64>> {
let classifier = if let Some(config) = &server.core.spam.bayes {
&config.classifier
} else {
return Ok(0.0);
return Ok(None);
};
// Obtain training counts
@ -169,7 +169,7 @@ pub(crate) async fn bayes_classify(
trc::Value::from(classifier.min_learns)
],
);
return Ok(0.0);
return Ok(None);
}
// Classify the text
@ -265,10 +265,10 @@ pub(crate) async fn bayes_classify(
trc::Value::from(ham_learns),
trc::Value::from(classifier.min_learns)
],
Result = result.unwrap_or_default()
Result = result.map(trc::Value::from).unwrap_or_default()
);
Ok(result.unwrap_or_default())
Ok(result)
}
pub(crate) async fn bayes_is_balanced(
@ -357,7 +357,7 @@ const P_REMOTE_IP: u8 = 4;
impl SpamFilterContext<'_> {
pub fn spam_tokens(&self) -> HashSet<Vec<u8>> {
let mut tokens = HashSet::new();
if matches!(self.input.dmarc_result, DmarcResult::Pass) {
if matches!(self.input.dmarc_result, Some(DmarcResult::Pass)) {
for addr in [&self.output.env_from_addr, &self.output.from.email] {
if !addr.address.is_empty() {
tokens.insert(add_prefix(P_FROM_EMAIL, addr.address.as_bytes()));

View file

@ -23,6 +23,33 @@ pub(crate) async fn is_dnsbl(
let zone = server
.eval_if::<String, _>(&config.zone, &resolver, resolver.ctx.input.span_id)
.await?;
#[cfg(feature = "test_mode")]
{
if zone.contains(".11.20.") {
let parts = zone.split('.').collect::<Vec<_>>();
return if config.tags.if_then.iter().any(|i| i.expr.items.len() == 3) && parts[0] != "2"
{
None
} else {
server
.eval_if(
&config.tags,
&SpamFilterResolver::new(
resolver.ctx,
&IpResolver::new(
format!("127.0.{}.{}", parts[1], parts[0]).parse().unwrap(),
),
resolver.location,
),
resolver.ctx.input.span_id,
)
.await
};
}
}
let todo = "use proper event error";
match server.core.smtp.resolvers.dns.ipv4_lookup(&zone).await {
@ -41,7 +68,7 @@ pub(crate) async fn is_dnsbl(
&config.tags,
&SpamFilterResolver::new(
resolver.ctx,
&IpResolver(
&IpResolver::new(
result
.iter()
.copied()

View file

@ -44,11 +44,10 @@ impl<T: ResolveVariable> ResolveVariable for SpamFilterResolver<'_, T> {
.unwrap_or_default()
.into(),
V_SPAM_EHLO_DOMAIN => self.ctx.output.ehlo_host.fqdn.as_str().into(),
V_SPAM_AUTH_AS => self.ctx.input.authenticated_as.into(),
V_SPAM_AUTH_AS => self.ctx.input.authenticated_as.unwrap_or_default().into(),
V_SPAM_ASN => self.ctx.input.asn.unwrap_or_default().into(),
V_SPAM_COUNTRY => self.ctx.input.country.unwrap_or_default().into(),
V_SPAM_TLS_VERSION => self.ctx.input.tls_version.into(),
V_SPAM_TLS_CIPHER => self.ctx.input.tls_cipher.into(),
V_SPAM_IS_TLS => self.ctx.input.is_tls.into(),
V_SPAM_ENV_FROM => self.ctx.output.env_from_addr.address.as_str().into(),
V_SPAM_ENV_FROM_LOCAL => self.ctx.output.env_from_addr.local_part.as_str().into(),
V_SPAM_ENV_FROM_DOMAIN => self
@ -442,14 +441,36 @@ impl ResolveVariable for StringListResolver<'_> {
}
}
pub struct IpResolver(pub IpAddr);
pub struct IpResolver {
ip: IpAddr,
ip_string: String,
reverse: String,
octets: Variable<'static>,
}
impl ResolveVariable for IpResolver {
fn resolve_variable(&self, variable: u32) -> Variable<'_> {
match variable {
V_IP => Variable::String(self.0.to_string().into()),
V_IP_REVERSE => Variable::String(self.0.to_reverse_name().into()),
V_IP_OCTETS => Variable::Array(match self.0 {
V_IP => Variable::String(self.ip_string.as_str().into()),
V_IP_REVERSE => Variable::String(self.reverse.as_str().into()),
V_IP_OCTETS => self.octets.clone(),
V_IP_IS_V4 => Variable::Integer(self.ip.is_ipv4() as _),
V_IP_IS_V6 => Variable::Integer(self.ip.is_ipv6() as _),
_ => Variable::Integer(0),
}
}
fn resolve_global(&self, _: &str) -> Variable<'_> {
Variable::Integer(0)
}
}
impl IpResolver {
pub fn new(ip: IpAddr) -> Self {
Self {
ip_string: ip.to_string(),
reverse: ip.to_reverse_name(),
octets: Variable::Array(match ip {
IpAddr::V4(ipv4_addr) => ipv4_addr
.octets()
.iter()
@ -461,13 +482,7 @@ impl ResolveVariable for IpResolver {
.map(|o| Variable::Integer(*o as _))
.collect(),
}),
V_IP_IS_V4 => Variable::Integer(self.0.is_ipv4() as _),
V_IP_IS_V6 => Variable::Integer(self.0.is_ipv6() as _),
_ => Variable::Integer(0),
ip,
}
}
fn resolve_global(&self, _: &str) -> Variable<'_> {
Variable::Integer(0)
}
}

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use common::Server;
use common::Server;
use store::{dispatch::lookup::KeyValue, Deserialize, Value};
pub mod bayes;

View file

@ -4,7 +4,7 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::{
use std::{
collections::HashSet,
io::{BufRead, BufReader},
time::Instant,
@ -19,12 +19,20 @@ use common::{
};
use mail_auth::flate2;
#[allow(unused_variables)]
#[allow(unreachable_code)]
pub async fn is_in_remote_list(
server: &Server,
config: &RemoteListConfig,
item: &str,
span_id: u64,
) -> bool {
#[cfg(feature = "test_mode")]
{
return (config.url.contains("open") && item.contains("open"))
|| (config.url.contains("tank") && item.contains("tank"));
}
match is_in_remote_list_(server, config, item, span_id).await {
Ok(result) => result,
Err(err) => {

View file

@ -1,11 +1,12 @@
expect SUBJ_BOUNCE_WORDS
expect SUBJ_BOUNCE_WORDS SINGLE_SHORT_PART
Subject: Delivery Status Notification (Failure)
Test
<!-- NEXT TEST -->
expect BOUNCE
expect BOUNCE SINGLE_SHORT_PART IS_DSN
MIME-Version: 1.0
Content-Type: multipart/report; report-type="delivery-status";
boundary="176e677bbd667276_87a2ed9cf1f4ecb_a49e592dab77f72e"
@ -18,8 +19,9 @@ Your message could not be delivered.
--176e677bbd667276_87a2ed9cf1f4ecb_a49e592dab77f72e--
<!-- NEXT TEST -->
envelope_from spammer@domain.com
expect
expect SINGLE_SHORT_PART IS_DSN
MIME-Version: 1.0
Content-Type: multipart/report; report-type="delivery-status";
boundary="176e677bbd667276_87a2ed9cf1f4ecb_a49e592dab77f72e"
@ -31,7 +33,7 @@ Your message could not be delivered.
--176e677bbd667276_87a2ed9cf1f4ecb_a49e592dab77f72e--
<!-- NEXT TEST -->
expect BOUNCE
expect BOUNCE SINGLE_SHORT_PART
From: MDaemon <dm@domain.com>
X-MDDSN-Message: True
@ -40,7 +42,7 @@ Subject: Something went wrong
Your message could not be delivered.
<!-- NEXT TEST -->
expect BOUNCE SUBJ_BOUNCE_WORDS
expect BOUNCE SUBJ_BOUNCE_WORDS SINGLE_SHORT_PART
From: Automated <MAILER-DAEMON@domain.com>
Subject: Delivery failure
@ -48,8 +50,9 @@ Subject: Delivery failure
Your message could not be delivered.
<!-- NEXT TEST -->
expect BOUNCE
expect BOUNCE HAS_ATTACHMENT HAS_MESSAGE_PARTS
MIME-Version: 1.0
From: Automated <POSTMASTER@domain.com>
Subject: Something unexpected happened
Content-Type: multipart/mixed;

View file

@ -6,8 +6,7 @@ spf.result none
spf_ehlo.result none
dmarc.result none
remote_ip 195.210.29.48
expect_header X-Spam-Status Yes, score=8.
expect_header X-Spam-Result
expect_score 8
expect rdns_none auth_na dmarc_na helo_nores_a_or_mx once_received mid_rhs_match_from spf_na has_data_uri arc_na subject_has_exclaim subject_ends_exclaim mime_html_only html_short_link_img_1 to_dn_none rcpt_count_one to_match_envrcpt_all fromhost_nores_a_or_mx rcvd_count_zero from_eq_envfrom dkim_na rcvd_no_tls_last from_has_dn date_in_past
From: Client Services <noreply@tetheer.com>
@ -50,8 +49,7 @@ dkim.domains tenthrevolution.com
dmarc.result pass
remote_ip 185.58.86.181
tls.version TLSv1.3
expect_header X-Spam-Status No, score=3.
expect_header X-Spam-Result
expect_score 3
expect from_eq_envfrom from_has_dn helo_nores_a_or_mx forged_rcvd_trail date_in_past arc_na uri_count_odd dkim_signed has_attachment spf_allow rcvd_tls_last rcpt_count_one mime_good subject_ends_spaces fromhost_nores_a_or_mx to_dn_eq_addr_all dkim_allow dmarc_policy_allow rcvd_count_three to_match_envrcpt_all
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=tenthrevolution.com;
@ -625,8 +623,7 @@ dmarc.result fail
dmarc.policy reject
remote_ip 51.89.165.39
tls.version TLS1_2
expect_header X-Spam-Status Yes, score=13.
expect_header X-Spam-Result
expect_score 13
expect has_replyto violated_direct_spf replyto_addr_eq_from uri_count_odd once_received r_parts_differ mid_rhs_match_from fromhost_nores_a_or_mx from_has_dn dkim_allow date_in_past to_match_envrcpt_all html_short_link_img_1 rcpt_count_one arc_na helo_nores_a_or_mx spf_softfail rcvd_tls_last rcvd_count_zero replyto_dom_eq_from_dom to_dn_none has_list_unsub dkim_signed rdns_none from_eq_envfrom dmarc_policy_reject
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; s=sectionalism; d=grupokonecta.net;

View file

@ -20,7 +20,7 @@ From: test
Test
<!-- NEXT TEST -->
envelope_from www-data@domain.org
expect FROM_SERVICE_ACCT ENVFROM_SERVICE_ACCT FROM_HAS_DN FROM_EQ_ENVFROM
expect FROM_SERVICE_ACCT FROM_HAS_DN FROM_EQ_ENVFROM
From: "WWW DATA" <www-data@domain.org>
@ -150,12 +150,11 @@ Disposition-Notification-To: <bye@domain.org>
Test
<!-- NEXT TEST -->
envelope_from hello@domain.org
envelope_from anonymous@domain.org
expect FROM_SERVICE_ACCT WWW_DOT_DOMAIN FROM_EQ_ENVFROM FROM_HAS_DN
From: "Hello" <hello@domain.org>
Sender: <info@www.domain.org>
Reply-to: <guest@domain.org>
From: "Hello" <anonymous@domain.org>
Reply-to: <info@www.domain.org>
Test
<!-- NEXT TEST -->

View file

@ -16,13 +16,11 @@ To: test@test.com
Test
<!-- NEXT TEST -->
expect XM_CASE HAS_LIST_UNSUB HAS_XOIP HAS_ORG_HEADER PRECEDENCE_BULK MULTIPLE_UNIQUE_HEADERS
expect XM_CASE HAS_LIST_UNSUB PRECEDENCE_BULK MULTIPLE_UNIQUE_HEADERS
X-mailer: my mailer 1
X-Originating-IP: 127.0.0.1
List-Unsubscribe: <unsub@list.org>
Precedence: bulk
Organization: my org
Subject: first subject
Subject: second subject
@ -38,7 +36,7 @@ Subject: test
Test
<!-- NEXT TEST -->
expect X_PHP_EVAL HAS_X_POS HAS_X_SOURCE HAS_X_PHP_SCRIPT PHP_XPS_PATTERN HIDDEN_SOURCE_OBJ HAS_X_GMSV HAS_X_ANTIABUSE HAS_X_AS HAS_XAW
expect X_PHP_EVAL HIDDEN_SOURCE_OBJ HAS_X_GMSV HAS_X_AS
X-PHP-Script: sendmail.php
X-PHP-Originating-Script: eval()

View file

@ -284,7 +284,7 @@ Content-Transfer-Encoding: 8bit
<meta http-equiv="refresh" content="5">
</head>
<body>
<p>some text</p>
<p>some text and a lovely explanation to avoid the text to image ratio tag</p>
<img src="...." alt="Red dot" />
<a href="data:other">Click me for a hello message</a>
</body>

View file

@ -45,6 +45,12 @@ expect INVALID_MSGID
Message-ID: <hello@domain.com> (hello world)
Test
<!-- NEXT TEST -->
expect MID_RHS_TOO_LONG
Message-ID: <hello@domaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomaindomain.com>
Test
<!-- NEXT TEST -->
expect MID_MISSING_BRACKETS

View file

@ -149,7 +149,7 @@ Content-Type: multipart/alternative;
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
Lorem ipsum dolor sit amet, Rcnsectetur Radipiscing elit, Rsed do Reiusmod tempor
Lorem ipsum dolor sit Ramet, Rcnsectetur Radipiscing elit, Rsed do Reiusmod tempor
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud
exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore

View file

@ -1,12 +1,12 @@
remote_ip 192.168.0.1
expect RCVD_IN_DNSWL_LOW RBL_SPAMHAUS
remote_ip 20.11.0.1
expect RCVD_IN_DNSWL_LOW
Subject: test
test
<!-- NEXT TEST -->
remote_ip 192.168.0.2
remote_ip 20.11.0.2
expect RBL_SENDERSCORE RBL_NIXSPAM RBL_SEM RBL_SPAMHAUS_SBL RBL_BARRACUDA RBL_BLOCKLISTDE RBL_VIRUSFREE_BOTNET RBL_SPAMCOP RCVD_IN_DNSWL_MED
Subject: test
@ -14,14 +14,14 @@ Subject: test
test
<!-- NEXT TEST -->
remote_ip 192.168.0.14
expect RWL_MAILSPIKE_NEUTRAL RECEIVED_SPAMHAUS_SBL RBL_SPAMHAUS RECEIVED_SPAMHAUS_XBL RECEIVED_BLOCKLISTDE RCVD_IN_DNSWL_MED
remote_ip 20.11.0.14
expect RWL_MAILSPIKE_NEUTRAL RECEIVED_SPAMHAUS_SBL RECEIVED_SPAMHAUS_XBL RECEIVED_BLOCKLISTDE RCVD_IN_DNSWL_MED
Received: from Agni (localhost [192.168.0.5]) (TLS: TLSv1/SSLv3, 168bits,DES-CBC3-SHA) by agni.forevermore.net
Received: from Agni (localhost [20.11.0.5]) (TLS: TLSv1/SSLv3, 168bits,DES-CBC3-SHA) by agni.forevermore.net
with esmtp; Mon, 28 Oct 2002 14:48:52 -0800
Received: from [192.168.0.14] (79.sub-174-252-72.myvzw.com [192.168.0.8]) by mx.google.com
Received: from [20.11.0.14] (79.sub-174-252-72.myvzw.com [20.11.0.8]) by mx.google.com
with ESMTPS id m16sm345129qck.28.2011.06.15.07.42.02 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 15 Jun 2011 07:42:08 -0700 (PDT)
Received: from user (192.168.0.2) by DB6PR07MB3384.eurprd07.prod.outlook.com ([192.168.0.2])
Received: from user (20.11.0.2) by DB6PR07MB3384.eurprd07.prod.outlook.com ([20.11.0.2])
with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.1143.11; Thu, 13 Sep 2018 14:47:44 +0000
Subject: test
@ -39,6 +39,7 @@ And my website is https://sem-fresh15.com/offers.html
Try cheating with a trusted domain user@dkimtrusted.org
<!-- NEXT TEST -->
dkim.result pass
dkim.domains dkimtrusted.org
expect DWL_DNSWL_HI
@ -57,7 +58,7 @@ Subject: test
test
<!-- NEXT TEST -->
expect SURBL_HASHBL_ABUSE SURBL_HASHBL_MALWARE SURBL_HASHBL_PHISH
expect SURBL_HASHBL_ABUSE SURBL_HASHBL_MALWARE SURBL_HASHBL_PHISH URL_ONLY
From: spammer@spamcorp.net
Reply-To: User <spammer@spamcorp.net>

View file

@ -38,6 +38,8 @@ Received: from Agni (localhost [::ffff:127.0.0.1]) (TLS: TLSv1/SSLv3, 168bits,DE
with esmtp; Mon, 28 Oct 2002 14:48:52 -0800
Received: from [10.231.252.223] (79.sub-174-252-72.myvzw.com [174.252.72.79]) by mx.google.com
with ESMTPS id m16sm345129qck.28.2011.06.15.07.42.02 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 15 Jun 2011 07:42:08 -0700 (PDT)
Received: from other.myvzw.com (79.sub-174-252-72.myvzw.com [174.252.72.79]) by mx.google.com
with ESMTPS id m16sm345129qck.28.2011.06.15.07.42.02 (version=TLSv1/SSLv3 cipher=OTHER); Wed, 15 Jun 2011 07:42:08 -0700 (PDT)
Received: from user (10.175.233.33) by DB6PR07MB3384.eurprd07.prod.outlook.com (10.175.234.11)
with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.1143.11; Thu, 13 Sep 2018 14:47:44 +0000
Received: from [94.198.96.74] (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange ECDHE (P-256) server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested)
@ -55,3 +57,10 @@ X-Mailer: MUA
Subject: test
test
<!-- NEXT TEST -->
expect RCVD_UNPARSABLE RCVD_NO_TLS_LAST RCVD_COUNT_ONE
To: user@domain.com
Received: invalid
test

View file

@ -24,7 +24,7 @@ Cc: other@user.org
Test
<!-- NEXT TEST -->
expect RCPT_ADDR_IN_SUBJECT TO_DN_NONE RCPT_COUNT_ONE
expect RCPT_IN_SUBJECT TO_DN_NONE RCPT_COUNT_ONE
To: hello@world.com
Subject: Special offer for HELLO@world.com
@ -117,7 +117,7 @@ Cc: otheruser@guerrillamail.com
Test
<!-- NEXT TEST -->
envelope_from test@test.com
expect FREEMAIL_CC DISPOSABLE_TO RCPT_COUNT_THREE TO_DN_NONE
expect FREEMAIL_CC DISPOSABLE_TO DISPOSABLE_CC RCPT_COUNT_THREE TO_DN_NONE
To: otheruser@guerrillamail.com
Cc: user@gmail.com

View file

@ -72,21 +72,21 @@ Test
expect REPLYTO_EXCESS_QP REPLYTO_DOM_EQ_FROM_DOM HAS_REPLYTO
From: hello@domain.org
Reply-to: =?iso-8859-1?Q?Die_Hasen_und_die_Froesche?= <hello@domain.org>
Reply-to: =?iso-8859-1?Q?Die_Hasen_und_die_Froesche?= <bye@domain.org>
Test
<!-- NEXT TEST -->
expect REPLYTO_EXCESS_BASE64 REPLYTO_DOM_EQ_FROM_DOM HAS_REPLYTO
From: hello@domain.org
Reply-to: "=?iso-8859-1?B?RGllIEhhc2VuIHVuIGRpZSBGcm9lc2NoZQ==?=" <hello@domain.org>
Reply-to: "=?iso-8859-1?B?RGllIEhhc2VuIHVuIGRpZSBGcm9lc2NoZQ==?=" <bye@domain.org>
Test
<!-- NEXT TEST -->
expect REPLYTO_EMAIL_HAS_TITLE REPLYTO_DOM_EQ_FROM_DOM HAS_REPLYTO
From: hello@domain.org
Reply-to: "Mr. Hello" <hello@domain.org>
Reply-to: "Mr. Hello" <bye@domain.org>
Test
<!-- NEXT TEST -->

View file

@ -27,7 +27,7 @@ Subject: thís líné shóúld bé éncódéd
Test
<!-- NEXT TEST -->
param.body 8bitmime
param.8bitmime 1
expect
Subject: thís líné shóúld bé éncódéd
@ -103,15 +103,36 @@ Subject: =?iso-8859-1?Q?Die_Hasen_und_die_Fr=F6sche_?=
Test
<!-- NEXT TEST -->
param.smtputf8 1
expect SUBJECT_HAS_CURRENCY SUBJECT_ENDS_EXCLAIM SUBJECT_HAS_EXCLAIM
expect SUBJECT_HAS_CURRENCY SUBJECT_ENDS_EXCLAIM
Subject: You have won £200!
Test
<!-- NEXT TEST -->
param.smtputf8 1
expect SUBJECT_HAS_CURRENCY SUBJECT_HAS_QUESTION SUBJECT_ENDS_QUESTION
expect SUBJECT_HAS_CURRENCY SUBJECT_ENDS_QUESTION
Subject: Have you won $200?
Test
<!-- NEXT TEST -->
expect RCPT_IN_SUBJECT
To: hello@world.org
Subject: Great offers for hello@world.org
Test
<!-- NEXT TEST -->
expect RCPT_DOMAIN_IN_SUBJECT
To: hello@world.org
Subject: Great offers for world.org
Test
<!-- NEXT TEST -->
expect
To: hello@world.org
Subject: Question about other@domain.net
Test

View file

@ -46,7 +46,7 @@ Subject: redirect to omograph
login to https://www.redirect.com/?https://xn--twiter-507b.com
<!-- NEXT TEST -->
expect HAS_ONION_URI
expect HAS_ONION_URI HAS_ANON_DOMAIN
Subject: url in title darkweb.onion/login
@ -54,26 +54,36 @@ test
<!-- NEXT TEST -->
expect HAS_IPFS_GATEWAY_URL HAS_WP_URI URI_HIDDEN_PATH
Content-Type: text/html; charset="utf-8"
Subject: html test
<link href="site.com/ipfs/Qm123">
<link href="https://site.com/ipfs/Qm123">
<a href="https://web.org/../../login.php"><img src="http://site.org/wp-static/img.png"></a>
<!-- NEXT TEST -->
expect HAS_GUC_PROXY_URI HAS_GOOGLE_FIREBASE_URL HAS_GOOGLE_REDIR
expect HAS_GUC_PROXY_URI HAS_GOOGLE_FIREBASE_URL HAS_GOOGLE_REDIR HAS_ANON_DOMAIN URL_ONLY
Content-Type: text/html; charset="utf-8"
Subject: mixed urls googleusercontent.com/proxy/url
<a href="https://firebasestorage.googleapis.com/content">google.com/url?otherurl.org</a>
<!-- NEXT TEST -->
expect WP_COMPROMISED HAS_WP_URI
expect WP_COMPROMISED
Subject: plain test
http://url.com/Well-known/../assetlinks.json
http://wp.com/WP-content/content.pdf
<!-- NEXT TEST -->
expect HAS_WP_URI
Subject: plain test
http://url.com/Well-known/../assetlinks.json
http://wp.com/WP-other/content.pdf
<!-- NEXT TEST -->
expect PHISHED_OPENPHISH PHISHED_PHISHTANK

File diff suppressed because it is too large Load diff