Antispam reputation and composite rules

This commit is contained in:
mdecimus 2023-10-15 16:15:14 +02:00
parent 0b67f31220
commit 29a8ecb29b
13 changed files with 287 additions and 41 deletions

View file

@ -193,18 +193,20 @@ impl Lookup {
pub async fn lookup(&self, items: &[DatabaseColumn<'_>]) -> Option<Variable> {
match self {
Lookup::Directory { directory, query } => match directory.query(query, items).await {
Ok(mut result) => match result.len() {
1 if !matches!(result.first(), Some(DatabaseColumn::Null)) => {
result.pop().map(Variable::from).unwrap()
Ok(mut result) => {
match result.len() {
1 if !matches!(result.first(), Some(DatabaseColumn::Null)) => {
result.pop().map(Variable::from).unwrap()
}
0 => Variable::default(),
_ => Variable::Array(
result
.into_iter()
.map(Variable::from)
.collect::<Vec<_>>()
.into(),
),
}
0 => Variable::default(),
_ => Variable::Array(
result
.into_iter()
.map(Variable::from)
.collect::<Vec<_>>()
.into(),
),
}
.into(),
Err(_) => None,

View file

@ -169,7 +169,6 @@ impl Directory for SqlDirectory {
for col in row.columns() {
let idx = col.ordinal();
columns.push(match col.type_info().kind() {
AnyTypeInfoKind::Null => DatabaseColumn::Null,
AnyTypeInfoKind::Bool => {
DatabaseColumn::Bool(row.try_get(idx).unwrap_or_default())
}
@ -187,6 +186,9 @@ impl Directory for SqlDirectory {
AnyTypeInfoKind::Blob => DatabaseColumn::Blob(
row.try_get::<Vec<u8>, _>(idx).unwrap_or_default().into(),
),
AnyTypeInfoKind::Null => row
.try_get::<String, _>(idx)
.map_or(DatabaseColumn::Null, DatabaseColumn::from),
});
}
columns

View file

@ -36,6 +36,10 @@ pub fn register_map(plugin_id: u32, fnc_map: &mut FunctionMap<SieveContext>) {
fnc_map.set_external_function("lookup_map", plugin_id, 2);
}
pub fn register_map_many(plugin_id: u32, fnc_map: &mut FunctionMap<SieveContext>) {
fnc_map.set_external_function("lookup_map_many", plugin_id, 2);
}
pub fn exec(ctx: PluginContext<'_>) -> Variable {
let lookup_id = ctx.arguments[0].to_string();
let span = ctx.span;
@ -95,3 +99,39 @@ pub fn exec_map(ctx: PluginContext<'_>) -> Variable {
Variable::default()
}
pub fn exec_map_many(ctx: PluginContext<'_>) -> Variable {
let lookup_id = ctx.arguments[0].to_string();
let items = match &ctx.arguments[1] {
Variable::Array(l) => l
.iter()
.filter_map(|i| {
if !i.is_empty() {
DatabaseColumn::from(i).into()
} else {
None
}
})
.collect(),
v if !v.is_empty() => vec![DatabaseColumn::from(v)],
_ => vec![],
};
let span = ctx.span;
if let Some(lookup) = ctx.core.sieve.lookup.get(lookup_id.as_ref()) {
items
.into_iter()
.filter_map(|item| ctx.handle.block_on(lookup.lookup(&[item])))
.collect::<Vec<_>>()
.into()
} else {
tracing::warn!(
parent: span,
context = "sieve:lookup",
event = "failed",
reason = "Unknown lookup id",
lookup_id = %lookup_id,
);
Variable::default()
}
}

View file

@ -45,11 +45,12 @@ pub struct PluginContext<'x> {
pub arguments: Vec<Variable>,
}
const PLUGINS_EXEC: [ExecPluginFnc; 10] = [
const PLUGINS_EXEC: [ExecPluginFnc; 11] = [
query::exec,
exec::exec,
lookup::exec,
lookup::exec_map,
lookup::exec_map_many,
dns::exec,
dns::exec_exists,
http::exec_header,
@ -57,11 +58,12 @@ const PLUGINS_EXEC: [ExecPluginFnc; 10] = [
bayes::exec_untrain,
bayes::exec_classify,
];
const PLUGINS_REGISTER: [RegisterPluginFnc; 10] = [
const PLUGINS_REGISTER: [RegisterPluginFnc; 11] = [
query::register,
exec::register,
lookup::register,
lookup::register_map,
lookup::register_map_many,
dns::register,
dns::register_exists,
http::register_header,

View file

@ -1,5 +1,5 @@
if eval "!t.SPAM_TRAP && !t.TRUSTED_REPLY" {
let "bayes_result" "bayes_classify('spamdb/bayes-classify', body_and_subject)";
let "bayes_result" "bayes_classify('spamdb/token-lookup', body_and_subject)";
if eval "!is_empty(bayes_result)" {
if eval "bayes_result > 0.7" {
let "t.BAYES_SPAM" "1";

View file

@ -0,0 +1,75 @@
if eval "t.FORGED_RECIPIENTS && t.MAILLIST" {
let "t.FORGED_RECIPIENTS_MAILLIST" "1";
}
if eval "t.FORGED_SENDER && t.MAILLIST" {
let "t.FORGED_SENDER_MAILLIST" "1";
}
if eval "t.DMARC_POLICY_ALLOW && (t.R_SPF_SOFTFAIL || t.R_SPF_FAIL || t.R_DKIM_REJECT)" {
let "t.DMARC_POLICY_ALLOW_WITH_FAILURES" "1";
}
if eval "t.R_DKIM_NA && t.R_SPF_NA && t.DMARC_NA && t.ARC_NA" {
let "t.AUTH_NA" "1";
}
if eval "!(t.R_DKIM_NA && t.R_SPF_NA && t.DMARC_NA && t.ARC_NA) && (t.R_DKIM_NA || t.R_DKIM_TEMPFAIL || t.R_DKIM_PERMFAIL) && (t.R_SPF_NA || t.R_SPF_DNSFAIL) && t.DMARC_NA && (t.ARC_NA || t.ARC_DNSFAIL)" {
let "t.AUTH_NA_OR_FAIL" "1";
}
if eval "(t.AUTH_NA || t.AUTH_NA_OR_FAIL) && (t.BOUNCE || t.SUBJ_BOUNCE_WORDS)" {
let "t.BOUNCE_NO_AUTH" "1";
}
if eval "(t.HAS_X_POS || t.HAS_PHPMAILER_SIG) && t.HAS_WP_URI && (t.PHISHING || t.CRACKED_SURBL || t.PH_SURBL_MULTI || t.DBL_PHISH || t.DBL_ABUSE_PHISH || t.URIBL_BLACK || t.PHISHED_OPENPHISH || t.PHISHED_PHISHTANK)" {
let "t.HACKED_WP_PHISHING" "1";
}
if eval "(t.HAS_XOIP || t.RCVD_FROM_SMTP_AUTH) && t.DCC_BULK" {
let "t.COMPROMISED_ACCT_BULK" "1";
}
if eval "t.DCC_BULK && (t.MISSING_TO || t.R_UNDISC_RCPT)" {
let "t.UNDISC_RCPTS_BULK" "1";
}
if eval "t.RECEIVED_SPAMHAUS_PBL && !t.RCVD_VIA_SMTP_AUTH" {
let "t.RCVD_UNAUTH_PBL" "1";
}
if eval "(t.R_DKIM_ALLOW || t.ARC_ALLOW) && t.RCVD_IN_DNSWL_MED" {
let "t.RCVD_DKIM_ARC_DNSWL_MED" "1";
}
if eval "(t.R_DKIM_ALLOW || t.ARC_ALLOW) && t.RCVD_IN_DNSWL_HI" {
let "t.RCVD_DKIM_ARC_DNSWL_HI" "1";
}
if eval "(t.HAS_X_POS || t.HAS_PHPMAILER_SIG || t.HAS_X_PHP_SCRIPT) && (t.SUBJECT_ENDS_QUESTION || t.SUBJECT_ENDS_EXCLAIM || t.MANY_INVISIBLE_PARTS)" {
let "t.AUTOGEN_PHP_SPAMMY" "1";
}
if eval "(t.PHISHING || t.DBL_PHISH || t.PHISHED_OPENPHISH || t.PHISHED_PHISHTANK) && (t.SUBJECT_ENDS_QUESTION || t.SUBJECT_ENDS_EXCLAIM)" {
let "t.PHISH_EMOTION" "1";
}
if eval "t.HAS_GUC_PROXY_URI || t.URIBL_RED || t.DBL_ABUSE_REDIR || t.HAS_ONION_URI" {
let "t.HAS_ANON_DOMAIN" "1";
}
if eval "(t.R_SPF_FAIL || t.R_SPF_SOFTFAIL) && (t.RCVD_COUNT_ZERO || t.RCVD_NO_TLS_LAST)" {
let "t.VIOLATED_DIRECT_SPF" "1";
}
if eval "(t.FREEMAIL_FROM || t.FREEMAIL_ENVFROM || t.FREEMAIL_REPLYTO) && (t.TO_DN_RECIPIENTS || t.R_UNDISC_RCPT) && (t.FROM_NAME_HAS_TITLE || t.FREEMAIL_REPLYTO_NEQ_FROM_DOM)" {
let "t.FREEMAIL_AFF" "1";
}
if eval "t.HFILTER_URL_ONLY && t.REDIRECTOR_URL" {
let "t.REDIRECTOR_URL_ONLY" "1";
}
if eval "t.FAKE_REPLY && t.RCVD_VIA_SMTP_AUTH && (!t.RECEIVED_SPAMHAUS_PBL || t.RECEIVED_SPAMHAUS_XBL || t.RECEIVED_SPAMHAUS_SBL)" {
let "t.THREAD_HIJACKING_FROM_INJECTOR" "1";
}

View file

@ -103,6 +103,9 @@ if eval "header.X-PHP-Script.exists" {
let "t.HIDDEN_SOURCE_OBJ" "1";
}
}
if eval "contains_ignore_case(header.X-Mailer, 'PHPMailer')" {
let "t.HAS_PHPMAILER_SIG" "1";
}
if eval "header.X-Source:X-Source-Args:X-Source-Dir.exists" {
let "t.HAS_X_SOURCE" "1";
if eval "contains(header.X-Source-Args, '../')" {

View file

@ -6,17 +6,3 @@ if eval "env.iprev.result != ''" {
let "t.RDNS_NONE" "1";
}
}
# Lookup ASN
let "asn_lookup" "";
if eval "len(env.remote_ip.reverse) <= 15" {
let "asn_lookup" "env.remote_ip.reverse + '.origin.asn.cymru.com'";
} else {
let "asn_lookup" "env.remote_ip.reverse + '.origin.asn6.cymru.com'";
}
let "asn_lookup" "split(dns_query(asn_lookup, 'txt'), '|')";
let "asn" "asn_lookup[0]";
let "country" "asn_lookup[2]";
#eval "print('ASN: ' + asn + ' (' + country + ')')";

View file

@ -6,6 +6,6 @@ if eval "!is_empty(message_id)" {
eval "lookup('spamdb/id-insert', message_id)";
if eval "lookup('spam/options', 'AUTOLEARN_REPLIES')" {
eval "bayes_train('spamdb/bayes-train', thread_name(header.subject) + ' ' + body.to_text, false)";
eval "bayes_train('spamdb/token-insert', thread_name(header.subject) + ' ' + body.to_text, false)";
}
}

View file

@ -0,0 +1,73 @@
# Obtain sender address and domain
let "rep_from" "envelope.from";
let "rep_from_domain" "envfrom_domain_sld";
if eval "is_empty(rep_from)" {
let "rep_from" "from_addr";
let "rep_from_domain" "from_domain_sld";
}
if eval "env.dmarc.result != 'pass'" {
# Do not penalize forged domains
let "rep_from" "'_' + rep_from";
let "rep_from_domain" "'_' + rep_from_domain";
}
# Lookup ASN
let "asn" "";
if eval "len(env.remote_ip.reverse) <= 15" {
let "asn" "env.remote_ip.reverse + '.origin.asn.cymru.com'";
} else {
let "asn" "env.remote_ip.reverse + '.origin.asn6.cymru.com'";
}
let "asn" "split(dns_query(asn, 'txt'), '|')[0]";
# Generate reputation tokens
let "token_ids" "";
if eval "asn > 0" {
let "token_ids" "['i:' + env.remote_ip, 'f:' + rep_from, 'd:' + rep_from_domain, 'a:' + asn ]";
} else {
let "token_ids" "['i:' + env.remote_ip, 'f:' + rep_from, 'd:' + rep_from_domain ]";
}
# Lookup reputation
let "i" "len(token_ids)";
let "reputation" "0.0";
while "i > 0" {
let "i" "i - 1";
let "token_id" "token_ids[i]";
# Lookup reputation
let "token_rep" "lookup_map('spamdb/reputation-lookup', token_id)";
# Update reputation
eval "lookup_map('spamdb/reputation-insert', [token_id, score])";
if eval "is_empty(token_rep)" {
continue;
}
# Assign weight
let "weight" "";
if eval "starts_with(token_id, 'f:')" {
# Sender address has 50% weight
let "weight" "0.5";
} elsif eval "starts_with(token_id, 'd:')" {
# Sender domain has 20% weight
let "weight" "0.2";
} elsif eval "starts_with(token_id, 'i:')" {
# IP has 20% weight
let "weight" "0.2";
} elsif eval "starts_with(token_id, 'a:')" {
# ASN has 10% weight
let "weight" "0.1";
} else {
continue;
}
let "reputation" "reputation + (token_rep[0] / token_rep[1] * weight)";
}
# Adjust score using a 0.5 factor
if eval "reputation > 0" {
let "score" "score + (reputation - score) * 0.5";
}

View file

@ -1,6 +1,6 @@
# Check if the message was sent to a spam trap address
if eval "lookup('spam/trap-address', envelope.to)" {
eval "bayes_train('spamdb/bayes-train', body_and_subject, true)";
eval "bayes_train('spamdb/token-insert', body_and_subject, true)";
let "t.SPAM_TRAP" "1";
}

View file

@ -0,0 +1,37 @@
remote_ip 10.0.0.1
score 1.0
final_score 1.0
expect
From: user@domain.org
Test
<!-- NEXT TEST -->
remote_ip 10.0.0.1
score 2.0
final_score 1.5
expect
From: user@domain.org
Test
<!-- NEXT TEST -->
remote_ip 10.0.0.1
score 3.0
final_score 2.2525252525252526
expect
From: user@domain.org
Test
<!-- NEXT TEST -->
remote_ip 10.0.0.1
score -5.0
final_score -1.4949494949494948
expect
From: user@domain.org
Test

View file

@ -45,6 +45,7 @@ duplicate-expiry = "7d"
[directory."spamdb"]
type = "sql"
address = "sqlite://%PATH%/test_antispam.db?mode=rwc"
#address = "sqlite:///tmp/test_antispam.db?mode=rwc"
[directory."spamdb".pool]
max-connections = 10
@ -52,11 +53,17 @@ min-connections = 0
idle-timeout = "5m"
[directory."spamdb".lookup]
bayes-train = "INSERT INTO bayes_weights (h1, h2, ws, wh) VALUES (?, ?, ?, ?) ON CONFLICT(h1, h2) DO UPDATE SET ws = ws + excluded.ws, wh = wh + excluded.wh"
bayes-classify = "SELECT ws, wh FROM bayes_weights WHERE h1 = ? AND h2 = ?"
id-insert = "INSERT INTO id_timestamps (id, timestamp) VALUES (?, CURRENT_TIMESTAMP)"
token-insert = "INSERT INTO bayes_tokens (h1, h2, ws, wh) VALUES (?, ?, ?, ?)
ON CONFLICT(h1, h2)
DO UPDATE SET ws = ws + excluded.ws, wh = wh + excluded.wh"
token-lookup = "SELECT ws, wh FROM bayes_tokens WHERE h1 = ? AND h2 = ?"
id-insert = "INSERT INTO id_timestamps (id, last_hit) VALUES (?, CURRENT_TIMESTAMP)"
id-lookup = "SELECT 1 FROM id_timestamps WHERE id = ?"
id-cleanup = "DELETE FROM id_timestamps WHERE (strftime('%s', 'now') - strftime('%s', timestamp)) < ?"
id-cleanup = "DELETE FROM id_timestamps WHERE (CURRENT_TIMESTAMP - last_hit) < ?"
reputation-insert = "INSERT INTO reputation (token, score, count, last_hit) VALUES (?, ?, 1, CURRENT_TIMESTAMP)
ON CONFLICT(token)
DO UPDATE SET score = (count + 1) * (excluded.score + 0.98 * score) / (0.98 * count + 1), count = count + 1, last_hit = CURRENT_TIMESTAMP"
reputation-lookup = "SELECT score, count FROM reputation WHERE token = ?"
[directory."spam"]
type = "memory"
@ -125,8 +132,8 @@ min-learns = 10
[sieve.scripts]
"#;
const CREATE_TABLES: &[&str; 2] = &[
"CREATE TABLE IF NOT EXISTS bayes_weights (
const CREATE_TABLES: &[&str; 3] = &[
"CREATE TABLE IF NOT EXISTS bayes_tokens (
h1 INTEGER NOT NULL,
h2 INTEGER NOT NULL,
ws INTEGER,
@ -134,8 +141,14 @@ wh INTEGER,
PRIMARY KEY (h1, h2)
)",
"CREATE TABLE IF NOT EXISTS id_timestamps (
id STRING PRIMARY KEY,
timestamp DATETIME NOT NULL
id STRING NOT NULL PRIMARY KEY,
last_hit TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)",
"CREATE TABLE IF NOT EXISTS reputation (
token STRING NOT NULL PRIMARY KEY,
score FLOAT NOT NULL DEFAULT '0',
count INT(11) NOT NULL DEFAULT '0',
last_hit TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)",
];
@ -170,6 +183,7 @@ async fn antispam() {
"replies_in",
"spamtrap",
"bayes_classify",
"reputation",
];
let mut core = SMTP::test();
let qr = core.init_test_queue("smtp_antispam_test");
@ -193,7 +207,15 @@ async fn antispam() {
.join("sieve");
let prelude = fs::read_to_string(base_path.join("prelude.sieve")).unwrap();
for test_name in tests {
let script = fs::read_to_string(base_path.join(format!("{test_name}.sieve"))).unwrap();
let mut script = fs::read_to_string(base_path.join(format!("{test_name}.sieve"))).unwrap();
if test_name == "reputation" {
script = "let \"score\" \"env.score\";\n\n".to_string()
+ script.as_str()
+ concat!(
"\n\nif eval \"score != env.final_score\" ",
"{let \"t.INVALID_SCORE\" \"score\";}\n"
);
}
config.push_str(&format!("{test_name} = '''{prelude}\n{script}\n'''\n"));
}
@ -208,7 +230,7 @@ async fn antispam() {
// Create tables
let sdb = ctx.directory.directories.get("spamdb").unwrap();
for query in CREATE_TABLES {
sdb.query(query, &[]).await.unwrap();
sdb.query(query, &[]).await.expect(query);
}
// Add mock DNS entries
@ -376,6 +398,10 @@ async fn antispam() {
.unwrap_or((v.to_lowercase(), Variable::Integer(1)))
}));
}
"score" | "final_score" => {
variables
.insert(param.to_string(), value.parse::<f64>().unwrap().into());
}
_ if param.starts_with("param.") | param.starts_with("tls.") => {
variables.insert(param.to_string(), value.to_string().into());
}