Port Spam filter to Rust - part 3

This commit is contained in:
mdecimus 2024-12-09 17:49:11 +01:00
parent 4453dc8f3d
commit f0d84c8e68
34 changed files with 1791 additions and 653 deletions

5
Cargo.lock generated
View file

@ -6447,16 +6447,21 @@ name = "spam-filter"
version = "0.10.7"
dependencies = [
"common",
"decancer",
"hyper 1.5.1",
"idna 1.0.3",
"mail-auth",
"mail-builder",
"mail-parser",
"mail-send",
"nlp",
"psl",
"reqwest 0.12.9",
"smtp-proto",
"store",
"tokio",
"trc",
"unicode-security",
"utils",
]

View file

@ -4,14 +4,65 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::time::Duration;
use utils::{config::Config, glob::GlobSet};
use super::{if_block::IfBlock, Expression};
#[derive(Debug, Clone, Default)]
pub struct SpamFilterConfig {
pub list_dmarc_allow: GlobSet,
pub list_spf_dkim_allow: GlobSet,
pub list_freemail_providers: GlobSet,
pub list_disposable_providers: GlobSet,
pub list_trusted_domains: GlobSet,
pub list_url_redirectors: GlobSet,
pub remote_lists: Vec<RemoteListConfig>,
pub dnsbls: Vec<DnsblConfig>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Target {
Url,
Domain,
Email,
Ip,
Ipv4,
Ipv6,
}
#[derive(Debug, Clone)]
pub struct RemoteListConfig {
pub id: String,
pub url: String,
pub retry: Duration, // 1 hour
pub refresh: Duration, // 12h openphish, 6h phishtank
pub timeout: Duration, // 10s
pub max_size: usize, // 10MB
pub max_entries: usize, // 100000
pub max_entry_size: usize, // 256
pub format: RemoteListFormat,
pub target: Target,
pub tag: String,
}
#[derive(Debug, Clone)]
pub struct DnsblConfig {
pub id: String,
pub zone: Expression,
pub target: Target,
pub tags: IfBlock,
}
#[derive(Debug, Clone)]
pub enum RemoteListFormat {
List,
Csv {
column: u32,
separator: char,
skip_first: bool,
},
}
impl SpamFilterConfig {

View file

@ -680,3 +680,21 @@ impl<'x> TryFrom<Variable<'x>> for StatusCode {
}
}
}
impl<'x> ResolveVariable for &'x str {
fn resolve_variable(&self, variable: u32) -> Variable<'x> {
match variable {
0 => Variable::String((*self).into()),
_ => Variable::Integer(0),
}
}
}
impl ResolveVariable for Vec<String> {
fn resolve_variable(&self, variable: u32) -> Variable<'_> {
match variable {
0 => Variable::Array(self.iter().map(|v| Variable::String(v.into())).collect()),
_ => Variable::Integer(0),
}
}
}

View file

@ -4,15 +4,15 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
mod array;
pub mod array;
mod email;
mod header;
pub mod html;
mod image;
mod misc;
pub mod image;
pub mod misc;
pub mod text;
mod unicode;
mod url;
pub mod unicode;
pub mod url;
use sieve::{runtime::Variable, FunctionMap};

View file

@ -43,7 +43,7 @@ pub fn fn_has_obscured<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
.into()
}
trait CharUtils {
pub trait CharUtils {
fn is_zwsp(&self) -> bool;
fn is_obscured(&self) -> bool;
}

View file

@ -4,19 +4,10 @@
* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
*/
use std::{
collections::HashSet,
io::{BufRead, BufReader},
time::{Duration, Instant},
};
use mail_auth::flate2;
use sieve::{runtime::Variable, FunctionMap};
use store::{Deserialize, Value};
use crate::{
config::scripts::RemoteList, scripts::into_sieve_value, HttpLimitResponse, USER_AGENT,
};
use crate::scripts::into_sieve_value;
use super::PluginContext;
@ -32,10 +23,6 @@ pub fn register_set(plugin_id: u32, fnc_map: &mut FunctionMap) {
fnc_map.set_external_function("key_set", plugin_id, 4);
}
pub fn register_remote(plugin_id: u32, fnc_map: &mut FunctionMap) {
fnc_map.set_external_function("key_exists_http", plugin_id, 3);
}
pub fn register_local_domain(plugin_id: u32, fnc_map: &mut FunctionMap) {
fnc_map.set_external_function("is_local_domain", plugin_id, 2);
}
@ -118,242 +105,6 @@ pub async fn exec_set(ctx: PluginContext<'_>) -> trc::Result<Variable> {
.map(|_| true.into())
}
pub async fn exec_remote(ctx: PluginContext<'_>) -> trc::Result<Variable> {
match exec_remote_(&ctx).await {
Ok(result) => Ok(result),
Err(err) => {
// Something went wrong, try again in one hour
const RETRY: Duration = Duration::from_secs(3600);
let mut _lock = ctx.server.inner.data.remote_lists.write();
let list = _lock
.entry(ctx.arguments[0].to_string().to_string())
.or_insert_with(|| RemoteList {
entries: HashSet::new(),
expires: Instant::now(),
});
if list.expires > Instant::now() {
Ok(list
.entries
.contains(ctx.arguments[1].to_string().as_ref())
.into())
} else {
list.expires = Instant::now() + RETRY;
Err(err)
}
}
}
}
const MAX_RESOURCE_SIZE: usize = 10 * 1024 * 1024;
async fn exec_remote_(ctx: &PluginContext<'_>) -> trc::Result<Variable> {
let resource = ctx.arguments[0].to_string();
let item = ctx.arguments[1].to_string();
#[cfg(feature = "test_mode")]
{
if (resource.contains("open") && item.contains("open"))
|| (resource.contains("tank") && item.contains("tank"))
{
return Ok(true.into());
}
}
if resource.is_empty() || item.is_empty() {
return Ok(false.into());
}
const TIMEOUT: Duration = Duration::from_secs(45);
const MAX_ENTRY_SIZE: usize = 256;
const MAX_ENTRIES: usize = 100000;
match ctx
.server
.inner
.data
.remote_lists
.read()
.get(resource.as_ref())
{
Some(remote_list) if remote_list.expires < Instant::now() => {
return Ok(remote_list.entries.contains(item.as_ref()).into())
}
_ => {}
}
enum Format {
List,
Csv {
column: u32,
separator: char,
skip_first: bool,
},
}
// Obtain parameters
let mut format = Format::List;
let mut expires = Duration::from_secs(12 * 3600);
if let Some(arr) = ctx.arguments[2].as_array() {
// Obtain expiration
match arr.first() {
Some(Variable::Integer(v)) if *v > 0 => {
expires = Duration::from_secs(*v as u64);
}
Some(Variable::Float(v)) if *v > 0.0 => {
expires = Duration::from_secs(*v as u64);
}
_ => (),
}
// Obtain list type
if matches!(arr.get(1), Some(Variable::String(list_type)) if list_type.eq_ignore_ascii_case("csv"))
{
format = Format::Csv {
column: arr.get(2).map(|v| v.to_integer()).unwrap_or_default() as u32,
separator: arr
.get(3)
.and_then(|v| v.to_string().chars().next())
.unwrap_or(','),
skip_first: arr.get(4).map_or(false, |v| v.to_bool()),
};
}
}
let response = reqwest::Client::builder()
.timeout(TIMEOUT)
.user_agent(USER_AGENT)
.build()
.unwrap_or_default()
.get(resource.as_ref())
.send()
.await
.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.ctx(trc::Key::Url, resource.to_string())
.details("Failed to build request")
})?;
if response.status().is_success() {
let bytes = response
.bytes_with_limit(MAX_RESOURCE_SIZE)
.await
.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.ctx(trc::Key::Url, resource.to_string())
.details("Failed to fetch resource")
})?
.ok_or_else(|| {
trc::SieveEvent::RuntimeError
.into_err()
.ctx(trc::Key::Url, resource.to_string())
.details("Resource is too large")
})?;
let reader: Box<dyn std::io::Read> = if resource.ends_with(".gz") {
Box::new(flate2::read::GzDecoder::new(&bytes[..]))
} else {
Box::new(&bytes[..])
};
// Lock remote list for writing
let mut _lock = ctx.server.inner.data.remote_lists.write();
let list = _lock
.entry(resource.to_string())
.or_insert_with(|| RemoteList {
entries: HashSet::new(),
expires: Instant::now(),
});
// Make sure that the list is still expired
if list.expires > Instant::now() {
return Ok(list.entries.contains(item.as_ref()).into());
}
for (pos, line) in BufReader::new(reader).lines().enumerate() {
let line_ = line.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.ctx(trc::Key::Url, resource.to_string())
.details("Failed to read line")
})?;
// Clear list once the first entry has been successfully fetched, decompressed and UTF8-decoded
if pos == 0 {
list.entries.clear();
}
match &format {
Format::List => {
let line = line_.trim();
if !line.is_empty() {
list.entries.insert(line.to_string());
}
}
Format::Csv {
column,
separator,
skip_first,
} if pos > 0 || !*skip_first => {
let mut in_quote = false;
let mut col_num = 0;
let mut entry = String::new();
for ch in line_.chars() {
if ch != '"' {
if ch == *separator && !in_quote {
if col_num == *column {
break;
} else {
col_num += 1;
}
} else if col_num == *column {
entry.push(ch);
if entry.len() > MAX_ENTRY_SIZE {
break;
}
}
} else {
in_quote = !in_quote;
}
}
if !entry.is_empty() {
list.entries.insert(entry);
}
}
_ => (),
}
if list.entries.len() == MAX_ENTRIES {
break;
}
}
trc::event!(
Spam(trc::SpamEvent::ListUpdated),
Url = resource.as_ref().to_string(),
Total = list.entries.len(),
);
// Update expiration
list.expires = Instant::now() + expires;
Ok(list.entries.contains(item.as_ref()).into())
} else {
trc::bail!(trc::SieveEvent::RuntimeError
.into_err()
.ctx(trc::Key::Code, response.status().as_u16())
.ctx(trc::Key::Url, resource.to_string())
.details("Failed to fetch remote list"));
}
}
pub async fn exec_local_domain(ctx: PluginContext<'_>) -> trc::Result<Variable> {
let domain = ctx.arguments[1].to_string();

View file

@ -31,13 +31,12 @@ pub struct PluginContext<'x> {
pub arguments: Vec<Variable>,
}
const PLUGINS_REGISTER: [RegisterPluginFnc; 14] = [
const PLUGINS_REGISTER: [RegisterPluginFnc; 13] = [
query::register,
exec::register,
lookup::register,
lookup::register_get,
lookup::register_set,
lookup::register_remote,
lookup::register_local_domain,
dns::register,
dns::register_exists,
@ -86,15 +85,14 @@ impl Core {
2 => lookup::exec(ctx).await,
3 => lookup::exec_get(ctx).await,
4 => lookup::exec_set(ctx).await,
5 => lookup::exec_remote(ctx).await,
6 => lookup::exec_local_domain(ctx).await,
7 => dns::exec(ctx).await,
8 => dns::exec_exists(ctx).await,
9 => http::exec_header(ctx).await,
10 => headers::exec(ctx),
11 => text::exec_tokenize(ctx),
12 => text::exec_domain_part(ctx),
13 => llm_prompt::exec(ctx).await,
5 => lookup::exec_local_domain(ctx).await,
6 => dns::exec(ctx).await,
7 => dns::exec_exists(ctx).await,
8 => http::exec_header(ctx).await,
9 => headers::exec(ctx),
10 => text::exec_tokenize(ctx),
11 => text::exec_domain_part(ctx),
12 => llm_prompt::exec(ctx).await,
_ => unreachable!(),
};

View file

@ -727,6 +727,40 @@ impl<T> TokenType<T> {
}
}
impl<T: AsRef<str>> TokenType<T> {
pub fn hostname(&self) -> Option<&str> {
match self {
TokenType::Url(url) => url.as_ref().split_once("://").map(|(_, host)| {
host.split_once('/')
.map_or(host, |(h, _)| h.split_once(':').map_or(h, |(h, _)| h))
}),
TokenType::UrlNoScheme(url) => {
let url = url.as_ref();
url.split_once('/').map_or(url, |(host, _)| host).into()
}
TokenType::Email(email) => email.as_ref().rsplit_once('@').map(|(_, domain)| domain),
_ => None,
}
}
pub fn hostname_sld(&self) -> Option<&str> {
self.hostname().and_then(|host| psl::domain_str(host))
}
pub fn url_lowercase(&self, with_scheme_only: bool) -> Option<String> {
match self {
TokenType::Url(url) => url.as_ref().trim().to_lowercase().into(),
TokenType::UrlNoScheme(url) if !with_scheme_only => {
let url = url.as_ref();
format!("http:s//{}", url.trim().to_lowercase())
.to_lowercase()
.into()
}
_ => None,
}
}
}
#[cfg(test)]
mod test {

View file

@ -16,6 +16,11 @@ mail-builder = { version = "0.3", features = ["ludicrous_mode"] }
mail-auth = { version = "0.5" }
mail-send = { version = "0.4", default-features = false, features = ["cram-md5", "ring", "tls12"] }
psl = "2"
hyper = { version = "1.0.1", features = ["server", "http1", "http2"] }
idna = "1.0"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-webpki-roots", "http2", "stream"]}
decancer = "3.0.1"
unicode-security = "0.1.0"
[features]
test_mode = []

View file

@ -0,0 +1,107 @@
use std::future::Future;
use common::Server;
use mail_parser::MimeHeaders;
use crate::SpamFilterContext;
pub trait SpamFilterAnalyzeBounce: Sync + Send {
fn spam_filter_analyze_bounce(
&self,
ctx: &mut SpamFilterContext<'_>,
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeBounce for Server {
async fn spam_filter_analyze_bounce(&self, ctx: &mut SpamFilterContext<'_>) {
let mut has_delivery_word = false;
let mut has_undelivery_word = false;
let mut has_failure_word = false;
let mut has_report_word = false;
let mut has_not_word = false;
for word in ctx.output.subject.split_whitespace() {
match word {
"delivery" | "delivered" => {
has_delivery_word = true;
}
"undeliverable" | "undelivered" => {
has_undelivery_word = true;
}
"returned" | "failed" | "failure" | "warning" => {
has_failure_word = true;
}
"notice" | "report" | "status" | "mail" => {
has_report_word = true;
}
"couldn't" | "hasn't" | "not" => {
has_not_word = true;
}
_ => {}
}
}
// Subject contains words or phrases typical for DSN
let has_bounce_words = has_undelivery_word
|| (has_delivery_word && (has_failure_word || has_not_word))
|| (has_report_word && has_failure_word);
if has_bounce_words {
ctx.result.add_tag("SUBJ_BOUNCE_WORDS");
}
if !ctx.input.env_from.is_empty() {
return;
}
match ctx.input.message.content_type() {
Some(ct)
if ct.ctype().eq_ignore_ascii_case("multipart")
&& ct
.subtype()
.map_or(false, |s| s.eq_ignore_ascii_case("report"))
&& ct.attribute("report-type").map_or(false, |a| {
a.eq_ignore_ascii_case("delivery-status")
|| a.eq_ignore_ascii_case("disposition-notification")
}) =>
{
// Message is a DSN
ctx.result.add_tag("BOUNCE");
}
_ => {
let from_local = &ctx.output.from.email.local_part;
if from_local.contains("mdaemon")
&& ctx.input.message.header("X-MDDSN-Message").is_some()
{
// Message is a DSN
ctx.result.add_tag("BOUNCE");
} else if from_local.contains("postmaster") || from_local.contains("mailer-daemon")
{
if has_bounce_words {
ctx.result.add_tag("BOUNCE");
} else {
for part in &ctx.input.message.parts {
if let Some(ct) = part.content_type() {
let st = ct.subtype().unwrap_or_default();
let ct = ct.ctype();
if (ct.eq_ignore_ascii_case("message")
|| ct.eq_ignore_ascii_case("text"))
&& (st.eq_ignore_ascii_case("rfc822-headers")
|| st.eq_ignore_ascii_case("rfc822"))
{
// Message is a DSN
ctx.result.add_tag("BOUNCE");
break;
}
}
}
}
}
}
}
}
}

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use store::write::now;
use crate::SpamFilterContext;
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeDate: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeDate for Core {
impl SpamFilterAnalyzeDate for Server {
async fn spam_filter_analyze_date(&self, ctx: &mut SpamFilterContext<'_>) {
if let Some(date) = ctx.input.message.date() {
let date = date.to_timestamp();

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use mail_auth::{
common::verify::VerifySignature, dmarc::Policy, DkimResult, DmarcResult, SpfResult,
};
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeDmarc: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeDmarc for Core {
impl SpamFilterAnalyzeDmarc for Server {
async fn spam_filter_analyze_dmarc(&self, ctx: &mut SpamFilterContext<'_>) {
ctx.result
.add_tag(match ctx.input.spf_mail_from_result.result() {
@ -75,6 +75,7 @@ impl SpamFilterAnalyzeDmarc for Core {
}
if self
.core
.spam
.list_dmarc_allow
.contains(&ctx.output.from.email.domain_part.fqdn)
@ -85,6 +86,7 @@ impl SpamFilterAnalyzeDmarc for Core {
ctx.result.add_tag("BLOCKLIST_DMARC");
}
} else if self
.core
.spam
.list_spf_dkim_allow
.contains(&ctx.output.from.email.domain_part.fqdn)

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use crate::SpamFilterContext;
@ -11,7 +11,7 @@ pub trait SpamFilterAnalyzeEhlo: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeEhlo for Core {
impl SpamFilterAnalyzeEhlo for Server {
async fn spam_filter_analyze_ehlo(&self, ctx: &mut SpamFilterContext<'_>) {
if let Some(ehlo_ip) = ctx.output.ehlo_host.ip {
// Helo host is bare ip
@ -34,8 +34,8 @@ impl SpamFilterAnalyzeEhlo for Core {
if matches!(
(
self.dns_exists_ip(&ctx.output.ehlo_host.fqdn).await,
self.dns_exists_mx(&ctx.output.ehlo_host.fqdn).await
self.core.dns_exists_ip(&ctx.output.ehlo_host.fqdn).await,
self.core.dns_exists_mx(&ctx.output.ehlo_host.fqdn).await
),
(Ok(false), Ok(false))
) {

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use mail_parser::HeaderName;
use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
@ -26,7 +26,7 @@ const SERVICE_ACCOUNTS: [&str; 9] = [
];
pub(crate) const TITLES: [&str; 7] = ["mr. ", "mrs. ", "ms. ", "dr. ", "prof. ", "rev. ", "hon. "];
impl SpamFilterAnalyzeFrom for Core {
impl SpamFilterAnalyzeFrom for Server {
async fn spam_filter_analyze_from(&self, ctx: &mut SpamFilterContext<'_>) {
let mut from_count = 0;
let mut from_raw = b"".as_slice();
@ -96,12 +96,14 @@ impl SpamFilterAnalyzeFrom for Core {
is_www_dot_domain = true;
}
if self
.core
.spam
.list_freemail_providers
.contains(from_addr.domain_part.sld.as_deref().unwrap_or_default())
{
ctx.result.add_tag("FREEMAIL_FROM");
} else if self
.core
.spam
.list_disposable_providers
.contains(from_addr.domain_part.sld.as_deref().unwrap_or_default())
@ -238,7 +240,7 @@ impl SpamFilterAnalyzeFrom for Core {
if SERVICE_ACCOUNTS.contains(&ctx.output.env_from_addr.local_part.as_str()) {
ctx.result.add_tag("ENVFROM_SERVICE_ACCT");
}
if self.spam.list_freemail_providers.contains(
if self.core.spam.list_freemail_providers.contains(
ctx.output
.env_from_addr
.domain_part
@ -247,7 +249,7 @@ impl SpamFilterAnalyzeFrom for Core {
.unwrap_or_default(),
) {
ctx.result.add_tag("FREEMAIL_ENVFROM");
} else if self.spam.list_disposable_providers.contains(
} else if self.core.spam.list_disposable_providers.contains(
ctx.output
.env_from_addr
.domain_part
@ -261,9 +263,11 @@ impl SpamFilterAnalyzeFrom for Core {
// Mail from no resolve to A or MX
if matches!(
(
self.dns_exists_ip(&ctx.output.env_from_addr.domain_part.fqdn)
self.core
.dns_exists_ip(&ctx.output.env_from_addr.domain_part.fqdn)
.await,
self.dns_exists_mx(&ctx.output.env_from_addr.domain_part.fqdn)
self.core
.dns_exists_mx(&ctx.output.env_from_addr.domain_part.fqdn)
.await
),
(Ok(false), Ok(false))

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use mail_parser::HeaderName;
use store::ahash::AHashSet;
@ -13,7 +13,7 @@ pub trait SpamFilterAnalyzeHeaders: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeHeaders for Core {
impl SpamFilterAnalyzeHeaders for Server {
async fn spam_filter_analyze_headers(&self, ctx: &mut SpamFilterContext<'_>) {
let mut list_score = 0.0;
let mut unique_headers = AHashSet::new();

View file

@ -1,9 +1,13 @@
use common::Core;
use mail_parser::{parsers::fields::thread::thread_name, HeaderName};
use std::collections::HashSet;
use common::Server;
use mail_parser::{parsers::fields::thread::thread_name, HeaderName, PartType};
use nlp::tokenizers::types::{TokenType, TypesTokenizer};
use crate::{
modules::html::{html_to_tokens, HtmlToken, HREF, SRC},
Email, Hostname, Recipient, SpamFilterContext, SpamFilterInput, SpamFilterOutput,
SpamFilterResult,
SpamFilterResult, TextPart,
};
pub trait SpamFilterInit {
@ -12,9 +16,9 @@ pub trait SpamFilterInit {
const POSTMASTER_ADDRESSES: [&str; 3] = ["postmaster", "mailer-daemon", "root"];
impl SpamFilterInit for Core {
impl SpamFilterInit for Server {
fn spam_filter_init<'x>(&self, input: SpamFilterInput<'x>) -> SpamFilterContext<'x> {
let mut subject = String::new();
let mut subject = "";
let mut from = None;
let mut reply_to = None;
let mut recipients_to = Vec::new();
@ -67,7 +71,7 @@ impl SpamFilterInit for Core {
});
}
HeaderName::Subject => {
subject = header.value().as_text().unwrap_or_default().to_lowercase();
subject = header.value().as_text().unwrap_or_default();
}
HeaderName::From => {
from = header.value().as_address().and_then(|addrs| addrs.first());
@ -76,6 +80,143 @@ impl SpamFilterInit for Core {
}
}
// Tokenize subject
let subject_tokens = TypesTokenizer::new(subject)
.tokenize_numbers(false)
.tokenize_urls(true)
.tokenize_urls_without_scheme(true)
.tokenize_emails(true)
.map(|t| t.word)
.collect::<Vec<_>>();
let subject = subject.to_lowercase();
// Tokenize and convert text parts
let mut text_parts = Vec::new();
let mut text_parts_nested = Vec::new();
let mut message_stack = Vec::new();
let mut message_iter = input.message.parts.iter();
loop {
while let Some(part) = message_iter.next() {
let is_main_message = message_stack.is_empty();
let text_part = match &part.body {
PartType::Text(text) => TextPart::Plain {
text_body: text.as_ref(),
tokens: TypesTokenizer::new(text.as_ref())
.tokenize_numbers(false)
.tokenize_urls(true)
.tokenize_urls_without_scheme(true)
.tokenize_emails(true)
.map(|t| t.word)
.collect::<Vec<_>>(),
},
PartType::Html(html) => {
let html_tokens = html_to_tokens(html);
let text_body_len = html_tokens
.iter()
.filter_map(|t| match t {
HtmlToken::Text { text } => text.len().into(),
_ => None,
})
.sum();
let mut text_body = String::with_capacity(text_body_len);
for token in &html_tokens {
if let HtmlToken::Text { text } = token {
if !text_body.is_empty()
&& !text_body.ends_with(' ')
&& text.starts_with(' ')
{
text_body.push(' ');
}
text_body.push_str(text)
}
}
TextPart::Html {
tokens: TypesTokenizer::new(&text_body)
.tokenize_numbers(false)
.tokenize_urls(true)
.tokenize_urls_without_scheme(true)
.tokenize_emails(true)
.map(|t| match t.word {
TokenType::Alphabetic(s) => {
TokenType::Alphabetic(s.to_string())
}
TokenType::Alphanumeric(s) => {
TokenType::Alphanumeric(s.to_string())
}
TokenType::Integer(s) => TokenType::Integer(s.to_string()),
TokenType::Other(s) => TokenType::Other(s),
TokenType::Punctuation(s) => TokenType::Punctuation(s),
TokenType::Space => TokenType::Space,
TokenType::Url(s) => TokenType::Url(s.to_string()),
TokenType::UrlNoScheme(s) => {
TokenType::UrlNoScheme(s.to_string())
}
TokenType::UrlNoHost(s) => TokenType::UrlNoHost(s.to_string()),
TokenType::IpAddr(s) => TokenType::IpAddr(s.to_string()),
TokenType::Email(s) => TokenType::Email(s.to_string()),
TokenType::Float(s) => TokenType::Float(s.to_string()),
})
.collect::<Vec<_>>(),
html_tokens,
text_body,
}
}
PartType::Message(message) => {
message_stack.push(message_iter);
message_iter = message.parts.iter();
TextPart::None
}
_ => TextPart::None,
};
if is_main_message {
text_parts.push(text_part);
} else if !matches!(text_part, TextPart::None) {
text_parts_nested.push(text_part);
}
}
if let Some(iter) = message_stack.pop() {
message_iter = iter;
} else {
break;
}
}
text_parts.extend(text_parts_nested);
// Extract URLs
let mut urls: HashSet<String> =
HashSet::from_iter(subject_tokens.iter().filter_map(|t| t.url_lowercase(false)));
for part in &text_parts {
match part {
TextPart::Plain { tokens, .. } => {
urls.extend(tokens.iter().filter_map(|t| t.url_lowercase(false)));
}
TextPart::Html {
html_tokens,
tokens,
..
} => {
for token in html_tokens {
if let HtmlToken::StartTag { attributes, .. } = token {
for (attr, value) in attributes {
match value {
Some(value) if [HREF, SRC].contains(attr) => {
urls.insert(value.trim().to_lowercase());
}
_ => {}
}
}
}
}
urls.extend(tokens.iter().filter_map(|t| t.url_lowercase(false)));
}
TextPart::None => {}
}
}
let env_from_addr = Email::new(input.env_from);
SpamFilterContext {
output: SpamFilterOutput {
@ -101,9 +242,12 @@ impl SpamFilterInit for Core {
reply_to,
subject_thread: thread_name(&subject).to_string(),
subject,
subject_tokens,
recipients_to,
recipients_cc,
recipients_bcc,
text_parts,
urls,
},
input,
result: SpamFilterResult {
@ -117,7 +261,7 @@ impl SpamFilterInit for Core {
use std::future::Future;
use common::Core;
use common::Server;
use crate::SpamFilterContext;
@ -128,7 +272,7 @@ pub trait SpamFilterAnalyze!: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyze! for Core {
impl SpamFilterAnalyze! for Server {
async fn spam_filter_analyze_*(&self, ctx: &mut SpamFilterContext<'_>) {
todo!()
}

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use mail_auth::IprevResult;
use crate::SpamFilterContext;
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeIpRev: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeIpRev for Core {
impl SpamFilterAnalyzeIpRev for Server {
async fn spam_filter_analyze_iprev(&self, ctx: &mut SpamFilterContext<'_>) {
match &ctx.input.iprev_result.result {
IprevResult::TempError(_) => ctx.result.add_tag("RDNS_DNSFAIL"),

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use mail_parser::HeaderName;
use crate::{Hostname, SpamFilterContext};
@ -12,7 +12,7 @@ pub trait SpamFilterAnalyzeMid: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeMid for Core {
impl SpamFilterAnalyzeMid for Server {
async fn spam_filter_analyze_message_id(&self, ctx: &mut SpamFilterContext<'_>) {
let mid_raw = ctx
.input

View file

@ -4,6 +4,7 @@ use mail_parser::{parsers::MessageStream, Header};
use crate::{Recipient, SpamFilterInput, SpamFilterOutput, SpamFilterResult};
pub mod bounce;
pub mod date;
pub mod dmarc;
pub mod ehlo;
@ -12,8 +13,11 @@ pub mod headers;
pub mod init;
pub mod iprev;
pub mod messageid;
pub mod received;
pub mod recipient;
pub mod replyto;
pub mod subject;
pub mod url;
impl SpamFilterInput<'_> {
pub fn header_as_address(&self, header: &Header<'_>) -> Option<Cow<'_, str>> {
@ -27,7 +31,7 @@ impl SpamFilterInput<'_> {
}
}
impl SpamFilterOutput {
impl SpamFilterOutput<'_> {
pub fn all_recipients(&self) -> impl Iterator<Item = &Recipient> {
self.recipients_to
.iter()

View file

@ -0,0 +1,146 @@
use std::future::Future;
use common::Server;
use mail_parser::{HeaderName, Host};
use crate::SpamFilterContext;
pub trait SpamFilterAnalyzeReceived: Sync + Send {
fn spam_filter_analyze_received(
&self,
ctx: &mut SpamFilterContext<'_>,
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeReceived for Server {
async fn spam_filter_analyze_received(&self, ctx: &mut SpamFilterContext<'_>) {
let mut rcvd_count = 0;
let mut rcvd_from_ip = 0;
let mut tls_count = 0;
let mut has_ua = false;
for header in ctx.input.message.headers() {
match &header.name {
HeaderName::Received => {
if !ctx
.input
.message
.raw_message()
.get(header.offset_start..header.offset_end)
.unwrap_or_default()
.is_ascii()
{
// Received headers have non-ASCII characters
ctx.result.add_tag("RCVD_ILLEGAL_CHARS");
}
if let Some(received) = header.value().as_received() {
let helo_domain = received.helo();
let ip_rev = received.from_iprev();
if matches!(&helo_domain, Some(Host::Name(hostname)) if hostname.eq_ignore_ascii_case("localhost"))
{
// HELO domain is "user"
ctx.result.add_tag("RCVD_HELO_USER");
} else if let (Some(Host::Name(helo_domain)), Some(ip_rev)) =
(helo_domain, ip_rev)
{
if helo_domain.to_lowercase() != ip_rev.to_lowercase() {
// HELO domain does not match PTR record
ctx.result.add_tag("FORGED_RCVD_TRAIL");
}
}
if let Some(delivered_for) = received.for_().map(|s| s.to_lowercase()) {
if ctx
.output
.all_recipients()
.any(|r| r.email.address == delivered_for)
{
// Recipient appears on Received trail
ctx.result.add_tag("PREVIOUSLY_DELIVERED");
}
}
if received.from_ip().is_some() {
// Received from an IP address rather than a FQDN
rcvd_from_ip += 1;
}
if received.tls_version().is_some() {
// Received with TLS
tls_count += 1;
}
} else {
// Received header is not RFC 5322 compliant
ctx.result.add_tag("RCVD_UNPARSABLE");
}
rcvd_count += 1;
}
HeaderName::Other(name) => {
if !has_ua
&& (name.eq_ignore_ascii_case("User-Agent")
|| name.eq_ignore_ascii_case("X-Mailer"))
{
has_ua = true;
}
}
_ => {}
}
}
if rcvd_from_ip >= 2 || (rcvd_from_ip == 1 && ctx.output.ehlo_host.ip.is_some()) {
// Has two or more Received headers containing bare IP addresses
ctx.result.add_tag("RCVD_DOUBLE_IP_SPAM");
}
// Received from an authenticated user
if !ctx.input.authenticated_as.is_empty() {
ctx.result.add_tag("RCVD_VIA_SMTP_AUTH");
}
// Received with TLS checks
if rcvd_count > 0 && rcvd_count == tls_count && !ctx.input.tls_version.is_empty() {
ctx.result.add_tag("RCVD_TLS_ALL");
} else if !ctx.input.tls_version.is_empty() {
ctx.result.add_tag("RCVD_TLS_LAST");
} else {
ctx.result.add_tag("RCVD_NO_TLS_LAST");
}
match rcvd_count {
0 => {
ctx.result.add_tag("RCVD_COUNT_ZERO");
// One received header in a message (currently zero
// but one header will be added later by the MTA)
ctx.result.add_tag("ONCE_RECEIVED");
// Message has been directly delivered from MUA to local MX
if has_ua {
ctx.result.add_tag("DIRECT_TO_MX");
}
}
1 => {
ctx.result.add_tag("RCVD_COUNT_ONE");
}
2 => {
ctx.result.add_tag("RCVD_COUNT_TWO");
}
3 => {
ctx.result.add_tag("RCVD_COUNT_THREE");
}
4 | 5 => {
ctx.result.add_tag("RCVD_COUNT_FIVE");
}
6 | 7 => {
ctx.result.add_tag("RCVD_COUNT_SEVEN");
}
8..=12 => {
ctx.result.add_tag("RCVD_COUNT_TWELVE");
}
_ => {}
}
}
}

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::{scripts::functions::text::levenshtein_distance, Core};
use common::{scripts::functions::text::levenshtein_distance, Server};
use mail_parser::HeaderName;
use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
use store::ahash::HashSet;
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeRecipient: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeRecipient for Core {
impl SpamFilterAnalyzeRecipient for Server {
async fn spam_filter_analyze_recipient(&self, ctx: &mut SpamFilterContext<'_>) {
let mut to_raw = b"".as_slice();
let mut cc_raw = b"".as_slice();
@ -191,7 +191,7 @@ impl SpamFilterAnalyzeRecipient for Core {
// Check for freemail or disposable domains
if let Some(domain) = rcpt.email.domain_part.sld.as_deref() {
if self.spam.list_freemail_providers.contains(domain) {
if self.core.spam.list_freemail_providers.contains(domain) {
if ctx
.output
.recipients_to
@ -202,7 +202,7 @@ impl SpamFilterAnalyzeRecipient for Core {
} else {
ctx.result.add_tag("FREEMAIL_CC");
}
} else if self.spam.list_disposable_providers.contains(domain) {
} else if self.core.spam.list_disposable_providers.contains(domain) {
if ctx
.output
.recipients_to

View file

@ -1,6 +1,6 @@
use std::future::Future;
use common::Core;
use common::Server;
use mail_parser::HeaderName;
use crate::SpamFilterContext;
@ -14,7 +14,7 @@ pub trait SpamFilterAnalyzeReplyTo: Sync + Send {
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeReplyTo for Core {
impl SpamFilterAnalyzeReplyTo for Server {
async fn spam_filter_analyze_reply_to(&self, ctx: &mut SpamFilterContext<'_>) {
let mut reply_to_raw = b"".as_slice();
let mut is_from_list = false;
@ -104,7 +104,12 @@ impl SpamFilterAnalyzeReplyTo for Core {
.sld
.as_deref()
.unwrap_or_default();
if self.spam.list_freemail_providers.contains(reply_to_sld) {
if self
.core
.spam
.list_freemail_providers
.contains(reply_to_sld)
{
ctx.result.add_tag("FREEMAIL_REPLYTO");
let from_domain_sld = ctx
.output
@ -115,11 +120,20 @@ impl SpamFilterAnalyzeReplyTo for Core {
.as_deref()
.unwrap_or_default();
if reply_to_sld != from_domain_sld
&& self.spam.list_freemail_providers.contains(from_domain_sld)
&& self
.core
.spam
.list_freemail_providers
.contains(from_domain_sld)
{
ctx.result.add_tag("FREEMAIL_REPLYTO_NEQ_FROM_DOM");
}
} else if self.spam.list_disposable_providers.contains(reply_to_sld) {
} else if self
.core
.spam
.list_disposable_providers
.contains(reply_to_sld)
{
ctx.result.add_tag("DISPOSABLE_REPLYTO");
}

View file

@ -0,0 +1,190 @@
use std::future::Future;
use common::Server;
use mail_parser::HeaderName;
use nlp::tokenizers::types::TokenType;
use smtp_proto::{MAIL_BODY_8BITMIME, MAIL_BODY_BINARYMIME, MAIL_SMTPUTF8};
use crate::{Email, SpamFilterContext};
pub trait SpamFilterAnalyzeSubject: Sync + Send {
fn spam_filter_analyze_subject(
&self,
ctx: &mut SpamFilterContext<'_>,
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeSubject for Server {
async fn spam_filter_analyze_subject(&self, ctx: &mut SpamFilterContext<'_>) {
let mut subject_raw = b"".as_slice();
let mut is_reply = false;
for header in ctx.input.message.headers() {
match &header.name {
HeaderName::Subject => {
subject_raw = ctx
.input
.message
.raw_message()
.get(header.offset_start..header.offset_end)
.unwrap_or_default();
}
HeaderName::InReplyTo | HeaderName::References => {
is_reply = true;
}
_ => {}
}
}
if subject_raw.is_empty() {
// Missing subject header
ctx.result.add_tag("MISSING_SUBJECT");
return;
}
let mut word_count = 0;
let mut upper_count = 0;
let mut lower_count = 0;
let mut last_ch = ' ';
let mut last_ch_trimmed = ' ';
let mut is_ascii = true;
for ch in ctx.output.subject_thread.chars() {
if !ch.is_whitespace() {
if last_ch.is_whitespace() {
word_count += 1;
}
match ch {
'$' | '€' | '£' | '¥' | '₹' | '₽' | '₿' => {
ctx.result.add_tag("SUBJECT_HAS_CURRENCY");
}
'!' => {
ctx.result.add_tag("SUBJECT_HAS_EXCLAIM");
}
'?' => {
ctx.result.add_tag("SUBJECT_HAS_QUESTION");
}
_ => {
if ch.is_alphabetic() {
if ch.is_uppercase() {
upper_count += 1;
} else {
lower_count += 1;
}
}
}
}
last_ch_trimmed = ch;
}
if !ch.is_ascii() {
is_ascii = false;
}
last_ch = ch;
}
match last_ch_trimmed {
'?' => {
ctx.result.add_tag("SUBJECT_ENDS_QUESTION");
}
'!' => {
ctx.result.add_tag("SUBJECT_ENDS_EXCLAIM");
}
_ => {}
}
if last_ch.is_whitespace() {
if last_ch_trimmed.is_whitespace() {
// Subject is empty
ctx.result.add_tag("EMPTY_SUBJECT");
} else {
// Subject ends with whitespace
ctx.result.add_tag("SUBJECT_ENDS_SPACES");
}
}
if ctx.output.subject_thread.len() >= 10
&& word_count > 1
&& upper_count > 2
&& lower_count == 0
{
// Subject contains mostly capital letters
ctx.result.add_tag("SUBJ_ALL_CAPS");
}
if ctx.output.subject_thread.len() > 200 {
// Subject is very long
ctx.result.add_tag("LONG_SUBJ");
}
for token in &ctx.output.subject_tokens {
match token {
TokenType::Url(_) => {
// Subject contains URL
ctx.result.add_tag("URL_IN_SUBJECT");
}
TokenType::Email(address) => {
// Subject contains recipient
let email = Email::new(address);
if ctx.output.env_to_addr.contains(&email)
|| ctx
.output
.all_recipients()
.any(|r| r.email.address == email.address)
{
ctx.result.add_tag("RCPT_IN_SUBJECT");
}
continue;
}
_ => {}
}
if let Some(hostname) = token.hostname_sld() {
let hostname = Some(hostname.to_lowercase());
if ctx
.output
.all_recipients()
.any(|r| r.email.domain_part.sld == hostname)
{
ctx.result.add_tag("RCPT_DOMAIN_IN_SUBJECT");
}
}
}
// Validate encoding
let subject_raw_utf8 = std::str::from_utf8(subject_raw);
if !subject_raw.is_ascii() {
if (ctx.input.env_from_flags
& (MAIL_SMTPUTF8 | MAIL_BODY_8BITMIME | MAIL_BODY_BINARYMIME))
== 0
{
ctx.result.add_tag("SUBJECT_NEEDS_ENCODING");
}
if subject_raw_utf8.is_err() {
ctx.result.add_tag("INVALID_SUBJECT_8BIT");
}
}
// Validate unnecessary encoding
let subject_raw_utf8 = subject_raw_utf8.unwrap_or_default();
if is_ascii && subject_raw_utf8.contains("=?") && subject_raw_utf8.contains("?=") {
if subject_raw_utf8.contains("?q?") || subject_raw_utf8.contains("?Q?") {
// Subject header is unnecessarily encoded in quoted-printable
ctx.result.add_tag("SUBJ_EXCESS_QP");
} else if subject_raw_utf8.contains("?b?") || subject_raw_utf8.contains("?B?") {
// Subject header is unnecessarily encoded in base64
ctx.result.add_tag("SUBJ_EXCESS_BASE64");
}
}
if !is_reply && ctx.output.subject.trim().starts_with("re:") {
// Subject is not a reply but starts with "re:"
ctx.result.add_tag("FAKE_REPLY");
}
}
}

View file

@ -0,0 +1,324 @@
use std::{borrow::Cow, future::Future, time::Duration};
use common::Server;
use common::{config::spamfilter::Target, scripts::functions::unicode::CharUtils};
use hyper::{
header::{HeaderName, LOCATION},
Uri,
};
use nlp::tokenizers::types::TokenType;
use reqwest::redirect::Policy;
use unicode_security::MixedScript;
use crate::modules::dnsbl::is_dnsbl;
use crate::modules::remote_list::is_in_remote_list;
use crate::{
modules::html::{HtmlToken, A, HREF},
Hostname, SpamFilterContext, TextPart,
};
pub trait SpamFilterAnalyzeUrl: Sync + Send {
fn spam_filter_analyze_url(
&self,
ctx: &mut SpamFilterContext<'_>,
) -> impl Future<Output = ()> + Send;
}
impl SpamFilterAnalyzeUrl for Server {
async fn spam_filter_analyze_url(&self, ctx: &mut SpamFilterContext<'_>) {
for (part_id, part) in ctx.output.text_parts.iter().enumerate() {
if ctx.input.message.text_body.contains(&part_id)
|| ctx.input.message.html_body.contains(&part_id)
{
let is_single = match part {
TextPart::Plain { tokens, .. } => is_single_url(tokens),
TextPart::Html {
html_tokens,
tokens,
..
} => is_single_html_url(html_tokens, tokens),
TextPart::None => false,
};
if is_single {
ctx.result.add_tag("URL_ONLY");
break;
}
}
}
for url in &ctx.output.urls {
for ch in url.chars() {
if ch.is_zwsp() {
ctx.result.add_tag("ZERO_WIDTH_SPACE_URL");
}
if ch.is_obscured() {
ctx.result.add_tag("R_SUSPICIOUS_URL");
}
}
// Skip non-URLs such as 'data:' and 'mailto:'
if !url.contains("://") {
continue;
}
// Parse url
let url_parsed = match url.parse::<Uri>() {
Ok(url) if url.host().is_some() => url,
_ => {
// URL could not be parsed
ctx.result.add_tag("R_SUSPICIOUS_URL");
continue;
}
};
let host = Hostname::new(url_parsed.host().unwrap());
let host_sld = host.sld_or_default();
// Skip local and trusted domains
if self.core.spam.list_trusted_domains.contains(host_sld)
|| self
.core
.storage
.directory
.is_local_domain(host_sld)
.await
.unwrap_or_default()
{
continue;
}
// Check for redirectors
let mut redirected_urls = Vec::new();
if host.ip.is_none() && self.core.spam.list_url_redirectors.contains(host_sld) {
ctx.result.add_tag("REDIRECTOR_URL");
let mut redirect_count = 0;
let mut url_redirect = Cow::Borrowed(url);
while redirect_count <= 0 {
match http_get_header(url_redirect.as_ref(), LOCATION, Duration::from_secs(5))
.await
{
Ok(Some(location)) => {
if let Ok(location_parsed) = location.parse::<Uri>() {
let host =
Hostname::new(location_parsed.host().unwrap_or_default());
if self
.core
.spam
.list_url_redirectors
.contains(host.sld_or_default())
{
url_redirect = Cow::Owned(location);
redirect_count += 1;
continue;
} else {
let location = location.to_lowercase();
if !ctx.output.urls.contains(&location) {
redirected_urls.push((
Cow::Owned(location),
location_parsed,
host,
));
}
}
}
}
Ok(None) => {}
Err(err) => {
trc::error!(err.span_id(ctx.input.span_id));
}
}
break;
}
if redirect_count > 5 {
ctx.result.add_tag("URL_REDIRECTOR_NESTED");
}
}
for (url, url_parsed, host) in [(Cow::Borrowed(url), url_parsed, host)]
.into_iter()
.chain(redirected_urls.into_iter())
{
let query = url_parsed
.path_and_query()
.map(|pq| pq.as_str())
.unwrap_or_default();
if host.ip.is_none() {
if !host.fqdn.is_ascii() {
if let Ok(cured_host) =
decancer::cure(&host.fqdn, decancer::Options::default())
{
let cured_host = cured_host.to_string();
if cured_host != host.fqdn
&& matches!(self.core.dns_exists_ip(&cured_host).await, Ok(true))
{
ctx.result.add_tag("HOMOGRAPH_URL");
}
if !cured_host.is_single_script() {
ctx.result.add_tag("MIXED_CHARSET_URL");
}
}
} else if matches!(host.sld.as_deref(), Some("googleusercontent.com"))
&& query.starts_with("/proxy/")
{
ctx.result.add_tag("HAS_GUC_PROXY_URI");
} else if host.fqdn.ends_with("firebasestorage.googleapis.com") {
ctx.result.add_tag("HAS_GOOGLE_FIREBASE_URL");
} else if host.sld_or_default().starts_with("google.") && query.contains("url?")
{
ctx.result.add_tag("HAS_GOOGLE_REDIR");
}
if host.fqdn.contains("ipfs.")
|| (query.contains("/ipfs") && query.contains("/qm"))
{
// InterPlanetary File System (IPFS) gateway URL, likely malicious
ctx.result.add_tag("HAS_IPFS_GATEWAY_URL");
} else if host.fqdn.ends_with(".onion") {
// Onion URL
ctx.result.add_tag("HAS_ONION_URI");
}
} else {
// URL is an ip address
ctx.result.add_tag("R_SUSPICIOUS_URL");
}
if query.starts_with("/wp-") {
// Contains WordPress URIs
ctx.result.add_tag("HAS_WP_URI");
if query.starts_with("/wp-content") || query.starts_with("/wp-includes") {
// URL that is pointing to a compromised WordPress installation
ctx.result.add_tag("WP_COMPROMISED");
}
}
if query.contains("/../")
&& !query.contains("/.well-known")
&& !query.contains("/.well_known")
{
// Message contains URI with a hidden path
ctx.result.add_tag("URI_HIDDEN_PATH");
}
// Check remote lists
for remote in &self.core.spam.remote_lists {
if matches!(remote.target, Target::Url)
&& is_in_remote_list(self, remote, url.as_ref(), ctx.input.span_id).await
{
ctx.result.add_tag(&remote.tag);
}
}
// Check DNSBL
for dnsbl in &self.core.spam.dnsbls {
if matches!(dnsbl.target, Target::Url) {
if let Some(tag) =
is_dnsbl(self, dnsbl, url.as_ref(), ctx.input.span_id).await
{
ctx.result.add_tag(tag);
}
}
}
}
}
}
}
async fn http_get_header(
url: &str,
header: HeaderName,
timeout: Duration,
) -> trc::Result<Option<String>> {
reqwest::Client::builder()
.user_agent("Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0")
.timeout(timeout)
.redirect(Policy::none())
.danger_accept_invalid_certs(true)
.build()
.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.details("Failed to build request")
})?
.get(url)
.send()
.await
.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.details("Failed to send request")
})
.map(|response| {
response
.headers()
.get(header)
.and_then(|h| h.to_str().ok())
.map(|h| h.to_string())
})
}
fn is_single_url<T: AsRef<str>>(tokens: &[TokenType<T>]) -> bool {
let mut url_count = 0;
let mut word_count = 0;
for token in tokens {
match token {
TokenType::Alphabetic(_)
| TokenType::Alphanumeric(_)
| TokenType::Integer(_)
| TokenType::Email(_)
| TokenType::Float(_) => {
word_count += 1;
}
TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
url_count += 1;
}
_ => {}
}
}
url_count == 1 && word_count <= 1
}
fn is_single_html_url<T: AsRef<str>>(html_tokens: &[HtmlToken], tokens: &[TokenType<T>]) -> bool {
let mut url_count = 0;
let mut word_count = 0;
for token in tokens {
match token {
TokenType::Alphabetic(_)
| TokenType::Alphanumeric(_)
| TokenType::Integer(_)
| TokenType::Email(_)
| TokenType::Float(_) => {
word_count += 1;
}
TokenType::Url(_) | TokenType::UrlNoScheme(_) => {
url_count += 1;
}
_ => {}
}
}
if word_count > 1 || url_count != 1 {
return false;
}
url_count = 0;
for token in html_tokens {
if matches!(token, HtmlToken::StartTag { name, attributes } if *name == A && attributes.iter().any(|(k, _)| *k == HREF))
{
url_count += 1;
}
}
url_count == 1
}

View file

@ -7,10 +7,13 @@ use std::net::IpAddr;
use mail_auth::{dmarc::Policy, ArcOutput, DkimOutput, DmarcResult, IprevOutput, SpfOutput};
use mail_parser::Message;
use modules::html::HtmlToken;
use nlp::tokenizers::types::TokenType;
use store::ahash::AHashSet;
pub struct SpamFilterInput<'x> {
pub message: &'x Message<'x>,
pub span_id: u64,
// Sender authentication
pub arc_result: &'x ArcOutput<'x>,
@ -36,7 +39,7 @@ pub struct SpamFilterInput<'x> {
pub env_rcpt_to: &'x [&'x str],
}
pub struct SpamFilterOutput {
pub struct SpamFilterOutput<'x> {
pub ehlo_host: Hostname,
pub iprev_ptr: Option<String>,
@ -51,6 +54,23 @@ pub struct SpamFilterOutput {
pub subject: String,
pub subject_thread: String,
pub subject_tokens: Vec<TokenType<&'x str>>,
pub text_parts: Vec<TextPart<'x>>,
pub urls: HashSet<String>,
}
pub enum TextPart<'x> {
Plain {
text_body: &'x str,
tokens: Vec<TokenType<&'x str>>,
},
Html {
html_tokens: Vec<HtmlToken>,
text_body: String,
tokens: Vec<TokenType<String>>,
},
None,
}
pub struct SpamFilterResult {
@ -59,7 +79,7 @@ pub struct SpamFilterResult {
pub struct SpamFilterContext<'x> {
pub input: SpamFilterInput<'x>,
pub output: SpamFilterOutput,
pub output: SpamFilterOutput<'x>,
pub result: SpamFilterResult,
}

View file

@ -0,0 +1,53 @@
use std::time::Instant;
use common::{config::spamfilter::DnsblConfig, Server};
use mail_auth::Error;
use trc::SpamEvent;
pub async fn is_dnsbl(
server: &Server,
config: &DnsblConfig,
item: &str,
span_id: u64,
) -> Option<String> {
let time = Instant::now();
let zone = server
.eval_expr::<String, _>(&config.zone, &item, &config.id, span_id)
.await?;
let todo = "use proper event error";
match server.core.smtp.resolvers.dns.ipv4_lookup(&zone).await {
Ok(result) => {
let result = result.iter().map(|ip| ip.to_string()).collect::<Vec<_>>();
trc::event!(
Spam(SpamEvent::Classify),
Result = result
.iter()
.map(|ip| trc::Value::from(ip.clone()))
.collect::<Vec<_>>(),
Elapsed = time.elapsed()
);
server.eval_if(&config.tags, &result, span_id).await
}
Err(Error::DnsRecordNotFound(_)) => {
trc::event!(
Spam(SpamEvent::Classify),
Result = trc::Value::None,
Elapsed = time.elapsed()
);
None
}
Err(err) => {
trc::event!(
Spam(SpamEvent::Classify),
Elapsed = time.elapsed(),
CausedBy = err.to_string()
);
None
}
}
}

View file

@ -0,0 +1,382 @@
use mail_parser::decoders::html::add_html_token;
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum HtmlToken {
StartTag {
name: u64,
attributes: Vec<(u64, Option<String>)>,
},
EndTag {
name: u64,
},
Comment {
text: String,
},
Text {
text: String,
},
}
pub(crate) const A: u64 = b'a' as u64;
pub(crate) const HREF: u64 =
(b'h' as u64) | (b'r' as u64) << 8 | (b'e' as u64) << 16 | (b'f' as u64) << 24;
pub(crate) const SRC: u64 = (b's' as u64) | (b'r' as u64) << 8 | (b'c' as u64) << 16;
pub fn html_to_tokens(input: &str) -> Vec<HtmlToken> {
let input = input.as_bytes();
let mut iter = input.iter().enumerate().peekable();
let mut tags = vec![];
let mut is_token_start = true;
let mut is_after_space = false;
let mut is_new_line = true;
let mut token_start = 0;
let mut token_end = 0;
let mut text = String::new();
while let Some((mut pos, &ch)) = iter.next() {
match ch {
b'<' => {
if !is_token_start {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space,
);
is_after_space = false;
is_token_start = true;
}
if !text.is_empty() {
tags.push(HtmlToken::Text {
text: std::mem::take(&mut text),
});
}
while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace()) {
pos += 1;
iter.next();
}
if matches!(input.get(pos + 1..pos + 4), Some(b"!--")) {
let mut comment = Vec::new();
let mut last_ch: u8 = 0;
for (_, &ch) in iter.by_ref() {
match ch {
b'>' if comment.len() > 2
&& matches!(comment.last(), Some(b'-'))
&& matches!(comment.get(comment.len() - 2), Some(b'-')) =>
{
break;
}
b' ' | b'\t' | b'\r' | b'\n' => {
if last_ch != b' ' {
comment.push(b' ');
} else {
last_ch = b' ';
}
continue;
}
_ => {
comment.push(ch);
}
}
last_ch = ch;
}
tags.push(HtmlToken::Comment {
text: String::from_utf8(comment).unwrap_or_default(),
});
} else {
let mut is_end_tag = false;
loop {
match iter.peek() {
Some((_, &b'/')) => {
is_end_tag = true;
pos += 1;
iter.next();
}
Some((_, ch)) if ch.is_ascii_whitespace() => {
pos += 1;
iter.next();
}
_ => break,
}
}
let mut in_quote = false;
let mut key: u64 = 0;
let mut shift = 0;
let mut tag = 0;
let mut attributes = vec![];
'outer: while let Some((_, &ch)) = iter.next() {
match ch {
b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' if shift < 64 => {
key |= (ch as u64) << shift;
shift += 8;
}
b'A'..=b'Z' if shift < 64 => {
key |= ((ch - b'A' + b'a') as u64) << shift;
shift += 8;
}
b'>' if !in_quote => {
if shift != 0 {
if tag == 0 {
tag = key;
} else {
attributes.push((key, None));
}
}
break;
}
b'"' => {
in_quote = !in_quote;
}
b'=' if !in_quote => {
while matches!(iter.peek(), Some((_, &ch)) if ch.is_ascii_whitespace())
{
iter.next();
}
if shift != 0 {
attributes.push((key, None));
key = 0;
shift = 0;
}
let mut value = vec![];
for (_, &ch) in iter.by_ref() {
match ch {
b'>' if !in_quote => {
if !value.is_empty() {
attributes.last_mut().unwrap().1 =
String::from_utf8(value)
.unwrap_or_default()
.into();
}
break 'outer;
}
b'"' => {
if in_quote {
in_quote = false;
break;
} else {
in_quote = true;
}
}
b' ' | b'\t' | b'\r' | b'\n' if !in_quote => {
break;
}
_ => {
value.push(ch);
}
}
}
if !value.is_empty() {
attributes.last_mut().unwrap().1 =
String::from_utf8(value).unwrap_or_default().into();
}
}
b' ' | b'\t' | b'\r' | b'\n' => {
if shift != 0 {
if tag == 0 {
tag = key;
} else {
attributes.push((key, None));
}
key = 0;
shift = 0;
}
}
_ => {}
}
}
if tag != 0 {
if is_end_tag {
tags.push(HtmlToken::EndTag { name: tag });
} else {
tags.push(HtmlToken::StartTag {
name: tag,
attributes,
});
}
}
}
continue;
}
b' ' | b'\t' | b'\r' | b'\n' => {
if !is_token_start {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space && !is_new_line,
);
is_new_line = false;
}
is_after_space = true;
is_token_start = true;
continue;
}
b'&' if !is_token_start => {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space && !is_new_line,
);
is_new_line = false;
is_token_start = true;
is_after_space = false;
}
b';' if !is_token_start => {
add_html_token(
&mut text,
&input[token_start..pos + 1],
is_after_space && !is_new_line,
);
is_token_start = true;
is_after_space = false;
is_new_line = false;
continue;
}
_ => (),
}
if is_token_start {
token_start = pos;
is_token_start = false;
}
token_end = pos;
}
if !is_token_start {
add_html_token(
&mut text,
&input[token_start..token_end + 1],
is_after_space && !is_new_line,
);
}
if !text.is_empty() {
tags.push(HtmlToken::Text { text });
}
tags
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_html_to_tokens_text() {
let input = "Hello, world!";
let tokens = html_to_tokens(input);
assert_eq!(
tokens,
vec![HtmlToken::Text {
text: "Hello, world!".to_string()
}]
);
}
#[test]
fn test_html_to_tokens_start_tag() {
let input = "<div>";
let tokens = html_to_tokens(input);
assert_eq!(
tokens,
vec![HtmlToken::StartTag {
name: 7760228,
attributes: vec![]
}]
);
}
#[test]
fn test_html_to_tokens_end_tag() {
let input = "</div>";
let tokens = html_to_tokens(input);
assert_eq!(tokens, vec![HtmlToken::EndTag { name: 7760228 }]);
}
#[test]
fn test_html_to_tokens_comment() {
let input = "<!-- This is a comment -->";
let tokens = html_to_tokens(input);
assert_eq!(
tokens,
vec![HtmlToken::Comment {
text: "!-- This is a comment --".to_string()
}]
);
}
#[test]
fn test_html_to_tokens_mixed() {
let input = "<div>Hello, <span>&quot; world &quot; </span>!</div>";
let tokens = html_to_tokens(input);
assert_eq!(
tokens,
vec![
HtmlToken::StartTag {
name: 7760228,
attributes: vec![]
},
HtmlToken::Text {
text: "Hello,".to_string()
},
HtmlToken::StartTag {
name: 1851879539,
attributes: vec![]
},
HtmlToken::Text {
text: " \" world \"".to_string()
},
HtmlToken::EndTag { name: 1851879539 },
HtmlToken::Text {
text: " !".to_string()
},
HtmlToken::EndTag { name: 7760228 }
]
);
}
#[test]
fn test_html_to_tokens_with_attributes() {
let input = r#"<input type="text" value="test"><single/><one attr/><a b=1 b c="123">"#;
let tokens = html_to_tokens(input);
assert_eq!(
tokens,
vec![
HtmlToken::StartTag {
name: 500186508905,
attributes: vec![
(1701869940, Some("text".to_string())),
(435761734006, Some("test".to_string()))
]
},
HtmlToken::StartTag {
name: 111516266162547,
attributes: vec![]
},
HtmlToken::StartTag {
name: 6647407,
attributes: vec![(1920234593, None)]
},
HtmlToken::StartTag {
name: 97,
attributes: vec![
(98, Some("1".to_string())),
(98, None),
(99, Some("123".to_string()))
]
}
]
);
}
}

View file

@ -1 +1,4 @@
pub mod dnsbl;
pub mod html;
pub mod remote_list;
pub mod sanitize;

View file

@ -0,0 +1,199 @@
use std::{
collections::HashSet,
io::{BufRead, BufReader},
time::Instant,
};
use common::{
config::{
scripts::RemoteList,
spamfilter::{RemoteListConfig, RemoteListFormat},
},
HttpLimitResponse, Server, USER_AGENT,
};
use mail_auth::flate2;
pub async fn is_in_remote_list(
server: &Server,
config: &RemoteListConfig,
item: &str,
span_id: u64,
) -> bool {
match is_in_remote_list_(server, config, item, span_id).await {
Ok(result) => result,
Err(err) => {
let mut _lock = server.inner.data.remote_lists.write();
let list = _lock
.entry(config.id.clone())
.or_insert_with(|| RemoteList {
entries: HashSet::new(),
expires: Instant::now(),
});
if list.expires > Instant::now() {
list.entries.contains(item)
} else {
list.expires = Instant::now() + config.retry;
trc::error!(err.span_id(span_id));
false
}
}
}
}
async fn is_in_remote_list_(
server: &Server,
config: &RemoteListConfig,
item: &str,
span_id: u64,
) -> trc::Result<bool> {
#[cfg(feature = "test_mode")]
{
if (config.url.contains("open") && item.contains("open"))
|| (config.url.contains("tank") && item.contains("tank"))
{
return Ok(true);
}
}
let todo = "update RuntimeError with SpamEvent error";
match server.inner.data.remote_lists.read().get(&config.id) {
Some(remote_list) if remote_list.expires < Instant::now() => {
return Ok(remote_list.entries.contains(item))
}
_ => {}
}
let response = reqwest::Client::builder()
.timeout(config.timeout)
.user_agent(USER_AGENT)
.build()
.unwrap_or_default()
.get(&config.url)
.send()
.await
.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.ctx(trc::Key::Url, config.url.to_string())
.details("Failed to build request")
})?;
if response.status().is_success() {
let bytes = response
.bytes_with_limit(config.max_size)
.await
.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.ctx(trc::Key::Url, config.url.to_string())
.details("Failed to fetch resource")
})?
.ok_or_else(|| {
trc::SieveEvent::RuntimeError
.into_err()
.ctx(trc::Key::Url, config.url.to_string())
.details("Resource is too large")
})?;
let reader: Box<dyn std::io::Read> = if config.url.ends_with(".gz") {
Box::new(flate2::read::GzDecoder::new(&bytes[..]))
} else {
Box::new(&bytes[..])
};
// Lock remote list for writing
let mut _lock = server.inner.data.remote_lists.write();
let list = _lock
.entry(config.id.to_string())
.or_insert_with(|| RemoteList {
entries: HashSet::new(),
expires: Instant::now(),
});
// Make sure that the list is still expired
if list.expires > Instant::now() {
return Ok(list.entries.contains(item));
}
for (pos, line) in BufReader::new(reader).lines().enumerate() {
let line_ = line.map_err(|err| {
trc::SieveEvent::RuntimeError
.into_err()
.reason(err)
.ctx(trc::Key::Url, config.url.to_string())
.details("Failed to read line")
})?;
// Clear list once the first entry has been successfully fetched, decompressed and UTF8-decoded
if pos == 0 {
list.entries.clear();
}
match &config.format {
RemoteListFormat::List => {
let line = line_.trim();
if !line.is_empty() {
list.entries.insert(line.to_string());
}
}
RemoteListFormat::Csv {
column,
separator,
skip_first,
} if pos > 0 || !*skip_first => {
let mut in_quote = false;
let mut col_num = 0;
let mut entry = String::new();
for ch in line_.chars() {
if ch != '"' {
if ch == *separator && !in_quote {
if col_num == *column {
break;
} else {
col_num += 1;
}
} else if col_num == *column {
entry.push(ch);
if entry.len() > config.max_entry_size {
break;
}
}
} else {
in_quote = !in_quote;
}
}
if !entry.is_empty() {
list.entries.insert(entry);
}
}
_ => (),
}
if list.entries.len() == config.max_entries {
break;
}
}
trc::event!(
Spam(trc::SpamEvent::ListUpdated),
Url = config.url.to_string(),
Total = list.entries.len(),
SpanId = span_id
);
// Update expiration
list.expires = Instant::now() + config.refresh;
Ok(list.entries.contains(item))
} else {
trc::bail!(trc::SieveEvent::RuntimeError
.into_err()
.ctx(trc::Key::Code, response.status().as_u16())
.ctx(trc::Key::Url, config.url.to_string())
.details("Failed to fetch remote list"));
}
}

View file

@ -4,7 +4,30 @@ use crate::{Email, Hostname};
impl Hostname {
pub fn new(host: &str) -> Self {
let fqdn = host.to_lowercase();
let mut fqdn = host.to_lowercase();
// Decode punycode
if fqdn.contains("xn--") {
let mut decoded = String::with_capacity(fqdn.len());
for part in fqdn.split('.') {
if !decoded.is_empty() {
decoded.push('.');
}
if let Some(puny) = part
.strip_prefix("xn--")
.and_then(idna::punycode::decode_to_string)
{
decoded.push_str(&puny);
} else {
decoded.push_str(part);
}
}
fqdn = decoded;
}
let ip = fqdn
.strip_prefix('[')
.and_then(|ip| ip.strip_suffix(']'))
@ -36,3 +59,9 @@ impl Email {
}
}
}
impl Hostname {
pub fn sld_or_default(&self) -> &str {
self.sld.as_deref().unwrap_or(self.fqdn.as_str())
}
}

View file

@ -1,49 +0,0 @@
if eval "(contains(subject_lc, 'delivery') &&
(contains(subject_lc, 'failed') ||
contains(subject_lc, 'report') ||
contains(subject_lc, 'status') ||
contains(subject_lc, 'warning'))) ||
(contains(subject_lc, 'failure') &&
(contains(subject_lc, 'delivery') ||
contains(subject_lc, 'notice') ||
contains(subject_lc, 'mail') )) ||
(contains(subject_lc, 'delivered') &&
(contains(subject_lc, 'couldn\\'t be') ||
contains(subject_lc, 'could not be') ||
contains(subject_lc, 'hasn\\'t been') ||
contains(subject_lc, 'has not been'))) ||
contains(subject_lc, 'returned mail') ||
contains(subject_lc, 'undeliverable') ||
contains(subject_lc, 'undelivered')" {
# Subject contains words or phrases typical for DSN
let "t.SUBJ_BOUNCE_WORDS" "1";
}
if eval "is_empty(envelope.from)" {
if eval "eq_ignore_case(header.content-type, 'multipart/report') &&
( eq_ignore_case(header.content-type.attr.report-type, 'delivery-status') ||
eq_ignore_case(header.content-type.attr.report-type, 'disposition-notification'))" {
let "t.BOUNCE" "1";
} else {
let "from" "to_lowercase(header.from)";
if eval "contains(from, 'mdaemon') && !is_empty(header.X-MDDSN-Message)" {
let "t.BOUNCE" "1";
} elsif eval "contains(from, 'postmaster') || contains(from, 'mailer-daemon')" {
if eval "t.SUBJ_BOUNCE_WORDS" {
let "t.BOUNCE" "1";
} else {
foreverypart {
if eval "(eq_ignore_case(header.content-type.type, 'message') ||
eq_ignore_case(header.content-type.type, 'text')) &&
(eq_ignore_case(header.content-type.subtype, 'rfc822-headers') ||
eq_ignore_case(header.content-type.subtype, 'rfc822'))" {
let "t.BOUNCE" "1";
break;
}
}
}
}
}
}

View file

@ -1,93 +0,0 @@
let "rcvd_raw" "header.received[*].raw";
let "rcvd_count" "count(rcvd_raw)";
# Count received headers
if eval "rcvd_count == 0" {
let "t.RCVD_COUNT_ZERO" "1";
} elsif eval "rcvd_count == 1" {
let "t.RCVD_COUNT_ONE" "1";
} elsif eval "rcvd_count == 2" {
let "t.RCVD_COUNT_TWO" "1";
} elsif eval "rcvd_count == 3" {
let "t.RCVD_COUNT_THREE" "1";
} elsif eval "rcvd_count <= 5" {
let "t.RCVD_COUNT_FIVE" "1";
} elsif eval "rcvd_count <= 7" {
let "t.RCVD_COUNT_SEVEN" "1";
} elsif eval "rcvd_count <= 12" {
let "t.RCVD_COUNT_TWELVE" "1";
}
# Received from an authenticated user
if eval "!is_empty(env.authenticated_as)" {
let "t.RCVD_VIA_SMTP_AUTH" "1";
}
# Received headers have non-ASCII characters
if eval "!is_ascii(rcvd_raw)" {
let "t.RCVD_ILLEGAL_CHARS" "1";
}
let "i" "0";
let "tls_count" "0";
let "rcvd_from_ip" "0";
while "i < rcvd_count" {
let "i" "i + 1";
let "helo_domain" "received_part(i, 'from')";
# Check for a forged received trail
if eval "!t.FORGED_RCVD_TRAIL" {
let "iprev" "received_part(i, 'iprev')";
if eval "!is_empty(iprev) && !is_empty(helo_domain) && !eq_ignore_case(helo_domain, iprev)" {
let "t.FORGED_RCVD_TRAIL" "1";
}
}
if eval "!t.PREVIOUSLY_DELIVERED" {
let "for" "received_part(i, 'for')";
# Recipient appears on Received trail
if eval "!is_empty(for) && contains_ignore_case(recipients, for)" {
let "t.PREVIOUSLY_DELIVERED" "1";
}
}
if eval "!t.RCVD_HELO_USER && eq_ignore_case(helo_domain, 'user')" {
# Received: HELO contains 'user'
let "t.RCVD_HELO_USER" "1";
}
if eval "!is_empty(received_part(i, 'from.ip'))" {
# Received from an IP address rather than a FQDN
let "rcvd_from_ip" "rcvd_from_ip + 1";
}
if eval "!is_empty(received_part(i, 'tls'))" {
# Received with TLS
let "tls_count" "tls_count + 1";
}
}
if eval "rcvd_from_ip >= 2 || (rcvd_from_ip == 1 && is_ip_addr(env.helo_domain))" {
# Has two or more Received headers containing bare IP addresses
let "t.RCVD_DOUBLE_IP_SPAM" "1";
}
if eval "rcvd_count == 0" {
# One received header in a message (currently zero but one header will be added later by the MTA)
let "t.ONCE_RECEIVED" "1";
# Message has been directly delivered from MUA to local MX
if eval "header.User-Agent.exists || header.X-Mailer.exists" {
let "t.DIRECT_TO_MX" "1";
}
}
# Received with TLS checks
if eval "rcvd_count > 0 && tls_count == rcvd_count && !is_empty(env.tls.version)" {
let "t.RCVD_TLS_ALL" "1";
} elsif eval "!is_empty(env.tls.version)" {
let "t.RCVD_TLS_LAST" "1";
} else {
let "t.RCVD_NO_TLS_LAST" "1";
}

View file

@ -1,78 +0,0 @@
let "raw_subject_lc" "to_lowercase(header.subject.raw)";
let "is_ascii_subject" "is_ascii(subject_lc)";
if eval "len(subject_clean) >= 10 && count(tokenize(subject_clean, 'words')) > 1 && is_uppercase(subject_clean)" {
# Subject contains mostly capital letters
let "t.SUBJ_ALL_CAPS" "1";
}
if eval "count_chars(subject_clean) > 200" {
# Subject is very long
let "t.LONG_SUBJ" "1";
}
if eval "!is_empty(tokenize(subject_lc, 'uri_strict'))" {
# Subject contains a URL
let "t.URL_IN_SUBJECT" "1";
}
if eval "!is_ascii(raw_subject_lc) && !env.param.smtputf8 && env.param.body != '8bitmime' && env.param.body != 'binarymime'" {
# Subject needs encoding
let "t.SUBJECT_NEEDS_ENCODING" "1";
}
if eval "!header.Subject.exists" {
# Missing subject header
let "t.MISSING_SUBJECT" "1";
} elsif eval "is_empty(trim(subject_lc))" {
# Subject is empty
let "t.EMPTY_SUBJECT" "1";
}
if eval "is_ascii(subject_lc) && contains(raw_subject_lc, '=?') && contains(raw_subject_lc, '?=')" {
if eval "contains(raw_subject_lc, '?q?')" {
# Subject header is unnecessarily encoded in quoted-printable
let "t.SUBJ_EXCESS_QP" "1";
} elsif eval "contains(raw_subject_lc, '?b?')" {
# Subject header is unnecessarily encoded in base64
let "t.SUBJ_EXCESS_BASE64" "1";
}
}
if eval "starts_with(subject_lc, 're:') && is_empty(header.in-reply-to) && is_empty(header.references)" {
# Fake reply
let "t.FAKE_REPLY" "1";
}
let "subject_lc_trim" "trim_end(subject_lc)";
if eval "subject_lc != subject_lc_trim" {
# Subject ends with space characters
let "t.SUBJECT_ENDS_SPACES" "1";
}
if eval "contains(subject_lc, '$') ||
contains(subject_lc, '') ||
contains(subject_lc, '£') ||
contains(subject_lc, '¥')" {
# Subject contains currency symbols
let "t.SUBJECT_HAS_CURRENCY" "1";
}
if eval "ends_with(subject_lc_trim, '!')" {
# Subject ends with an exclamation mark
let "t.SUBJECT_ENDS_EXCLAIM" "1";
} elsif eval "ends_with(subject_lc_trim, '?')" {
# Subject ends with a question mark
let "t.SUBJECT_ENDS_QUESTION" "1";
}
if eval "contains(subject_lc_trim, '!')" {
# Subject contains an exclamation mark
let "t.SUBJECT_HAS_EXCLAIM" "1";
}
if eval "contains(subject_lc_trim, '?')" {
# Subject contains a question mark
let "t.SUBJECT_HAS_QUESTION" "1";
}

View file

@ -1,125 +0,0 @@
if eval "(count(body_urls) == 1 || count(html_body_urls) == 1) && count(tokenize(text_body, 'words')) == 0" {
let "t.URL_ONLY" "1";
}
if eval "has_zwsp(urls)" {
let "t.ZERO_WIDTH_SPACE_URL" "1";
} elsif eval "has_obscured(urls)" {
let "t.R_SUSPICIOUS_URL" "1";
}
let "i" "count(urls)";
while "i > 0" {
let "i" "i - 1";
let "url" "urls[i]";
# Skip non-URLs such as 'data:' and 'mailto:'
if eval "!contains(url, '://')" {
continue;
}
let "host" "uri_part(url, 'host')";
if eval "!is_empty(host)" {
let "is_ip" "is_ip_addr(host)";
let "host" "puny_decode(host)";
let "host_lc" "to_lowercase(host)";
let "host_sld" "domain_part(host_lc, 'sld')";
# Skip local and trusted domains
if eval "is_local_domain(DOMAIN_DIRECTORY, host_sld) || key_exists('spam-allow', host_sld)" {
continue;
}
if eval "!is_ip &&
(!t.REDIRECTOR_URL || !t.URL_REDIRECTOR_NESTED) &&
key_exists('spam-redirect', host_sld)" {
let "t.REDIRECTOR_URL" "1";
let "redir_count" "1";
while "redir_count <= 5" {
# Use a custom user-agent and a 3 second timeout
let "url_redirect" "http_header(url, 'Location', 'Mozilla/5.0 (X11; Linux i686; rv:109.0) Gecko/20100101 Firefox/118.0', 3000)";
if eval "!is_empty(url_redirect)" {
let "url" "url_redirect";
let "host" "uri_part(url, 'host')";
let "is_ip" "is_ip_addr(host)";
let "host" "puny_decode(host)";
let "host_lc" "to_lowercase(host)";
let "host_sld" "domain_part(host_lc, 'sld')";
if eval "!is_ip && key_exists('spam-redirect', host_sld)" {
let "redir_count" "redir_count + 1";
} else {
break;
}
} else {
break;
}
}
if eval "redir_count > 5" {
let "t.URL_REDIRECTOR_NESTED" "1";
}
}
let "url_lc" "to_lowercase(url)";
let "query" "uri_part(url_lc, 'path_query')";
if eval "!is_ip" {
if eval "!is_ascii(host)" {
let "host_cured" "cure_text(host)";
if eval "host_lc != host_cured && dns_exists(host_cured, 'ip')" {
let "t.HOMOGRAPH_URL" "1";
}
if eval "!is_single_script(host)" {
let "t.MIXED_CHARSET_URL" "1";
}
} else {
if eval "ends_with(host, 'googleusercontent.com') && starts_with(query, '/proxy/')" {
let "t.HAS_GUC_PROXY_URI" "1";
} elsif eval "ends_with(host, 'firebasestorage.googleapis.com')" {
let "t.HAS_GOOGLE_FIREBASE_URL" "1";
} elsif eval "starts_with(domain_part(host, 'sld'), 'google.') && contains(query, 'url?') " {
let "t.HAS_GOOGLE_REDIR" "1";
}
}
if eval "(contains(host_lc, 'ipfs.') || contains(query, '/ipfs')) && contains(query, '/qm')" {
# InterPlanetary File System (IPFS) gateway URL, likely malicious
let "t.HAS_IPFS_GATEWAY_URL" "1";
} elsif eval "ends_with(host_lc, '.onion')" {
let "t.HAS_ONION_URI" "1";
}
} else {
# URL is an ip address
let "t.R_SUSPICIOUS_URL" "1";
}
if eval "starts_with(query, '/wp-')" {
# Contains WordPress URIs
let "t.HAS_WP_URI" "1";
if eval "starts_with(query, '/wp-content') | starts_with(query, '/wp-includes')" {
# URL that is pointing to a compromised WordPress installation
let "t.WP_COMPROMISED" "1";
}
}
if eval "contains(query, '/../') && !contains(query, '/well-known') && !contains(query, '/well_known')" {
# Message contains URI with a hidden path
let "t.URI_HIDDEN_PATH" "1";
}
# Phishing checks (refresh OpenPhish every 12 hours, PhishTank every 6 hours)
if eval "key_exists_http('https://openphish.com/feed.txt', url, [43200, 'list'])" {
let "t.PHISHED_OPENPHISH" "1";
}
if eval "key_exists_http('http://data.phishtank.com/data/online-valid.csv', url, [21600, 'csv', 1, ',', true])" {
let "t.PHISHED_PHISHTANK" "1";
}
} else {
# URL could not be parsed
let "t.R_SUSPICIOUS_URL" "1";
}
}